Bugzilla – Attachment 123504 Details for
Bug 95198
Shadow of Mordor beta has missing geometry with gl 4.3
Home
|
Browse
|
Search
|
[?]
R600_DEBUG=ps,vs,tcs,tes,gs steam's output.
out.txt (text/plain), 13.90 MB, created by
John
on 2016-05-05 19:48:48 UTC
(
hide
)
Description:
R600_DEBUG=ps,vs,tcs,tes,gs steam's output.
Filename:
MIME Type:
Creator:
John
Created:
2016-05-05 19:48:48 UTC
Size:
13.90 MB
patch
obsolete
>SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 >DCL IN[0], COLOR, COLOR >DCL OUT[0], COLOR > 0: MOV OUT[0], IN[0] > 1: END >radeonsi: Compiling shader 1 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32, float, float, float, float) #0 { >main_body: > %27 = bitcast float %5 to i32 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 >} > >attributes #0 = { "InitialPSInputAddr"="36983" } > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], COLOR >DCL CONST[0..3] >DCL TEMP[0] > 0: MUL TEMP[0], IN[0].xxxx, CONST[0] > 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] > 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] > 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] > 4: MOV OUT[1], IN[1] > 5: END >radeonsi: Compiling shader 2 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 > %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %13) > %36 = extractelement <4 x float> %35, i32 0 > %37 = extractelement <4 x float> %35, i32 1 > %38 = extractelement <4 x float> %35, i32 2 > %39 = extractelement <4 x float> %35, i32 3 > %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 > %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %14) > %43 = extractelement <4 x float> %42, i32 0 > %44 = extractelement <4 x float> %42, i32 1 > %45 = extractelement <4 x float> %42, i32 2 > %46 = extractelement <4 x float> %42, i32 3 > %47 = fmul float %36, %17 > %48 = fmul float %36, %18 > %49 = fmul float %36, %19 > %50 = fmul float %36, %20 > %51 = fmul float %37, %21 > %52 = fadd float %51, %47 > %53 = fmul float %37, %22 > %54 = fadd float %53, %48 > %55 = fmul float %37, %23 > %56 = fadd float %55, %49 > %57 = fmul float %37, %24 > %58 = fadd float %57, %50 > %59 = fmul float %38, %25 > %60 = fadd float %59, %52 > %61 = fmul float %38, %26 > %62 = fadd float %61, %54 > %63 = fmul float %38, %27 > %64 = fadd float %63, %56 > %65 = fmul float %38, %28 > %66 = fadd float %65, %58 > %67 = fmul float %39, %29 > %68 = fadd float %67, %60 > %69 = fmul float %39, %30 > %70 = fadd float %69, %62 > %71 = fmul float %39, %31 > %72 = fadd float %71, %64 > %73 = fmul float %39, %32 > %74 = fadd float %73, %66 > %75 = and i32 %8, 1 > %76 = icmp eq i32 %75, 0 > br i1 %76, label %endif-block, label %if-true-block > >if-true-block: ; preds = %main_body > %77 = call float @llvm.AMDGPU.clamp.(float %43, float 0.000000e+00, float 1.000000e+00) > %78 = call float @llvm.AMDGPU.clamp.(float %44, float 0.000000e+00, float 1.000000e+00) > %79 = call float @llvm.AMDGPU.clamp.(float %45, float 0.000000e+00, float 1.000000e+00) > %80 = call float @llvm.AMDGPU.clamp.(float %46, float 0.000000e+00, float 1.000000e+00) > br label %endif-block > >endif-block: ; preds = %main_body, %if-true-block > %.06 = phi float [ %77, %if-true-block ], [ %43, %main_body ] > %.05 = phi float [ %78, %if-true-block ], [ %44, %main_body ] > %.04 = phi float [ %79, %if-true-block ], [ %45, %main_body ] > %.0 = phi float [ %80, %if-true-block ], [ %46, %main_body ] > %81 = bitcast i32 %11 to float > %82 = insertvalue <{ float, float, float }> undef, float %81, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %.06, float %.05, float %.04, float %.0) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74) > ret <{ float, float, float }> %82 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 >DCL IN[0], GENERIC[0], CONSTANT >DCL OUT[0], COLOR > 0: MOV OUT[0], IN[0] > 1: END >radeonsi: Compiling shader 3 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) > %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) > %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) > %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) > %27 = bitcast float %5 to i32 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.constant(i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] > 0: MOV OUT[0], IN[0] > 1: MOV OUT[1], IN[1] > 2: END >radeonsi: Compiling shader 4 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %13) > %18 = extractelement <4 x float> %17, i32 0 > %19 = extractelement <4 x float> %17, i32 1 > %20 = extractelement <4 x float> %17, i32 2 > %21 = extractelement <4 x float> %17, i32 3 > %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 > %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %14) > %25 = extractelement <4 x float> %24, i32 0 > %26 = extractelement <4 x float> %24, i32 1 > %27 = extractelement <4 x float> %24, i32 2 > %28 = extractelement <4 x float> %24, i32 3 > %29 = bitcast i32 %11 to float > %30 = insertvalue <{ float, float, float }> undef, float %29, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float %27, float %28) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21) > ret <{ float, float, float }> %30 >} > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >radeonsi: Compiling shader 5 >Vertex Shader Prolog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { >main_body: > %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> undef, i32 %0, 0 > %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %19, i32 %1, 1 > %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %20, i32 %2, 2 > %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %21, i32 %3, 3 > %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %22, i32 %4, 4 > %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %23, i32 %5, 5 > %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %24, i32 %6, 6 > %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %25, i32 %7, 7 > %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %26, i32 %8, 8 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %27, i32 %9, 9 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %28, i32 %10, 10 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %29, i32 %11, 11 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %30, i32 %12, 12 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %31, i32 %13, 13 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %32, i32 %14, 14 > %34 = bitcast i32 %15 to float > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %33, float %34, 15 > %36 = bitcast i32 %16 to float > %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %35, float %36, 16 > %38 = bitcast i32 %17 to float > %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %37, float %38, 17 > %40 = bitcast i32 %18 to float > %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %39, float %40, 18 > %42 = add i32 %15, %12 > %43 = bitcast i32 %42 to float > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %41, float %43, 19 > %45 = add i32 %15, %12 > %46 = bitcast i32 %45 to float > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %44, float %46, 20 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %47 >} > >radeonsi: Compiling shader 6 >Vertex Shader Epilog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs void @main() { >main_body: > ret void undef >} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 > buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A > exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 16 >Code Size: 64 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >radeonsi: Compiling shader 7 >Fragment Shader Epilog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { >main_body: > %20 = call i32 @llvm.SI.packf16(float %6, float %7) > %21 = bitcast i32 %20 to float > %22 = call i32 @llvm.SI.packf16(float %8, float %9) > %23 = bitcast i32 %22 to float > call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %21, float %23, float undef, float undef) > ret void >} > >; Function Attrs: nounwind readnone >declare i32 @llvm.SI.packf16(float, float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { "InitialPSInputAddr"="16777215" } >attributes #1 = { nounwind readnone } > > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 > v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 > v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 > v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 16 >VGPRS: 15 >Code Size: 40 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 > buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A > exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 16 >Code Size: 64 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 > v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 > v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 > v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 16 >VGPRS: 15 >Code Size: 40 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 > buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A > exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 16 >Code Size: 64 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 > v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 > v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 > v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 16 >VGPRS: 15 >Code Size: 40 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 > buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A > exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 16 >Code Size: 64 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 > v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 > v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 > v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 16 >VGPRS: 15 >Code Size: 40 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 > buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A > exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 16 >Code Size: 64 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 > v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 > v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 > v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 16 >VGPRS: 15 >Code Size: 40 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], LINEAR >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT > 0: TEX OUT[0], IN[0], SAMP[0], 2D > 1: END >radeonsi: Compiling shader 8 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) > %34 = bitcast float %32 to i32 > %35 = bitcast float %33 to i32 > %36 = insertelement <2 x i32> undef, i32 %34, i32 0 > %37 = insertelement <2 x i32> %36, i32 %35, i32 1 > %38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %39 = extractelement <4 x float> %38, i32 0 > %40 = extractelement <4 x float> %38, i32 1 > %41 = extractelement <4 x float> %38, i32 2 > %42 = extractelement <4 x float> %38, i32 3 > %43 = bitcast float %5 to i32 > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10 > %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 11 > %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 12 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %41, 13 > %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14 > %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v8, 0, 0, [m0] ; C8000008 > v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0] ; C8010009 > v_interp_p1_f32 v1, v8, 1, 0, [m0] ; C8040108 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0] ; C8050109 > image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000 > s_waitcnt vmcnt(0) ; BF8C0F70 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 72 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], LINEAR >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 3D, FLOAT > 0: TEX OUT[0], IN[0], SAMP[0], 3D > 1: END >radeonsi: Compiling shader 9 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) > %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %12) > %35 = bitcast float %32 to i32 > %36 = bitcast float %33 to i32 > %37 = bitcast float %34 to i32 > %38 = insertelement <4 x i32> undef, i32 %35, i32 0 > %39 = insertelement <4 x i32> %38, i32 %36, i32 1 > %40 = insertelement <4 x i32> %39, i32 %37, i32 2 > %41 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %40, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %42 = extractelement <4 x float> %41, i32 0 > %43 = extractelement <4 x float> %41, i32 1 > %44 = extractelement <4 x float> %41, i32 2 > %45 = extractelement <4 x float> %41, i32 3 > %46 = bitcast float %5 to i32 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %46, 10 > %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 11 > %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %43, 12 > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49, float %44, 13 > %51 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50, float %45, 14 > %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %51, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v8, 0, 0, [m0] ; C8000008 > v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0] ; C8010009 > v_interp_p1_f32 v1, v8, 1, 0, [m0] ; C8040108 > v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0] ; C8050109 > v_interp_p1_f32 v2, v8, 2, 0, [m0] ; C8080208 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v2, [v2], v9, 2, 0, [m0] ; C8090209 > image_sample v[0:3], v[0:3], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000 > s_waitcnt vmcnt(0) ; BF8C0F70 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 80 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], LINEAR >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, UINT > 0: TEX OUT[0], IN[0], SAMP[0], 2D > 1: END >radeonsi: Compiling shader 10 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) > %34 = bitcast float %32 to i32 > %35 = bitcast float %33 to i32 > %36 = insertelement <2 x i32> undef, i32 %34, i32 0 > %37 = insertelement <2 x i32> %36, i32 %35, i32 1 > %38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %39 = extractelement <4 x float> %38, i32 0 > %40 = extractelement <4 x float> %38, i32 1 > %41 = extractelement <4 x float> %38, i32 2 > %42 = extractelement <4 x float> %38, i32 3 > %43 = bitcast float %5 to i32 > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10 > %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 11 > %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 12 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %41, 13 > %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14 > %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >radeonsi: Compiling shader 11 >Fragment Shader Epilog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { >main_body: > call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %6, float %7, float %8, float %9) > ret void >} > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { "InitialPSInputAddr"="16777215" } > > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v8, 0, 0, [m0] ; C8000008 > v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0] ; C8010009 > v_interp_p1_f32 v1, v8, 1, 0, [m0] ; C8040108 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0] ; C8050109 > image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000 > s_waitcnt vmcnt(0) ; BF8C0F70 >Shader epilog disassembly: > exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 64 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >radeonsi: Compiling shader 12 >Fragment Shader Epilog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { >main_body: > %20 = bitcast float %6 to i32 > %21 = icmp ult i32 %20, 65535 > %22 = select i1 %21, i32 %20, i32 65535 > %23 = bitcast float %7 to i32 > %24 = icmp ult i32 %23, 65535 > %25 = select i1 %24, i32 %23, i32 65535 > %26 = bitcast float %8 to i32 > %27 = icmp ult i32 %26, 65535 > %28 = select i1 %27, i32 %26, i32 65535 > %29 = bitcast float %9 to i32 > %30 = icmp ult i32 %29, 65535 > %31 = select i1 %30, i32 %29, i32 65535 > %32 = shl i32 %25, 16 > %33 = or i32 %22, %32 > %34 = bitcast i32 %33 to float > %35 = shl i32 %31, 16 > %36 = or i32 %28, %35 > %37 = bitcast i32 %36 to float > call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %34, float %37, float undef, float undef) > ret void >} > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { "InitialPSInputAddr"="16777215" } > > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v8, 0, 0, [m0] ; C8000008 > v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0] ; C8010009 > v_interp_p1_f32 v1, v8, 1, 0, [m0] ; C8040108 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0] ; C8050109 > image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000 > s_waitcnt vmcnt(0) ; BF8C0F70 >Shader epilog disassembly: > s_mov_b32 s0, 0xffff ; BE8003FF 0000FFFF > v_min_u32_e32 v1, s0, v1 ; 26020200 > v_min_u32_e32 v0, s0, v0 ; 26000000 > v_lshlrev_b32_e32 v1, 16, v1 ; 34020290 > v_min_u32_e32 v3, s0, v3 ; 26060600 > v_or_b32_e32 v0, v1, v0 ; 38000101 > v_min_u32_e32 v2, s0, v2 ; 26040400 > v_lshlrev_b32_e32 v1, 16, v3 ; 34020690 > v_or_b32_e32 v1, v1, v2 ; 38020501 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 104 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >radeonsi: Compiling shader 13 >Fragment Shader Epilog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { >main_body: > call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 9, i32 0, i32 undef, i32 undef, i32 undef, i32 undef) > ret void >} > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) > >attributes #0 = { "InitialPSInputAddr"="16777215" } > > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 > v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 > v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 > v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 16 >VGPRS: 15 >Code Size: 32 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL TEMP[0], LOCAL > 0: MOV TEMP[0].xy, IN[1].xyxx > 1: MOV OUT[1], TEMP[0] > 2: MOV OUT[0], IN[0] > 3: END >radeonsi: Compiling shader 14 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %13) > %18 = extractelement <4 x float> %17, i32 0 > %19 = extractelement <4 x float> %17, i32 1 > %20 = extractelement <4 x float> %17, i32 2 > %21 = extractelement <4 x float> %17, i32 3 > %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 > %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %14) > %25 = extractelement <4 x float> %24, i32 0 > %26 = extractelement <4 x float> %24, i32 1 > %27 = bitcast i32 %11 to float > %28 = insertvalue <{ float, float, float }> undef, float %27, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21) > ret <{ float, float, float }> %28 >} > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xyz, TEMP[0].xyzz, CONST[1][0].yyyy, CONST[1][0].zzzz > 3: MOV_SAT TEMP[1].xyz, TEMP[1].xyzz > 4: MOV TEMP[0].w, TEMP[0].wwww > 5: LG2 TEMP[2].x, TEMP[1].xxxx > 6: LG2 TEMP[2].y, TEMP[1].yyyy > 7: LG2 TEMP[2].z, TEMP[1].zzzz > 8: MUL TEMP[1].xyz, TEMP[2].xyzz, CONST[1][0].xxxx > 9: EX2 TEMP[2].x, TEMP[1].xxxx > 10: EX2 TEMP[2].y, TEMP[1].yyyy > 11: EX2 TEMP[2].z, TEMP[1].zzzz > 12: MOV TEMP[0].xyz, TEMP[2].xyzx > 13: MOV OUT[0], TEMP[0] > 14: END >radeonsi: Compiling shader 15 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %39 = bitcast float %37 to i32 > %40 = bitcast float %38 to i32 > %41 = insertelement <2 x i32> undef, i32 %39, i32 0 > %42 = insertelement <2 x i32> %41, i32 %40, i32 1 > %43 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %42, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = extractelement <4 x float> %43, i32 3 > %48 = call float @llvm.fma.f32(float %44, float %26, float %27) > %49 = call float @llvm.fma.f32(float %45, float %26, float %27) > %50 = call float @llvm.fma.f32(float %46, float %26, float %27) > %51 = call float @llvm.AMDGPU.clamp.(float %48, float 0.000000e+00, float 1.000000e+00) > %52 = call float @llvm.AMDGPU.clamp.(float %49, float 0.000000e+00, float 1.000000e+00) > %53 = call float @llvm.AMDGPU.clamp.(float %50, float 0.000000e+00, float 1.000000e+00) > %54 = call float @llvm.log2.f32(float %51) > %55 = call float @llvm.log2.f32(float %52) > %56 = call float @llvm.log2.f32(float %53) > %57 = fmul float %54, %25 > %58 = fmul float %55, %25 > %59 = fmul float %56, %25 > %60 = call float @llvm.exp2.f32(float %57) > %61 = call float @llvm.exp2.f32(float %58) > %62 = call float @llvm.exp2.f32(float %59) > %63 = bitcast float %5 to i32 > %64 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %63, 10 > %65 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %64, float %60, 11 > %66 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %65, float %61, 12 > %67 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %66, float %62, 13 > %68 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %67, float %47, 14 > %69 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %68, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %69 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], CONSTANT >DCL OUT[0], COLOR > 0: MOV OUT[0], IN[0] > 1: END >radeonsi: Compiling shader 16 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) > %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) > %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) > %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) > %27 = bitcast float %5 to i32 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.constant(i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 > v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 > v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 > v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 16 >VGPRS: 15 >Code Size: 40 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 > buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 32, 0, 0, 0, v10, v11, v0, v0 ; F800020F 00000B0A > exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 16 >Code Size: 64 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 > s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 > s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > s_and_b32 s20, s20, s19 ; 87141314 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > image_sample v[0:3], v[0:1], s[12:19], s[20:23] dmask:0xf ; F0800F00 00A30000 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v4, s0 ; 7E080200 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, s7, v0, v4 ; D2960000 04120007 > v_fma_f32 v1, s7, v1, v4 ; D2960001 04120207 > v_fma_f32 v2, s7, v2, v4 ; D2960002 04120407 > v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 > v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 > v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 > v_log_f32_e32 v0, v0 ; 7E004F00 > v_log_f32_e32 v1, v1 ; 7E024F01 > v_log_f32_e32 v2, v2 ; 7E044F02 > v_mul_f32_e32 v0, s6, v0 ; 10000006 > v_mul_f32_e32 v1, s6, v1 ; 10020206 > v_mul_f32_e32 v2, s6, v2 ; 10040406 > v_exp_f32_e32 v0, v0 ; 7E004B00 > v_exp_f32_e32 v1, v1 ; 7E024B01 > v_exp_f32_e32 v2, v2 ; 7E044B02 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 180 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..4095] >DCL TEMP[0..5], LOCAL >DCL ADDR[0] >IMM[0] INT32 {6, 1, 4, 2} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 1.0000} >IMM[2] UINT32 {0, 16, 0, 0} > 0: UMUL TEMP[0].x, IN[1].xxxx, IMM[0].xxxx > 1: MOV TEMP[0].x, TEMP[0].xxxx > 2: UMAD TEMP[1].x, IN[1].xxxx, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].y, TEMP[1].xxxx > 4: U2F TEMP[0].xy, TEMP[0].xyyy > 5: ADD TEMP[0].xy, TEMP[0].xyyy, IMM[1].xxxx > 6: F2U TEMP[1].xy, TEMP[0].xyyy > 7: UMUL TEMP[2].x, TEMP[1].yyyy, IMM[2].yyyy > 8: USHR TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz > 9: UARL ADDR[0].x, TEMP[2].xxxx > 10: UARL ADDR[0].x, TEMP[2].xxxx > 11: MOV TEMP[2], CONST[1][ADDR[0].x] > 12: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 13: USHR TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz > 14: UARL ADDR[0].x, TEMP[1].xxxx > 15: UARL ADDR[0].x, TEMP[1].xxxx > 16: MOV TEMP[1], CONST[1][ADDR[0].x] > 17: UMAD TEMP[3], IN[1].xxxx, IMM[0].xxxx, IMM[0].wwzz > 18: U2F TEMP[3], TEMP[3] > 19: ADD TEMP[0], TEMP[3], IMM[1].xyxy > 20: F2U TEMP[0], TEMP[0] > 21: UMUL TEMP[3].x, TEMP[0].zzzz, IMM[2].yyyy > 22: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].zzzz > 23: UARL ADDR[0].x, TEMP[4].xxxx > 24: MOV TEMP[3], CONST[1][ADDR[0].x] > 25: DP4 TEMP[3].x, IN[0], TEMP[3] > 26: UMUL TEMP[4].x, TEMP[0].wwww, IMM[2].yyyy > 27: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz > 28: UARL ADDR[0].x, TEMP[5].xxxx > 29: MOV TEMP[4], CONST[1][ADDR[0].x] > 30: DP4 TEMP[4].x, IN[0], TEMP[4] > 31: MOV TEMP[3].y, TEMP[4].xxxx > 32: UMUL TEMP[4].x, TEMP[0].xxxx, IMM[2].yyyy > 33: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz > 34: UARL ADDR[0].x, TEMP[5].xxxx > 35: MOV TEMP[4], CONST[1][ADDR[0].x] > 36: DP4 TEMP[4].x, IN[0], TEMP[4] > 37: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 38: USHR TEMP[5].x, TEMP[0].xxxx, IMM[0].zzzz > 39: UARL ADDR[0].x, TEMP[5].xxxx > 40: MOV TEMP[0], CONST[1][ADDR[0].x] > 41: DP4 TEMP[0].x, IN[0], TEMP[0] > 42: MOV TEMP[4].y, TEMP[0].xxxx > 43: MOV TEMP[4].zw, IMM[1].wwzw > 44: MOV OUT[3], TEMP[3] > 45: MOV OUT[1], TEMP[1] > 46: MOV OUT[2], TEMP[2] > 47: MOV OUT[0], TEMP[4] > 48: END >radeonsi: Compiling shader 17 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = bitcast float %27 to i32 > %29 = mul i32 %28, 6 > %30 = bitcast float %27 to i32 > %31 = mul i32 %30, 6 > %32 = or i32 %31, 1 > %33 = uitofp i32 %29 to float > %34 = uitofp i32 %32 to float > %35 = fadd float %33, 0x3FB99999A0000000 > %36 = fadd float %34, 0x3FB99999A0000000 > %37 = fptoui float %35 to i32 > %38 = fptoui float %36 to i32 > %39 = shl i32 %38, 4 > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %39) > %41 = shl i32 %38, 4 > %42 = or i32 %41, 4 > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %42) > %44 = shl i32 %38, 4 > %45 = or i32 %44, 8 > %46 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %45) > %47 = shl i32 %38, 4 > %48 = or i32 %47, 12 > %49 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %48) > %50 = shl i32 %37, 4 > %51 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %50) > %52 = shl i32 %37, 4 > %53 = or i32 %52, 4 > %54 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %53) > %55 = shl i32 %37, 4 > %56 = or i32 %55, 8 > %57 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %56) > %58 = shl i32 %37, 4 > %59 = or i32 %58, 12 > %60 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %59) > %61 = bitcast float %27 to i32 > %62 = mul i32 %61, 6 > %63 = add i32 %62, 2 > %64 = bitcast float %27 to i32 > %65 = mul i32 %64, 6 > %66 = add i32 %65, 2 > %67 = bitcast float %27 to i32 > %68 = mul i32 %67, 6 > %69 = add i32 %68, 4 > %70 = bitcast float %27 to i32 > %71 = mul i32 %70, 6 > %72 = add i32 %71, 4 > %73 = uitofp i32 %63 to float > %74 = uitofp i32 %66 to float > %75 = uitofp i32 %69 to float > %76 = uitofp i32 %72 to float > %77 = fadd float %73, 0x3FB99999A0000000 > %78 = fadd float %74, 0x3FF19999A0000000 > %79 = fadd float %75, 0x3FB99999A0000000 > %80 = fadd float %76, 0x3FF19999A0000000 > %81 = fptoui float %77 to i32 > %82 = fptoui float %78 to i32 > %83 = fptoui float %79 to i32 > %84 = fptoui float %80 to i32 > %85 = shl i32 %83, 4 > %86 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %85) > %87 = shl i32 %83, 4 > %88 = or i32 %87, 4 > %89 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %88) > %90 = shl i32 %83, 4 > %91 = or i32 %90, 8 > %92 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %91) > %93 = shl i32 %83, 4 > %94 = or i32 %93, 12 > %95 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %94) > %96 = fmul float %20, %86 > %97 = fmul float %21, %89 > %98 = fadd float %96, %97 > %99 = fmul float %22, %92 > %100 = fadd float %98, %99 > %101 = fmul float %23, %95 > %102 = fadd float %100, %101 > %103 = shl i32 %84, 4 > %104 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %103) > %105 = shl i32 %84, 4 > %106 = or i32 %105, 4 > %107 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %106) > %108 = shl i32 %84, 4 > %109 = or i32 %108, 8 > %110 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %109) > %111 = shl i32 %84, 4 > %112 = or i32 %111, 12 > %113 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %112) > %114 = fmul float %20, %104 > %115 = fmul float %21, %107 > %116 = fadd float %114, %115 > %117 = fmul float %22, %110 > %118 = fadd float %116, %117 > %119 = fmul float %23, %113 > %120 = fadd float %118, %119 > %121 = shl i32 %81, 4 > %122 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %121) > %123 = shl i32 %81, 4 > %124 = or i32 %123, 4 > %125 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %124) > %126 = shl i32 %81, 4 > %127 = or i32 %126, 8 > %128 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %127) > %129 = shl i32 %81, 4 > %130 = or i32 %129, 12 > %131 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %130) > %132 = fmul float %20, %122 > %133 = fmul float %21, %125 > %134 = fadd float %132, %133 > %135 = fmul float %22, %128 > %136 = fadd float %134, %135 > %137 = fmul float %23, %131 > %138 = fadd float %136, %137 > %139 = shl i32 %82, 4 > %140 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %139) > %141 = shl i32 %82, 4 > %142 = or i32 %141, 4 > %143 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %142) > %144 = shl i32 %82, 4 > %145 = or i32 %144, 8 > %146 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %145) > %147 = shl i32 %82, 4 > %148 = or i32 %147, 12 > %149 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %148) > %150 = fmul float %20, %140 > %151 = fmul float %21, %143 > %152 = fadd float %150, %151 > %153 = fmul float %22, %146 > %154 = fadd float %152, %153 > %155 = fmul float %23, %149 > %156 = fadd float %154, %155 > %157 = bitcast i32 %11 to float > %158 = insertvalue <{ float, float, float }> undef, float %157, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %51, float %54, float %57, float %60) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %43, float %46, float %49) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %102, float %120, float %92, float %95) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %138, float %156, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %158 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..4] >DCL TEMP[0..23], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 16, 64, 48} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] UINT32 {32, 0, 0, 0} > 0: MOV TEMP[0].x, IMM[0].xxxx > 1: MOV TEMP[1], CONST[1][0] > 2: MOV TEMP[0].y, -CONST[1][0].xxxx > 3: BGNLOOP :0 > 4: MOV TEMP[1], CONST[1][0] > 5: FSLT TEMP[2].x, CONST[1][0].xxxx, TEMP[0].yyyy > 6: AND TEMP[3].x, TEMP[2].xxxx, IMM[2].xxxx > 7: INEG TEMP[4].x, TEMP[3].xxxx > 8: MOV TEMP[0].z, TEMP[4].xxxx > 9: MOV TEMP[5].x, TEMP[4].xxxx > 10: USNE TEMP[6].x, TEMP[4].xxxx, IMM[1].xxxx > 11: UIF TEMP[6].xxxx :0 > 12: BRK > 13: ENDIF > 14: MOV TEMP[7], CONST[1][1] > 15: ADD TEMP[8].x, TEMP[0].yyyy, CONST[1][1].xxxx > 16: MOV TEMP[9].x, TEMP[0].xxxx > 17: MOV TEMP[10], CONST[1][0] > 18: MOV TEMP[9].y, -CONST[1][0].yyyy > 19: BGNLOOP :0 > 20: MOV TEMP[11], CONST[1][0] > 21: FSLT TEMP[12].x, CONST[1][0].yyyy, TEMP[9].yyyy > 22: AND TEMP[13].x, TEMP[12].xxxx, IMM[2].xxxx > 23: INEG TEMP[14].x, TEMP[13].xxxx > 24: MOV TEMP[0].z, TEMP[14].xxxx > 25: MOV TEMP[15].x, TEMP[14].xxxx > 26: USNE TEMP[16].x, TEMP[14].xxxx, IMM[1].xxxx > 27: UIF TEMP[16].xxxx :0 > 28: BRK > 29: ENDIF > 30: MOV TEMP[17], CONST[1][1] > 31: ADD TEMP[5].x, TEMP[9].yyyy, CONST[1][1].yyyy > 32: MOV TEMP[8].y, TEMP[5].xxxx > 33: MOV TEMP[18], CONST[1][4] > 34: FMA TEMP[19].xy, TEMP[8].xyyy, CONST[1][4].xyyy, IN[2].xyyy > 35: MOV TEMP[0].w, TEMP[19].yyxy > 36: MOV TEMP[7].xy, TEMP[19].xyyy > 37: MOV TEMP[7].w, IMM[0].xxxx > 38: TXL TEMP[20], TEMP[7], SAMP[0], 2D > 39: MOV TEMP[21], TEMP[20] > 40: MOV TEMP[0].z, TEMP[20].wwww > 41: ADD TEMP[9].x, TEMP[20].wwww, TEMP[9].xxxx > 42: ADD TEMP[22].x, TEMP[9].yyyy, IMM[0].yyyy > 43: MOV TEMP[9].y, TEMP[22].xxxx > 44: ENDLOOP :0 > 45: MOV TEMP[0].x, TEMP[9].xxxx > 46: ADD TEMP[23].x, TEMP[0].yyyy, IMM[0].yyyy > 47: MOV TEMP[0].y, TEMP[23].xxxx > 48: ENDLOOP :0 > 49: MUL TEMP[1].xy, IN[2].xyyy, CONST[1][3].xyyy > 50: MOV TEMP[1].xy, TEMP[1].xyyy > 51: MOV TEMP[1].w, IMM[0].xxxx > 52: TXL TEMP[1], TEMP[1], SAMP[1], 2D > 53: FMA TEMP[2].x, -TEMP[0].xxxx, CONST[1][0].wwww, TEMP[1].wwww > 54: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][0].zzzz > 55: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 56: ADD TEMP[9], -TEMP[1], CONST[1][2] > 57: FMA TEMP[3], TEMP[2].xxxx, TEMP[9], TEMP[1] > 58: ADD TEMP[2].x, -TEMP[2].xxxx, IMM[0].yyyy > 59: MOV TEMP[0].w, TEMP[2].xxxx > 60: MOV TEMP[0].xyz, TEMP[1].wwww > 61: MUL TEMP[2], TEMP[0].wwwz, TEMP[1] > 62: MUL TEMP[0], TEMP[0], TEMP[2] > 63: MOV TEMP[8].xyz, IN[1].xyzx > 64: MOV TEMP[8].w, IMM[0].yyyy > 65: FMA TEMP[1], TEMP[3], TEMP[1].wwww, -TEMP[0] > 66: MUL TEMP[0], TEMP[1], TEMP[8] > 67: MUL TEMP[0], TEMP[0], IN[1].wwww > 68: FMA TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] > 69: MOV OUT[0], TEMP[0] > 70: END >radeonsi: Compiling shader 18 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %39 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 > %41 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %42 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %41, i64 0, i64 3 > %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 > %44 = extractelement <8 x i32> %40, i32 7 > %45 = extractelement <4 x i32> %43, i32 0 > %46 = and i32 %45, %44 > %47 = insertelement <4 x i32> %43, i32 %46, i32 0 > %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 > %50 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %50, i64 0, i64 7 > %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 > %53 = extractelement <8 x i32> %49, i32 7 > %54 = extractelement <4 x i32> %52, i32 0 > %55 = and i32 %54, %53 > %56 = insertelement <4 x i32> %52, i32 %55, i32 0 > %57 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %67 = fsub float -0.000000e+00, %25 > %68 = fsub float -0.000000e+00, %26 > br label %LOOP > >LOOP: ; preds = %IF99, %main_body > %temp1.0 = phi float [ %67, %main_body ], [ %134, %IF99 ] > %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp36.0, %IF99 ] > %69 = fcmp olt float %25, %temp1.0 > br i1 %69, label %IF, label %ENDIF > >IF: ; preds = %LOOP > %70 = fmul float %65, %35 > %71 = fmul float %66, %36 > %72 = bitcast float %70 to i32 > %73 = bitcast float %71 to i32 > %74 = insertelement <4 x i32> undef, i32 %72, i32 0 > %75 = insertelement <4 x i32> %74, i32 %73, i32 1 > %76 = insertelement <4 x i32> %75, i32 0, i32 2 > %77 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %76, <8 x i32> %49, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = extractelement <4 x float> %77, i32 3 > %82 = fsub float -0.000000e+00, %temp.0 > %83 = call float @llvm.fma.f32(float %82, float %28, float %81) > %84 = fmul float %83, %27 > %85 = call float @llvm.AMDGPU.clamp.(float %84, float 0.000000e+00, float 1.000000e+00) > %86 = fsub float %31, %78 > %87 = fsub float %32, %79 > %88 = fsub float %33, %80 > %89 = fsub float %34, %81 > %90 = call float @llvm.fma.f32(float %85, float %86, float %78) > %91 = call float @llvm.fma.f32(float %85, float %87, float %79) > %92 = call float @llvm.fma.f32(float %85, float %88, float %80) > %93 = call float @llvm.fma.f32(float %85, float %89, float %81) > %94 = fsub float 1.000000e+00, %85 > %95 = fmul float %94, %78 > %96 = fmul float %94, %79 > %97 = fmul float %94, %80 > %98 = fmul float %81, %81 > %99 = fmul float %81, %95 > %100 = fmul float %81, %96 > %101 = fmul float %81, %97 > %102 = fmul float %94, %98 > %103 = fsub float -0.000000e+00, %99 > %104 = call float @llvm.fma.f32(float %90, float %81, float %103) > %105 = fsub float -0.000000e+00, %100 > %106 = call float @llvm.fma.f32(float %91, float %81, float %105) > %107 = fsub float -0.000000e+00, %101 > %108 = call float @llvm.fma.f32(float %92, float %81, float %107) > %109 = fsub float -0.000000e+00, %102 > %110 = call float @llvm.fma.f32(float %93, float %81, float %109) > %111 = fmul float %104, %61 > %112 = fmul float %106, %62 > %113 = fmul float %108, %63 > %114 = fmul float %111, %64 > %115 = fmul float %112, %64 > %116 = fmul float %113, %64 > %117 = fmul float %110, %64 > %118 = call float @llvm.fma.f32(float %57, float %117, float %114) > %119 = call float @llvm.fma.f32(float %58, float %117, float %115) > %120 = call float @llvm.fma.f32(float %59, float %117, float %116) > %121 = call float @llvm.fma.f32(float %60, float %117, float %117) > %122 = bitcast float %5 to i32 > %123 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %122, 10 > %124 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %123, float %118, 11 > %125 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %124, float %119, 12 > %126 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %125, float %120, 13 > %127 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %126, float %121, 14 > %128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %127, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128 > >ENDIF: ; preds = %LOOP > %129 = fadd float %temp1.0, %29 > %130 = call float @llvm.fma.f32(float %129, float %37, float %65) > %131 = bitcast float %130 to i32 > %132 = insertelement <4 x i32> undef, i32 %131, i32 0 > br label %LOOP97 > >LOOP97: ; preds = %ENDIF98, %ENDIF > %temp36.0 = phi float [ %temp.0, %ENDIF ], [ %142, %ENDIF98 ] > %temp37.0 = phi float [ %68, %ENDIF ], [ %143, %ENDIF98 ] > %133 = fcmp olt float %26, %temp37.0 > br i1 %133, label %IF99, label %ENDIF98 > >IF99: ; preds = %LOOP97 > %134 = fadd float %temp1.0, 1.000000e+00 > br label %LOOP > >ENDIF98: ; preds = %LOOP97 > %135 = fadd float %temp37.0, %30 > %136 = call float @llvm.fma.f32(float %135, float %38, float %66) > %137 = bitcast float %136 to i32 > %138 = insertelement <4 x i32> %132, i32 %137, i32 1 > %139 = insertelement <4 x i32> %138, i32 0, i32 2 > %140 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %139, <8 x i32> %40, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %141 = extractelement <4 x float> %140, i32 3 > %142 = fadd float %141, %temp36.0 > %143 = fadd float %temp37.0, 1.000000e+00 > br label %LOOP97 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..4] >DCL TEMP[0..23], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 16, 64, 48} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] UINT32 {32, 0, 0, 0} > 0: MOV TEMP[0].x, IMM[0].xxxx > 1: MOV TEMP[1], CONST[1][0] > 2: MOV TEMP[0].y, -CONST[1][0].xxxx > 3: BGNLOOP :0 > 4: MOV TEMP[1], CONST[1][0] > 5: FSLT TEMP[2].x, CONST[1][0].xxxx, TEMP[0].yyyy > 6: AND TEMP[3].x, TEMP[2].xxxx, IMM[2].xxxx > 7: INEG TEMP[4].x, TEMP[3].xxxx > 8: MOV TEMP[0].z, TEMP[4].xxxx > 9: MOV TEMP[5].x, TEMP[4].xxxx > 10: USNE TEMP[6].x, TEMP[4].xxxx, IMM[1].xxxx > 11: UIF TEMP[6].xxxx :0 > 12: BRK > 13: ENDIF > 14: MOV TEMP[7], CONST[1][1] > 15: ADD TEMP[8].x, TEMP[0].yyyy, CONST[1][1].xxxx > 16: MOV TEMP[9].x, TEMP[0].xxxx > 17: MOV TEMP[10], CONST[1][0] > 18: MOV TEMP[9].y, -CONST[1][0].yyyy > 19: BGNLOOP :0 > 20: MOV TEMP[11], CONST[1][0] > 21: FSLT TEMP[12].x, CONST[1][0].yyyy, TEMP[9].yyyy > 22: AND TEMP[13].x, TEMP[12].xxxx, IMM[2].xxxx > 23: INEG TEMP[14].x, TEMP[13].xxxx > 24: MOV TEMP[0].z, TEMP[14].xxxx > 25: MOV TEMP[15].x, TEMP[14].xxxx > 26: USNE TEMP[16].x, TEMP[14].xxxx, IMM[1].xxxx > 27: UIF TEMP[16].xxxx :0 > 28: BRK > 29: ENDIF > 30: MOV TEMP[17], CONST[1][1] > 31: ADD TEMP[5].x, TEMP[9].yyyy, CONST[1][1].yyyy > 32: MOV TEMP[8].y, TEMP[5].xxxx > 33: MOV TEMP[18], CONST[1][4] > 34: FMA TEMP[19].xy, TEMP[8].xyyy, CONST[1][4].xyyy, IN[2].xyyy > 35: MOV TEMP[0].w, TEMP[19].yyxy > 36: MOV TEMP[7].xy, TEMP[19].xyyy > 37: MOV TEMP[7].w, IMM[0].xxxx > 38: TXL TEMP[20], TEMP[7], SAMP[0], 2D > 39: MOV TEMP[21], TEMP[20] > 40: MOV TEMP[0].z, TEMP[20].wwww > 41: ADD TEMP[9].x, TEMP[20].wwww, TEMP[9].xxxx > 42: ADD TEMP[22].x, TEMP[9].yyyy, IMM[0].yyyy > 43: MOV TEMP[9].y, TEMP[22].xxxx > 44: ENDLOOP :0 > 45: MOV TEMP[0].x, TEMP[9].xxxx > 46: ADD TEMP[23].x, TEMP[0].yyyy, IMM[0].yyyy > 47: MOV TEMP[0].y, TEMP[23].xxxx > 48: ENDLOOP :0 > 49: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][0].wwww > 50: MUL TEMP[1].xy, IN[2].xyyy, CONST[1][3].xyyy > 51: MOV TEMP[1].xy, TEMP[1].xyyy > 52: MOV TEMP[1].w, IMM[0].xxxx > 53: TXL TEMP[1], TEMP[1], SAMP[1], 2D > 54: ADD TEMP[2].x, -TEMP[1].wwww, IMM[0].yyyy > 55: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx > 56: MUL TEMP[2].x, TEMP[0].xxxx, CONST[1][0].zzzz > 57: MOV_SAT TEMP[0].x, TEMP[2].xxxx > 58: MOV TEMP[8].xyz, IN[1].xyzx > 59: MOV TEMP[8].w, IMM[0].yyyy > 60: FMA TEMP[1], CONST[1][2], TEMP[0].xxxx, TEMP[1] > 61: MUL TEMP[0], TEMP[1], TEMP[8] > 62: MUL TEMP[0], TEMP[0], IN[1].wwww > 63: FMA TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] > 64: MOV OUT[0], TEMP[0] > 65: END >radeonsi: Compiling shader 19 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %39 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 > %41 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %42 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %41, i64 0, i64 3 > %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 > %44 = extractelement <8 x i32> %40, i32 7 > %45 = extractelement <4 x i32> %43, i32 0 > %46 = and i32 %45, %44 > %47 = insertelement <4 x i32> %43, i32 %46, i32 0 > %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 > %50 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %50, i64 0, i64 7 > %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 > %53 = extractelement <8 x i32> %49, i32 7 > %54 = extractelement <4 x i32> %52, i32 0 > %55 = and i32 %54, %53 > %56 = insertelement <4 x i32> %52, i32 %55, i32 0 > %57 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %67 = fsub float -0.000000e+00, %25 > %68 = fsub float -0.000000e+00, %26 > br label %LOOP > >LOOP: ; preds = %IF99, %main_body > %temp1.0 = phi float [ %67, %main_body ], [ %114, %IF99 ] > %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp36.0, %IF99 ] > %69 = fcmp olt float %25, %temp1.0 > br i1 %69, label %IF, label %ENDIF > >IF: ; preds = %LOOP > %70 = fmul float %temp.0, %28 > %71 = fmul float %65, %35 > %72 = fmul float %66, %36 > %73 = bitcast float %71 to i32 > %74 = bitcast float %72 to i32 > %75 = insertelement <4 x i32> undef, i32 %73, i32 0 > %76 = insertelement <4 x i32> %75, i32 %74, i32 1 > %77 = insertelement <4 x i32> %76, i32 0, i32 2 > %78 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %77, <8 x i32> %49, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %79 = extractelement <4 x float> %78, i32 0 > %80 = extractelement <4 x float> %78, i32 1 > %81 = extractelement <4 x float> %78, i32 2 > %82 = extractelement <4 x float> %78, i32 3 > %83 = fsub float 1.000000e+00, %82 > %84 = fmul float %83, %70 > %85 = fmul float %84, %27 > %86 = call float @llvm.AMDGPU.clamp.(float %85, float 0.000000e+00, float 1.000000e+00) > %87 = call float @llvm.fma.f32(float %31, float %86, float %79) > %88 = call float @llvm.fma.f32(float %32, float %86, float %80) > %89 = call float @llvm.fma.f32(float %33, float %86, float %81) > %90 = call float @llvm.fma.f32(float %34, float %86, float %82) > %91 = fmul float %87, %61 > %92 = fmul float %88, %62 > %93 = fmul float %89, %63 > %94 = fmul float %91, %64 > %95 = fmul float %92, %64 > %96 = fmul float %93, %64 > %97 = fmul float %90, %64 > %98 = call float @llvm.fma.f32(float %57, float %97, float %94) > %99 = call float @llvm.fma.f32(float %58, float %97, float %95) > %100 = call float @llvm.fma.f32(float %59, float %97, float %96) > %101 = call float @llvm.fma.f32(float %60, float %97, float %97) > %102 = bitcast float %5 to i32 > %103 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %102, 10 > %104 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %103, float %98, 11 > %105 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %104, float %99, 12 > %106 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %105, float %100, 13 > %107 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, float %101, 14 > %108 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %107, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %108 > >ENDIF: ; preds = %LOOP > %109 = fadd float %temp1.0, %29 > %110 = call float @llvm.fma.f32(float %109, float %37, float %65) > %111 = bitcast float %110 to i32 > %112 = insertelement <4 x i32> undef, i32 %111, i32 0 > br label %LOOP97 > >LOOP97: ; preds = %ENDIF98, %ENDIF > %temp36.0 = phi float [ %temp.0, %ENDIF ], [ %122, %ENDIF98 ] > %temp37.0 = phi float [ %68, %ENDIF ], [ %123, %ENDIF98 ] > %113 = fcmp olt float %26, %temp37.0 > br i1 %113, label %IF99, label %ENDIF98 > >IF99: ; preds = %LOOP97 > %114 = fadd float %temp1.0, 1.000000e+00 > br label %LOOP > >ENDIF98: ; preds = %LOOP97 > %115 = fadd float %temp37.0, %30 > %116 = call float @llvm.fma.f32(float %115, float %38, float %66) > %117 = bitcast float %116 to i32 > %118 = insertelement <4 x i32> %112, i32 %117, i32 1 > %119 = insertelement <4 x i32> %118, i32 0, i32 2 > %120 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %119, <8 x i32> %40, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %121 = extractelement <4 x float> %120, i32 3 > %122 = fadd float %121, %temp36.0 > %123 = fadd float %temp37.0, 1.000000e+00 > br label %LOOP97 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..12] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 144, 160, 176} >IMM[2] UINT32 {192, 0, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][9], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][10], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][11], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][12], TEMP[0] > 8: MOV TEMP[1].w, TEMP[0].xxxx > 9: MOV TEMP[0].xy, IN[1].xyxx > 10: MOV OUT[2], IN[2] > 11: MOV OUT[1], TEMP[0] > 12: MOV OUT[0], TEMP[1] > 13: END >radeonsi: Compiling shader 20 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 144) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 148) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 152) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 156) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 160) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 164) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 168) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 172) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 176) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 180) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 184) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 188) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 192) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 196) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 200) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 204) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %35, i32 0, i32 %13) > %37 = extractelement <4 x float> %36, i32 0 > %38 = extractelement <4 x float> %36, i32 1 > %39 = extractelement <4 x float> %36, i32 2 > %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 > %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %14) > %43 = extractelement <4 x float> %42, i32 0 > %44 = extractelement <4 x float> %42, i32 1 > %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 > %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %15) > %48 = extractelement <4 x float> %47, i32 0 > %49 = extractelement <4 x float> %47, i32 1 > %50 = extractelement <4 x float> %47, i32 2 > %51 = extractelement <4 x float> %47, i32 3 > %52 = fmul float %18, %37 > %53 = fmul float %19, %38 > %54 = fadd float %52, %53 > %55 = fmul float %20, %39 > %56 = fadd float %54, %55 > %57 = fadd float %56, %21 > %58 = fmul float %22, %37 > %59 = fmul float %23, %38 > %60 = fadd float %58, %59 > %61 = fmul float %24, %39 > %62 = fadd float %60, %61 > %63 = fadd float %62, %25 > %64 = fmul float %26, %37 > %65 = fmul float %27, %38 > %66 = fadd float %64, %65 > %67 = fmul float %28, %39 > %68 = fadd float %66, %67 > %69 = fadd float %68, %29 > %70 = fmul float %30, %37 > %71 = fmul float %31, %38 > %72 = fadd float %70, %71 > %73 = fmul float %32, %39 > %74 = fadd float %72, %73 > %75 = fadd float %74, %33 > %76 = bitcast i32 %11 to float > %77 = insertvalue <{ float, float, float }> undef, float %76, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %43, float %44, float %39, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %48, float %49, float %50, float %51) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %57, float %63, float %69, float %75) > ret <{ float, float, float }> %77 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0], LOCAL > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0], TEMP[0], IN[1] > 3: MOV OUT[0], TEMP[0] > 4: END >radeonsi: Compiling shader 21 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %38 = bitcast float %32 to i32 > %39 = bitcast float %33 to i32 > %40 = insertelement <2 x i32> undef, i32 %38, i32 0 > %41 = insertelement <2 x i32> %40, i32 %39, i32 1 > %42 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %41, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %43 = extractelement <4 x float> %42, i32 0 > %44 = extractelement <4 x float> %42, i32 1 > %45 = extractelement <4 x float> %42, i32 2 > %46 = extractelement <4 x float> %42, i32 3 > %47 = fmul float %43, %34 > %48 = fmul float %44, %35 > %49 = fmul float %45, %36 > %50 = fmul float %46, %37 > %51 = bitcast float %5 to i32 > %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %51, 10 > %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float %47, 11 > %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %48, 12 > %55 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54, float %49, 13 > %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %55, float %50, 14 > %57 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %57 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >radeonsi: Compiling shader 22 >Vertex Shader Prolog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { >main_body: > %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> undef, i32 %0, 0 > %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %19, i32 %1, 1 > %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %20, i32 %2, 2 > %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %21, i32 %3, 3 > %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %22, i32 %4, 4 > %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %23, i32 %5, 5 > %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %24, i32 %6, 6 > %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %25, i32 %7, 7 > %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %26, i32 %8, 8 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %27, i32 %9, 9 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %28, i32 %10, 10 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %29, i32 %11, 11 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %30, i32 %12, 12 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %31, i32 %13, 13 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %32, i32 %14, 14 > %34 = bitcast i32 %15 to float > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %33, float %34, 15 > %36 = bitcast i32 %16 to float > %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %35, float %36, 16 > %38 = bitcast i32 %17 to float > %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %37, float %38, 17 > %40 = bitcast i32 %18 to float > %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %39, float %40, 18 > %42 = add i32 %15, %12 > %43 = bitcast i32 %42 to float > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %41, float %43, 19 > %45 = add i32 %15, %12 > %46 = bitcast i32 %45 to float > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %44, float %46, 20 > %48 = add i32 %15, %12 > %49 = bitcast i32 %48 to float > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %47, float %49, 21 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %50 >} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[8:11], s[10:11], 0x8 ; C0840B08 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[7:10], v4, s[4:7], 0 idxen ; E00C2000 80010704 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[10:13], v5, s[12:15], 0 idxen ; E00C2000 80030A05 > buffer_load_format_xyzw v[3:6], v6, s[8:11], 0 idxen ; E00C2000 80020306 > s_buffer_load_dword s12, s[0:3], 0x2d ; C206012D > s_buffer_load_dword s11, s[0:3], 0x2c ; C205812C > s_buffer_load_dword s4, s[0:3], 0x25 ; C2020125 > s_buffer_load_dword s8, s[0:3], 0x29 ; C2040129 > s_buffer_load_dword s13, s[0:3], 0x2e ; C206812E > s_buffer_load_dword s17, s[0:3], 0x31 ; C2088131 > s_buffer_load_dword s16, s[0:3], 0x24 ; C2080124 > s_buffer_load_dword s7, s[0:3], 0x28 ; C2038128 > s_buffer_load_dword s14, s[0:3], 0x2f ; C207012F > s_buffer_load_dword s15, s[0:3], 0x30 ; C2078130 > s_buffer_load_dword s5, s[0:3], 0x26 ; C2028126 > s_buffer_load_dword s9, s[0:3], 0x2a ; C204812A > s_buffer_load_dword s18, s[0:3], 0x32 ; C2090132 > s_buffer_load_dword s6, s[0:3], 0x27 ; C2030127 > s_buffer_load_dword s10, s[0:3], 0x2b ; C205012B > s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133 > s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 > v_mul_f32_e32 v12, s12, v8 ; 1018100C > v_mac_f32_e32 v12, s11, v7 ; 3E180E0B > v_mul_f32_e32 v0, s4, v8 ; 10001004 > v_mul_f32_e32 v1, s8, v8 ; 10021008 > v_mul_f32_e32 v8, s17, v8 ; 10101011 > v_mac_f32_e32 v12, s13, v9 ; 3E18120D > v_mac_f32_e32 v0, s16, v7 ; 3E000E10 > v_mac_f32_e32 v1, s7, v7 ; 3E020E07 > v_mac_f32_e32 v8, s15, v7 ; 3E100E0F > v_add_f32_e32 v7, s14, v12 ; 060E180E > v_mov_b32_e32 v12, 1.0 ; 7E1802F2 > v_mac_f32_e32 v0, s5, v9 ; 3E001205 > v_mac_f32_e32 v1, s9, v9 ; 3E021209 > v_mac_f32_e32 v8, s18, v9 ; 3E101212 > exp 15, 32, 0, 0, 0, v10, v11, v9, v12 ; F800020F 0C090B0A > v_add_f32_e32 v0, s6, v0 ; 06000006 > v_add_f32_e32 v1, s10, v1 ; 0602020A > v_add_f32_e32 v8, s0, v8 ; 06101000 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 33, 0, 0, 0, v3, v4, v5, v6 ; F800021F 06050403 > exp 15, 12, 0, 1, 0, v0, v1, v7, v8 ; F80008CF 08070100 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 16 >Code Size: 232 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 0, 1, [m0] ; C8100402 > v_interp_p2_f32 v4, [v4], v3, 0, 1, [m0] ; C8110403 > v_interp_p1_f32 v5, v2, 1, 1, [m0] ; C8140502 > v_interp_p2_f32 v5, [v5], v3, 1, 1, [m0] ; C8150503 > v_interp_p1_f32 v6, v2, 2, 1, [m0] ; C8180602 > v_interp_p2_f32 v6, [v6], v3, 2, 1, [m0] ; C8190603 > v_interp_p1_f32 v7, v2, 3, 1, [m0] ; C81C0702 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v7, [v7], v3, 3, 1, [m0] ; C81D0703 > image_sample v[8:11], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030800 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v0, v4, v8 ; 10001104 > v_mul_f32_e32 v1, v5, v9 ; 10021305 > v_mul_f32_e32 v2, v6, v10 ; 10041506 > v_mul_f32_e32 v3, v7, v11 ; 10061707 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 120 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..1] >DCL TEMP[0..18], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 16, 0, 0} >IMM[2] INT32 {1, 0, 0, 0} > 0: MOV TEMP[0], IMM[0].xxxx > 1: MOV TEMP[1], CONST[1][0] > 2: MOV TEMP[1].x, -CONST[1][0].xxxx > 3: BGNLOOP :0 > 4: MOV TEMP[2], CONST[1][0] > 5: FSLT TEMP[3].x, CONST[1][0].xxxx, TEMP[1].xxxx > 6: AND TEMP[4].x, TEMP[3].xxxx, IMM[2].xxxx > 7: INEG TEMP[5].x, TEMP[4].xxxx > 8: MOV TEMP[1].z, TEMP[5].xxxx > 9: MOV TEMP[6].x, TEMP[5].xxxx > 10: USNE TEMP[7].x, TEMP[5].xxxx, IMM[1].xxxx > 11: UIF TEMP[7].xxxx :0 > 12: BRK > 13: ENDIF > 14: MOV TEMP[2], TEMP[0] > 15: MOV TEMP[8], CONST[1][0] > 16: MOV TEMP[1].y, -CONST[1][0].yyyy > 17: BGNLOOP :0 > 18: MOV TEMP[9], CONST[1][0] > 19: FSLT TEMP[10].x, CONST[1][0].yyyy, TEMP[1].yyyy > 20: AND TEMP[11].x, TEMP[10].xxxx, IMM[2].xxxx > 21: INEG TEMP[12].x, TEMP[11].xxxx > 22: MOV TEMP[1].z, TEMP[12].xxxx > 23: MOV TEMP[13].x, TEMP[12].xxxx > 24: USNE TEMP[14].x, TEMP[12].xxxx, IMM[1].xxxx > 25: UIF TEMP[14].xxxx :0 > 26: BRK > 27: ENDIF > 28: MOV TEMP[15], CONST[1][1] > 29: FMA TEMP[16].xy, TEMP[1].xyyy, CONST[1][1].xyyy, IN[2].xyyy > 30: MOV TEMP[1].zw, TEMP[16].yyxy > 31: MOV TEMP[17].xy, TEMP[16].xyyy > 32: MOV TEMP[17].w, IMM[0].xxxx > 33: TXL TEMP[6], TEMP[17], SAMP[0], 2D > 34: ADD TEMP[2], TEMP[2], TEMP[6] > 35: ADD TEMP[18].x, TEMP[1].yyyy, IMM[0].yyyy > 36: MOV TEMP[1].y, TEMP[18].xxxx > 37: ENDLOOP :0 > 38: MOV TEMP[0], TEMP[2] > 39: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy > 40: ENDLOOP :0 > 41: MUL TEMP[0], TEMP[0], CONST[1][0].wwww > 42: MOV TEMP[1].xyz, IN[1].xyzx > 43: MOV TEMP[1].w, IMM[0].yyyy > 44: MUL TEMP[0], TEMP[0], TEMP[1] > 45: MUL TEMP[0], TEMP[0], IN[1].wwww > 46: FMA TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] > 47: MOV OUT[0], TEMP[0] > 48: END >radeonsi: Compiling shader 23 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) > %30 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 > %32 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %33 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %32, i64 0, i64 3 > %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 > %35 = extractelement <8 x i32> %31, i32 7 > %36 = extractelement <4 x i32> %34, i32 0 > %37 = and i32 %36, %35 > %38 = insertelement <4 x i32> %34, i32 %37, i32 0 > %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %49 = fsub float -0.000000e+00, %25 > %50 = fsub float -0.000000e+00, %26 > br label %LOOP > >LOOP: ; preds = %IF79, %main_body > %temp4.0 = phi float [ %49, %main_body ], [ %78, %IF79 ] > %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.0, %IF79 ] > %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp10.0, %IF79 ] > %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %temp9.0, %IF79 ] > %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp8.0, %IF79 ] > %51 = fcmp olt float %25, %temp4.0 > br i1 %51, label %IF, label %ENDIF > >IF: ; preds = %LOOP > %52 = fmul float %temp.0, %27 > %53 = fmul float %temp1.0, %27 > %54 = fmul float %temp2.0, %27 > %55 = fmul float %temp3.0, %27 > %56 = fmul float %52, %43 > %57 = fmul float %53, %44 > %58 = fmul float %54, %45 > %59 = fmul float %56, %46 > %60 = fmul float %57, %46 > %61 = fmul float %58, %46 > %62 = fmul float %55, %46 > %63 = call float @llvm.fma.f32(float %39, float %62, float %59) > %64 = call float @llvm.fma.f32(float %40, float %62, float %60) > %65 = call float @llvm.fma.f32(float %41, float %62, float %61) > %66 = call float @llvm.fma.f32(float %42, float %62, float %62) > %67 = bitcast float %5 to i32 > %68 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %67, 10 > %69 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %68, float %63, 11 > %70 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %69, float %64, 12 > %71 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %70, float %65, 13 > %72 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %71, float %66, 14 > %73 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %72, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %73 > >ENDIF: ; preds = %LOOP > %74 = call float @llvm.fma.f32(float %temp4.0, float %28, float %47) > %75 = bitcast float %74 to i32 > %76 = insertelement <4 x i32> undef, i32 %75, i32 0 > br label %LOOP77 > >LOOP77: ; preds = %ENDIF78, %ENDIF > %temp11.0 = phi float [ %temp3.0, %ENDIF ], [ %91, %ENDIF78 ] > %temp10.0 = phi float [ %temp2.0, %ENDIF ], [ %90, %ENDIF78 ] > %temp9.0 = phi float [ %temp1.0, %ENDIF ], [ %89, %ENDIF78 ] > %temp8.0 = phi float [ %temp.0, %ENDIF ], [ %88, %ENDIF78 ] > %temp5.0 = phi float [ %50, %ENDIF ], [ %92, %ENDIF78 ] > %77 = fcmp olt float %26, %temp5.0 > br i1 %77, label %IF79, label %ENDIF78 > >IF79: ; preds = %LOOP77 > %78 = fadd float %temp4.0, 1.000000e+00 > br label %LOOP > >ENDIF78: ; preds = %LOOP77 > %79 = call float @llvm.fma.f32(float %temp5.0, float %29, float %48) > %80 = bitcast float %79 to i32 > %81 = insertelement <4 x i32> %76, i32 %80, i32 1 > %82 = insertelement <4 x i32> %81, i32 0, i32 2 > %83 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %82, <8 x i32> %31, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = extractelement <4 x float> %83, i32 3 > %88 = fadd float %temp8.0, %84 > %89 = fadd float %temp9.0, %85 > %90 = fadd float %temp10.0, %86 > %91 = fadd float %temp11.0, %87 > %92 = fadd float %temp5.0, 1.000000e+00 > br label %LOOP77 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL TEMP[0..1], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, -0.5000} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: FMA TEMP[1].xy, IN[0].xyyy, IMM[0].zwww, IMM[0].zzzz > 3: MOV OUT[1], TEMP[1] > 4: MOV OUT[0], TEMP[0] > 5: END >radeonsi: Compiling shader 24 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %13) > %17 = extractelement <4 x float> %16, i32 0 > %18 = extractelement <4 x float> %16, i32 1 > %19 = call float @llvm.fma.f32(float %17, float 5.000000e-01, float 5.000000e-01) > %20 = call float @llvm.fma.f32(float %18, float -5.000000e-01, float 5.000000e-01) > %21 = bitcast i32 %11 to float > %22 = insertvalue <{ float, float, float }> undef, float %21, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %19, float %20, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %17, float %18, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %22 >} > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..7] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 96, 112, 80} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D > 2: FSLT TEMP[1].x, CONST[1][6].xxxx, TEMP[0].wwww > 3: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 4: INEG TEMP[1].x, TEMP[1].xxxx > 5: MOV TEMP[2].y, TEMP[1].xxxx > 6: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1][6].yyyy > 7: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 8: INEG TEMP[0].x, TEMP[0].xxxx > 9: AND TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx > 10: MOV TEMP[2].x, TEMP[0].xxxx > 11: USNE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 12: UIF TEMP[0].xxxx :0 > 13: MOV TEMP[0].xy, IN[0].xyyy > 14: TEX TEMP[0], TEMP[0], SAMP[1], 2D > 15: MUL TEMP[2].xyz, TEMP[0].wwww, TEMP[0].xyzz > 16: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[1][7].xxxx > 17: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[1][5].xxxx > 18: ELSE :0 > 19: MOV TEMP[2].xyz, IMM[2].xxxx > 20: ENDIF > 21: MOV TEMP[0].xyz, TEMP[2].xyzx > 22: MOV TEMP[0].w, IMM[2].yyyy > 23: MOV OUT[0], TEMP[0] > 24: END >radeonsi: Compiling shader 25 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %29 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 > %31 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %32 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %31, i64 0, i64 3 > %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 > %34 = extractelement <8 x i32> %30, i32 7 > %35 = extractelement <4 x i32> %33, i32 0 > %36 = and i32 %35, %34 > %37 = insertelement <4 x i32> %33, i32 %36, i32 0 > %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 > %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 7 > %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %45 = bitcast float %43 to i32 > %46 = bitcast float %44 to i32 > %47 = insertelement <2 x i32> undef, i32 %45, i32 0 > %48 = insertelement <2 x i32> %47, i32 %46, i32 1 > %49 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %48, <8 x i32> %30, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %50 = extractelement <4 x float> %49, i32 3 > %51 = fcmp olt float %26, %50 > %52 = fcmp olt float %50, %27 > %53 = and i1 %52, %51 > br i1 %53, label %IF, label %ENDIF > >IF: ; preds = %main_body > %54 = extractelement <4 x i32> %42, i32 0 > %55 = extractelement <8 x i32> %39, i32 7 > %56 = and i32 %54, %55 > %57 = insertelement <4 x i32> %42, i32 %56, i32 0 > %58 = bitcast float %43 to i32 > %59 = bitcast float %44 to i32 > %60 = insertelement <2 x i32> undef, i32 %58, i32 0 > %61 = insertelement <2 x i32> %60, i32 %59, i32 1 > %62 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %61, <8 x i32> %39, <4 x i32> %57, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %63 = extractelement <4 x float> %62, i32 0 > %64 = extractelement <4 x float> %62, i32 1 > %65 = extractelement <4 x float> %62, i32 2 > %66 = extractelement <4 x float> %62, i32 3 > %67 = fmul float %66, %63 > %68 = fmul float %66, %64 > %69 = fmul float %66, %65 > %70 = fmul float %67, %28 > %71 = fmul float %68, %28 > %72 = fmul float %69, %28 > %73 = fmul float %70, %25 > %74 = fmul float %71, %25 > %75 = fmul float %72, %25 > br label %ENDIF > >ENDIF: ; preds = %main_body, %IF > %temp8.0 = phi float [ %73, %IF ], [ 0.000000e+00, %main_body ] > %temp9.0 = phi float [ %74, %IF ], [ 0.000000e+00, %main_body ] > %temp10.0 = phi float [ %75, %IF ], [ 0.000000e+00, %main_body ] > %76 = bitcast float %5 to i32 > %77 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %76, 10 > %78 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %77, float %temp8.0, 11 > %79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %78, float %temp9.0, 12 > %80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %79, float %temp10.0, 13 > %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %80, float 1.000000e+00, 14 > %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..6] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 96, 0, 0} >IMM[1] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, -1.0000} >IMM[2] FLT32 { 0.2500, 0.0000, 0.0000, 0.0000} > 0: FSEQ TEMP[0].xy, CONST[1][6].xyyy, IMM[1].xxxx > 1: RCP TEMP[1].x, CONST[1][6].xxxx > 2: RCP TEMP[1].y, CONST[1][6].yyyy > 3: UCMP TEMP[0].xy, TEMP[0].xyyy, IMM[1].yyyy, TEMP[1].xyyy > 4: ADD TEMP[1].xy, -TEMP[0].xyyy, IN[0].xyyy > 5: MOV TEMP[1].xy, TEMP[1].xyyy > 6: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 7: FMA TEMP[2], TEMP[0].xyxy, IMM[1].zwwz, IN[0].xyxy > 8: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy > 9: MOV TEMP[3].xy, TEMP[2].xyyy > 10: TEX TEMP[3], TEMP[3], SAMP[0], 2D > 11: ADD TEMP[1], TEMP[1], TEMP[3] > 12: MOV TEMP[2].xy, TEMP[2].zwww > 13: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 14: ADD TEMP[1], TEMP[2], TEMP[1] > 15: MOV TEMP[2].xy, TEMP[0].xyyy > 16: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 17: ADD TEMP[0], TEMP[2], TEMP[1] > 18: MUL TEMP[0], TEMP[0], IMM[2].xxxx > 19: MOV OUT[0], TEMP[0] > 20: END >radeonsi: Compiling shader 26 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %38 = fcmp oeq float %25, 0.000000e+00 > %39 = fcmp oeq float %26, 0.000000e+00 > %40 = fdiv float 1.000000e+00, %25 > %41 = fdiv float 1.000000e+00, %26 > %42 = select i1 %38, float 0x4600000000000000, float %40 > %43 = select i1 %39, float 0x4600000000000000, float %41 > %44 = fsub float %36, %42 > %45 = fsub float %37, %43 > %46 = bitcast float %44 to i32 > %47 = bitcast float %45 to i32 > %48 = insertelement <2 x i32> undef, i32 %46, i32 0 > %49 = insertelement <2 x i32> %48, i32 %47, i32 1 > %50 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %49, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %51 = extractelement <4 x float> %50, i32 0 > %52 = extractelement <4 x float> %50, i32 1 > %53 = extractelement <4 x float> %50, i32 2 > %54 = extractelement <4 x float> %50, i32 3 > %55 = call float @llvm.fma.f32(float %42, float 1.000000e+00, float %36) > %56 = call float @llvm.fma.f32(float %43, float -1.000000e+00, float %37) > %57 = call float @llvm.fma.f32(float %42, float -1.000000e+00, float %36) > %58 = call float @llvm.fma.f32(float %43, float 1.000000e+00, float %37) > %59 = fadd float %42, %36 > %60 = fadd float %43, %37 > %61 = bitcast float %55 to i32 > %62 = bitcast float %56 to i32 > %63 = insertelement <2 x i32> undef, i32 %61, i32 0 > %64 = insertelement <2 x i32> %63, i32 %62, i32 1 > %65 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %64, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %66 = extractelement <4 x float> %65, i32 0 > %67 = extractelement <4 x float> %65, i32 1 > %68 = extractelement <4 x float> %65, i32 2 > %69 = extractelement <4 x float> %65, i32 3 > %70 = fadd float %51, %66 > %71 = fadd float %52, %67 > %72 = fadd float %53, %68 > %73 = fadd float %54, %69 > %74 = bitcast float %57 to i32 > %75 = bitcast float %58 to i32 > %76 = insertelement <2 x i32> undef, i32 %74, i32 0 > %77 = insertelement <2 x i32> %76, i32 %75, i32 1 > %78 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %77, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %79 = extractelement <4 x float> %78, i32 0 > %80 = extractelement <4 x float> %78, i32 1 > %81 = extractelement <4 x float> %78, i32 2 > %82 = extractelement <4 x float> %78, i32 3 > %83 = fadd float %79, %70 > %84 = fadd float %80, %71 > %85 = fadd float %81, %72 > %86 = fadd float %82, %73 > %87 = bitcast float %59 to i32 > %88 = bitcast float %60 to i32 > %89 = insertelement <2 x i32> undef, i32 %87, i32 0 > %90 = insertelement <2 x i32> %89, i32 %88, i32 1 > %91 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %90, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %92 = extractelement <4 x float> %91, i32 0 > %93 = extractelement <4 x float> %91, i32 1 > %94 = extractelement <4 x float> %91, i32 2 > %95 = extractelement <4 x float> %91, i32 3 > %96 = fadd float %92, %83 > %97 = fadd float %93, %84 > %98 = fadd float %94, %85 > %99 = fadd float %95, %86 > %100 = fmul float %96, 2.500000e-01 > %101 = fmul float %97, 2.500000e-01 > %102 = fmul float %98, 2.500000e-01 > %103 = fmul float %99, 2.500000e-01 > %104 = bitcast float %5 to i32 > %105 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %104, 10 > %106 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %105, float %100, 11 > %107 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, float %101, 12 > %108 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %107, float %102, 13 > %109 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %108, float %103, 14 > %110 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %109, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %110 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..5] >DCL TEMP[0..1], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} >IMM[1] UINT32 {0, 80, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: MUL TEMP[1].xy, CONST[1][5].xyyy, IMM[0].yzzz > 3: FMA TEMP[1].xy, IN[0].xyyy, TEMP[1].xyyy, CONST[1][5].xyyy > 4: MOV TEMP[1].zw, IMM[0].yyxy > 5: MOV OUT[1], TEMP[1] > 6: MOV OUT[0], TEMP[0] > 7: END >radeonsi: Compiling shader 27 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 80) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 84) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = fsub float -0.000000e+00, %17 > %24 = call float @llvm.fma.f32(float %21, float %16, float %16) > %25 = call float @llvm.fma.f32(float %22, float %23, float %17) > %26 = bitcast i32 %11 to float > %27 = insertvalue <{ float, float, float }> undef, float %26, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %24, float %25, float 0.000000e+00, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %27 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..6] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.0000, 0.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {0, 96, 1259902592, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D > 2: FSLT TEMP[1].x, IMM[0].xxxx, TEMP[0].xxxx > 3: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 4: INEG TEMP[1].x, TEMP[1].xxxx > 5: FSNE TEMP[2].x, TEMP[0].xxxx, IMM[0].xxxx > 6: UIF TEMP[2].xxxx :0 > 7: RCP TEMP[0].x, TEMP[0].xxxx > 8: MUL TEMP[0].x, CONST[1][6].xxxx, TEMP[0].xxxx > 9: ELSE :0 > 10: SSG TEMP[2].x, CONST[1][6].xxxx > 11: MUL TEMP[0].x, IMM[0].yyyy, TEMP[2].xxxx > 12: ENDIF > 13: USNE TEMP[2].x, TEMP[1].xxxx, IMM[2].xxxx > 14: UIF TEMP[2].xxxx :0 > 15: MOV TEMP[2].x, TEMP[0].xxxx > 16: ELSE :0 > 17: MOV TEMP[2].x, IMM[2].zzzz > 18: ENDIF > 19: MOV TEMP[2].x, TEMP[2].xxxx > 20: USNE TEMP[3].x, TEMP[1].xxxx, IMM[2].xxxx > 21: UIF TEMP[3].xxxx :0 > 22: MOV TEMP[3].x, TEMP[0].xxxx > 23: ELSE :0 > 24: MOV TEMP[3].x, IMM[2].zzzz > 25: ENDIF > 26: MOV TEMP[2].y, TEMP[3].xxxx > 27: USNE TEMP[3].x, TEMP[1].xxxx, IMM[2].xxxx > 28: UIF TEMP[3].xxxx :0 > 29: MOV TEMP[3].x, TEMP[0].xxxx > 30: ELSE :0 > 31: MOV TEMP[3].x, IMM[2].zzzz > 32: ENDIF > 33: MOV TEMP[2].z, TEMP[3].xxxx > 34: USNE TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 35: UIF TEMP[1].xxxx :0 > 36: MOV TEMP[0].x, TEMP[0].xxxx > 37: ELSE :0 > 38: MOV TEMP[0].x, IMM[2].zzzz > 39: ENDIF > 40: MOV TEMP[2].w, TEMP[0].xxxx > 41: MOV OUT[0], TEMP[2] > 42: END >radeonsi: Compiling shader 28 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %26 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 > %28 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %29 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %28, i64 0, i64 3 > %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 > %31 = extractelement <8 x i32> %27, i32 7 > %32 = extractelement <4 x i32> %30, i32 0 > %33 = and i32 %32, %31 > %34 = insertelement <4 x i32> %30, i32 %33, i32 0 > %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %37 = bitcast float %35 to i32 > %38 = bitcast float %36 to i32 > %39 = insertelement <2 x i32> undef, i32 %37, i32 0 > %40 = insertelement <2 x i32> %39, i32 %38, i32 1 > %41 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %40, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %42 = extractelement <4 x float> %41, i32 0 > %43 = fcmp ogt float %42, 0.000000e+00 > %44 = fcmp une float %42, 0.000000e+00 > br i1 %44, label %IF, label %ELSE > >IF: ; preds = %main_body > %45 = fdiv float 1.000000e+00, %42 > %46 = fmul float %25, %45 > br label %ENDIF > >ELSE: ; preds = %main_body > %47 = fcmp ogt float %25, 0.000000e+00 > %48 = select i1 %47, float 1.000000e+00, float %25 > %49 = fcmp oge float %48, 0.000000e+00 > %.op = fmul float %48, 0x4600000000000000 > %50 = select i1 %49, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp.0 = phi float [ %46, %IF ], [ %50, %ELSE ] > %temp.0. = select i1 %43, float %temp.0, float 1.000000e+07 > %temp12.0 = select i1 %43, float %temp.0, float 1.000000e+07 > %temp.0.28 = select i1 %43, float %temp.0, float 1.000000e+07 > %temp.1 = select i1 %43, float %temp.0, float 1.000000e+07 > %51 = bitcast float %5 to i32 > %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %51, 10 > %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float %temp.0., 11 > %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %temp12.0, 12 > %55 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54, float %temp.0.28, 13 > %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %55, float %temp.1, 14 > %57 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %57 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..6] >DCL TEMP[0..4], LOCAL >IMM[0] UINT32 {0, 96, 0, 0} >IMM[1] FLT32 { 0.3333, 255.0000, 1.0000, 0.0039} >IMM[2] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 3: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][6].xxxx > 4: MOV TEMP[1].xy, IN[0].xyyy > 5: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 6: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1].xyzz > 7: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][6].xxxx, TEMP[0].xyzz > 8: MOV TEMP[2].xy, IN[0].xyyy > 9: TEX TEMP[2], TEMP[2], SAMP[2], 2D > 10: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 11: FMA TEMP[2].xyz, TEMP[1].xyzz, CONST[1][6].xxxx, TEMP[0].xyzz > 12: MUL TEMP[0].xyz, TEMP[2].xyzz, IMM[1].xxxx > 13: MAX TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx > 14: MAX TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx > 15: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][6].yyyy > 16: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy > 17: CEIL TEMP[2].x, TEMP[2].xxxx > 18: MAX TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz > 19: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].wwww > 20: MUL TEMP[1].x, TEMP[2].xxxx, CONST[1][6].xxxx > 21: MOV TEMP[2].w, TEMP[2].xxxx > 22: FSEQ TEMP[3].xyz, TEMP[1].xxxx, IMM[2].xxxx > 23: SSG TEMP[4].xyz, TEMP[0].xyzz > 24: MUL TEMP[4].xyz, IMM[2].yyyy, TEMP[4].xyzz > 25: RCP TEMP[1].xyz, TEMP[1].xxxx > 26: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz > 27: UCMP TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[0].xyzz > 28: MOV OUT[0], TEMP[2] > 29: END >radeonsi: Compiling shader 29 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 7 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 11 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %56 = bitcast float %54 to i32 > %57 = bitcast float %55 to i32 > %58 = insertelement <2 x i32> undef, i32 %56, i32 0 > %59 = insertelement <2 x i32> %58, i32 %57, i32 1 > %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = fmul float %64, %61 > %66 = fmul float %64, %62 > %67 = fmul float %64, %63 > %68 = fmul float %65, %25 > %69 = fmul float %66, %25 > %70 = fmul float %67, %25 > %71 = bitcast float %54 to i32 > %72 = bitcast float %55 to i32 > %73 = insertelement <2 x i32> undef, i32 %71, i32 0 > %74 = insertelement <2 x i32> %73, i32 %72, i32 1 > %75 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %74, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %76 = extractelement <4 x float> %75, i32 0 > %77 = extractelement <4 x float> %75, i32 1 > %78 = extractelement <4 x float> %75, i32 2 > %79 = extractelement <4 x float> %75, i32 3 > %80 = fmul float %79, %76 > %81 = fmul float %79, %77 > %82 = fmul float %79, %78 > %83 = call float @llvm.fma.f32(float %80, float %25, float %68) > %84 = call float @llvm.fma.f32(float %81, float %25, float %69) > %85 = call float @llvm.fma.f32(float %82, float %25, float %70) > %86 = bitcast float %54 to i32 > %87 = bitcast float %55 to i32 > %88 = insertelement <2 x i32> undef, i32 %86, i32 0 > %89 = insertelement <2 x i32> %88, i32 %87, i32 1 > %90 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %89, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = extractelement <4 x float> %90, i32 3 > %95 = fmul float %94, %91 > %96 = fmul float %94, %92 > %97 = fmul float %94, %93 > %98 = call float @llvm.fma.f32(float %95, float %25, float %83) > %99 = call float @llvm.fma.f32(float %96, float %25, float %84) > %100 = call float @llvm.fma.f32(float %97, float %25, float %85) > %101 = fmul float %98, 0x3FD5555560000000 > %102 = fmul float %99, 0x3FD5555560000000 > %103 = fmul float %100, 0x3FD5555560000000 > %104 = call float @llvm.maxnum.f32(float %102, float %101) > %105 = call float @llvm.maxnum.f32(float %103, float %104) > %106 = fmul float %105, %26 > %107 = fmul float %106, 2.550000e+02 > %108 = call float @llvm.ceil.f32(float %107) > %109 = call float @llvm.maxnum.f32(float %108, float 1.000000e+00) > %110 = fmul float %109, 0x3F70101020000000 > %111 = fmul float %110, %25 > %112 = fcmp oeq float %111, 0.000000e+00 > %113 = fcmp oeq float %111, 0.000000e+00 > %114 = fcmp oeq float %111, 0.000000e+00 > %115 = fcmp ogt float %101, 0.000000e+00 > %116 = select i1 %115, float 1.000000e+00, float %101 > %117 = fcmp oge float %116, 0.000000e+00 > %118 = fcmp ogt float %102, 0.000000e+00 > %119 = select i1 %118, float 1.000000e+00, float %102 > %120 = fcmp oge float %119, 0.000000e+00 > %121 = fcmp ogt float %103, 0.000000e+00 > %122 = select i1 %121, float 1.000000e+00, float %103 > %123 = fcmp oge float %122, 0.000000e+00 > %.op = fmul float %116, 0x4600000000000000 > %124 = select i1 %117, float %.op, float 0xC600000000000000 > %.op20 = fmul float %119, 0x4600000000000000 > %125 = select i1 %120, float %.op20, float 0xC600000000000000 > %.op21 = fmul float %122, 0x4600000000000000 > %126 = select i1 %123, float %.op21, float 0xC600000000000000 > %127 = fdiv float 1.000000e+00, %111 > %128 = fmul float %101, %127 > %129 = fmul float %102, %127 > %130 = fmul float %103, %127 > %131 = select i1 %112, float %124, float %128 > %132 = select i1 %113, float %125, float %129 > %133 = select i1 %114, float %126, float %130 > %134 = bitcast float %5 to i32 > %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %134, 10 > %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135, float %131, 11 > %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136, float %132, 12 > %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %133, 13 > %139 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138, float %110, 14 > %140 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %139, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %140 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..6] >DCL TEMP[0..1], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} >IMM[1] UINT32 {0, 96, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: MUL TEMP[1].xy, CONST[1][6].yzzz, IMM[0].yzzz > 3: FMA TEMP[1].xy, IN[0].xyyy, TEMP[1].xyyy, CONST[1][6].yzzz > 4: MOV OUT[1], TEMP[1] > 5: MOV OUT[0], TEMP[0] > 6: END >radeonsi: Compiling shader 30 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 100) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 104) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = fsub float -0.000000e+00, %17 > %24 = call float @llvm.fma.f32(float %21, float %16, float %16) > %25 = call float @llvm.fma.f32(float %22, float %23, float %17) > %26 = bitcast i32 %11 to float > %27 = insertvalue <{ float, float, float }> undef, float %26, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %24, float %25, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %27 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 3D, FLOAT >DCL SVIEW[2], 3D, FLOAT >DCL CONST[1][0..6] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 80, 96, 0} >IMM[1] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xyz, TEMP[0].xyzz, CONST[1][5].zzzz, CONST[1][5].wwww > 3: FMA TEMP[0].xyz, TEMP[0].xyzz, CONST[1][5].xxxx, CONST[1][5].yyyy > 4: MOV TEMP[0].xyz, TEMP[0].xyzz > 5: MOV TEMP[0].w, IMM[1].xxxx > 6: TXL TEMP[0].xyz, TEMP[0], SAMP[1], 3D > 7: MOV TEMP[2].xyz, TEMP[1].xyzz > 8: MOV TEMP[2].w, IMM[1].xxxx > 9: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 3D > 10: ADD TEMP[1].xyz, -TEMP[0].xyzz, TEMP[2].xyzz > 11: FMA TEMP[0].xyz, CONST[1][6].xxxx, TEMP[1].xyzz, TEMP[0].xyzz > 12: MOV TEMP[0].w, IMM[1].yyyy > 13: MOV OUT[0], TEMP[0] > 14: END >radeonsi: Compiling shader 31 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 92) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %30 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 > %32 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %33 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %32, i64 0, i64 3 > %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 > %35 = extractelement <8 x i32> %31, i32 7 > %36 = extractelement <4 x i32> %34, i32 0 > %37 = and i32 %36, %35 > %38 = insertelement <4 x i32> %34, i32 %37, i32 0 > %39 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 > %41 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %42 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %41, i64 0, i64 7 > %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 > %44 = extractelement <8 x i32> %40, i32 7 > %45 = extractelement <4 x i32> %43, i32 0 > %46 = and i32 %45, %44 > %47 = insertelement <4 x i32> %43, i32 %46, i32 0 > %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 > %50 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %50, i64 0, i64 11 > %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 > %53 = extractelement <8 x i32> %49, i32 7 > %54 = extractelement <4 x i32> %52, i32 0 > %55 = and i32 %54, %53 > %56 = insertelement <4 x i32> %52, i32 %55, i32 0 > %57 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %59 = bitcast float %57 to i32 > %60 = bitcast float %58 to i32 > %61 = insertelement <2 x i32> undef, i32 %59, i32 0 > %62 = insertelement <2 x i32> %61, i32 %60, i32 1 > %63 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %62, <8 x i32> %31, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %64 = extractelement <4 x float> %63, i32 0 > %65 = extractelement <4 x float> %63, i32 1 > %66 = extractelement <4 x float> %63, i32 2 > %67 = call float @llvm.fma.f32(float %64, float %27, float %28) > %68 = call float @llvm.fma.f32(float %65, float %27, float %28) > %69 = call float @llvm.fma.f32(float %66, float %27, float %28) > %70 = call float @llvm.fma.f32(float %64, float %25, float %26) > %71 = call float @llvm.fma.f32(float %65, float %25, float %26) > %72 = call float @llvm.fma.f32(float %66, float %25, float %26) > %73 = bitcast float %70 to i32 > %74 = bitcast float %71 to i32 > %75 = bitcast float %72 to i32 > %76 = insertelement <4 x i32> undef, i32 %73, i32 0 > %77 = insertelement <4 x i32> %76, i32 %74, i32 1 > %78 = insertelement <4 x i32> %77, i32 %75, i32 2 > %79 = insertelement <4 x i32> %78, i32 0, i32 3 > %80 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %79, <8 x i32> %40, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %81 = extractelement <4 x float> %80, i32 0 > %82 = extractelement <4 x float> %80, i32 1 > %83 = extractelement <4 x float> %80, i32 2 > %84 = bitcast float %67 to i32 > %85 = bitcast float %68 to i32 > %86 = bitcast float %69 to i32 > %87 = insertelement <4 x i32> undef, i32 %84, i32 0 > %88 = insertelement <4 x i32> %87, i32 %85, i32 1 > %89 = insertelement <4 x i32> %88, i32 %86, i32 2 > %90 = insertelement <4 x i32> %89, i32 0, i32 3 > %91 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %90, <8 x i32> %49, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %92 = extractelement <4 x float> %91, i32 0 > %93 = extractelement <4 x float> %91, i32 1 > %94 = extractelement <4 x float> %91, i32 2 > %95 = fsub float %92, %81 > %96 = fsub float %93, %82 > %97 = fsub float %94, %83 > %98 = call float @llvm.fma.f32(float %29, float %95, float %81) > %99 = call float @llvm.fma.f32(float %29, float %96, float %82) > %100 = call float @llvm.fma.f32(float %29, float %97, float %83) > %101 = bitcast float %5 to i32 > %102 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %101, 10 > %103 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %102, float %98, 11 > %104 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %103, float %99, 12 > %105 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %104, float %100, 13 > %106 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %105, float 1.000000e+00, 14 > %107 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %107 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..8] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 80, 96, 112} >IMM[2] UINT32 {128, 0, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][5], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][6], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][7], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][8], TEMP[0] > 8: MOV TEMP[1].w, TEMP[0].xxxx > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 32 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 80) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 84) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 88) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 92) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 96) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 100) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 104) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 108) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 112) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 116) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 120) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 124) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 128) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 132) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 136) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 140) > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %13) > %35 = extractelement <4 x float> %34, i32 0 > %36 = extractelement <4 x float> %34, i32 1 > %37 = extractelement <4 x float> %34, i32 2 > %38 = fmul float %16, %35 > %39 = fmul float %17, %36 > %40 = fadd float %38, %39 > %41 = fmul float %18, %37 > %42 = fadd float %40, %41 > %43 = fadd float %42, %19 > %44 = fmul float %20, %35 > %45 = fmul float %21, %36 > %46 = fadd float %44, %45 > %47 = fmul float %22, %37 > %48 = fadd float %46, %47 > %49 = fadd float %48, %23 > %50 = fmul float %24, %35 > %51 = fmul float %25, %36 > %52 = fadd float %50, %51 > %53 = fmul float %26, %37 > %54 = fadd float %52, %53 > %55 = fadd float %54, %27 > %56 = fmul float %28, %35 > %57 = fmul float %29, %36 > %58 = fadd float %56, %57 > %59 = fmul float %30, %37 > %60 = fadd float %58, %59 > %61 = fadd float %60, %31 > %62 = bitcast i32 %11 to float > %63 = insertvalue <{ float, float, float }> undef, float %62, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %43, float %49, float %55, float %61) > ret <{ float, float, float }> %63 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL OUT[0], COLOR >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} > 0: MOV OUT[0], IMM[0].xxxx > 1: END >radeonsi: Compiling shader 33 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = bitcast float %5 to i32 > %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %23, 10 > %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %24, float 1.000000e+00, 11 > %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %25, float 1.000000e+00, 12 > %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %26, float 1.000000e+00, 13 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %27, float 1.000000e+00, 14 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29 >} > >attributes #0 = { "InitialPSInputAddr"="36983" } > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL OUT[6], GENERIC[5] >DCL OUT[7], GENERIC[6] >DCL OUT[8], GENERIC[7] >DCL CONST[1][0..6] >DCL TEMP[0..10], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, -0.5000} >IMM[1] UINT32 {0, 96, 0, 0} >IMM[2] FLT32 {-158456325028528675187087900672.0000, 0.0000, -7.0000, -6.0000} >IMM[3] FLT32 { -5.0000, 0.0000, -4.0000, 158456325028528675187087900672.0000} >IMM[4] FLT32 { -3.0000, 0.0000, -2.0000, 6.0000} >IMM[5] FLT32 {-158456325028528675187087900672.0000, 0.0000, 158456325028528675187087900672.0000, 6.0000} >IMM[6] FLT32 { -1.0000, 0.0000, 1.0000, 6.0000} >IMM[7] FLT32 { 2.0000, 0.0000, 3.0000, 6.0000} >IMM[8] FLT32 { 4.0000, 0.0000, 5.0000, 6.0000} >IMM[9] FLT32 { 6.0000, 0.0000, 7.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: FMA TEMP[1].xy, IN[0].xyyy, IMM[0].zwww, IMM[0].zzzz > 3: FSEQ TEMP[2], CONST[1][6].xyxy, IMM[0].xxxx > 4: RCP TEMP[3].xz, CONST[1][6].xxxx > 5: RCP TEMP[3].yw, CONST[1][6].yyyy > 6: MUL TEMP[3], IMM[2].zywy, TEMP[3] > 7: UCMP TEMP[2], TEMP[2], IMM[2].xyxy, TEMP[3] > 8: ADD TEMP[2], TEMP[2], TEMP[1].xyxy > 9: FSEQ TEMP[3], CONST[1][6].xyxy, IMM[0].xxxx > 10: RCP TEMP[4].xz, CONST[1][6].xxxx > 11: RCP TEMP[4].yw, CONST[1][6].yyyy > 12: MUL TEMP[4], IMM[3].xyzy, TEMP[4] > 13: UCMP TEMP[3], TEMP[3], IMM[2].xyxy, TEMP[4] > 14: ADD TEMP[3], TEMP[3], TEMP[1].xyxy > 15: FSEQ TEMP[4], CONST[1][6].xyxy, IMM[0].xxxx > 16: RCP TEMP[5].xz, CONST[1][6].xxxx > 17: RCP TEMP[5].yw, CONST[1][6].yyyy > 18: MUL TEMP[5], IMM[4].xyzy, TEMP[5] > 19: UCMP TEMP[4], TEMP[4], IMM[2].xyxy, TEMP[5] > 20: ADD TEMP[4], TEMP[4], TEMP[1].xyxy > 21: FMA TEMP[5].xy, IN[0].xyyy, IMM[0].zwww, IMM[0].zzzz > 22: MOV TEMP[5].zw, TEMP[5].yyxy > 23: FSEQ TEMP[6], CONST[1][6].xyxy, IMM[0].xxxx > 24: RCP TEMP[7].xz, CONST[1][6].xxxx > 25: RCP TEMP[7].yw, CONST[1][6].yyyy > 26: MUL TEMP[7], IMM[6].xyzy, TEMP[7] > 27: UCMP TEMP[6], TEMP[6], IMM[5].xyzy, TEMP[7] > 28: ADD TEMP[5].xy, TEMP[6].xyyy, TEMP[1].xyyy > 29: ADD TEMP[6].xy, TEMP[6].zwww, TEMP[1].xyyy > 30: FSEQ TEMP[7], CONST[1][6].xyxy, IMM[0].xxxx > 31: RCP TEMP[8].xz, CONST[1][6].xxxx > 32: RCP TEMP[8].yw, CONST[1][6].yyyy > 33: MUL TEMP[8], IMM[7].xyzy, TEMP[8] > 34: UCMP TEMP[7], TEMP[7], IMM[3].wywy, TEMP[8] > 35: ADD TEMP[8].xy, TEMP[7].xyyy, TEMP[1].xyyy > 36: MOV TEMP[6].zw, TEMP[8].yyxy > 37: ADD TEMP[7].xy, TEMP[7].zwww, TEMP[1].xyyy > 38: FSEQ TEMP[8], CONST[1][6].xyxy, IMM[0].xxxx > 39: RCP TEMP[9].xz, CONST[1][6].xxxx > 40: RCP TEMP[9].yw, CONST[1][6].yyyy > 41: MUL TEMP[9], IMM[8].xyzy, TEMP[9] > 42: UCMP TEMP[8], TEMP[8], IMM[3].wywy, TEMP[9] > 43: ADD TEMP[9].xy, TEMP[8].xyyy, TEMP[1].xyyy > 44: MOV TEMP[7].zw, TEMP[9].yyxy > 45: ADD TEMP[8].xy, TEMP[8].zwww, TEMP[1].xyyy > 46: FSEQ TEMP[9], CONST[1][6].xyxy, IMM[0].xxxx > 47: RCP TEMP[10].xz, CONST[1][6].xxxx > 48: RCP TEMP[10].yw, CONST[1][6].yyyy > 49: MUL TEMP[10], IMM[9].xyzy, TEMP[10] > 50: UCMP TEMP[9], TEMP[9], IMM[3].wywy, TEMP[10] > 51: ADD TEMP[10].xy, TEMP[9].xyyy, TEMP[1].xyyy > 52: MOV TEMP[8].zw, TEMP[10].yyxy > 53: ADD TEMP[1].xy, TEMP[9].zwww, TEMP[1].xyyy > 54: MOV OUT[8], TEMP[1] > 55: MOV OUT[7], TEMP[8] > 56: MOV OUT[6], TEMP[7] > 57: MOV OUT[5], TEMP[6] > 58: MOV OUT[4], TEMP[5] > 59: MOV OUT[3], TEMP[4] > 60: MOV OUT[2], TEMP[3] > 61: MOV OUT[1], TEMP[2] > 62: MOV OUT[0], TEMP[0] > 63: END >radeonsi: Compiling shader 34 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 96) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 100) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %24 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %25 = fcmp oeq float %16, 0.000000e+00 > %26 = fcmp oeq float %17, 0.000000e+00 > %27 = fcmp oeq float %16, 0.000000e+00 > %28 = fcmp oeq float %17, 0.000000e+00 > %29 = fdiv float 1.000000e+00, %16 > %30 = fdiv float 1.000000e+00, %17 > %31 = fmul float %29, -7.000000e+00 > %32 = fmul float %30, 0.000000e+00 > %33 = fmul float %29, -6.000000e+00 > %34 = fmul float %30, 0.000000e+00 > %35 = select i1 %25, float 0xC600000000000000, float %31 > %36 = select i1 %26, float 0.000000e+00, float %32 > %37 = select i1 %27, float 0xC600000000000000, float %33 > %38 = select i1 %28, float 0.000000e+00, float %34 > %39 = fadd float %35, %23 > %40 = fadd float %36, %24 > %41 = fadd float %37, %23 > %42 = fadd float %38, %24 > %43 = fcmp oeq float %16, 0.000000e+00 > %44 = fcmp oeq float %17, 0.000000e+00 > %45 = fcmp oeq float %16, 0.000000e+00 > %46 = fcmp oeq float %17, 0.000000e+00 > %47 = fdiv float 1.000000e+00, %16 > %48 = fdiv float 1.000000e+00, %17 > %49 = fmul float %47, -5.000000e+00 > %50 = fmul float %48, 0.000000e+00 > %51 = fmul float %47, -4.000000e+00 > %52 = fmul float %48, 0.000000e+00 > %53 = select i1 %43, float 0xC600000000000000, float %49 > %54 = select i1 %44, float 0.000000e+00, float %50 > %55 = select i1 %45, float 0xC600000000000000, float %51 > %56 = select i1 %46, float 0.000000e+00, float %52 > %57 = fadd float %53, %23 > %58 = fadd float %54, %24 > %59 = fadd float %55, %23 > %60 = fadd float %56, %24 > %61 = fcmp oeq float %16, 0.000000e+00 > %62 = fcmp oeq float %17, 0.000000e+00 > %63 = fcmp oeq float %16, 0.000000e+00 > %64 = fcmp oeq float %17, 0.000000e+00 > %65 = fdiv float 1.000000e+00, %16 > %66 = fdiv float 1.000000e+00, %17 > %67 = fmul float %65, -3.000000e+00 > %68 = fmul float %66, 0.000000e+00 > %69 = fmul float %65, -2.000000e+00 > %70 = fmul float %66, 0.000000e+00 > %71 = select i1 %61, float 0xC600000000000000, float %67 > %72 = select i1 %62, float 0.000000e+00, float %68 > %73 = select i1 %63, float 0xC600000000000000, float %69 > %74 = select i1 %64, float 0.000000e+00, float %70 > %75 = fadd float %71, %23 > %76 = fadd float %72, %24 > %77 = fadd float %73, %23 > %78 = fadd float %74, %24 > %79 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %80 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %81 = fcmp oeq float %16, 0.000000e+00 > %82 = fcmp oeq float %17, 0.000000e+00 > %83 = fcmp oeq float %16, 0.000000e+00 > %84 = fcmp oeq float %17, 0.000000e+00 > %85 = fdiv float 1.000000e+00, %16 > %86 = fdiv float 1.000000e+00, %17 > %87 = fsub float -0.000000e+00, %85 > %88 = fmul float %86, 0.000000e+00 > %89 = fmul float %86, 0.000000e+00 > %90 = select i1 %81, float 0xC600000000000000, float %87 > %91 = select i1 %82, float 0.000000e+00, float %88 > %92 = select i1 %83, float 0x4600000000000000, float %85 > %93 = select i1 %84, float 0.000000e+00, float %89 > %94 = fadd float %90, %23 > %95 = fadd float %91, %24 > %96 = fadd float %92, %23 > %97 = fadd float %93, %24 > %98 = fcmp oeq float %16, 0.000000e+00 > %99 = fcmp oeq float %17, 0.000000e+00 > %100 = fcmp oeq float %16, 0.000000e+00 > %101 = fcmp oeq float %17, 0.000000e+00 > %102 = fdiv float 1.000000e+00, %16 > %103 = fdiv float 1.000000e+00, %17 > %104 = fmul float %102, 2.000000e+00 > %105 = fmul float %103, 0.000000e+00 > %106 = fmul float %102, 3.000000e+00 > %107 = fmul float %103, 0.000000e+00 > %108 = select i1 %98, float 0x4600000000000000, float %104 > %109 = select i1 %99, float 0.000000e+00, float %105 > %110 = select i1 %100, float 0x4600000000000000, float %106 > %111 = select i1 %101, float 0.000000e+00, float %107 > %112 = fadd float %108, %23 > %113 = fadd float %109, %24 > %114 = fadd float %110, %23 > %115 = fadd float %111, %24 > %116 = fcmp oeq float %16, 0.000000e+00 > %117 = fcmp oeq float %17, 0.000000e+00 > %118 = fcmp oeq float %16, 0.000000e+00 > %119 = fcmp oeq float %17, 0.000000e+00 > %120 = fdiv float 1.000000e+00, %16 > %121 = fdiv float 1.000000e+00, %17 > %122 = fmul float %120, 4.000000e+00 > %123 = fmul float %121, 0.000000e+00 > %124 = fmul float %120, 5.000000e+00 > %125 = fmul float %121, 0.000000e+00 > %126 = select i1 %116, float 0x4600000000000000, float %122 > %127 = select i1 %117, float 0.000000e+00, float %123 > %128 = select i1 %118, float 0x4600000000000000, float %124 > %129 = select i1 %119, float 0.000000e+00, float %125 > %130 = fadd float %126, %23 > %131 = fadd float %127, %24 > %132 = fadd float %128, %23 > %133 = fadd float %129, %24 > %134 = fcmp oeq float %16, 0.000000e+00 > %135 = fcmp oeq float %17, 0.000000e+00 > %136 = fcmp oeq float %16, 0.000000e+00 > %137 = fcmp oeq float %17, 0.000000e+00 > %138 = fdiv float 1.000000e+00, %16 > %139 = fdiv float 1.000000e+00, %17 > %140 = fmul float %138, 6.000000e+00 > %141 = fmul float %139, 0.000000e+00 > %142 = fmul float %138, 7.000000e+00 > %143 = fmul float %139, 0.000000e+00 > %144 = select i1 %134, float 0x4600000000000000, float %140 > %145 = select i1 %135, float 0.000000e+00, float %141 > %146 = select i1 %136, float 0x4600000000000000, float %142 > %147 = select i1 %137, float 0.000000e+00, float %143 > %148 = fadd float %144, %23 > %149 = fadd float %145, %24 > %150 = fadd float %146, %23 > %151 = fadd float %147, %24 > %152 = bitcast i32 %11 to float > %153 = insertvalue <{ float, float, float }> undef, float %152, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %57, float %58, float %59, float %60) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %75, float %76, float %77, float %78) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %94, float %95, float %79, float %80) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %96, float %97, float %112, float %113) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %114, float %115, float %130, float %131) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %132, float %133, float %148, float %149) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %150, float %151, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %153 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL IN[5], GENERIC[5], PERSPECTIVE >DCL IN[6], GENERIC[6], PERSPECTIVE >DCL IN[7], GENERIC[7], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..7] >DCL TEMP[0..4], LOCAL >IMM[0] UINT32 {0, 112, 0, 0} >IMM[1] FLT32 { 0.0271, 0.0159, 0.0424, 0.0613} >IMM[2] FLT32 { 0.0815, 0.1000, 0.1130, 0.1177} >IMM[3] FLT32 { 255.0000, 1.0000, 0.0039, 0.0000} >IMM[4] FLT32 {158456325028528675187087900672.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].zwww > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 3: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][7].xxxx > 4: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[1].xxxx > 5: MOV TEMP[1].xy, IN[0].xyyy > 6: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 7: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1].xyzz > 8: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 9: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[1].yyyy, TEMP[0].xyzz > 10: MOV TEMP[2].xy, IN[1].xyyy > 11: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 12: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 13: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 14: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[1].zzzz, TEMP[0].xyzz > 15: MOV TEMP[2].xy, IN[1].zwww > 16: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 17: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 18: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 19: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[1].wwww, TEMP[0].xyzz > 20: MOV TEMP[2].xy, IN[2].xyyy > 21: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 22: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 23: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 24: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[2].xxxx, TEMP[0].xyzz > 25: MOV TEMP[2].xy, IN[2].zwww > 26: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 27: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 28: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 29: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[2].yyyy, TEMP[0].xyzz > 30: MOV TEMP[2].xy, IN[3].xyyy > 31: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 32: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 33: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 34: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[2].zzzz, TEMP[0].xyzz > 35: MOV TEMP[2].xy, IN[3].zwww > 36: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 37: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 38: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 39: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[2].wwww, TEMP[0].xyzz > 40: MOV TEMP[2].xy, IN[4].xyyy > 41: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 42: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 43: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 44: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[2].zzzz, TEMP[0].xyzz > 45: MOV TEMP[2].xy, IN[4].zwww > 46: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 47: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 48: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 49: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[2].yyyy, TEMP[0].xyzz > 50: MOV TEMP[2].xy, IN[5].xyyy > 51: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 52: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 53: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 54: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[2].xxxx, TEMP[0].xyzz > 55: MOV TEMP[2].xy, IN[5].zwww > 56: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 57: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 58: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 59: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[1].wwww, TEMP[0].xyzz > 60: MOV TEMP[2].xy, IN[6].xyyy > 61: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 62: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 63: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 64: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[1].zzzz, TEMP[0].xyzz > 65: MOV TEMP[2].xy, IN[6].zwww > 66: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 67: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 68: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 69: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[1].xxxx, TEMP[0].xyzz > 70: MOV TEMP[2].xy, IN[7].xyyy > 71: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 72: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 73: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 74: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[1].yyyy, TEMP[0].xyzz > 75: MAX TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx > 76: MAX TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx > 77: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][7].yyyy > 78: MUL TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx > 79: CEIL TEMP[2].x, TEMP[2].xxxx > 80: MAX TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy > 81: MUL TEMP[2].x, TEMP[2].xxxx, IMM[3].zzzz > 82: MUL TEMP[1].x, TEMP[2].xxxx, CONST[1][7].xxxx > 83: MOV TEMP[2].w, TEMP[2].xxxx > 84: FSEQ TEMP[3].xyz, TEMP[1].xxxx, IMM[3].wwww > 85: SSG TEMP[4].xyz, TEMP[0].xyzz > 86: MUL TEMP[4].xyz, IMM[4].xxxx, TEMP[4].xyzz > 87: RCP TEMP[1].xyz, TEMP[1].xxxx > 88: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz > 89: UCMP TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[0].xyzz > 90: MOV OUT[0], TEMP[2] > 91: END >radeonsi: Compiling shader 35 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %6, <2 x i32> %8) > %66 = bitcast float %38 to i32 > %67 = bitcast float %39 to i32 > %68 = insertelement <2 x i32> undef, i32 %66, i32 0 > %69 = insertelement <2 x i32> %68, i32 %67, i32 1 > %70 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %69, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = fmul float %74, %71 > %76 = fmul float %74, %72 > %77 = fmul float %74, %73 > %78 = fmul float %75, %25 > %79 = fmul float %76, %25 > %80 = fmul float %77, %25 > %81 = fmul float %78, 0x3F9BBA4AE0000000 > %82 = fmul float %79, 0x3F9BBA4AE0000000 > %83 = fmul float %80, 0x3F9BBA4AE0000000 > %84 = bitcast float %36 to i32 > %85 = bitcast float %37 to i32 > %86 = insertelement <2 x i32> undef, i32 %84, i32 0 > %87 = insertelement <2 x i32> %86, i32 %85, i32 1 > %88 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %87, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %89 = extractelement <4 x float> %88, i32 0 > %90 = extractelement <4 x float> %88, i32 1 > %91 = extractelement <4 x float> %88, i32 2 > %92 = extractelement <4 x float> %88, i32 3 > %93 = fmul float %92, %89 > %94 = fmul float %92, %90 > %95 = fmul float %92, %91 > %96 = fmul float %93, %25 > %97 = fmul float %94, %25 > %98 = fmul float %95, %25 > %99 = call float @llvm.fma.f32(float %96, float 0x3F904F8860000000, float %81) > %100 = call float @llvm.fma.f32(float %97, float 0x3F904F8860000000, float %82) > %101 = call float @llvm.fma.f32(float %98, float 0x3F904F8860000000, float %83) > %102 = bitcast float %40 to i32 > %103 = bitcast float %41 to i32 > %104 = insertelement <2 x i32> undef, i32 %102, i32 0 > %105 = insertelement <2 x i32> %104, i32 %103, i32 1 > %106 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %105, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %107 = extractelement <4 x float> %106, i32 0 > %108 = extractelement <4 x float> %106, i32 1 > %109 = extractelement <4 x float> %106, i32 2 > %110 = extractelement <4 x float> %106, i32 3 > %111 = fmul float %110, %107 > %112 = fmul float %110, %108 > %113 = fmul float %110, %109 > %114 = fmul float %111, %25 > %115 = fmul float %112, %25 > %116 = fmul float %113, %25 > %117 = call float @llvm.fma.f32(float %114, float 0x3FA5B87E00000000, float %99) > %118 = call float @llvm.fma.f32(float %115, float 0x3FA5B87E00000000, float %100) > %119 = call float @llvm.fma.f32(float %116, float 0x3FA5B87E00000000, float %101) > %120 = bitcast float %42 to i32 > %121 = bitcast float %43 to i32 > %122 = insertelement <2 x i32> undef, i32 %120, i32 0 > %123 = insertelement <2 x i32> %122, i32 %121, i32 1 > %124 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %123, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %125 = extractelement <4 x float> %124, i32 0 > %126 = extractelement <4 x float> %124, i32 1 > %127 = extractelement <4 x float> %124, i32 2 > %128 = extractelement <4 x float> %124, i32 3 > %129 = fmul float %128, %125 > %130 = fmul float %128, %126 > %131 = fmul float %128, %127 > %132 = fmul float %129, %25 > %133 = fmul float %130, %25 > %134 = fmul float %131, %25 > %135 = call float @llvm.fma.f32(float %132, float 0x3FAF5CC9C0000000, float %117) > %136 = call float @llvm.fma.f32(float %133, float 0x3FAF5CC9C0000000, float %118) > %137 = call float @llvm.fma.f32(float %134, float 0x3FAF5CC9C0000000, float %119) > %138 = bitcast float %44 to i32 > %139 = bitcast float %45 to i32 > %140 = insertelement <2 x i32> undef, i32 %138, i32 0 > %141 = insertelement <2 x i32> %140, i32 %139, i32 1 > %142 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %141, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %143 = extractelement <4 x float> %142, i32 0 > %144 = extractelement <4 x float> %142, i32 1 > %145 = extractelement <4 x float> %142, i32 2 > %146 = extractelement <4 x float> %142, i32 3 > %147 = fmul float %146, %143 > %148 = fmul float %146, %144 > %149 = fmul float %146, %145 > %150 = fmul float %147, %25 > %151 = fmul float %148, %25 > %152 = fmul float %149, %25 > %153 = call float @llvm.fma.f32(float %150, float 0x3FB4DE00C0000000, float %135) > %154 = call float @llvm.fma.f32(float %151, float 0x3FB4DE00C0000000, float %136) > %155 = call float @llvm.fma.f32(float %152, float 0x3FB4DE00C0000000, float %137) > %156 = bitcast float %46 to i32 > %157 = bitcast float %47 to i32 > %158 = insertelement <2 x i32> undef, i32 %156, i32 0 > %159 = insertelement <2 x i32> %158, i32 %157, i32 1 > %160 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %159, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %161 = extractelement <4 x float> %160, i32 0 > %162 = extractelement <4 x float> %160, i32 1 > %163 = extractelement <4 x float> %160, i32 2 > %164 = extractelement <4 x float> %160, i32 3 > %165 = fmul float %164, %161 > %166 = fmul float %164, %162 > %167 = fmul float %164, %163 > %168 = fmul float %165, %25 > %169 = fmul float %166, %25 > %170 = fmul float %167, %25 > %171 = call float @llvm.fma.f32(float %168, float 0x3FB9976C60000000, float %153) > %172 = call float @llvm.fma.f32(float %169, float 0x3FB9976C60000000, float %154) > %173 = call float @llvm.fma.f32(float %170, float 0x3FB9976C60000000, float %155) > %174 = bitcast float %48 to i32 > %175 = bitcast float %49 to i32 > %176 = insertelement <2 x i32> undef, i32 %174, i32 0 > %177 = insertelement <2 x i32> %176, i32 %175, i32 1 > %178 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %177, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %179 = extractelement <4 x float> %178, i32 0 > %180 = extractelement <4 x float> %178, i32 1 > %181 = extractelement <4 x float> %178, i32 2 > %182 = extractelement <4 x float> %178, i32 3 > %183 = fmul float %182, %179 > %184 = fmul float %182, %180 > %185 = fmul float %182, %181 > %186 = fmul float %183, %25 > %187 = fmul float %184, %25 > %188 = fmul float %185, %25 > %189 = call float @llvm.fma.f32(float %186, float 0x3FBCECD240000000, float %171) > %190 = call float @llvm.fma.f32(float %187, float 0x3FBCECD240000000, float %172) > %191 = call float @llvm.fma.f32(float %188, float 0x3FBCECD240000000, float %173) > %192 = bitcast float %50 to i32 > %193 = bitcast float %51 to i32 > %194 = insertelement <2 x i32> undef, i32 %192, i32 0 > %195 = insertelement <2 x i32> %194, i32 %193, i32 1 > %196 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %195, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %197 = extractelement <4 x float> %196, i32 0 > %198 = extractelement <4 x float> %196, i32 1 > %199 = extractelement <4 x float> %196, i32 2 > %200 = extractelement <4 x float> %196, i32 3 > %201 = fmul float %200, %197 > %202 = fmul float %200, %198 > %203 = fmul float %200, %199 > %204 = fmul float %201, %25 > %205 = fmul float %202, %25 > %206 = fmul float %203, %25 > %207 = call float @llvm.fma.f32(float %204, float 0x3FBE214FC0000000, float %189) > %208 = call float @llvm.fma.f32(float %205, float 0x3FBE214FC0000000, float %190) > %209 = call float @llvm.fma.f32(float %206, float 0x3FBE214FC0000000, float %191) > %210 = bitcast float %52 to i32 > %211 = bitcast float %53 to i32 > %212 = insertelement <2 x i32> undef, i32 %210, i32 0 > %213 = insertelement <2 x i32> %212, i32 %211, i32 1 > %214 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %213, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %215 = extractelement <4 x float> %214, i32 0 > %216 = extractelement <4 x float> %214, i32 1 > %217 = extractelement <4 x float> %214, i32 2 > %218 = extractelement <4 x float> %214, i32 3 > %219 = fmul float %218, %215 > %220 = fmul float %218, %216 > %221 = fmul float %218, %217 > %222 = fmul float %219, %25 > %223 = fmul float %220, %25 > %224 = fmul float %221, %25 > %225 = call float @llvm.fma.f32(float %222, float 0x3FBCECD240000000, float %207) > %226 = call float @llvm.fma.f32(float %223, float 0x3FBCECD240000000, float %208) > %227 = call float @llvm.fma.f32(float %224, float 0x3FBCECD240000000, float %209) > %228 = bitcast float %54 to i32 > %229 = bitcast float %55 to i32 > %230 = insertelement <2 x i32> undef, i32 %228, i32 0 > %231 = insertelement <2 x i32> %230, i32 %229, i32 1 > %232 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %231, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %233 = extractelement <4 x float> %232, i32 0 > %234 = extractelement <4 x float> %232, i32 1 > %235 = extractelement <4 x float> %232, i32 2 > %236 = extractelement <4 x float> %232, i32 3 > %237 = fmul float %236, %233 > %238 = fmul float %236, %234 > %239 = fmul float %236, %235 > %240 = fmul float %237, %25 > %241 = fmul float %238, %25 > %242 = fmul float %239, %25 > %243 = call float @llvm.fma.f32(float %240, float 0x3FB9976C60000000, float %225) > %244 = call float @llvm.fma.f32(float %241, float 0x3FB9976C60000000, float %226) > %245 = call float @llvm.fma.f32(float %242, float 0x3FB9976C60000000, float %227) > %246 = bitcast float %56 to i32 > %247 = bitcast float %57 to i32 > %248 = insertelement <2 x i32> undef, i32 %246, i32 0 > %249 = insertelement <2 x i32> %248, i32 %247, i32 1 > %250 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %249, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %251 = extractelement <4 x float> %250, i32 0 > %252 = extractelement <4 x float> %250, i32 1 > %253 = extractelement <4 x float> %250, i32 2 > %254 = extractelement <4 x float> %250, i32 3 > %255 = fmul float %254, %251 > %256 = fmul float %254, %252 > %257 = fmul float %254, %253 > %258 = fmul float %255, %25 > %259 = fmul float %256, %25 > %260 = fmul float %257, %25 > %261 = call float @llvm.fma.f32(float %258, float 0x3FB4DE00C0000000, float %243) > %262 = call float @llvm.fma.f32(float %259, float 0x3FB4DE00C0000000, float %244) > %263 = call float @llvm.fma.f32(float %260, float 0x3FB4DE00C0000000, float %245) > %264 = bitcast float %58 to i32 > %265 = bitcast float %59 to i32 > %266 = insertelement <2 x i32> undef, i32 %264, i32 0 > %267 = insertelement <2 x i32> %266, i32 %265, i32 1 > %268 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %267, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %269 = extractelement <4 x float> %268, i32 0 > %270 = extractelement <4 x float> %268, i32 1 > %271 = extractelement <4 x float> %268, i32 2 > %272 = extractelement <4 x float> %268, i32 3 > %273 = fmul float %272, %269 > %274 = fmul float %272, %270 > %275 = fmul float %272, %271 > %276 = fmul float %273, %25 > %277 = fmul float %274, %25 > %278 = fmul float %275, %25 > %279 = call float @llvm.fma.f32(float %276, float 0x3FAF5CC9C0000000, float %261) > %280 = call float @llvm.fma.f32(float %277, float 0x3FAF5CC9C0000000, float %262) > %281 = call float @llvm.fma.f32(float %278, float 0x3FAF5CC9C0000000, float %263) > %282 = bitcast float %60 to i32 > %283 = bitcast float %61 to i32 > %284 = insertelement <2 x i32> undef, i32 %282, i32 0 > %285 = insertelement <2 x i32> %284, i32 %283, i32 1 > %286 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %285, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %287 = extractelement <4 x float> %286, i32 0 > %288 = extractelement <4 x float> %286, i32 1 > %289 = extractelement <4 x float> %286, i32 2 > %290 = extractelement <4 x float> %286, i32 3 > %291 = fmul float %290, %287 > %292 = fmul float %290, %288 > %293 = fmul float %290, %289 > %294 = fmul float %291, %25 > %295 = fmul float %292, %25 > %296 = fmul float %293, %25 > %297 = call float @llvm.fma.f32(float %294, float 0x3FA5B87E00000000, float %279) > %298 = call float @llvm.fma.f32(float %295, float 0x3FA5B87E00000000, float %280) > %299 = call float @llvm.fma.f32(float %296, float 0x3FA5B87E00000000, float %281) > %300 = bitcast float %62 to i32 > %301 = bitcast float %63 to i32 > %302 = insertelement <2 x i32> undef, i32 %300, i32 0 > %303 = insertelement <2 x i32> %302, i32 %301, i32 1 > %304 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %303, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %305 = extractelement <4 x float> %304, i32 0 > %306 = extractelement <4 x float> %304, i32 1 > %307 = extractelement <4 x float> %304, i32 2 > %308 = extractelement <4 x float> %304, i32 3 > %309 = fmul float %308, %305 > %310 = fmul float %308, %306 > %311 = fmul float %308, %307 > %312 = fmul float %309, %25 > %313 = fmul float %310, %25 > %314 = fmul float %311, %25 > %315 = call float @llvm.fma.f32(float %312, float 0x3F9BBA4AE0000000, float %297) > %316 = call float @llvm.fma.f32(float %313, float 0x3F9BBA4AE0000000, float %298) > %317 = call float @llvm.fma.f32(float %314, float 0x3F9BBA4AE0000000, float %299) > %318 = bitcast float %64 to i32 > %319 = bitcast float %65 to i32 > %320 = insertelement <2 x i32> undef, i32 %318, i32 0 > %321 = insertelement <2 x i32> %320, i32 %319, i32 1 > %322 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %321, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %323 = extractelement <4 x float> %322, i32 0 > %324 = extractelement <4 x float> %322, i32 1 > %325 = extractelement <4 x float> %322, i32 2 > %326 = extractelement <4 x float> %322, i32 3 > %327 = fmul float %326, %323 > %328 = fmul float %326, %324 > %329 = fmul float %326, %325 > %330 = fmul float %327, %25 > %331 = fmul float %328, %25 > %332 = fmul float %329, %25 > %333 = call float @llvm.fma.f32(float %330, float 0x3F904F8860000000, float %315) > %334 = call float @llvm.fma.f32(float %331, float 0x3F904F8860000000, float %316) > %335 = call float @llvm.fma.f32(float %332, float 0x3F904F8860000000, float %317) > %336 = call float @llvm.maxnum.f32(float %334, float %333) > %337 = call float @llvm.maxnum.f32(float %335, float %336) > %338 = fmul float %337, %26 > %339 = fmul float %338, 2.550000e+02 > %340 = call float @llvm.ceil.f32(float %339) > %341 = call float @llvm.maxnum.f32(float %340, float 1.000000e+00) > %342 = fmul float %341, 0x3F70101020000000 > %343 = fmul float %342, %25 > %344 = fcmp oeq float %343, 0.000000e+00 > %345 = fcmp oeq float %343, 0.000000e+00 > %346 = fcmp oeq float %343, 0.000000e+00 > %347 = fcmp ogt float %333, 0.000000e+00 > %348 = select i1 %347, float 1.000000e+00, float %333 > %349 = fcmp oge float %348, 0.000000e+00 > %350 = fcmp ogt float %334, 0.000000e+00 > %351 = select i1 %350, float 1.000000e+00, float %334 > %352 = fcmp oge float %351, 0.000000e+00 > %353 = fcmp ogt float %335, 0.000000e+00 > %354 = select i1 %353, float 1.000000e+00, float %335 > %355 = fcmp oge float %354, 0.000000e+00 > %.op = fmul float %348, 0x4600000000000000 > %356 = select i1 %349, float %.op, float 0xC600000000000000 > %.op20 = fmul float %351, 0x4600000000000000 > %357 = select i1 %352, float %.op20, float 0xC600000000000000 > %.op21 = fmul float %354, 0x4600000000000000 > %358 = select i1 %355, float %.op21, float 0xC600000000000000 > %359 = fdiv float 1.000000e+00, %343 > %360 = fmul float %333, %359 > %361 = fmul float %334, %359 > %362 = fmul float %335, %359 > %363 = select i1 %344, float %356, float %360 > %364 = select i1 %345, float %357, float %361 > %365 = select i1 %346, float %358, float %362 > %366 = bitcast float %5 to i32 > %367 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %366, 10 > %368 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %367, float %363, 11 > %369 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %368, float %364, 12 > %370 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %369, float %365, 13 > %371 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %370, float %342, 14 > %372 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %371, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %372 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL OUT[6], GENERIC[5] >DCL OUT[7], GENERIC[6] >DCL OUT[8], GENERIC[7] >DCL CONST[1][0..6] >DCL TEMP[0..10], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, -0.5000} >IMM[1] UINT32 {0, 96, 0, 0} >IMM[2] FLT32 { 0.0000, -158456325028528675187087900672.0000, -7.0000, -6.0000} >IMM[3] FLT32 { 0.0000, -5.0000, -4.0000, 158456325028528675187087900672.0000} >IMM[4] FLT32 { 0.0000, -3.0000, -2.0000, 6.0000} >IMM[5] FLT32 {-158456325028528675187087900672.0000, 0.0000, 158456325028528675187087900672.0000, 6.0000} >IMM[6] FLT32 { -1.0000, 0.0000, 1.0000, 6.0000} >IMM[7] FLT32 { 0.0000, 2.0000, 3.0000, 6.0000} >IMM[8] FLT32 { 0.0000, 4.0000, 5.0000, 6.0000} >IMM[9] FLT32 { 0.0000, 6.0000, 7.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: FMA TEMP[1].xy, IN[0].xyyy, IMM[0].zwww, IMM[0].zzzz > 3: FSEQ TEMP[2], CONST[1][6].xyxy, IMM[0].xxxx > 4: RCP TEMP[3].xz, CONST[1][6].xxxx > 5: RCP TEMP[3].yw, CONST[1][6].yyyy > 6: MUL TEMP[3], IMM[2].xzxw, TEMP[3] > 7: UCMP TEMP[2], TEMP[2], IMM[2].xyxy, TEMP[3] > 8: ADD TEMP[2], TEMP[2], TEMP[1].xyxy > 9: FSEQ TEMP[3], CONST[1][6].xyxy, IMM[0].xxxx > 10: RCP TEMP[4].xz, CONST[1][6].xxxx > 11: RCP TEMP[4].yw, CONST[1][6].yyyy > 12: MUL TEMP[4], IMM[3].xyxz, TEMP[4] > 13: UCMP TEMP[3], TEMP[3], IMM[2].xyxy, TEMP[4] > 14: ADD TEMP[3], TEMP[3], TEMP[1].xyxy > 15: FSEQ TEMP[4], CONST[1][6].xyxy, IMM[0].xxxx > 16: RCP TEMP[5].xz, CONST[1][6].xxxx > 17: RCP TEMP[5].yw, CONST[1][6].yyyy > 18: MUL TEMP[5], IMM[4].xyxz, TEMP[5] > 19: UCMP TEMP[4], TEMP[4], IMM[2].xyxy, TEMP[5] > 20: ADD TEMP[4], TEMP[4], TEMP[1].xyxy > 21: FMA TEMP[5].xyz, IN[0].xxyy, IMM[0].zzww, IMM[0].zzzz > 22: MOV TEMP[5].xzw, TEMP[5].xxyz > 23: FSEQ TEMP[6].xyz, CONST[1][6].yxyy, IMM[0].xxxx > 24: RCP TEMP[7].xz, CONST[1][6].yyyy > 25: RCP TEMP[7].y, CONST[1][6].xxxx > 26: MUL TEMP[7].xyz, IMM[6].xyzz, TEMP[7].xyzz > 27: UCMP TEMP[6].xyz, TEMP[6].xyzz, IMM[5].xyzz, TEMP[7].xyzz > 28: ADD TEMP[7].x, TEMP[6].xxxx, TEMP[1].yyyy > 29: MOV TEMP[5].y, TEMP[7].xxxx > 30: ADD TEMP[6].xy, TEMP[6].yzzz, TEMP[1].xyyy > 31: FSEQ TEMP[7], CONST[1][6].xyxy, IMM[0].xxxx > 32: RCP TEMP[8].xz, CONST[1][6].xxxx > 33: RCP TEMP[8].yw, CONST[1][6].yyyy > 34: MUL TEMP[8], IMM[7].xyxz, TEMP[8] > 35: UCMP TEMP[7], TEMP[7], IMM[3].xwxw, TEMP[8] > 36: ADD TEMP[8].xy, TEMP[7].xyyy, TEMP[1].xyyy > 37: MOV TEMP[6].zw, TEMP[8].yyxy > 38: ADD TEMP[7].xy, TEMP[7].zwww, TEMP[1].xyyy > 39: FSEQ TEMP[8], CONST[1][6].xyxy, IMM[0].xxxx > 40: RCP TEMP[9].xz, CONST[1][6].xxxx > 41: RCP TEMP[9].yw, CONST[1][6].yyyy > 42: MUL TEMP[9], IMM[8].xyxz, TEMP[9] > 43: UCMP TEMP[8], TEMP[8], IMM[3].xwxw, TEMP[9] > 44: ADD TEMP[9].xy, TEMP[8].xyyy, TEMP[1].xyyy > 45: MOV TEMP[7].zw, TEMP[9].yyxy > 46: ADD TEMP[8].xy, TEMP[8].zwww, TEMP[1].xyyy > 47: FSEQ TEMP[9], CONST[1][6].xyxy, IMM[0].xxxx > 48: RCP TEMP[10].xz, CONST[1][6].xxxx > 49: RCP TEMP[10].yw, CONST[1][6].yyyy > 50: MUL TEMP[10], IMM[9].xyxz, TEMP[10] > 51: UCMP TEMP[9], TEMP[9], IMM[3].xwxw, TEMP[10] > 52: ADD TEMP[10].xy, TEMP[9].xyyy, TEMP[1].xyyy > 53: MOV TEMP[8].zw, TEMP[10].yyxy > 54: ADD TEMP[1].xy, TEMP[9].zwww, TEMP[1].xyyy > 55: MOV OUT[8], TEMP[1] > 56: MOV OUT[7], TEMP[8] > 57: MOV OUT[6], TEMP[7] > 58: MOV OUT[5], TEMP[6] > 59: MOV OUT[4], TEMP[5] > 60: MOV OUT[3], TEMP[4] > 61: MOV OUT[2], TEMP[3] > 62: MOV OUT[1], TEMP[2] > 63: MOV OUT[0], TEMP[0] > 64: END >radeonsi: Compiling shader 36 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 96) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 100) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %24 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %25 = fcmp oeq float %16, 0.000000e+00 > %26 = fcmp oeq float %17, 0.000000e+00 > %27 = fcmp oeq float %16, 0.000000e+00 > %28 = fcmp oeq float %17, 0.000000e+00 > %29 = fdiv float 1.000000e+00, %16 > %30 = fdiv float 1.000000e+00, %17 > %31 = fmul float %29, 0.000000e+00 > %32 = fmul float %30, -7.000000e+00 > %33 = fmul float %29, 0.000000e+00 > %34 = fmul float %30, -6.000000e+00 > %35 = select i1 %25, float 0.000000e+00, float %31 > %36 = select i1 %26, float 0xC600000000000000, float %32 > %37 = select i1 %27, float 0.000000e+00, float %33 > %38 = select i1 %28, float 0xC600000000000000, float %34 > %39 = fadd float %35, %23 > %40 = fadd float %36, %24 > %41 = fadd float %37, %23 > %42 = fadd float %38, %24 > %43 = fcmp oeq float %16, 0.000000e+00 > %44 = fcmp oeq float %17, 0.000000e+00 > %45 = fcmp oeq float %16, 0.000000e+00 > %46 = fcmp oeq float %17, 0.000000e+00 > %47 = fdiv float 1.000000e+00, %16 > %48 = fdiv float 1.000000e+00, %17 > %49 = fmul float %47, 0.000000e+00 > %50 = fmul float %48, -5.000000e+00 > %51 = fmul float %47, 0.000000e+00 > %52 = fmul float %48, -4.000000e+00 > %53 = select i1 %43, float 0.000000e+00, float %49 > %54 = select i1 %44, float 0xC600000000000000, float %50 > %55 = select i1 %45, float 0.000000e+00, float %51 > %56 = select i1 %46, float 0xC600000000000000, float %52 > %57 = fadd float %53, %23 > %58 = fadd float %54, %24 > %59 = fadd float %55, %23 > %60 = fadd float %56, %24 > %61 = fcmp oeq float %16, 0.000000e+00 > %62 = fcmp oeq float %17, 0.000000e+00 > %63 = fcmp oeq float %16, 0.000000e+00 > %64 = fcmp oeq float %17, 0.000000e+00 > %65 = fdiv float 1.000000e+00, %16 > %66 = fdiv float 1.000000e+00, %17 > %67 = fmul float %65, 0.000000e+00 > %68 = fmul float %66, -3.000000e+00 > %69 = fmul float %65, 0.000000e+00 > %70 = fmul float %66, -2.000000e+00 > %71 = select i1 %61, float 0.000000e+00, float %67 > %72 = select i1 %62, float 0xC600000000000000, float %68 > %73 = select i1 %63, float 0.000000e+00, float %69 > %74 = select i1 %64, float 0xC600000000000000, float %70 > %75 = fadd float %71, %23 > %76 = fadd float %72, %24 > %77 = fadd float %73, %23 > %78 = fadd float %74, %24 > %79 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %80 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %81 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %82 = fcmp oeq float %17, 0.000000e+00 > %83 = fcmp oeq float %16, 0.000000e+00 > %84 = fcmp oeq float %17, 0.000000e+00 > %85 = fdiv float 1.000000e+00, %17 > %86 = fdiv float 1.000000e+00, %16 > %87 = fsub float -0.000000e+00, %85 > %88 = fmul float %86, 0.000000e+00 > %89 = select i1 %82, float 0xC600000000000000, float %87 > %90 = select i1 %83, float 0.000000e+00, float %88 > %91 = select i1 %84, float 0x4600000000000000, float %85 > %92 = fadd float %89, %24 > %93 = fadd float %90, %23 > %94 = fadd float %91, %24 > %95 = fcmp oeq float %16, 0.000000e+00 > %96 = fcmp oeq float %17, 0.000000e+00 > %97 = fcmp oeq float %16, 0.000000e+00 > %98 = fcmp oeq float %17, 0.000000e+00 > %99 = fdiv float 1.000000e+00, %16 > %100 = fdiv float 1.000000e+00, %17 > %101 = fmul float %99, 0.000000e+00 > %102 = fmul float %100, 2.000000e+00 > %103 = fmul float %99, 0.000000e+00 > %104 = fmul float %100, 3.000000e+00 > %105 = select i1 %95, float 0.000000e+00, float %101 > %106 = select i1 %96, float 0x4600000000000000, float %102 > %107 = select i1 %97, float 0.000000e+00, float %103 > %108 = select i1 %98, float 0x4600000000000000, float %104 > %109 = fadd float %105, %23 > %110 = fadd float %106, %24 > %111 = fadd float %107, %23 > %112 = fadd float %108, %24 > %113 = fcmp oeq float %16, 0.000000e+00 > %114 = fcmp oeq float %17, 0.000000e+00 > %115 = fcmp oeq float %16, 0.000000e+00 > %116 = fcmp oeq float %17, 0.000000e+00 > %117 = fdiv float 1.000000e+00, %16 > %118 = fdiv float 1.000000e+00, %17 > %119 = fmul float %117, 0.000000e+00 > %120 = fmul float %118, 4.000000e+00 > %121 = fmul float %117, 0.000000e+00 > %122 = fmul float %118, 5.000000e+00 > %123 = select i1 %113, float 0.000000e+00, float %119 > %124 = select i1 %114, float 0x4600000000000000, float %120 > %125 = select i1 %115, float 0.000000e+00, float %121 > %126 = select i1 %116, float 0x4600000000000000, float %122 > %127 = fadd float %123, %23 > %128 = fadd float %124, %24 > %129 = fadd float %125, %23 > %130 = fadd float %126, %24 > %131 = fcmp oeq float %16, 0.000000e+00 > %132 = fcmp oeq float %17, 0.000000e+00 > %133 = fcmp oeq float %16, 0.000000e+00 > %134 = fcmp oeq float %17, 0.000000e+00 > %135 = fdiv float 1.000000e+00, %16 > %136 = fdiv float 1.000000e+00, %17 > %137 = fmul float %135, 0.000000e+00 > %138 = fmul float %136, 6.000000e+00 > %139 = fmul float %135, 0.000000e+00 > %140 = fmul float %136, 7.000000e+00 > %141 = select i1 %131, float 0.000000e+00, float %137 > %142 = select i1 %132, float 0x4600000000000000, float %138 > %143 = select i1 %133, float 0.000000e+00, float %139 > %144 = select i1 %134, float 0x4600000000000000, float %140 > %145 = fadd float %141, %23 > %146 = fadd float %142, %24 > %147 = fadd float %143, %23 > %148 = fadd float %144, %24 > %149 = bitcast i32 %11 to float > %150 = insertvalue <{ float, float, float }> undef, float %149, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %57, float %58, float %59, float %60) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %75, float %76, float %77, float %78) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %79, float %92, float %80, float %81) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %93, float %94, float %109, float %110) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %111, float %112, float %127, float %128) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %129, float %130, float %145, float %146) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %147, float %148, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %150 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL OUT[6], GENERIC[5] >DCL OUT[7], GENERIC[6] >DCL OUT[8], GENERIC[7] >DCL CONST[1][0..6] >DCL TEMP[0..10], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, -0.5000} >IMM[1] UINT32 {0, 96, 0, 0} >IMM[2] FLT32 {-158456325028528675187087900672.0000, -1.5000, -0.5000, 158456325028528675187087900672.0000} >IMM[3] FLT32 { 0.5000, -1.5000, 1.5000, -0.5000} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: FMA TEMP[1], IN[0].xyxy, IMM[0].zwzw, IMM[0].zzzz > 3: FSEQ TEMP[2], CONST[1][6].xyxy, IMM[0].xxxx > 4: RCP TEMP[3].xz, CONST[1][6].xxxx > 5: RCP TEMP[3].yw, CONST[1][6].yyyy > 6: MUL TEMP[3], IMM[2].yyzy, TEMP[3] > 7: UCMP TEMP[2], TEMP[2], IMM[2].xxxx, TEMP[3] > 8: ADD TEMP[2], TEMP[2], TEMP[1].zwzw > 9: FSEQ TEMP[3], CONST[1][6].xyxy, IMM[0].xxxx > 10: RCP TEMP[4].xz, CONST[1][6].xxxx > 11: RCP TEMP[4].yw, CONST[1][6].yyyy > 12: MUL TEMP[4], IMM[3].xyzy, TEMP[4] > 13: UCMP TEMP[3], TEMP[3], IMM[2].wxwx, TEMP[4] > 14: ADD TEMP[3], TEMP[3], TEMP[1].zwzw > 15: FSEQ TEMP[4], CONST[1][6].xyxy, IMM[0].xxxx > 16: RCP TEMP[5].xz, CONST[1][6].xxxx > 17: RCP TEMP[5].yw, CONST[1][6].yyyy > 18: MUL TEMP[5], IMM[2].yzzz, TEMP[5] > 19: UCMP TEMP[4], TEMP[4], IMM[2].xxxx, TEMP[5] > 20: ADD TEMP[4], TEMP[4], TEMP[1].zwzw > 21: FSEQ TEMP[5], CONST[1][6].xyxy, IMM[0].xxxx > 22: RCP TEMP[6].xz, CONST[1][6].xxxx > 23: RCP TEMP[6].yw, CONST[1][6].yyyy > 24: MUL TEMP[6], IMM[3].xwzw, TEMP[6] > 25: UCMP TEMP[5], TEMP[5], IMM[2].wxwx, TEMP[6] > 26: ADD TEMP[5], TEMP[5], TEMP[1].zwzw > 27: FSEQ TEMP[6], CONST[1][6].xyxy, IMM[0].xxxx > 28: RCP TEMP[7].xz, CONST[1][6].xxxx > 29: RCP TEMP[7].yw, CONST[1][6].yyyy > 30: MUL TEMP[7], IMM[3].yxwx, TEMP[7] > 31: UCMP TEMP[6], TEMP[6], IMM[2].xwxw, TEMP[7] > 32: ADD TEMP[6], TEMP[6], TEMP[1].zwzw > 33: FSEQ TEMP[7], CONST[1][6].xyxy, IMM[0].xxxx > 34: RCP TEMP[8].xz, CONST[1][6].xxxx > 35: RCP TEMP[8].yw, CONST[1][6].yyyy > 36: MUL TEMP[8], IMM[3].xxzx, TEMP[8] > 37: UCMP TEMP[7], TEMP[7], IMM[2].wwww, TEMP[8] > 38: ADD TEMP[7], TEMP[7], TEMP[1].zwzw > 39: FSEQ TEMP[8], CONST[1][6].xyxy, IMM[0].xxxx > 40: RCP TEMP[9].xz, CONST[1][6].xxxx > 41: RCP TEMP[9].yw, CONST[1][6].yyyy > 42: MUL TEMP[9], IMM[3].yzwz, TEMP[9] > 43: UCMP TEMP[8], TEMP[8], IMM[2].xwxw, TEMP[9] > 44: ADD TEMP[8], TEMP[8], TEMP[1].zwzw > 45: FSEQ TEMP[9], CONST[1][6].xyxy, IMM[0].xxxx > 46: RCP TEMP[10].xz, CONST[1][6].xxxx > 47: RCP TEMP[10].yw, CONST[1][6].yyyy > 48: MUL TEMP[10], IMM[3].xzzz, TEMP[10] > 49: UCMP TEMP[9], TEMP[9], IMM[2].wwww, TEMP[10] > 50: ADD TEMP[1], TEMP[9], TEMP[1] > 51: MOV OUT[8], TEMP[1] > 52: MOV OUT[7], TEMP[8] > 53: MOV OUT[6], TEMP[7] > 54: MOV OUT[5], TEMP[6] > 55: MOV OUT[4], TEMP[5] > 56: MOV OUT[3], TEMP[4] > 57: MOV OUT[2], TEMP[3] > 58: MOV OUT[1], TEMP[2] > 59: MOV OUT[0], TEMP[0] > 60: END >radeonsi: Compiling shader 37 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 96) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 100) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %24 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %25 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %26 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %27 = fcmp oeq float %16, 0.000000e+00 > %28 = fcmp oeq float %17, 0.000000e+00 > %29 = fcmp oeq float %16, 0.000000e+00 > %30 = fcmp oeq float %17, 0.000000e+00 > %31 = fdiv float 1.000000e+00, %16 > %32 = fdiv float 1.000000e+00, %17 > %33 = fmul float %31, -1.500000e+00 > %34 = fmul float %32, -1.500000e+00 > %35 = fmul float %31, -5.000000e-01 > %36 = fmul float %32, -1.500000e+00 > %37 = select i1 %27, float 0xC600000000000000, float %33 > %38 = select i1 %28, float 0xC600000000000000, float %34 > %39 = select i1 %29, float 0xC600000000000000, float %35 > %40 = select i1 %30, float 0xC600000000000000, float %36 > %41 = fadd float %37, %25 > %42 = fadd float %38, %26 > %43 = fadd float %39, %25 > %44 = fadd float %40, %26 > %45 = fcmp oeq float %16, 0.000000e+00 > %46 = fcmp oeq float %17, 0.000000e+00 > %47 = fcmp oeq float %16, 0.000000e+00 > %48 = fcmp oeq float %17, 0.000000e+00 > %49 = fdiv float 1.000000e+00, %16 > %50 = fdiv float 1.000000e+00, %17 > %51 = fmul float %49, 5.000000e-01 > %52 = fmul float %50, -1.500000e+00 > %53 = fmul float %49, 1.500000e+00 > %54 = fmul float %50, -1.500000e+00 > %55 = select i1 %45, float 0x4600000000000000, float %51 > %56 = select i1 %46, float 0xC600000000000000, float %52 > %57 = select i1 %47, float 0x4600000000000000, float %53 > %58 = select i1 %48, float 0xC600000000000000, float %54 > %59 = fadd float %55, %25 > %60 = fadd float %56, %26 > %61 = fadd float %57, %25 > %62 = fadd float %58, %26 > %63 = fcmp oeq float %16, 0.000000e+00 > %64 = fcmp oeq float %17, 0.000000e+00 > %65 = fcmp oeq float %16, 0.000000e+00 > %66 = fcmp oeq float %17, 0.000000e+00 > %67 = fdiv float 1.000000e+00, %16 > %68 = fdiv float 1.000000e+00, %17 > %69 = fmul float %67, -1.500000e+00 > %70 = fmul float %68, -5.000000e-01 > %71 = fmul float %67, -5.000000e-01 > %72 = fmul float %68, -5.000000e-01 > %73 = select i1 %63, float 0xC600000000000000, float %69 > %74 = select i1 %64, float 0xC600000000000000, float %70 > %75 = select i1 %65, float 0xC600000000000000, float %71 > %76 = select i1 %66, float 0xC600000000000000, float %72 > %77 = fadd float %73, %25 > %78 = fadd float %74, %26 > %79 = fadd float %75, %25 > %80 = fadd float %76, %26 > %81 = fcmp oeq float %16, 0.000000e+00 > %82 = fcmp oeq float %17, 0.000000e+00 > %83 = fcmp oeq float %16, 0.000000e+00 > %84 = fcmp oeq float %17, 0.000000e+00 > %85 = fdiv float 1.000000e+00, %16 > %86 = fdiv float 1.000000e+00, %17 > %87 = fmul float %85, 5.000000e-01 > %88 = fmul float %86, -5.000000e-01 > %89 = fmul float %85, 1.500000e+00 > %90 = fmul float %86, -5.000000e-01 > %91 = select i1 %81, float 0x4600000000000000, float %87 > %92 = select i1 %82, float 0xC600000000000000, float %88 > %93 = select i1 %83, float 0x4600000000000000, float %89 > %94 = select i1 %84, float 0xC600000000000000, float %90 > %95 = fadd float %91, %25 > %96 = fadd float %92, %26 > %97 = fadd float %93, %25 > %98 = fadd float %94, %26 > %99 = fcmp oeq float %16, 0.000000e+00 > %100 = fcmp oeq float %17, 0.000000e+00 > %101 = fcmp oeq float %16, 0.000000e+00 > %102 = fcmp oeq float %17, 0.000000e+00 > %103 = fdiv float 1.000000e+00, %16 > %104 = fdiv float 1.000000e+00, %17 > %105 = fmul float %103, -1.500000e+00 > %106 = fmul float %104, 5.000000e-01 > %107 = fmul float %103, -5.000000e-01 > %108 = fmul float %104, 5.000000e-01 > %109 = select i1 %99, float 0xC600000000000000, float %105 > %110 = select i1 %100, float 0x4600000000000000, float %106 > %111 = select i1 %101, float 0xC600000000000000, float %107 > %112 = select i1 %102, float 0x4600000000000000, float %108 > %113 = fadd float %109, %25 > %114 = fadd float %110, %26 > %115 = fadd float %111, %25 > %116 = fadd float %112, %26 > %117 = fcmp oeq float %16, 0.000000e+00 > %118 = fcmp oeq float %17, 0.000000e+00 > %119 = fcmp oeq float %16, 0.000000e+00 > %120 = fcmp oeq float %17, 0.000000e+00 > %121 = fdiv float 1.000000e+00, %16 > %122 = fdiv float 1.000000e+00, %17 > %123 = fmul float %121, 5.000000e-01 > %124 = fmul float %122, 5.000000e-01 > %125 = fmul float %121, 1.500000e+00 > %126 = fmul float %122, 5.000000e-01 > %127 = select i1 %117, float 0x4600000000000000, float %123 > %128 = select i1 %118, float 0x4600000000000000, float %124 > %129 = select i1 %119, float 0x4600000000000000, float %125 > %130 = select i1 %120, float 0x4600000000000000, float %126 > %131 = fadd float %127, %25 > %132 = fadd float %128, %26 > %133 = fadd float %129, %25 > %134 = fadd float %130, %26 > %135 = fcmp oeq float %16, 0.000000e+00 > %136 = fcmp oeq float %17, 0.000000e+00 > %137 = fcmp oeq float %16, 0.000000e+00 > %138 = fcmp oeq float %17, 0.000000e+00 > %139 = fdiv float 1.000000e+00, %16 > %140 = fdiv float 1.000000e+00, %17 > %141 = fmul float %139, -1.500000e+00 > %142 = fmul float %140, 1.500000e+00 > %143 = fmul float %139, -5.000000e-01 > %144 = fmul float %140, 1.500000e+00 > %145 = select i1 %135, float 0xC600000000000000, float %141 > %146 = select i1 %136, float 0x4600000000000000, float %142 > %147 = select i1 %137, float 0xC600000000000000, float %143 > %148 = select i1 %138, float 0x4600000000000000, float %144 > %149 = fadd float %145, %25 > %150 = fadd float %146, %26 > %151 = fadd float %147, %25 > %152 = fadd float %148, %26 > %153 = fcmp oeq float %16, 0.000000e+00 > %154 = fcmp oeq float %17, 0.000000e+00 > %155 = fcmp oeq float %16, 0.000000e+00 > %156 = fcmp oeq float %17, 0.000000e+00 > %157 = fdiv float 1.000000e+00, %16 > %158 = fdiv float 1.000000e+00, %17 > %159 = fmul float %157, 5.000000e-01 > %160 = fmul float %158, 1.500000e+00 > %161 = fmul float %157, 1.500000e+00 > %162 = fmul float %158, 1.500000e+00 > %163 = select i1 %153, float 0x4600000000000000, float %159 > %164 = select i1 %154, float 0x4600000000000000, float %160 > %165 = select i1 %155, float 0x4600000000000000, float %161 > %166 = select i1 %156, float 0x4600000000000000, float %162 > %167 = fadd float %163, %23 > %168 = fadd float %164, %24 > %169 = fadd float %165, %25 > %170 = fadd float %166, %26 > %171 = bitcast i32 %11 to float > %172 = insertvalue <{ float, float, float }> undef, float %171, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %59, float %60, float %61, float %62) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %77, float %78, float %79, float %80) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %96, float %97, float %98) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %113, float %114, float %115, float %116) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %131, float %132, float %133, float %134) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %149, float %150, float %151, float %152) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %167, float %168, float %169, float %170) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %172 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL IN[5], GENERIC[5], PERSPECTIVE >DCL IN[6], GENERIC[6], PERSPECTIVE >DCL IN[7], GENERIC[7], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..7] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 112, 0, 0} >IMM[1] FLT32 { 2.0000, 0.0625, 0.0000, 255.0000} >IMM[2] FLT32 { 0.3000, 0.5900, 0.1100, 1.0000} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] FLT32 { 0.0039, 158456325028528675187087900672.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].zwww > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: ADD TEMP[1].x, TEMP[0].wwww, -CONST[1][7].zzzz > 3: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 4: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[0].xyzz > 5: MOV TEMP[2].xy, IN[0].xyyy > 6: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 7: ADD TEMP[3].x, TEMP[2].wwww, -CONST[1][7].zzzz > 8: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 9: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[1].xyzz > 10: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 11: MOV TEMP[2].xy, IN[1].xyyy > 12: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 13: ADD TEMP[3].x, TEMP[2].wwww, -CONST[1][7].zzzz > 14: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 15: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[1].xyzz > 16: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 17: MOV TEMP[2].xy, IN[1].zwww > 18: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 19: ADD TEMP[3].x, TEMP[2].wwww, -CONST[1][7].zzzz > 20: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 21: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[1].xyzz > 22: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 23: MOV TEMP[2].xy, IN[2].xyyy > 24: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 25: ADD TEMP[3].x, TEMP[2].wwww, -CONST[1][7].zzzz > 26: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 27: MUL TEMP[3].xyz, TEMP[3].xxxx, TEMP[2].xyzz > 28: FMA TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, TEMP[0].xyzz > 29: FMA TEMP[1].xyz, TEMP[3].xyzz, IMM[1].xxxx, TEMP[1].xyzz > 30: MOV TEMP[4].xy, IN[3].xyyy > 31: TEX TEMP[4], TEMP[4], SAMP[0], 2D > 32: ADD TEMP[5].x, TEMP[4].wwww, -CONST[1][7].zzzz > 33: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 34: FMA TEMP[1].xyz, TEMP[4].xyzz, TEMP[5].xxxx, TEMP[1].xyzz > 35: ADD TEMP[0].xyz, TEMP[2].xyzz, TEMP[4].xyzz > 36: MOV TEMP[2].xy, IN[3].zwww > 37: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 38: ADD TEMP[4].x, TEMP[2].wwww, -CONST[1][7].zzzz > 39: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 40: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[1].xyzz > 41: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 42: MOV TEMP[2].xy, IN[4].xyyy > 43: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 44: ADD TEMP[4].x, TEMP[2].wwww, -CONST[1][7].zzzz > 45: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 46: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[1].xyzz > 47: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 48: MOV TEMP[2].xy, IN[4].zwww > 49: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 50: ADD TEMP[4].x, TEMP[2].wwww, -CONST[1][7].zzzz > 51: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 52: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[1].xyzz > 53: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 54: MOV TEMP[2].xy, IN[5].xyyy > 55: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 56: ADD TEMP[4].x, TEMP[2].wwww, -CONST[1][7].zzzz > 57: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 58: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[1].xyzz > 59: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 60: MOV TEMP[2].xy, IN[5].zwww > 61: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 62: ADD TEMP[4].x, TEMP[2].wwww, -CONST[1][7].zzzz > 63: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 64: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[1].xyzz > 65: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 66: MOV TEMP[2].xy, IN[6].xyyy > 67: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 68: ADD TEMP[4].x, TEMP[2].wwww, -CONST[1][7].zzzz > 69: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 70: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[1].xyzz > 71: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 72: MOV TEMP[2].xy, IN[6].zwww > 73: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 74: ADD TEMP[4].x, TEMP[2].wwww, -CONST[1][7].zzzz > 75: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 76: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[1].xyzz > 77: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 78: MOV TEMP[2].xy, IN[7].xyyy > 79: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 80: ADD TEMP[4].x, TEMP[2].wwww, -CONST[1][7].zzzz > 81: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 82: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[1].xyzz > 83: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 84: MOV TEMP[2].xy, IN[7].zwww > 85: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 86: ADD TEMP[4].x, TEMP[2].wwww, -CONST[1][7].zzzz > 87: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 88: FMA TEMP[4].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[1].xyzz > 89: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 90: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[1].yyyy > 91: MUL TEMP[1].xyz, TEMP[4].xyzz, IMM[1].yyyy > 92: MUL TEMP[2].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 93: DP3 TEMP[1].x, TEMP[1].xyzz, IMM[2].xyzz > 94: FSLT TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx > 95: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx > 96: INEG TEMP[1].x, TEMP[1].xxxx > 97: MAX TEMP[4].x, TEMP[2].yyyy, TEMP[2].xxxx > 98: MAX TEMP[4].x, TEMP[2].zzzz, TEMP[4].xxxx > 99: MUL TEMP[4].x, TEMP[4].xxxx, CONST[1][7].yyyy >100: MUL TEMP[4].x, TEMP[4].xxxx, IMM[1].wwww >101: CEIL TEMP[4].x, TEMP[4].xxxx >102: MAX TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww >103: MUL TEMP[4].x, TEMP[4].xxxx, IMM[4].xxxx >104: MOV TEMP[3].w, TEMP[4].xxxx >105: MUL TEMP[4].x, TEMP[4].xxxx, CONST[1][7].xxxx >106: FSEQ TEMP[5].xyz, TEMP[4].xxxx, IMM[1].zzzz >107: SSG TEMP[6].xyz, TEMP[2].xyzz >108: MUL TEMP[6].xyz, IMM[4].yyyy, TEMP[6].xyzz >109: RCP TEMP[4].xyz, TEMP[4].xxxx >110: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xyzz >111: UCMP TEMP[3].xyz, TEMP[5].xyzz, TEMP[6].xyzz, TEMP[2].xyzz >112: MOV TEMP[0].w, IMM[1].zzzz >113: USNE TEMP[2].x, TEMP[1].xxxx, IMM[0].xxxx >114: UIF TEMP[2].xxxx :0 >115: MOV TEMP[2].x, TEMP[3].xxxx >116: ELSE :0 >117: MOV TEMP[2].x, TEMP[0].xxxx >118: ENDIF >119: MOV TEMP[2].x, TEMP[2].xxxx >120: USNE TEMP[4].x, TEMP[1].xxxx, IMM[0].xxxx >121: UIF TEMP[4].xxxx :0 >122: MOV TEMP[4].x, TEMP[3].yyyy >123: ELSE :0 >124: MOV TEMP[4].x, TEMP[0].yyyy >125: ENDIF >126: MOV TEMP[2].y, TEMP[4].xxxx >127: USNE TEMP[4].x, TEMP[1].xxxx, IMM[0].xxxx >128: UIF TEMP[4].xxxx :0 >129: MOV TEMP[4].x, TEMP[3].zzzz >130: ELSE :0 >131: MOV TEMP[4].x, TEMP[0].zzzz >132: ENDIF >133: MOV TEMP[2].z, TEMP[4].xxxx >134: USNE TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx >135: UIF TEMP[1].xxxx :0 >136: MOV TEMP[1].x, TEMP[3].wwww >137: ELSE :0 >138: MOV TEMP[1].x, TEMP[0].wwww >139: ENDIF >140: MOV TEMP[2].w, TEMP[1].xxxx >141: MOV OUT[0], TEMP[2] >142: END >radeonsi: Compiling shader 38 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %6, <2 x i32> %8) > %67 = bitcast float %39 to i32 > %68 = bitcast float %40 to i32 > %69 = insertelement <2 x i32> undef, i32 %67, i32 0 > %70 = insertelement <2 x i32> %69, i32 %68, i32 1 > %71 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %70, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %72 = extractelement <4 x float> %71, i32 0 > %73 = extractelement <4 x float> %71, i32 1 > %74 = extractelement <4 x float> %71, i32 2 > %75 = extractelement <4 x float> %71, i32 3 > %76 = fsub float %75, %27 > %77 = call float @llvm.AMDGPU.clamp.(float %76, float 0.000000e+00, float 1.000000e+00) > %78 = fmul float %77, %72 > %79 = fmul float %77, %73 > %80 = fmul float %77, %74 > %81 = bitcast float %37 to i32 > %82 = bitcast float %38 to i32 > %83 = insertelement <2 x i32> undef, i32 %81, i32 0 > %84 = insertelement <2 x i32> %83, i32 %82, i32 1 > %85 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %84, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = extractelement <4 x float> %85, i32 2 > %89 = extractelement <4 x float> %85, i32 3 > %90 = fsub float %89, %27 > %91 = call float @llvm.AMDGPU.clamp.(float %90, float 0.000000e+00, float 1.000000e+00) > %92 = call float @llvm.fma.f32(float %86, float %91, float %78) > %93 = call float @llvm.fma.f32(float %87, float %91, float %79) > %94 = call float @llvm.fma.f32(float %88, float %91, float %80) > %95 = fadd float %72, %86 > %96 = fadd float %73, %87 > %97 = fadd float %74, %88 > %98 = bitcast float %41 to i32 > %99 = bitcast float %42 to i32 > %100 = insertelement <2 x i32> undef, i32 %98, i32 0 > %101 = insertelement <2 x i32> %100, i32 %99, i32 1 > %102 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %101, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %103 = extractelement <4 x float> %102, i32 0 > %104 = extractelement <4 x float> %102, i32 1 > %105 = extractelement <4 x float> %102, i32 2 > %106 = extractelement <4 x float> %102, i32 3 > %107 = fsub float %106, %27 > %108 = call float @llvm.AMDGPU.clamp.(float %107, float 0.000000e+00, float 1.000000e+00) > %109 = call float @llvm.fma.f32(float %103, float %108, float %92) > %110 = call float @llvm.fma.f32(float %104, float %108, float %93) > %111 = call float @llvm.fma.f32(float %105, float %108, float %94) > %112 = fadd float %95, %103 > %113 = fadd float %96, %104 > %114 = fadd float %97, %105 > %115 = bitcast float %43 to i32 > %116 = bitcast float %44 to i32 > %117 = insertelement <2 x i32> undef, i32 %115, i32 0 > %118 = insertelement <2 x i32> %117, i32 %116, i32 1 > %119 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %118, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %120 = extractelement <4 x float> %119, i32 0 > %121 = extractelement <4 x float> %119, i32 1 > %122 = extractelement <4 x float> %119, i32 2 > %123 = extractelement <4 x float> %119, i32 3 > %124 = fsub float %123, %27 > %125 = call float @llvm.AMDGPU.clamp.(float %124, float 0.000000e+00, float 1.000000e+00) > %126 = call float @llvm.fma.f32(float %120, float %125, float %109) > %127 = call float @llvm.fma.f32(float %121, float %125, float %110) > %128 = call float @llvm.fma.f32(float %122, float %125, float %111) > %129 = fadd float %112, %120 > %130 = fadd float %113, %121 > %131 = fadd float %114, %122 > %132 = bitcast float %45 to i32 > %133 = bitcast float %46 to i32 > %134 = insertelement <2 x i32> undef, i32 %132, i32 0 > %135 = insertelement <2 x i32> %134, i32 %133, i32 1 > %136 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %135, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %137 = extractelement <4 x float> %136, i32 0 > %138 = extractelement <4 x float> %136, i32 1 > %139 = extractelement <4 x float> %136, i32 2 > %140 = extractelement <4 x float> %136, i32 3 > %141 = fsub float %140, %27 > %142 = call float @llvm.AMDGPU.clamp.(float %141, float 0.000000e+00, float 1.000000e+00) > %143 = fmul float %142, %137 > %144 = fmul float %142, %138 > %145 = fmul float %142, %139 > %146 = call float @llvm.fma.f32(float %137, float 2.000000e+00, float %129) > %147 = call float @llvm.fma.f32(float %138, float 2.000000e+00, float %130) > %148 = call float @llvm.fma.f32(float %139, float 2.000000e+00, float %131) > %149 = call float @llvm.fma.f32(float %143, float 2.000000e+00, float %126) > %150 = call float @llvm.fma.f32(float %144, float 2.000000e+00, float %127) > %151 = call float @llvm.fma.f32(float %145, float 2.000000e+00, float %128) > %152 = bitcast float %47 to i32 > %153 = bitcast float %48 to i32 > %154 = insertelement <2 x i32> undef, i32 %152, i32 0 > %155 = insertelement <2 x i32> %154, i32 %153, i32 1 > %156 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %155, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %157 = extractelement <4 x float> %156, i32 0 > %158 = extractelement <4 x float> %156, i32 1 > %159 = extractelement <4 x float> %156, i32 2 > %160 = extractelement <4 x float> %156, i32 3 > %161 = fsub float %160, %27 > %162 = call float @llvm.AMDGPU.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) > %163 = call float @llvm.fma.f32(float %157, float %162, float %149) > %164 = call float @llvm.fma.f32(float %158, float %162, float %150) > %165 = call float @llvm.fma.f32(float %159, float %162, float %151) > %166 = fadd float %146, %157 > %167 = fadd float %147, %158 > %168 = fadd float %148, %159 > %169 = bitcast float %49 to i32 > %170 = bitcast float %50 to i32 > %171 = insertelement <2 x i32> undef, i32 %169, i32 0 > %172 = insertelement <2 x i32> %171, i32 %170, i32 1 > %173 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %172, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %174 = extractelement <4 x float> %173, i32 0 > %175 = extractelement <4 x float> %173, i32 1 > %176 = extractelement <4 x float> %173, i32 2 > %177 = extractelement <4 x float> %173, i32 3 > %178 = fsub float %177, %27 > %179 = call float @llvm.AMDGPU.clamp.(float %178, float 0.000000e+00, float 1.000000e+00) > %180 = call float @llvm.fma.f32(float %174, float %179, float %163) > %181 = call float @llvm.fma.f32(float %175, float %179, float %164) > %182 = call float @llvm.fma.f32(float %176, float %179, float %165) > %183 = fadd float %166, %174 > %184 = fadd float %167, %175 > %185 = fadd float %168, %176 > %186 = bitcast float %51 to i32 > %187 = bitcast float %52 to i32 > %188 = insertelement <2 x i32> undef, i32 %186, i32 0 > %189 = insertelement <2 x i32> %188, i32 %187, i32 1 > %190 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %189, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %191 = extractelement <4 x float> %190, i32 0 > %192 = extractelement <4 x float> %190, i32 1 > %193 = extractelement <4 x float> %190, i32 2 > %194 = extractelement <4 x float> %190, i32 3 > %195 = fsub float %194, %27 > %196 = call float @llvm.AMDGPU.clamp.(float %195, float 0.000000e+00, float 1.000000e+00) > %197 = call float @llvm.fma.f32(float %191, float %196, float %180) > %198 = call float @llvm.fma.f32(float %192, float %196, float %181) > %199 = call float @llvm.fma.f32(float %193, float %196, float %182) > %200 = fadd float %183, %191 > %201 = fadd float %184, %192 > %202 = fadd float %185, %193 > %203 = bitcast float %53 to i32 > %204 = bitcast float %54 to i32 > %205 = insertelement <2 x i32> undef, i32 %203, i32 0 > %206 = insertelement <2 x i32> %205, i32 %204, i32 1 > %207 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %206, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %208 = extractelement <4 x float> %207, i32 0 > %209 = extractelement <4 x float> %207, i32 1 > %210 = extractelement <4 x float> %207, i32 2 > %211 = extractelement <4 x float> %207, i32 3 > %212 = fsub float %211, %27 > %213 = call float @llvm.AMDGPU.clamp.(float %212, float 0.000000e+00, float 1.000000e+00) > %214 = call float @llvm.fma.f32(float %208, float %213, float %197) > %215 = call float @llvm.fma.f32(float %209, float %213, float %198) > %216 = call float @llvm.fma.f32(float %210, float %213, float %199) > %217 = fadd float %200, %208 > %218 = fadd float %201, %209 > %219 = fadd float %202, %210 > %220 = bitcast float %55 to i32 > %221 = bitcast float %56 to i32 > %222 = insertelement <2 x i32> undef, i32 %220, i32 0 > %223 = insertelement <2 x i32> %222, i32 %221, i32 1 > %224 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %223, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %225 = extractelement <4 x float> %224, i32 0 > %226 = extractelement <4 x float> %224, i32 1 > %227 = extractelement <4 x float> %224, i32 2 > %228 = extractelement <4 x float> %224, i32 3 > %229 = fsub float %228, %27 > %230 = call float @llvm.AMDGPU.clamp.(float %229, float 0.000000e+00, float 1.000000e+00) > %231 = call float @llvm.fma.f32(float %225, float %230, float %214) > %232 = call float @llvm.fma.f32(float %226, float %230, float %215) > %233 = call float @llvm.fma.f32(float %227, float %230, float %216) > %234 = fadd float %217, %225 > %235 = fadd float %218, %226 > %236 = fadd float %219, %227 > %237 = bitcast float %57 to i32 > %238 = bitcast float %58 to i32 > %239 = insertelement <2 x i32> undef, i32 %237, i32 0 > %240 = insertelement <2 x i32> %239, i32 %238, i32 1 > %241 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %240, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %242 = extractelement <4 x float> %241, i32 0 > %243 = extractelement <4 x float> %241, i32 1 > %244 = extractelement <4 x float> %241, i32 2 > %245 = extractelement <4 x float> %241, i32 3 > %246 = fsub float %245, %27 > %247 = call float @llvm.AMDGPU.clamp.(float %246, float 0.000000e+00, float 1.000000e+00) > %248 = call float @llvm.fma.f32(float %242, float %247, float %231) > %249 = call float @llvm.fma.f32(float %243, float %247, float %232) > %250 = call float @llvm.fma.f32(float %244, float %247, float %233) > %251 = fadd float %234, %242 > %252 = fadd float %235, %243 > %253 = fadd float %236, %244 > %254 = bitcast float %59 to i32 > %255 = bitcast float %60 to i32 > %256 = insertelement <2 x i32> undef, i32 %254, i32 0 > %257 = insertelement <2 x i32> %256, i32 %255, i32 1 > %258 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %257, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %259 = extractelement <4 x float> %258, i32 0 > %260 = extractelement <4 x float> %258, i32 1 > %261 = extractelement <4 x float> %258, i32 2 > %262 = extractelement <4 x float> %258, i32 3 > %263 = fsub float %262, %27 > %264 = call float @llvm.AMDGPU.clamp.(float %263, float 0.000000e+00, float 1.000000e+00) > %265 = call float @llvm.fma.f32(float %259, float %264, float %248) > %266 = call float @llvm.fma.f32(float %260, float %264, float %249) > %267 = call float @llvm.fma.f32(float %261, float %264, float %250) > %268 = fadd float %251, %259 > %269 = fadd float %252, %260 > %270 = fadd float %253, %261 > %271 = bitcast float %61 to i32 > %272 = bitcast float %62 to i32 > %273 = insertelement <2 x i32> undef, i32 %271, i32 0 > %274 = insertelement <2 x i32> %273, i32 %272, i32 1 > %275 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %274, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %276 = extractelement <4 x float> %275, i32 0 > %277 = extractelement <4 x float> %275, i32 1 > %278 = extractelement <4 x float> %275, i32 2 > %279 = extractelement <4 x float> %275, i32 3 > %280 = fsub float %279, %27 > %281 = call float @llvm.AMDGPU.clamp.(float %280, float 0.000000e+00, float 1.000000e+00) > %282 = call float @llvm.fma.f32(float %276, float %281, float %265) > %283 = call float @llvm.fma.f32(float %277, float %281, float %266) > %284 = call float @llvm.fma.f32(float %278, float %281, float %267) > %285 = fadd float %268, %276 > %286 = fadd float %269, %277 > %287 = fadd float %270, %278 > %288 = bitcast float %63 to i32 > %289 = bitcast float %64 to i32 > %290 = insertelement <2 x i32> undef, i32 %288, i32 0 > %291 = insertelement <2 x i32> %290, i32 %289, i32 1 > %292 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %291, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %293 = extractelement <4 x float> %292, i32 0 > %294 = extractelement <4 x float> %292, i32 1 > %295 = extractelement <4 x float> %292, i32 2 > %296 = extractelement <4 x float> %292, i32 3 > %297 = fsub float %296, %27 > %298 = call float @llvm.AMDGPU.clamp.(float %297, float 0.000000e+00, float 1.000000e+00) > %299 = call float @llvm.fma.f32(float %293, float %298, float %282) > %300 = call float @llvm.fma.f32(float %294, float %298, float %283) > %301 = call float @llvm.fma.f32(float %295, float %298, float %284) > %302 = fadd float %285, %293 > %303 = fadd float %286, %294 > %304 = fadd float %287, %295 > %305 = bitcast float %65 to i32 > %306 = bitcast float %66 to i32 > %307 = insertelement <2 x i32> undef, i32 %305, i32 0 > %308 = insertelement <2 x i32> %307, i32 %306, i32 1 > %309 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %308, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 1 > %312 = extractelement <4 x float> %309, i32 2 > %313 = extractelement <4 x float> %309, i32 3 > %314 = fsub float %313, %27 > %315 = call float @llvm.AMDGPU.clamp.(float %314, float 0.000000e+00, float 1.000000e+00) > %316 = call float @llvm.fma.f32(float %310, float %315, float %299) > %317 = call float @llvm.fma.f32(float %311, float %315, float %300) > %318 = call float @llvm.fma.f32(float %312, float %315, float %301) > %319 = fadd float %302, %310 > %320 = fadd float %303, %311 > %321 = fadd float %304, %312 > %322 = fmul float %319, 6.250000e-02 > %323 = fmul float %320, 6.250000e-02 > %324 = fmul float %321, 6.250000e-02 > %325 = fmul float %316, 6.250000e-02 > %326 = fmul float %317, 6.250000e-02 > %327 = fmul float %318, 6.250000e-02 > %328 = fmul float %325, %25 > %329 = fmul float %326, %25 > %330 = fmul float %327, %25 > %331 = fmul float %325, 0x3FD3333340000000 > %332 = fmul float %326, 0x3FE2E147A0000000 > %333 = fadd float %332, %331 > %334 = fmul float %327, 0x3FBC28F5C0000000 > %335 = fadd float %333, %334 > %336 = fcmp ogt float %335, 0.000000e+00 > %337 = call float @llvm.maxnum.f32(float %329, float %328) > %338 = call float @llvm.maxnum.f32(float %330, float %337) > %339 = fmul float %338, %26 > %340 = fmul float %339, 2.550000e+02 > %341 = call float @llvm.ceil.f32(float %340) > %342 = call float @llvm.maxnum.f32(float %341, float 1.000000e+00) > %343 = fmul float %342, 0x3F70101020000000 > %344 = fmul float %343, %25 > %345 = fcmp oeq float %344, 0.000000e+00 > %346 = fcmp oeq float %344, 0.000000e+00 > %347 = fcmp oeq float %344, 0.000000e+00 > %348 = fcmp ogt float %328, 0.000000e+00 > %349 = select i1 %348, float 1.000000e+00, float %328 > %350 = fcmp oge float %349, 0.000000e+00 > %351 = fcmp ogt float %329, 0.000000e+00 > %352 = select i1 %351, float 1.000000e+00, float %329 > %353 = fcmp oge float %352, 0.000000e+00 > %354 = fcmp ogt float %330, 0.000000e+00 > %355 = select i1 %354, float 1.000000e+00, float %330 > %356 = fcmp oge float %355, 0.000000e+00 > %.op = fmul float %349, 0x4600000000000000 > %357 = select i1 %350, float %.op, float 0xC600000000000000 > %.op38 = fmul float %352, 0x4600000000000000 > %358 = select i1 %353, float %.op38, float 0xC600000000000000 > %.op39 = fmul float %355, 0x4600000000000000 > %359 = select i1 %356, float %.op39, float 0xC600000000000000 > %360 = fdiv float 1.000000e+00, %344 > %361 = fmul float %328, %360 > %362 = fmul float %329, %360 > %363 = fmul float %330, %360 > %364 = select i1 %345, float %357, float %361 > %365 = select i1 %346, float %358, float %362 > %366 = select i1 %347, float %359, float %363 > %. = select i1 %336, float %364, float %322 > %temp16.0 = select i1 %336, float %365, float %323 > %.37 = select i1 %336, float %366, float %324 > %temp4.0 = select i1 %336, float %343, float 0.000000e+00 > %367 = bitcast float %5 to i32 > %368 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %367, 10 > %369 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %368, float %., 11 > %370 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %369, float %temp16.0, 12 > %371 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %370, float %.37, 13 > %372 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %371, float %temp4.0, 14 > %373 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %372, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %373 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0], LOCAL >IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D > 2: MOV OUT[0], IMM[0].xxxx > 3: MOV OUT[1], TEMP[0].wwww > 4: END >radeonsi: Compiling shader 39 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %34 = bitcast float %32 to i32 > %35 = bitcast float %33 to i32 > %36 = insertelement <2 x i32> undef, i32 %34, i32 0 > %37 = insertelement <2 x i32> %36, i32 %35, i32 1 > %38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %39 = extractelement <4 x float> %38, i32 3 > %40 = bitcast float %5 to i32 > %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %40, 10 > %42 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float 0.000000e+00, 11 > %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float 0.000000e+00, 12 > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43, float 0.000000e+00, 13 > %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float 0.000000e+00, 14 > %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %39, 15 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %39, 16 > %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %39, 17 > %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %39, 18 > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL OUT[6], GENERIC[5] >DCL OUT[7], GENERIC[6] >DCL OUT[8], GENERIC[7] >DCL CONST[1][0..6] >DCL TEMP[0..9], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, -0.5000} >IMM[1] UINT32 {0, 96, 0, 0} >IMM[2] FLT32 {158456325028528675187087900672.0000, -1.5000, -0.5000, 0.5000} >IMM[3] FLT32 { 0.5000, -1.5000, 1.5000, -0.5000} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: FMA TEMP[1], IN[0].xyxy, IMM[0].zwzw, IMM[0].zzzz > 3: FSEQ TEMP[2], CONST[1][6].xyxy, IMM[0].xxxx > 4: RCP TEMP[3].xz, CONST[1][6].xxxx > 5: RCP TEMP[3].yw, CONST[1][6].yyyy > 6: UCMP TEMP[2], TEMP[2], IMM[2].xxxx, TEMP[3] > 7: FMA TEMP[3], TEMP[2].zwzw, IMM[2].yyzy, TEMP[1].zwzw > 8: FMA TEMP[4], TEMP[2].zwzw, IMM[3].xyzy, TEMP[1].zwzw > 9: FMA TEMP[5], TEMP[2].zwzw, IMM[2].yzzz, TEMP[1].zwzw > 10: FMA TEMP[6], TEMP[2].zwzw, IMM[3].xwzw, TEMP[1].zwzw > 11: FMA TEMP[7], TEMP[2].zwzw, IMM[2].ywzw, TEMP[1].zwzw > 12: FMA TEMP[8], TEMP[2].zwzw, IMM[3].xxzx, TEMP[1].zwzw > 13: FMA TEMP[9], TEMP[2].zwzw, IMM[3].yzwz, TEMP[1].zwzw > 14: FMA TEMP[1], TEMP[2], IMM[3].xzzz, TEMP[1] > 15: MOV OUT[8], TEMP[1] > 16: MOV OUT[7], TEMP[9] > 17: MOV OUT[6], TEMP[8] > 18: MOV OUT[5], TEMP[7] > 19: MOV OUT[4], TEMP[6] > 20: MOV OUT[3], TEMP[5] > 21: MOV OUT[2], TEMP[4] > 22: MOV OUT[1], TEMP[3] > 23: MOV OUT[0], TEMP[0] > 24: END >radeonsi: Compiling shader 40 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 96) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 100) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %24 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %25 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %26 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %27 = fcmp oeq float %16, 0.000000e+00 > %28 = fcmp oeq float %17, 0.000000e+00 > %29 = fcmp oeq float %16, 0.000000e+00 > %30 = fcmp oeq float %17, 0.000000e+00 > %31 = fdiv float 1.000000e+00, %16 > %32 = fdiv float 1.000000e+00, %17 > %33 = select i1 %27, float 0x4600000000000000, float %31 > %34 = select i1 %28, float 0x4600000000000000, float %32 > %35 = select i1 %29, float 0x4600000000000000, float %31 > %36 = select i1 %30, float 0x4600000000000000, float %32 > %37 = call float @llvm.fma.f32(float %35, float -1.500000e+00, float %25) > %38 = call float @llvm.fma.f32(float %36, float -1.500000e+00, float %26) > %39 = call float @llvm.fma.f32(float %35, float -5.000000e-01, float %25) > %40 = call float @llvm.fma.f32(float %36, float -1.500000e+00, float %26) > %41 = call float @llvm.fma.f32(float %35, float 5.000000e-01, float %25) > %42 = call float @llvm.fma.f32(float %36, float -1.500000e+00, float %26) > %43 = call float @llvm.fma.f32(float %35, float 1.500000e+00, float %25) > %44 = call float @llvm.fma.f32(float %36, float -1.500000e+00, float %26) > %45 = call float @llvm.fma.f32(float %35, float -1.500000e+00, float %25) > %46 = call float @llvm.fma.f32(float %36, float -5.000000e-01, float %26) > %47 = call float @llvm.fma.f32(float %35, float -5.000000e-01, float %25) > %48 = call float @llvm.fma.f32(float %36, float -5.000000e-01, float %26) > %49 = call float @llvm.fma.f32(float %35, float 5.000000e-01, float %25) > %50 = call float @llvm.fma.f32(float %36, float -5.000000e-01, float %26) > %51 = call float @llvm.fma.f32(float %35, float 1.500000e+00, float %25) > %52 = call float @llvm.fma.f32(float %36, float -5.000000e-01, float %26) > %53 = call float @llvm.fma.f32(float %35, float -1.500000e+00, float %25) > %54 = call float @llvm.fma.f32(float %36, float 5.000000e-01, float %26) > %55 = call float @llvm.fma.f32(float %35, float -5.000000e-01, float %25) > %56 = call float @llvm.fma.f32(float %36, float 5.000000e-01, float %26) > %57 = call float @llvm.fma.f32(float %35, float 5.000000e-01, float %25) > %58 = call float @llvm.fma.f32(float %36, float 5.000000e-01, float %26) > %59 = call float @llvm.fma.f32(float %35, float 1.500000e+00, float %25) > %60 = call float @llvm.fma.f32(float %36, float 5.000000e-01, float %26) > %61 = call float @llvm.fma.f32(float %35, float -1.500000e+00, float %25) > %62 = call float @llvm.fma.f32(float %36, float 1.500000e+00, float %26) > %63 = call float @llvm.fma.f32(float %35, float -5.000000e-01, float %25) > %64 = call float @llvm.fma.f32(float %36, float 1.500000e+00, float %26) > %65 = call float @llvm.fma.f32(float %33, float 5.000000e-01, float %23) > %66 = call float @llvm.fma.f32(float %34, float 1.500000e+00, float %24) > %67 = call float @llvm.fma.f32(float %35, float 1.500000e+00, float %25) > %68 = call float @llvm.fma.f32(float %36, float 1.500000e+00, float %26) > %69 = bitcast i32 %11 to float > %70 = insertvalue <{ float, float, float }> undef, float %69, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %37, float %38, float %39, float %40) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float %43, float %44) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %45, float %46, float %47, float %48) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %49, float %50, float %51, float %52) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %53, float %54, float %55, float %56) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %57, float %58, float %59, float %60) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %61, float %62, float %63, float %64) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %65, float %66, float %67, float %68) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %70 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL IN[5], GENERIC[5], PERSPECTIVE >DCL IN[6], GENERIC[6], PERSPECTIVE >DCL IN[7], GENERIC[7], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 0.0625, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D > 2: MOV TEMP[0].x, TEMP[0].xxxx > 3: MOV TEMP[1].xy, IN[0].zwww > 4: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 5: MOV TEMP[0].y, TEMP[1].xxxx > 6: MOV TEMP[1].xy, IN[1].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: MOV TEMP[0].z, TEMP[1].xxxx > 9: MOV TEMP[1].xy, IN[1].zwww > 10: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 11: MOV TEMP[0].w, TEMP[1].xxxx > 12: MOV TEMP[1].xy, IN[2].xyyy > 13: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 14: MOV TEMP[1].x, TEMP[1].xxxx > 15: MOV TEMP[2].xy, IN[2].zwww > 16: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 17: MOV TEMP[1].y, TEMP[2].xxxx > 18: MOV TEMP[2].xy, IN[3].xyyy > 19: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 20: MOV TEMP[1].z, TEMP[2].xxxx > 21: MOV TEMP[2].xy, IN[3].zwww > 22: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 23: MOV TEMP[1].w, TEMP[2].xxxx > 24: ADD TEMP[0], TEMP[0], TEMP[1] > 25: MOV TEMP[2].xy, IN[4].xyyy > 26: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 27: MOV TEMP[1].x, TEMP[2].xxxx > 28: MOV TEMP[2].xy, IN[4].zwww > 29: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 30: MOV TEMP[1].y, TEMP[2].xxxx > 31: MOV TEMP[2].xy, IN[5].xyyy > 32: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 33: MOV TEMP[1].z, TEMP[2].xxxx > 34: MOV TEMP[2].xy, IN[5].zwww > 35: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 36: MOV TEMP[1].w, TEMP[2].xxxx > 37: ADD TEMP[0], TEMP[0], TEMP[1] > 38: MOV TEMP[2].xy, IN[6].xyyy > 39: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 40: MOV TEMP[1].x, TEMP[2].xxxx > 41: MOV TEMP[2].xy, IN[6].zwww > 42: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 43: MOV TEMP[1].y, TEMP[2].xxxx > 44: MOV TEMP[2].xy, IN[7].xyyy > 45: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 46: MOV TEMP[1].z, TEMP[2].xxxx > 47: MOV TEMP[2].xy, IN[7].zwww > 48: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 49: MOV TEMP[1].w, TEMP[2].xxxx > 50: ADD TEMP[0], TEMP[0], TEMP[1] > 51: DP4 TEMP[0].x, TEMP[0], IMM[0].xxxx > 52: MOV OUT[0], TEMP[0].xxxx > 53: MOV OUT[1], IMM[0].yyyy > 54: END >radeonsi: Compiling shader 41 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %6, <2 x i32> %8) > %64 = bitcast float %32 to i32 > %65 = bitcast float %33 to i32 > %66 = insertelement <2 x i32> undef, i32 %64, i32 0 > %67 = insertelement <2 x i32> %66, i32 %65, i32 1 > %68 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %67, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %69 = extractelement <4 x float> %68, i32 0 > %70 = bitcast float %34 to i32 > %71 = bitcast float %35 to i32 > %72 = insertelement <2 x i32> undef, i32 %70, i32 0 > %73 = insertelement <2 x i32> %72, i32 %71, i32 1 > %74 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %73, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %75 = extractelement <4 x float> %74, i32 0 > %76 = bitcast float %36 to i32 > %77 = bitcast float %37 to i32 > %78 = insertelement <2 x i32> undef, i32 %76, i32 0 > %79 = insertelement <2 x i32> %78, i32 %77, i32 1 > %80 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %79, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %81 = extractelement <4 x float> %80, i32 0 > %82 = bitcast float %38 to i32 > %83 = bitcast float %39 to i32 > %84 = insertelement <2 x i32> undef, i32 %82, i32 0 > %85 = insertelement <2 x i32> %84, i32 %83, i32 1 > %86 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %85, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %87 = extractelement <4 x float> %86, i32 0 > %88 = bitcast float %40 to i32 > %89 = bitcast float %41 to i32 > %90 = insertelement <2 x i32> undef, i32 %88, i32 0 > %91 = insertelement <2 x i32> %90, i32 %89, i32 1 > %92 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %91, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %93 = extractelement <4 x float> %92, i32 0 > %94 = bitcast float %42 to i32 > %95 = bitcast float %43 to i32 > %96 = insertelement <2 x i32> undef, i32 %94, i32 0 > %97 = insertelement <2 x i32> %96, i32 %95, i32 1 > %98 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %97, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %99 = extractelement <4 x float> %98, i32 0 > %100 = bitcast float %44 to i32 > %101 = bitcast float %45 to i32 > %102 = insertelement <2 x i32> undef, i32 %100, i32 0 > %103 = insertelement <2 x i32> %102, i32 %101, i32 1 > %104 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %103, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %105 = extractelement <4 x float> %104, i32 0 > %106 = bitcast float %46 to i32 > %107 = bitcast float %47 to i32 > %108 = insertelement <2 x i32> undef, i32 %106, i32 0 > %109 = insertelement <2 x i32> %108, i32 %107, i32 1 > %110 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %109, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %111 = extractelement <4 x float> %110, i32 0 > %112 = fadd float %69, %93 > %113 = fadd float %75, %99 > %114 = fadd float %81, %105 > %115 = fadd float %87, %111 > %116 = bitcast float %48 to i32 > %117 = bitcast float %49 to i32 > %118 = insertelement <2 x i32> undef, i32 %116, i32 0 > %119 = insertelement <2 x i32> %118, i32 %117, i32 1 > %120 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %119, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %121 = extractelement <4 x float> %120, i32 0 > %122 = bitcast float %50 to i32 > %123 = bitcast float %51 to i32 > %124 = insertelement <2 x i32> undef, i32 %122, i32 0 > %125 = insertelement <2 x i32> %124, i32 %123, i32 1 > %126 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %125, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %127 = extractelement <4 x float> %126, i32 0 > %128 = bitcast float %52 to i32 > %129 = bitcast float %53 to i32 > %130 = insertelement <2 x i32> undef, i32 %128, i32 0 > %131 = insertelement <2 x i32> %130, i32 %129, i32 1 > %132 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %131, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %133 = extractelement <4 x float> %132, i32 0 > %134 = bitcast float %54 to i32 > %135 = bitcast float %55 to i32 > %136 = insertelement <2 x i32> undef, i32 %134, i32 0 > %137 = insertelement <2 x i32> %136, i32 %135, i32 1 > %138 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %137, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %139 = extractelement <4 x float> %138, i32 0 > %140 = fadd float %112, %121 > %141 = fadd float %113, %127 > %142 = fadd float %114, %133 > %143 = fadd float %115, %139 > %144 = bitcast float %56 to i32 > %145 = bitcast float %57 to i32 > %146 = insertelement <2 x i32> undef, i32 %144, i32 0 > %147 = insertelement <2 x i32> %146, i32 %145, i32 1 > %148 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %147, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %149 = extractelement <4 x float> %148, i32 0 > %150 = bitcast float %58 to i32 > %151 = bitcast float %59 to i32 > %152 = insertelement <2 x i32> undef, i32 %150, i32 0 > %153 = insertelement <2 x i32> %152, i32 %151, i32 1 > %154 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %153, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %155 = extractelement <4 x float> %154, i32 0 > %156 = bitcast float %60 to i32 > %157 = bitcast float %61 to i32 > %158 = insertelement <2 x i32> undef, i32 %156, i32 0 > %159 = insertelement <2 x i32> %158, i32 %157, i32 1 > %160 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %159, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %161 = extractelement <4 x float> %160, i32 0 > %162 = bitcast float %62 to i32 > %163 = bitcast float %63 to i32 > %164 = insertelement <2 x i32> undef, i32 %162, i32 0 > %165 = insertelement <2 x i32> %164, i32 %163, i32 1 > %166 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %165, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %167 = extractelement <4 x float> %166, i32 0 > %168 = fadd float %140, %149 > %169 = fadd float %141, %155 > %170 = fadd float %142, %161 > %171 = fadd float %143, %167 > %172 = fmul float %168, 6.250000e-02 > %173 = fmul float %169, 6.250000e-02 > %174 = fadd float %172, %173 > %175 = fmul float %170, 6.250000e-02 > %176 = fadd float %174, %175 > %177 = fmul float %171, 6.250000e-02 > %178 = fadd float %176, %177 > %179 = bitcast float %5 to i32 > %180 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %179, 10 > %181 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %180, float %178, 11 > %182 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %181, float %178, 12 > %183 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %182, float %178, 13 > %184 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %183, float %178, 14 > %185 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %184, float 0.000000e+00, 15 > %186 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %185, float 0.000000e+00, 16 > %187 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %186, float 0.000000e+00, 17 > %188 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %187, float 0.000000e+00, 18 > %189 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %188, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %189 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..17] >DCL CONST[2][0..4095] >DCL TEMP[0..23], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 2.0000} >IMM[1] FLT32 { 255.0020, 0.0000, 0.0000, 0.0000} >IMM[2] INT32 {1, 2, 4, 0} >IMM[3] UINT32 {1, 16, 0, 240} >IMM[4] UINT32 {256, 272, 0, 0} > 0: FMA TEMP[0].xy, IN[1].xyyy, IMM[0].xyyy, IMM[0].zxxx > 1: FMA TEMP[1].xy, TEMP[0].xyyy, IMM[0].wwww, IMM[0].yyyy > 2: MOV TEMP[1].zw, IMM[0].xxxx > 3: MOV TEMP[2].xy, IN[1].xyxx > 4: MUL TEMP[0].xyz, IN[6].zyxx, IMM[1].xxxx > 5: F2I TEMP[3].xyz, TEMP[0].xyzz > 6: SHL TEMP[4].xyz, TEMP[3].xyzz, IMM[2].xxxx > 7: UMAD TEMP[5].xyz, TEMP[3].xyzz, IMM[2].yyyy, IMM[2].xxxx > 8: UMUL TEMP[6].x, TEMP[4].xxxx, IMM[3].yyyy > 9: USHR TEMP[7].x, TEMP[6].xxxx, IMM[2].zzzz > 10: UARL ADDR[0].x, TEMP[7].xxxx > 11: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 12: MUL TEMP[6].x, IN[5].xxxx, TEMP[6].yyyy > 13: MOV TEMP[6].w, TEMP[6].xxxx > 14: UMUL TEMP[7].x, TEMP[4].yyyy, IMM[3].yyyy > 15: USHR TEMP[8].x, TEMP[7].xxxx, IMM[2].zzzz > 16: UARL ADDR[0].x, TEMP[8].xxxx > 17: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 18: MUL TEMP[7].x, IN[5].yyyy, TEMP[7].yyyy > 19: MOV TEMP[7].w, TEMP[7].xxxx > 20: UMUL TEMP[8].x, TEMP[5].xxxx, IMM[3].yyyy > 21: USHR TEMP[9].x, TEMP[8].xxxx, IMM[2].zzzz > 22: UARL ADDR[0].x, TEMP[9].xxxx > 23: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 24: UMUL TEMP[9].x, TEMP[5].xxxx, IMM[3].yyyy > 25: USHR TEMP[10].x, TEMP[9].xxxx, IMM[2].zzzz > 26: UARL ADDR[0].x, TEMP[10].xxxx > 27: MOV TEMP[9].w, CONST[2][ADDR[0].x] > 28: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].wwww > 29: UMUL TEMP[9].x, TEMP[5].xxxx, IMM[3].yyyy > 30: USHR TEMP[10].x, TEMP[9].xxxx, IMM[2].zzzz > 31: UARL ADDR[0].x, TEMP[10].xxxx > 32: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 33: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[3].yyyy > 34: USHR TEMP[11].x, TEMP[10].xxxx, IMM[2].zzzz > 35: UARL ADDR[0].x, TEMP[11].xxxx > 36: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 37: FMA TEMP[9].x, TEMP[9].yyyy, TEMP[10].zzzz, -TEMP[8].xxxx > 38: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[3].yyyy > 39: USHR TEMP[11].x, TEMP[10].xxxx, IMM[2].zzzz > 40: UARL ADDR[0].x, TEMP[11].xxxx > 41: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 42: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[3].yyyy > 43: USHR TEMP[12].x, TEMP[11].xxxx, IMM[2].zzzz > 44: UARL ADDR[0].x, TEMP[12].xxxx > 45: MOV TEMP[11].z, CONST[2][ADDR[0].x] > 46: FMA TEMP[8].x, TEMP[10].yyyy, TEMP[11].zzzz, TEMP[8].xxxx > 47: MUL TEMP[8].x, TEMP[8].xxxx, IN[5].xxxx > 48: MUL TEMP[9].x, TEMP[9].xxxx, IN[5].xxxx > 49: MUL TEMP[9].x, TEMP[9].xxxx, IMM[0].wwww > 50: MOV TEMP[6].z, TEMP[9].xxxx > 51: UMUL TEMP[9].x, TEMP[5].yyyy, IMM[3].yyyy > 52: USHR TEMP[10].x, TEMP[9].xxxx, IMM[2].zzzz > 53: UARL ADDR[0].x, TEMP[10].xxxx > 54: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 55: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[3].yyyy > 56: USHR TEMP[11].x, TEMP[10].xxxx, IMM[2].zzzz > 57: UARL ADDR[0].x, TEMP[11].xxxx > 58: MOV TEMP[10].w, CONST[2][ADDR[0].x] > 59: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[10].wwww > 60: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[3].yyyy > 61: USHR TEMP[11].x, TEMP[10].xxxx, IMM[2].zzzz > 62: UARL ADDR[0].x, TEMP[11].xxxx > 63: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 64: UMUL TEMP[11].x, TEMP[5].yyyy, IMM[3].yyyy > 65: USHR TEMP[12].x, TEMP[11].xxxx, IMM[2].zzzz > 66: UARL ADDR[0].x, TEMP[12].xxxx > 67: MOV TEMP[11].z, CONST[2][ADDR[0].x] > 68: FMA TEMP[10].x, TEMP[10].yyyy, TEMP[11].zzzz, -TEMP[9].xxxx > 69: UMUL TEMP[11].x, TEMP[5].yyyy, IMM[3].yyyy > 70: USHR TEMP[12].x, TEMP[11].xxxx, IMM[2].zzzz > 71: UARL ADDR[0].x, TEMP[12].xxxx > 72: MOV TEMP[11].y, CONST[2][ADDR[0].x] > 73: UMUL TEMP[12].x, TEMP[5].yyyy, IMM[3].yyyy > 74: USHR TEMP[13].x, TEMP[12].xxxx, IMM[2].zzzz > 75: UARL ADDR[0].x, TEMP[13].xxxx > 76: MOV TEMP[12].z, CONST[2][ADDR[0].x] > 77: FMA TEMP[9].x, TEMP[11].yyyy, TEMP[12].zzzz, TEMP[9].xxxx > 78: MUL TEMP[9].x, TEMP[9].xxxx, IN[5].yyyy > 79: MUL TEMP[9].x, IMM[0].wwww, TEMP[9].xxxx > 80: MOV TEMP[9].y, TEMP[9].xxxx > 81: MUL TEMP[10].x, TEMP[10].xxxx, IN[5].yyyy > 82: MUL TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx > 83: MOV TEMP[7].z, TEMP[10].xxxx > 84: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[3].yyyy > 85: USHR TEMP[11].x, TEMP[10].xxxx, IMM[2].zzzz > 86: UARL ADDR[0].x, TEMP[11].xxxx > 87: MOV TEMP[10].yz, CONST[2][ADDR[0].x] > 88: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[3].yyyy > 89: USHR TEMP[12].x, TEMP[11].xxxx, IMM[2].zzzz > 90: UARL ADDR[0].x, TEMP[12].xxxx > 91: MOV TEMP[11].xw, CONST[2][ADDR[0].x] > 92: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww > 93: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[3].yyyy > 94: USHR TEMP[12].x, TEMP[11].xxxx, IMM[2].zzzz > 95: UARL ADDR[0].x, TEMP[12].xxxx > 96: MOV TEMP[11].x, CONST[2][ADDR[0].x] > 97: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[3].yyyy > 98: USHR TEMP[13].x, TEMP[12].xxxx, IMM[2].zzzz > 99: UARL ADDR[0].x, TEMP[13].xxxx >100: MOV TEMP[12].y, CONST[2][ADDR[0].x] >101: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >102: MUL TEMP[11].x, TEMP[11].xxxx, IN[5].xxxx >103: MUL TEMP[6].x, IMM[0].wwww, TEMP[11].xxxx >104: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[3].yyyy >105: USHR TEMP[12].x, TEMP[11].xxxx, IMM[2].zzzz >106: UARL ADDR[0].x, TEMP[12].xxxx >107: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >108: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[3].yyyy >109: USHR TEMP[13].x, TEMP[12].xxxx, IMM[2].zzzz >110: UARL ADDR[0].x, TEMP[13].xxxx >111: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >112: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >113: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >114: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[0].wwww, IMM[0].xxxx >115: MUL TEMP[13].x, IN[5].xxxx, TEMP[12].yyyy >116: MOV TEMP[6].y, TEMP[13].xxxx >117: UMUL TEMP[13].x, TEMP[5].yyyy, IMM[3].yyyy >118: USHR TEMP[14].x, TEMP[13].xxxx, IMM[2].zzzz >119: UARL ADDR[0].x, TEMP[14].xxxx >120: MOV TEMP[13].yz, CONST[2][ADDR[0].x] >121: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[3].yyyy >122: USHR TEMP[15].x, TEMP[14].xxxx, IMM[2].zzzz >123: UARL ADDR[0].x, TEMP[15].xxxx >124: MOV TEMP[14].xw, CONST[2][ADDR[0].x] >125: MUL TEMP[13].xyz, TEMP[13].zzyy, TEMP[14].wxww >126: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[3].yyyy >127: USHR TEMP[15].x, TEMP[14].xxxx, IMM[2].zzzz >128: UARL ADDR[0].x, TEMP[15].xxxx >129: MOV TEMP[14].x, CONST[2][ADDR[0].x] >130: UMUL TEMP[15].x, TEMP[5].yyyy, IMM[3].yyyy >131: USHR TEMP[16].x, TEMP[15].xxxx, IMM[2].zzzz >132: UARL ADDR[0].x, TEMP[16].xxxx >133: MOV TEMP[15].y, CONST[2][ADDR[0].x] >134: FMA TEMP[14].x, TEMP[14].xxxx, TEMP[15].yyyy, TEMP[13].xxxx >135: MUL TEMP[14].x, TEMP[14].xxxx, IN[5].yyyy >136: MUL TEMP[7].x, IMM[0].wwww, TEMP[14].xxxx >137: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[3].yyyy >138: USHR TEMP[15].x, TEMP[14].xxxx, IMM[2].zzzz >139: UARL ADDR[0].x, TEMP[15].xxxx >140: MOV TEMP[14].xyz, CONST[2][ADDR[0].x] >141: UMUL TEMP[15].x, TEMP[5].yyyy, IMM[3].yyyy >142: USHR TEMP[16].x, TEMP[15].xxxx, IMM[2].zzzz >143: UARL ADDR[0].x, TEMP[16].xxxx >144: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >145: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz >146: ADD TEMP[14].xyz, TEMP[14].zzyy, TEMP[14].yxxx >147: FMA TEMP[15].xyz, -TEMP[14].xyzz, IMM[0].wwww, IMM[0].xxxx >148: MUL TEMP[16].x, IN[5].yyyy, TEMP[15].yyyy >149: MOV TEMP[7].y, TEMP[16].xxxx >150: ADD TEMP[6], TEMP[6], TEMP[7] >151: UMUL TEMP[16].x, TEMP[4].zzzz, IMM[3].yyyy >152: USHR TEMP[17].x, TEMP[16].xxxx, IMM[2].zzzz >153: UARL ADDR[0].x, TEMP[17].xxxx >154: MOV TEMP[16].y, CONST[2][ADDR[0].x] >155: MUL TEMP[16].x, IN[5].zzzz, TEMP[16].yyyy >156: MOV TEMP[7].w, TEMP[16].xxxx >157: UMUL TEMP[16].x, TEMP[5].zzzz, IMM[3].yyyy >158: USHR TEMP[17].x, TEMP[16].xxxx, IMM[2].zzzz >159: UARL ADDR[0].x, TEMP[17].xxxx >160: MOV TEMP[16].x, CONST[2][ADDR[0].x] >161: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[3].yyyy >162: USHR TEMP[18].x, TEMP[17].xxxx, IMM[2].zzzz >163: UARL ADDR[0].x, TEMP[18].xxxx >164: MOV TEMP[17].w, CONST[2][ADDR[0].x] >165: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[17].wwww >166: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[3].yyyy >167: USHR TEMP[18].x, TEMP[17].xxxx, IMM[2].zzzz >168: UARL ADDR[0].x, TEMP[18].xxxx >169: MOV TEMP[17].y, CONST[2][ADDR[0].x] >170: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[3].yyyy >171: USHR TEMP[19].x, TEMP[18].xxxx, IMM[2].zzzz >172: UARL ADDR[0].x, TEMP[19].xxxx >173: MOV TEMP[18].z, CONST[2][ADDR[0].x] >174: FMA TEMP[17].x, TEMP[17].yyyy, TEMP[18].zzzz, -TEMP[16].xxxx >175: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[3].yyyy >176: USHR TEMP[19].x, TEMP[18].xxxx, IMM[2].zzzz >177: UARL ADDR[0].x, TEMP[19].xxxx >178: MOV TEMP[18].y, CONST[2][ADDR[0].x] >179: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[3].yyyy >180: USHR TEMP[20].x, TEMP[19].xxxx, IMM[2].zzzz >181: UARL ADDR[0].x, TEMP[20].xxxx >182: MOV TEMP[19].z, CONST[2][ADDR[0].x] >183: FMA TEMP[16].x, TEMP[18].yyyy, TEMP[19].zzzz, TEMP[16].xxxx >184: MUL TEMP[16].x, TEMP[16].xxxx, IN[5].zzzz >185: MUL TEMP[16].x, IMM[0].wwww, TEMP[16].xxxx >186: MOV TEMP[16].y, TEMP[16].xxxx >187: MUL TEMP[17].x, TEMP[17].xxxx, IN[5].zzzz >188: MUL TEMP[17].x, IMM[0].wwww, TEMP[17].xxxx >189: MOV TEMP[7].z, TEMP[17].xxxx >190: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[3].yyyy >191: USHR TEMP[18].x, TEMP[17].xxxx, IMM[2].zzzz >192: UARL ADDR[0].x, TEMP[18].xxxx >193: MOV TEMP[17].yz, CONST[2][ADDR[0].x] >194: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[3].yyyy >195: USHR TEMP[19].x, TEMP[18].xxxx, IMM[2].zzzz >196: UARL ADDR[0].x, TEMP[19].xxxx >197: MOV TEMP[18].xw, CONST[2][ADDR[0].x] >198: MUL TEMP[17].xyz, TEMP[17].zzyy, TEMP[18].wxww >199: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[3].yyyy >200: USHR TEMP[19].x, TEMP[18].xxxx, IMM[2].zzzz >201: UARL ADDR[0].x, TEMP[19].xxxx >202: MOV TEMP[18].x, CONST[2][ADDR[0].x] >203: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[3].yyyy >204: USHR TEMP[20].x, TEMP[19].xxxx, IMM[2].zzzz >205: UARL ADDR[0].x, TEMP[20].xxxx >206: MOV TEMP[19].y, CONST[2][ADDR[0].x] >207: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].yyyy, TEMP[17].xxxx >208: MUL TEMP[18].x, TEMP[18].xxxx, IN[5].zzzz >209: MUL TEMP[7].x, IMM[0].wwww, TEMP[18].xxxx >210: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[3].yyyy >211: USHR TEMP[19].x, TEMP[18].xxxx, IMM[2].zzzz >212: UARL ADDR[0].x, TEMP[19].xxxx >213: MOV TEMP[18].xyz, CONST[2][ADDR[0].x] >214: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[3].yyyy >215: USHR TEMP[20].x, TEMP[19].xxxx, IMM[2].zzzz >216: UARL ADDR[0].x, TEMP[20].xxxx >217: MOV TEMP[19].xyz, CONST[2][ADDR[0].x] >218: MUL TEMP[18].xyz, TEMP[18].xyzz, TEMP[19].xyzz >219: ADD TEMP[18].xyz, TEMP[18].zzyy, TEMP[18].yxxx >220: FMA TEMP[19].xyz, -TEMP[18].xyzz, IMM[0].wwww, IMM[0].xxxx >221: MUL TEMP[20].x, IN[5].zzzz, TEMP[19].yyyy >222: MOV TEMP[7].y, TEMP[20].xxxx >223: ADD TEMP[6], TEMP[6], TEMP[7] >224: MOV TEMP[7].xyz, IN[0].xyzx >225: MOV TEMP[7].w, IMM[0].xxxx >226: DP4 TEMP[20].x, TEMP[6], TEMP[7] >227: MOV TEMP[20].y, TEMP[20].xxxx >228: UMUL TEMP[21].x, TEMP[5].xxxx, IMM[3].yyyy >229: USHR TEMP[22].x, TEMP[21].xxxx, IMM[2].zzzz >230: UARL ADDR[0].x, TEMP[22].xxxx >231: MOV TEMP[21].x, CONST[2][ADDR[0].x] >232: UMUL TEMP[22].x, TEMP[5].xxxx, IMM[3].yyyy >233: USHR TEMP[23].x, TEMP[22].xxxx, IMM[2].zzzz >234: UARL ADDR[0].x, TEMP[23].xxxx >235: MOV TEMP[22].z, CONST[2][ADDR[0].x] >236: FMA TEMP[21].x, TEMP[21].xxxx, TEMP[22].zzzz, -TEMP[10].zzzz >237: MUL TEMP[21].x, TEMP[21].xxxx, IN[5].xxxx >238: MUL TEMP[21].x, IMM[0].wwww, TEMP[21].xxxx >239: MUL TEMP[8].x, TEMP[8].xxxx, IMM[0].wwww >240: MOV TEMP[21].y, TEMP[8].xxxx >241: UMUL TEMP[8].x, TEMP[5].yyyy, IMM[3].yyyy >242: USHR TEMP[22].x, TEMP[8].xxxx, IMM[2].zzzz >243: UARL ADDR[0].x, TEMP[22].xxxx >244: MOV TEMP[8].x, CONST[2][ADDR[0].x] >245: UMUL TEMP[22].x, TEMP[5].yyyy, IMM[3].yyyy >246: USHR TEMP[23].x, TEMP[22].xxxx, IMM[2].zzzz >247: UARL ADDR[0].x, TEMP[23].xxxx >248: MOV TEMP[22].z, CONST[2][ADDR[0].x] >249: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[22].zzzz, -TEMP[13].zzzz >250: MUL TEMP[8].x, TEMP[8].xxxx, IN[5].yyyy >251: MUL TEMP[9].x, IMM[0].wwww, TEMP[8].xxxx >252: MUL TEMP[8].x, IN[5].xxxx, TEMP[12].zzzz >253: MOV TEMP[21].z, TEMP[8].xxxx >254: MUL TEMP[11].x, IN[5].xxxx, TEMP[12].xxxx >255: MUL TEMP[8].x, IN[5].yyyy, TEMP[15].zzzz >256: MOV TEMP[9].z, TEMP[8].xxxx >257: MUL TEMP[14].x, IN[5].yyyy, TEMP[15].xxxx >258: UMUL TEMP[8].x, TEMP[4].xxxx, IMM[3].yyyy >259: USHR TEMP[12].x, TEMP[8].xxxx, IMM[2].zzzz >260: UARL ADDR[0].x, TEMP[12].xxxx >261: MOV TEMP[8].z, CONST[2][ADDR[0].x] >262: MUL TEMP[8].x, IN[5].xxxx, TEMP[8].zzzz >263: MOV TEMP[21].w, TEMP[8].xxxx >264: UMUL TEMP[8].x, TEMP[4].yyyy, IMM[3].yyyy >265: USHR TEMP[12].x, TEMP[8].xxxx, IMM[2].zzzz >266: UARL ADDR[0].x, TEMP[12].xxxx >267: MOV TEMP[8].z, CONST[2][ADDR[0].x] >268: MUL TEMP[8].x, IN[5].yyyy, TEMP[8].zzzz >269: MOV TEMP[9].w, TEMP[8].xxxx >270: ADD TEMP[9], TEMP[9], TEMP[21] >271: UMUL TEMP[8].x, TEMP[5].zzzz, IMM[3].yyyy >272: USHR TEMP[12].x, TEMP[8].xxxx, IMM[2].zzzz >273: UARL ADDR[0].x, TEMP[12].xxxx >274: MOV TEMP[8].x, CONST[2][ADDR[0].x] >275: UMUL TEMP[12].x, TEMP[5].zzzz, IMM[3].yyyy >276: USHR TEMP[15].x, TEMP[12].xxxx, IMM[2].zzzz >277: UARL ADDR[0].x, TEMP[15].xxxx >278: MOV TEMP[12].z, CONST[2][ADDR[0].x] >279: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[12].zzzz, -TEMP[17].zzzz >280: MUL TEMP[8].x, TEMP[8].xxxx, IN[5].zzzz >281: MUL TEMP[16].x, IMM[0].wwww, TEMP[8].xxxx >282: MUL TEMP[8].x, IN[5].zzzz, TEMP[19].zzzz >283: MOV TEMP[16].z, TEMP[8].xxxx >284: MUL TEMP[18].x, IN[5].zzzz, TEMP[19].xxxx >285: UMUL TEMP[8].x, TEMP[4].zzzz, IMM[3].yyyy >286: USHR TEMP[12].x, TEMP[8].xxxx, IMM[2].zzzz >287: UARL ADDR[0].x, TEMP[12].xxxx >288: MOV TEMP[8].z, CONST[2][ADDR[0].x] >289: MUL TEMP[8].x, IN[5].zzzz, TEMP[8].zzzz >290: MOV TEMP[16].w, TEMP[8].xxxx >291: ADD TEMP[9], TEMP[9], TEMP[16] >292: DP4 TEMP[8].x, TEMP[9], TEMP[7] >293: MOV TEMP[20].z, TEMP[8].xxxx >294: UMUL TEMP[8].x, TEMP[4].xxxx, IMM[3].yyyy >295: USHR TEMP[12].x, TEMP[8].xxxx, IMM[2].zzzz >296: UARL ADDR[0].x, TEMP[12].xxxx >297: MOV TEMP[8].x, CONST[2][ADDR[0].x] >298: MUL TEMP[8].x, IN[5].xxxx, TEMP[8].xxxx >299: MOV TEMP[11].w, TEMP[8].xxxx >300: UMUL TEMP[8].x, TEMP[4].yyyy, IMM[3].yyyy >301: USHR TEMP[12].x, TEMP[8].xxxx, IMM[2].zzzz >302: UARL ADDR[0].x, TEMP[12].xxxx >303: MOV TEMP[8].x, CONST[2][ADDR[0].x] >304: MUL TEMP[8].x, IN[5].yyyy, TEMP[8].xxxx >305: MOV TEMP[14].w, TEMP[8].xxxx >306: UMUL TEMP[8].x, TEMP[4].zzzz, IMM[3].yyyy >307: USHR TEMP[12].x, TEMP[8].xxxx, IMM[2].zzzz >308: UARL ADDR[0].x, TEMP[12].xxxx >309: MOV TEMP[8].x, CONST[2][ADDR[0].x] >310: MUL TEMP[8].x, IN[5].zzzz, TEMP[8].xxxx >311: MOV TEMP[18].w, TEMP[8].xxxx >312: UMUL TEMP[8].x, TEMP[5].xxxx, IMM[3].yyyy >313: USHR TEMP[12].x, TEMP[8].xxxx, IMM[2].zzzz >314: UARL ADDR[0].x, TEMP[12].xxxx >315: MOV TEMP[8].x, CONST[2][ADDR[0].x] >316: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[3].yyyy >317: USHR TEMP[15].x, TEMP[12].xxxx, IMM[2].zzzz >318: UARL ADDR[0].x, TEMP[15].xxxx >319: MOV TEMP[12].y, CONST[2][ADDR[0].x] >320: FMA TEMP[0].x, TEMP[8].xxxx, TEMP[12].yyyy, -TEMP[10].xxxx >321: ADD TEMP[8].x, TEMP[10].zzzz, TEMP[10].yyyy >322: MOV TEMP[0].w, TEMP[8].xxxx >323: MUL TEMP[8].xy, TEMP[0].xwww, IN[5].xxxx >324: MUL TEMP[8].xy, IMM[0].wwww, TEMP[8].xyyy >325: MOV TEMP[11].yz, TEMP[8].yxyy >326: UMUL TEMP[8].x, TEMP[5].yyyy, IMM[3].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[2].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].x, CONST[2][ADDR[0].x] >330: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[3].yyyy >331: USHR TEMP[12].x, TEMP[10].xxxx, IMM[2].zzzz >332: UARL ADDR[0].x, TEMP[12].xxxx >333: MOV TEMP[10].y, CONST[2][ADDR[0].x] >334: FMA TEMP[0].x, TEMP[8].xxxx, TEMP[10].yyyy, -TEMP[13].xxxx >335: UMUL TEMP[8].x, TEMP[5].zzzz, IMM[3].yyyy >336: USHR TEMP[10].x, TEMP[8].xxxx, IMM[2].zzzz >337: UARL ADDR[0].x, TEMP[10].xxxx >338: MOV TEMP[8].x, CONST[2][ADDR[0].x] >339: UMUL TEMP[5].x, TEMP[5].zzzz, IMM[3].yyyy >340: USHR TEMP[10].x, TEMP[5].xxxx, IMM[2].zzzz >341: UARL ADDR[0].x, TEMP[10].xxxx >342: MOV TEMP[5].y, CONST[2][ADDR[0].x] >343: FMA TEMP[5].x, TEMP[8].xxxx, TEMP[5].yyyy, -TEMP[17].xxxx >344: MOV TEMP[0].y, TEMP[5].xxxx >345: ADD TEMP[5].x, TEMP[17].zzzz, TEMP[17].yyyy >346: MOV TEMP[0].z, TEMP[5].xxxx >347: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[5].yzzz >348: MUL TEMP[5].xy, IMM[0].wwww, TEMP[0].yzzz >349: MOV TEMP[18].yz, TEMP[5].yxyy >350: ADD TEMP[5].x, TEMP[13].zzzz, TEMP[13].yyyy >351: MUL TEMP[5].x, TEMP[5].xxxx, IN[5].yyyy >352: MOV TEMP[0].y, TEMP[5].xxxx >353: MUL TEMP[5].xy, IMM[0].wwww, TEMP[0].xyyy >354: MOV TEMP[14].yz, TEMP[5].yxyy >355: ADD TEMP[0], TEMP[11], TEMP[14] >356: ADD TEMP[0], TEMP[18], TEMP[0] >357: DP4 TEMP[20].x, TEMP[0], TEMP[7] >358: MOV TEMP[20].w, IMM[0].xxxx >359: DP4 TEMP[5].x, CONST[1][15], TEMP[20] >360: DP4 TEMP[7].x, CONST[1][16], TEMP[20] >361: MOV TEMP[5].y, TEMP[7].xxxx >362: DP4 TEMP[7].x, CONST[1][17], TEMP[20] >363: MOV TEMP[5].z, TEMP[7].xxxx >364: DP3 TEMP[7].x, TEMP[6].xyzz, IN[3].xyzz >365: MOV TEMP[3].y, TEMP[7].xxxx >366: DP3 TEMP[7].x, TEMP[9].xyzz, IN[3].xyzz >367: MOV TEMP[3].z, TEMP[7].xxxx >368: DP3 TEMP[3].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[3].x, TEMP[3].xyzz, CONST[1][16].xyzz >370: DP3 TEMP[7].x, TEMP[6].xyzz, IN[4].xyzz >371: MOV TEMP[4].y, TEMP[7].xxxx >372: DP3 TEMP[7].x, TEMP[6].xyzz, IN[2].xyzz >373: MOV TEMP[6].y, TEMP[7].xxxx >374: DP3 TEMP[7].x, TEMP[9].xyzz, IN[4].xyzz >375: MOV TEMP[4].z, TEMP[7].xxxx >376: DP3 TEMP[7].x, TEMP[9].xyzz, IN[2].xyzz >377: MOV TEMP[6].z, TEMP[7].xxxx >378: DP3 TEMP[4].x, TEMP[0].xyzz, IN[4].xyzz >379: DP3 TEMP[6].x, TEMP[0].xyzz, IN[2].xyzz >380: DP3 TEMP[6].x, TEMP[6].xyzz, CONST[1][16].xyzz >381: MOV TEMP[3].z, TEMP[6].xxxx >382: DP3 TEMP[4].x, TEMP[4].xyzz, CONST[1][16].xyzz >383: MOV TEMP[3].y, TEMP[4].xxxx >384: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz >385: RSQ TEMP[0].x, TEMP[0].xxxx >386: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[3].xyzz >387: MOV OUT[3], TEMP[0] >388: MOV OUT[2], TEMP[5] >389: MOV OUT[1], TEMP[2] >390: MOV OUT[0], TEMP[1] >391: END >radeonsi: Compiling shader 42 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %20 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, align 16, !tbaa !0 > %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240) > %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244) > %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 248) > %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 252) > %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 256) > %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 260) > %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 264) > %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 268) > %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 272) > %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 276) > %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 280) > %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 284) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %13) > %39 = extractelement <4 x float> %38, i32 0 > %40 = extractelement <4 x float> %38, i32 1 > %41 = extractelement <4 x float> %38, i32 2 > %42 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 > %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %14) > %45 = extractelement <4 x float> %44, i32 0 > %46 = extractelement <4 x float> %44, i32 1 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %15) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %16) > %56 = extractelement <4 x float> %55, i32 0 > %57 = extractelement <4 x float> %55, i32 1 > %58 = extractelement <4 x float> %55, i32 2 > %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 > %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %17) > %62 = extractelement <4 x float> %61, i32 0 > %63 = extractelement <4 x float> %61, i32 1 > %64 = extractelement <4 x float> %61, i32 2 > %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 > %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %18) > %68 = extractelement <4 x float> %67, i32 0 > %69 = extractelement <4 x float> %67, i32 1 > %70 = extractelement <4 x float> %67, i32 2 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %19) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = call float @llvm.fma.f32(float %45, float 1.000000e+00, float 0.000000e+00) > %78 = call float @llvm.fma.f32(float %46, float -1.000000e+00, float 1.000000e+00) > %79 = call float @llvm.fma.f32(float %77, float 2.000000e+00, float -1.000000e+00) > %80 = call float @llvm.fma.f32(float %78, float 2.000000e+00, float -1.000000e+00) > %81 = fmul float %76, 0x406FE01000000000 > %82 = fmul float %75, 0x406FE01000000000 > %83 = fmul float %74, 0x406FE01000000000 > %84 = fptosi float %81 to i32 > %85 = fptosi float %82 to i32 > %86 = fptosi float %83 to i32 > %87 = shl i32 %84, 1 > %88 = or i32 %87, 1 > %89 = shl i32 %85, 1 > %90 = or i32 %89, 1 > %91 = shl i32 %86, 1 > %92 = or i32 %91, 1 > %93 = shl i32 %84, 5 > %94 = or i32 %93, 4 > %95 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %94) > %96 = fmul float %68, %95 > %97 = shl i32 %85, 5 > %98 = or i32 %97, 4 > %99 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %98) > %100 = fmul float %69, %99 > %101 = shl i32 %88, 4 > %102 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %101) > %103 = shl i32 %88, 4 > %104 = or i32 %103, 12 > %105 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %104) > %106 = fmul float %102, %105 > %107 = shl i32 %88, 4 > %108 = or i32 %107, 4 > %109 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %108) > %110 = shl i32 %88, 4 > %111 = or i32 %110, 8 > %112 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %111) > %113 = fsub float -0.000000e+00, %106 > %114 = call float @llvm.fma.f32(float %109, float %112, float %113) > %115 = shl i32 %88, 4 > %116 = or i32 %115, 4 > %117 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %116) > %118 = shl i32 %88, 4 > %119 = or i32 %118, 8 > %120 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %119) > %121 = call float @llvm.fma.f32(float %117, float %120, float %106) > %122 = fmul float %121, %68 > %123 = fmul float %114, %68 > %124 = fmul float %123, 2.000000e+00 > %125 = shl i32 %90, 4 > %126 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %125) > %127 = shl i32 %90, 4 > %128 = or i32 %127, 12 > %129 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %128) > %130 = fmul float %126, %129 > %131 = shl i32 %90, 4 > %132 = or i32 %131, 4 > %133 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %132) > %134 = shl i32 %90, 4 > %135 = or i32 %134, 8 > %136 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %135) > %137 = fsub float -0.000000e+00, %130 > %138 = call float @llvm.fma.f32(float %133, float %136, float %137) > %139 = shl i32 %90, 4 > %140 = or i32 %139, 4 > %141 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %140) > %142 = shl i32 %90, 4 > %143 = or i32 %142, 8 > %144 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %143) > %145 = call float @llvm.fma.f32(float %141, float %144, float %130) > %146 = fmul float %145, %69 > %147 = fmul float %146, 2.000000e+00 > %148 = fmul float %138, %69 > %149 = fmul float %148, 2.000000e+00 > %150 = shl i32 %88, 4 > %151 = or i32 %150, 4 > %152 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %151) > %153 = shl i32 %88, 4 > %154 = or i32 %153, 8 > %155 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %154) > %156 = shl i32 %88, 4 > %157 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %156) > %158 = shl i32 %88, 4 > %159 = or i32 %158, 12 > %160 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %159) > %161 = fmul float %155, %160 > %162 = fmul float %155, %157 > %163 = fmul float %152, %160 > %164 = shl i32 %88, 4 > %165 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %164) > %166 = shl i32 %88, 4 > %167 = or i32 %166, 4 > %168 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %167) > %169 = call float @llvm.fma.f32(float %165, float %168, float %161) > %170 = fmul float %169, %68 > %171 = fmul float %170, 2.000000e+00 > %172 = shl i32 %88, 4 > %173 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %172) > %174 = shl i32 %88, 4 > %175 = or i32 %174, 4 > %176 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %175) > %177 = shl i32 %88, 4 > %178 = or i32 %177, 8 > %179 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %178) > %180 = shl i32 %88, 4 > %181 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %180) > %182 = shl i32 %88, 4 > %183 = or i32 %182, 4 > %184 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %183) > %185 = shl i32 %88, 4 > %186 = or i32 %185, 8 > %187 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %186) > %188 = fmul float %173, %181 > %189 = fmul float %176, %184 > %190 = fmul float %179, %187 > %191 = fadd float %190, %189 > %192 = fadd float %190, %188 > %193 = fadd float %189, %188 > %194 = fsub float -0.000000e+00, %191 > %195 = call float @llvm.fma.f32(float %194, float 2.000000e+00, float 1.000000e+00) > %196 = fsub float -0.000000e+00, %192 > %197 = call float @llvm.fma.f32(float %196, float 2.000000e+00, float 1.000000e+00) > %198 = fsub float -0.000000e+00, %193 > %199 = call float @llvm.fma.f32(float %198, float 2.000000e+00, float 1.000000e+00) > %200 = fmul float %68, %197 > %201 = shl i32 %90, 4 > %202 = or i32 %201, 4 > %203 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %202) > %204 = shl i32 %90, 4 > %205 = or i32 %204, 8 > %206 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %205) > %207 = shl i32 %90, 4 > %208 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %207) > %209 = shl i32 %90, 4 > %210 = or i32 %209, 12 > %211 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %210) > %212 = fmul float %206, %211 > %213 = fmul float %206, %208 > %214 = fmul float %203, %211 > %215 = shl i32 %90, 4 > %216 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %215) > %217 = shl i32 %90, 4 > %218 = or i32 %217, 4 > %219 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %218) > %220 = call float @llvm.fma.f32(float %216, float %219, float %212) > %221 = fmul float %220, %69 > %222 = fmul float %221, 2.000000e+00 > %223 = shl i32 %90, 4 > %224 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %223) > %225 = shl i32 %90, 4 > %226 = or i32 %225, 4 > %227 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %226) > %228 = shl i32 %90, 4 > %229 = or i32 %228, 8 > %230 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %229) > %231 = shl i32 %90, 4 > %232 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %231) > %233 = shl i32 %90, 4 > %234 = or i32 %233, 4 > %235 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %234) > %236 = shl i32 %90, 4 > %237 = or i32 %236, 8 > %238 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %237) > %239 = fmul float %224, %232 > %240 = fmul float %227, %235 > %241 = fmul float %230, %238 > %242 = fadd float %241, %240 > %243 = fadd float %241, %239 > %244 = fadd float %240, %239 > %245 = fsub float -0.000000e+00, %242 > %246 = call float @llvm.fma.f32(float %245, float 2.000000e+00, float 1.000000e+00) > %247 = fsub float -0.000000e+00, %243 > %248 = call float @llvm.fma.f32(float %247, float 2.000000e+00, float 1.000000e+00) > %249 = fsub float -0.000000e+00, %244 > %250 = call float @llvm.fma.f32(float %249, float 2.000000e+00, float 1.000000e+00) > %251 = fmul float %69, %248 > %252 = fadd float %171, %222 > %253 = fadd float %200, %251 > %254 = fadd float %124, %149 > %255 = fadd float %96, %100 > %256 = shl i32 %86, 5 > %257 = or i32 %256, 4 > %258 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %257) > %259 = fmul float %70, %258 > %260 = shl i32 %92, 4 > %261 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %260) > %262 = shl i32 %92, 4 > %263 = or i32 %262, 12 > %264 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %263) > %265 = fmul float %261, %264 > %266 = shl i32 %92, 4 > %267 = or i32 %266, 4 > %268 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %267) > %269 = shl i32 %92, 4 > %270 = or i32 %269, 8 > %271 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %270) > %272 = fsub float -0.000000e+00, %265 > %273 = call float @llvm.fma.f32(float %268, float %271, float %272) > %274 = shl i32 %92, 4 > %275 = or i32 %274, 4 > %276 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %275) > %277 = shl i32 %92, 4 > %278 = or i32 %277, 8 > %279 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %278) > %280 = call float @llvm.fma.f32(float %276, float %279, float %265) > %281 = fmul float %280, %70 > %282 = fmul float %281, 2.000000e+00 > %283 = fmul float %273, %70 > %284 = fmul float %283, 2.000000e+00 > %285 = shl i32 %92, 4 > %286 = or i32 %285, 4 > %287 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %286) > %288 = shl i32 %92, 4 > %289 = or i32 %288, 8 > %290 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %289) > %291 = shl i32 %92, 4 > %292 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %291) > %293 = shl i32 %92, 4 > %294 = or i32 %293, 12 > %295 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %294) > %296 = fmul float %290, %295 > %297 = fmul float %290, %292 > %298 = fmul float %287, %295 > %299 = shl i32 %92, 4 > %300 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %299) > %301 = shl i32 %92, 4 > %302 = or i32 %301, 4 > %303 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %302) > %304 = call float @llvm.fma.f32(float %300, float %303, float %296) > %305 = fmul float %304, %70 > %306 = fmul float %305, 2.000000e+00 > %307 = shl i32 %92, 4 > %308 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %307) > %309 = shl i32 %92, 4 > %310 = or i32 %309, 4 > %311 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %310) > %312 = shl i32 %92, 4 > %313 = or i32 %312, 8 > %314 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %313) > %315 = shl i32 %92, 4 > %316 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %315) > %317 = shl i32 %92, 4 > %318 = or i32 %317, 4 > %319 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %318) > %320 = shl i32 %92, 4 > %321 = or i32 %320, 8 > %322 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %321) > %323 = fmul float %308, %316 > %324 = fmul float %311, %319 > %325 = fmul float %314, %322 > %326 = fadd float %325, %324 > %327 = fadd float %325, %323 > %328 = fadd float %324, %323 > %329 = fsub float -0.000000e+00, %326 > %330 = call float @llvm.fma.f32(float %329, float 2.000000e+00, float 1.000000e+00) > %331 = fsub float -0.000000e+00, %327 > %332 = call float @llvm.fma.f32(float %331, float 2.000000e+00, float 1.000000e+00) > %333 = fsub float -0.000000e+00, %328 > %334 = call float @llvm.fma.f32(float %333, float 2.000000e+00, float 1.000000e+00) > %335 = fmul float %70, %332 > %336 = fadd float %252, %306 > %337 = fadd float %253, %335 > %338 = fadd float %254, %284 > %339 = fadd float %255, %259 > %340 = fmul float %336, %39 > %341 = fmul float %337, %40 > %342 = fadd float %340, %341 > %343 = fmul float %338, %41 > %344 = fadd float %342, %343 > %345 = fadd float %344, %339 > %346 = shl i32 %88, 4 > %347 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %346) > %348 = shl i32 %88, 4 > %349 = or i32 %348, 8 > %350 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %349) > %351 = fsub float -0.000000e+00, %163 > %352 = call float @llvm.fma.f32(float %347, float %350, float %351) > %353 = fmul float %352, %68 > %354 = fmul float %353, 2.000000e+00 > %355 = fmul float %122, 2.000000e+00 > %356 = shl i32 %90, 4 > %357 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %356) > %358 = shl i32 %90, 4 > %359 = or i32 %358, 8 > %360 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %359) > %361 = fsub float -0.000000e+00, %214 > %362 = call float @llvm.fma.f32(float %357, float %360, float %361) > %363 = fmul float %362, %69 > %364 = fmul float %363, 2.000000e+00 > %365 = fmul float %68, %199 > %366 = fmul float %68, %195 > %367 = fmul float %69, %250 > %368 = fmul float %69, %246 > %369 = shl i32 %84, 5 > %370 = or i32 %369, 8 > %371 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %370) > %372 = fmul float %68, %371 > %373 = shl i32 %85, 5 > %374 = or i32 %373, 8 > %375 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %374) > %376 = fmul float %69, %375 > %377 = fadd float %364, %354 > %378 = fadd float %147, %355 > %379 = fadd float %367, %365 > %380 = fadd float %376, %372 > %381 = shl i32 %92, 4 > %382 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %381) > %383 = shl i32 %92, 4 > %384 = or i32 %383, 8 > %385 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %384) > %386 = fsub float -0.000000e+00, %298 > %387 = call float @llvm.fma.f32(float %382, float %385, float %386) > %388 = fmul float %387, %70 > %389 = fmul float %388, 2.000000e+00 > %390 = fmul float %70, %334 > %391 = fmul float %70, %330 > %392 = shl i32 %86, 5 > %393 = or i32 %392, 8 > %394 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %393) > %395 = fmul float %70, %394 > %396 = fadd float %377, %389 > %397 = fadd float %378, %282 > %398 = fadd float %379, %390 > %399 = fadd float %380, %395 > %400 = fmul float %396, %39 > %401 = fmul float %397, %40 > %402 = fadd float %400, %401 > %403 = fmul float %398, %41 > %404 = fadd float %402, %403 > %405 = fadd float %404, %399 > %406 = shl i32 %84, 5 > %407 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %406) > %408 = fmul float %68, %407 > %409 = shl i32 %85, 5 > %410 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %409) > %411 = fmul float %69, %410 > %412 = shl i32 %86, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %412) > %414 = fmul float %70, %413 > %415 = shl i32 %88, 4 > %416 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %415) > %417 = shl i32 %88, 4 > %418 = or i32 %417, 4 > %419 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %418) > %420 = fsub float -0.000000e+00, %161 > %421 = call float @llvm.fma.f32(float %416, float %419, float %420) > %422 = fadd float %163, %162 > %423 = fmul float %421, %68 > %424 = fmul float %422, %68 > %425 = fmul float %423, 2.000000e+00 > %426 = fmul float %424, 2.000000e+00 > %427 = shl i32 %90, 4 > %428 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %427) > %429 = shl i32 %90, 4 > %430 = or i32 %429, 4 > %431 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %430) > %432 = fsub float -0.000000e+00, %212 > %433 = call float @llvm.fma.f32(float %428, float %431, float %432) > %434 = shl i32 %92, 4 > %435 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %434) > %436 = shl i32 %92, 4 > %437 = or i32 %436, 4 > %438 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %437) > %439 = fsub float -0.000000e+00, %296 > %440 = call float @llvm.fma.f32(float %435, float %438, float %439) > %441 = fadd float %298, %297 > %442 = fmul float %433, %69 > %443 = fmul float %440, %70 > %444 = fmul float %441, %70 > %445 = fmul float %443, 2.000000e+00 > %446 = fmul float %444, 2.000000e+00 > %447 = fadd float %214, %213 > %448 = fmul float %447, %69 > %449 = fmul float %442, 2.000000e+00 > %450 = fmul float %448, 2.000000e+00 > %451 = fadd float %366, %368 > %452 = fadd float %425, %449 > %453 = fadd float %426, %450 > %454 = fadd float %408, %411 > %455 = fadd float %391, %451 > %456 = fadd float %445, %452 > %457 = fadd float %446, %453 > %458 = fadd float %414, %454 > %459 = fmul float %455, %39 > %460 = fmul float %456, %40 > %461 = fadd float %459, %460 > %462 = fmul float %457, %41 > %463 = fadd float %461, %462 > %464 = fadd float %463, %458 > %465 = fmul float %22, %464 > %466 = fmul float %23, %345 > %467 = fadd float %465, %466 > %468 = fmul float %24, %405 > %469 = fadd float %467, %468 > %470 = fadd float %469, %25 > %471 = fmul float %26, %464 > %472 = fmul float %27, %345 > %473 = fadd float %471, %472 > %474 = fmul float %28, %405 > %475 = fadd float %473, %474 > %476 = fadd float %475, %29 > %477 = fmul float %30, %464 > %478 = fmul float %31, %345 > %479 = fadd float %477, %478 > %480 = fmul float %32, %405 > %481 = fadd float %479, %480 > %482 = fadd float %481, %33 > %483 = fmul float %336, %56 > %484 = fmul float %337, %57 > %485 = fadd float %484, %483 > %486 = fmul float %338, %58 > %487 = fadd float %485, %486 > %488 = fmul float %396, %56 > %489 = fmul float %397, %57 > %490 = fadd float %489, %488 > %491 = fmul float %398, %58 > %492 = fadd float %490, %491 > %493 = fmul float %455, %56 > %494 = fmul float %456, %57 > %495 = fadd float %494, %493 > %496 = fmul float %457, %58 > %497 = fadd float %495, %496 > %498 = fmul float %497, %26 > %499 = fmul float %487, %27 > %500 = fadd float %499, %498 > %501 = fmul float %492, %28 > %502 = fadd float %500, %501 > %503 = fmul float %336, %62 > %504 = fmul float %337, %63 > %505 = fadd float %504, %503 > %506 = fmul float %338, %64 > %507 = fadd float %505, %506 > %508 = fmul float %336, %50 > %509 = fmul float %337, %51 > %510 = fadd float %509, %508 > %511 = fmul float %338, %52 > %512 = fadd float %510, %511 > %513 = fmul float %396, %62 > %514 = fmul float %397, %63 > %515 = fadd float %514, %513 > %516 = fmul float %398, %64 > %517 = fadd float %515, %516 > %518 = fmul float %396, %50 > %519 = fmul float %397, %51 > %520 = fadd float %519, %518 > %521 = fmul float %398, %52 > %522 = fadd float %520, %521 > %523 = fmul float %455, %62 > %524 = fmul float %456, %63 > %525 = fadd float %524, %523 > %526 = fmul float %457, %64 > %527 = fadd float %525, %526 > %528 = fmul float %455, %50 > %529 = fmul float %456, %51 > %530 = fadd float %529, %528 > %531 = fmul float %457, %52 > %532 = fadd float %530, %531 > %533 = fmul float %532, %26 > %534 = fmul float %512, %27 > %535 = fadd float %534, %533 > %536 = fmul float %522, %28 > %537 = fadd float %535, %536 > %538 = fmul float %527, %26 > %539 = fmul float %507, %27 > %540 = fadd float %539, %538 > %541 = fmul float %517, %28 > %542 = fadd float %540, %541 > %543 = fmul float %502, %502 > %544 = fmul float %542, %542 > %545 = fadd float %544, %543 > %546 = fmul float %537, %537 > %547 = fadd float %545, %546 > %548 = call float @llvm.AMDGPU.rsq.clamped.f32(float %547) > %549 = fmul float %548, %502 > %550 = fmul float %548, %542 > %551 = fmul float %548, %537 > %552 = bitcast i32 %11 to float > %553 = insertvalue <{ float, float, float }> undef, float %552, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %470, float %476, float %482, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %549, float %550, float %551, float %458) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %79, float %80, float 1.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %553 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[2] >DCL CONST[1][0..14] >DCL TEMP[0..7], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} >IMM[1] FLT32 {158456325028528675187087900672.0000, 0.9961, 0.0039, 0.5000} >IMM[2] UINT32 {0, 208, 224, 0} >IMM[3] FLT32 { -10.0989, 255.9961, 256.0000, 0.0039} >IMM[4] INT32 {1, 0, 0, 0} >IMM[5] FLT32 {65535.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[2].xyzz, IN[2].xyzz > 9: RSQ TEMP[1].x, TEMP[1].xxxx > 10: FMA TEMP[1].xyz, IN[2].xyzz, TEMP[1].xxxx, TEMP[0].xyzz > 11: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz > 12: RSQ TEMP[2].x, TEMP[2].xxxx > 13: MUL TEMP[0].xy, TEMP[2].xxxx, TEMP[1].yxxx > 14: DDX TEMP[1].xyz, IN[1].xyzz > 15: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz > 16: MUL TEMP[3], CONST[2].xxxx, IN[1].xyzz > 17: DDY TEMP[1].xyz, TEMP[3] > 18: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz > 19: SQRT TEMP[2].x, TEMP[2].xxxx > 20: SQRT TEMP[2].y, TEMP[3].xxxx > 21: MUL TEMP[2].x, TEMP[2].yyyy, TEMP[2].xxxx > 22: FSEQ TEMP[3].xy, TEMP[2].xxxx, IMM[0].wwww > 23: SSG TEMP[4].xy, TEMP[0].xyyy > 24: MUL TEMP[4].xy, IMM[1].xxxx, TEMP[4].xyyy > 25: RCP TEMP[2].xy, TEMP[2].xxxx > 26: MUL TEMP[2].xy, TEMP[0].xyyy, TEMP[2].xyyy > 27: UCMP TEMP[2].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[2].xyyy > 28: MUL TEMP[3].xy, CONST[1][13].wwww, CONST[1][14].yxxx > 29: FMA TEMP[1].xy, -TEMP[2].xyyy, TEMP[3].xyyy, IN[0].xyyy > 30: FMA TEMP[2], TEMP[2].yxyx, TEMP[3].yxyx, IN[0].xyyx > 31: MOV TEMP[1].zw, TEMP[2].wwzw > 32: MOV TEMP[2].xy, TEMP[2].xyyy > 33: TEX TEMP[2].xzw, TEMP[2], SAMP[1], 2D > 34: MOV TEMP[0].x, TEMP[2].xzwx > 35: MOV TEMP[3].xy, TEMP[1].xzzz > 36: TEX TEMP[3].xzw, TEMP[3], SAMP[1], 2D > 37: MOV TEMP[4].x, TEMP[3].xxzw > 38: MOV TEMP[5].xy, TEMP[1].wyyy > 39: TEX TEMP[5].xzw, TEMP[5], SAMP[1], 2D > 40: MOV TEMP[1].x, TEMP[5].xxzw > 41: DP2 TEMP[3].x, TEMP[3].zwww, IMM[1].yzzz > 42: MOV TEMP[4].y, TEMP[3].xxxx > 43: DP2 TEMP[3].x, TEMP[5].zwww, IMM[1].yzzz > 44: MOV TEMP[1].y, TEMP[3].xxxx > 45: MOV TEMP[3].xy, IN[0].xyyy > 46: TEX TEMP[3], TEMP[3], SAMP[1], 2D > 47: MOV TEMP[5].xy, TEMP[3].xyxw > 48: DP2 TEMP[3].x, TEMP[3].zwww, IMM[1].yzzz > 49: MOV TEMP[5].z, TEMP[3].xxxx > 50: MAX TEMP[1].xy, TEMP[1].xyyy, TEMP[5].xzzz > 51: FMA TEMP[3].xy, -CONST[1][13].xxxx, IMM[1].wwww, TEMP[5].xzzz > 52: MAX TEMP[4].xy, TEMP[1].xyyy, TEMP[4].xyyy > 53: ADD TEMP[6].x, -TEMP[4].yyyy, IMM[0].zzzz > 54: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[6].xxxx > 55: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[6].xxxx > 56: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[6].xxxx > 57: MUL TEMP[6].x, TEMP[6].xxxx, IMM[3].xxxx > 58: EX2 TEMP[6].x, TEMP[6].xxxx > 59: ADD TEMP[7].x, TEMP[6].xxxx, TEMP[4].xxxx > 60: FMA TEMP[4].x, -TEMP[4].xxxx, TEMP[6].xxxx, TEMP[7].xxxx > 61: DP2 TEMP[6].x, TEMP[2].zwww, IMM[1].yzzz > 62: MOV TEMP[0].y, TEMP[6].xxxx > 63: ADD TEMP[6].x, -TEMP[6].xxxx, IMM[0].zzzz > 64: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[6].xxxx > 65: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[6].xxxx > 66: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[6].xxxx > 67: MUL TEMP[6].x, TEMP[6].xxxx, IMM[3].xxxx > 68: EX2 TEMP[6].x, TEMP[6].xxxx > 69: ADD TEMP[1].x, TEMP[6].xxxx, TEMP[2].xxxx > 70: FMA TEMP[2].x, -TEMP[2].xxxx, TEMP[6].xxxx, TEMP[1].xxxx > 71: FSLT TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx > 72: AND TEMP[2].x, TEMP[2].xxxx, IMM[4].xxxx > 73: INEG TEMP[2].x, TEMP[2].xxxx > 74: USNE TEMP[4].x, TEMP[2].xxxx, IMM[2].xxxx > 75: UIF TEMP[4].xxxx :0 > 76: MOV TEMP[4].x, TEMP[0].xxxx > 77: ELSE :0 > 78: MOV TEMP[4].x, TEMP[3].xxxx > 79: ENDIF > 80: MOV TEMP[4].x, TEMP[4].xxxx > 81: USNE TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx > 82: UIF TEMP[2].xxxx :0 > 83: MOV TEMP[0].x, TEMP[0].yyyy > 84: ELSE :0 > 85: MOV TEMP[0].x, TEMP[3].yyyy > 86: ENDIF > 87: MOV TEMP[4].y, TEMP[0].xxxx > 88: MOV TEMP[5].xz, TEMP[4].xxyx > 89: ADD TEMP[0].xyz, TEMP[5].xyzz, -CONST[1][13].xxxx > 90: MOV_SAT TEMP[0].xyz, TEMP[0].xyzz > 91: MUL TEMP[2].x, TEMP[0].zzzz, IMM[3].yyyy > 92: FLR TEMP[2].x, TEMP[2].xxxx > 93: MUL TEMP[1].xy, TEMP[2].xxxx, IMM[3].zwww > 94: FMA TEMP[2].x, TEMP[0].zzzz, IMM[5].xxxx, -TEMP[1].xxxx > 95: MOV TEMP[0].xy, TEMP[0].xyxx > 96: MOV TEMP[0].z, TEMP[1].yyyy > 97: MUL TEMP[1].x, TEMP[2].xxxx, IMM[3].wwww > 98: MOV TEMP[0].w, TEMP[1].xxxx > 99: MOV OUT[0], TEMP[0] >100: END >radeonsi: Compiling shader 43 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@ddxy_lds = external addrspace(3) global [64 x i32] > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) > %26 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 > %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 208) > %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 220) > %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 224) > %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 228) > %32 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32, !tbaa !0 > %34 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %35 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 3 > %36 = load <4 x i32>, <4 x i32> addrspace(2)* %35, align 16, !tbaa !0 > %37 = extractelement <8 x i32> %33, i32 7 > %38 = extractelement <4 x i32> %36, i32 0 > %39 = and i32 %38, %37 > %40 = insertelement <4 x i32> %36, i32 %39, i32 0 > %41 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !tbaa !0 > %43 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %44 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %43, i64 0, i64 7 > %45 = load <4 x i32>, <4 x i32> addrspace(2)* %44, align 16, !tbaa !0 > %46 = extractelement <8 x i32> %42, i32 7 > %47 = extractelement <4 x i32> %45, i32 0 > %48 = and i32 %47, %46 > %49 = insertelement <4 x i32> %45, i32 %48, i32 0 > %50 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %58 = bitcast float %50 to i32 > %59 = bitcast float %51 to i32 > %60 = insertelement <2 x i32> undef, i32 %58, i32 0 > %61 = insertelement <2 x i32> %60, i32 %59, i32 1 > %62 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %61, <8 x i32> %33, <4 x i32> %40, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %63 = extractelement <4 x float> %62, i32 1 > %64 = extractelement <4 x float> %62, i32 3 > %65 = call float @llvm.fma.f32(float %63, float 2.000000e+00, float -1.000000e+00) > %66 = call float @llvm.fma.f32(float %64, float 2.000000e+00, float -1.000000e+00) > %67 = fsub float -0.000000e+00, %65 > %68 = call float @llvm.fma.f32(float %67, float %65, float 1.000000e+00) > %69 = fsub float -0.000000e+00, %66 > %70 = call float @llvm.fma.f32(float %69, float %66, float %68) > %71 = call float @llvm.sqrt.f32(float %70) > %72 = fmul float %55, %55 > %73 = fmul float %56, %56 > %74 = fadd float %73, %72 > %75 = fmul float %57, %57 > %76 = fadd float %74, %75 > %77 = call float @llvm.AMDGPU.rsq.clamped.f32(float %76) > %78 = call float @llvm.fma.f32(float %55, float %77, float %65) > %79 = call float @llvm.fma.f32(float %56, float %77, float %66) > %80 = call float @llvm.fma.f32(float %57, float %77, float %71) > %81 = fmul float %78, %78 > %82 = fmul float %79, %79 > %83 = fadd float %82, %81 > %84 = fmul float %80, %80 > %85 = fadd float %83, %84 > %86 = call float @llvm.AMDGPU.rsq.clamped.f32(float %85) > %87 = fmul float %86, %79 > %88 = fmul float %86, %78 > %89 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) > %90 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %89), !range !1 > %91 = zext i32 %90 to i64 > %92 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %91 > %93 = bitcast i32 addrspace(3)* %92 to float addrspace(3)* > store float %54, float addrspace(3)* %93, align 4 > %94 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) > %95 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %94), !range !1 > %96 = zext i32 %95 to i64 > %97 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %96 > %98 = bitcast i32 addrspace(3)* %97 to float addrspace(3)* > store float %54, float addrspace(3)* %98, align 4 > %99 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) > %100 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %99), !range !1 > %101 = zext i32 %100 to i64 > %102 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %101 > %103 = and i32 %100, 60 > %104 = zext i32 %103 to i64 > %105 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %104 > %106 = or i32 %103, 1 > %107 = zext i32 %106 to i64 > %108 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %107 > %109 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > store float %52, float addrspace(3)* %109, align 4 > %110 = bitcast i32 addrspace(3)* %105 to float addrspace(3)* > %111 = load float, float addrspace(3)* %110, align 4 > %112 = bitcast i32 addrspace(3)* %108 to float addrspace(3)* > %113 = load float, float addrspace(3)* %112, align 4 > %114 = fsub float %113, %111 > %115 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > store float %53, float addrspace(3)* %115, align 4 > %116 = bitcast i32 addrspace(3)* %105 to float addrspace(3)* > %117 = load float, float addrspace(3)* %116, align 4 > %118 = bitcast i32 addrspace(3)* %108 to float addrspace(3)* > %119 = load float, float addrspace(3)* %118, align 4 > %120 = fsub float %119, %117 > %121 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > store float %54, float addrspace(3)* %121, align 4 > %122 = bitcast i32 addrspace(3)* %105 to float addrspace(3)* > %123 = load float, float addrspace(3)* %122, align 4 > %124 = bitcast i32 addrspace(3)* %108 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = fsub float %125, %123 > %127 = fmul float %114, %114 > %128 = fmul float %120, %120 > %129 = fadd float %128, %127 > %130 = fmul float %126, %126 > %131 = fadd float %129, %130 > %132 = fmul float %25, %52 > %133 = fmul float %25, %53 > %134 = fmul float %25, %54 > %135 = fmul float %25, %54 > %136 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) > %137 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %136), !range !1 > %138 = zext i32 %137 to i64 > %139 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %138 > %140 = bitcast i32 addrspace(3)* %139 to float addrspace(3)* > store float %135, float addrspace(3)* %140, align 4 > %141 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) > %142 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %141), !range !1 > %143 = zext i32 %142 to i64 > %144 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %143 > %145 = bitcast i32 addrspace(3)* %144 to float addrspace(3)* > store float %135, float addrspace(3)* %145, align 4 > %146 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) > %147 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %146), !range !1 > %148 = zext i32 %147 to i64 > %149 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %148 > %150 = and i32 %147, 60 > %151 = zext i32 %150 to i64 > %152 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %151 > %153 = or i32 %150, 2 > %154 = zext i32 %153 to i64 > %155 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %154 > %156 = bitcast i32 addrspace(3)* %149 to float addrspace(3)* > store float %132, float addrspace(3)* %156, align 4 > %157 = bitcast i32 addrspace(3)* %152 to float addrspace(3)* > %158 = load float, float addrspace(3)* %157, align 4 > %159 = bitcast i32 addrspace(3)* %155 to float addrspace(3)* > %160 = load float, float addrspace(3)* %159, align 4 > %161 = fsub float %160, %158 > %162 = bitcast i32 addrspace(3)* %149 to float addrspace(3)* > store float %133, float addrspace(3)* %162, align 4 > %163 = bitcast i32 addrspace(3)* %152 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = bitcast i32 addrspace(3)* %155 to float addrspace(3)* > %166 = load float, float addrspace(3)* %165, align 4 > %167 = fsub float %166, %164 > %168 = bitcast i32 addrspace(3)* %149 to float addrspace(3)* > store float %134, float addrspace(3)* %168, align 4 > %169 = bitcast i32 addrspace(3)* %152 to float addrspace(3)* > %170 = load float, float addrspace(3)* %169, align 4 > %171 = bitcast i32 addrspace(3)* %155 to float addrspace(3)* > %172 = load float, float addrspace(3)* %171, align 4 > %173 = fsub float %172, %170 > %174 = bitcast i32 addrspace(3)* %149 to float addrspace(3)* > store float %135, float addrspace(3)* %174, align 4 > %175 = fmul float %161, %161 > %176 = fmul float %167, %167 > %177 = fadd float %176, %175 > %178 = fmul float %173, %173 > %179 = fadd float %177, %178 > %180 = call float @llvm.sqrt.f32(float %131) > %181 = call float @llvm.sqrt.f32(float %179) > %182 = fmul float %181, %180 > %183 = fcmp oeq float %182, 0.000000e+00 > %184 = fcmp oeq float %182, 0.000000e+00 > %185 = fcmp ogt float %87, 0.000000e+00 > %186 = select i1 %185, float 1.000000e+00, float %87 > %187 = fcmp oge float %186, 0.000000e+00 > %188 = fcmp ogt float %88, 0.000000e+00 > %189 = select i1 %188, float 1.000000e+00, float %88 > %190 = fcmp oge float %189, 0.000000e+00 > %.op = fmul float %186, 0x4600000000000000 > %191 = select i1 %187, float %.op, float 0xC600000000000000 > %.op35 = fmul float %189, 0x4600000000000000 > %192 = select i1 %190, float %.op35, float 0xC600000000000000 > %193 = fdiv float 1.000000e+00, %182 > %194 = fmul float %87, %193 > %195 = fmul float %88, %193 > %196 = select i1 %183, float %191, float %194 > %197 = select i1 %184, float %192, float %195 > %198 = fmul float %29, %31 > %199 = fmul float %29, %30 > %200 = fsub float -0.000000e+00, %196 > %201 = call float @llvm.fma.f32(float %200, float %198, float %50) > %202 = fsub float -0.000000e+00, %197 > %203 = call float @llvm.fma.f32(float %202, float %199, float %51) > %204 = call float @llvm.fma.f32(float %197, float %199, float %50) > %205 = call float @llvm.fma.f32(float %196, float %198, float %51) > %206 = call float @llvm.fma.f32(float %197, float %199, float %51) > %207 = call float @llvm.fma.f32(float %196, float %198, float %50) > %208 = bitcast float %204 to i32 > %209 = bitcast float %205 to i32 > %210 = insertelement <2 x i32> undef, i32 %208, i32 0 > %211 = insertelement <2 x i32> %210, i32 %209, i32 1 > %212 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %211, <8 x i32> %42, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %213 = extractelement <4 x float> %212, i32 0 > %214 = extractelement <4 x float> %212, i32 2 > %215 = extractelement <4 x float> %212, i32 3 > %216 = bitcast float %201 to i32 > %217 = bitcast float %206 to i32 > %218 = insertelement <2 x i32> undef, i32 %216, i32 0 > %219 = insertelement <2 x i32> %218, i32 %217, i32 1 > %220 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %219, <8 x i32> %42, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %221 = extractelement <4 x float> %220, i32 0 > %222 = extractelement <4 x float> %220, i32 2 > %223 = extractelement <4 x float> %220, i32 3 > %224 = bitcast float %207 to i32 > %225 = bitcast float %203 to i32 > %226 = insertelement <2 x i32> undef, i32 %224, i32 0 > %227 = insertelement <2 x i32> %226, i32 %225, i32 1 > %228 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %227, <8 x i32> %42, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %229 = extractelement <4 x float> %228, i32 0 > %230 = extractelement <4 x float> %228, i32 2 > %231 = extractelement <4 x float> %228, i32 3 > %232 = fmul float %222, 0x3FEFE01FE0000000 > %233 = fmul float %223, 0x3F6FE01FE0000000 > %234 = fadd float %232, %233 > %235 = fmul float %230, 0x3FEFE01FE0000000 > %236 = fmul float %231, 0x3F6FE01FE0000000 > %237 = fadd float %235, %236 > %238 = bitcast float %50 to i32 > %239 = bitcast float %51 to i32 > %240 = insertelement <2 x i32> undef, i32 %238, i32 0 > %241 = insertelement <2 x i32> %240, i32 %239, i32 1 > %242 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %241, <8 x i32> %42, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %243 = extractelement <4 x float> %242, i32 0 > %244 = extractelement <4 x float> %242, i32 1 > %245 = extractelement <4 x float> %242, i32 2 > %246 = extractelement <4 x float> %242, i32 3 > %247 = fmul float %245, 0x3FEFE01FE0000000 > %248 = fmul float %246, 0x3F6FE01FE0000000 > %249 = fadd float %247, %248 > %250 = call float @llvm.maxnum.f32(float %229, float %243) > %251 = call float @llvm.maxnum.f32(float %237, float %249) > %252 = fsub float -0.000000e+00, %28 > %253 = call float @llvm.fma.f32(float %252, float 5.000000e-01, float %243) > %254 = fsub float -0.000000e+00, %28 > %255 = call float @llvm.fma.f32(float %254, float 5.000000e-01, float %249) > %256 = call float @llvm.maxnum.f32(float %250, float %221) > %257 = call float @llvm.maxnum.f32(float %251, float %234) > %258 = fsub float 1.000000e+00, %257 > %259 = fmul float %258, %258 > %260 = fmul float %259, %259 > %261 = fmul float %260, %260 > %262 = fmul float %261, 0xC024329E80000000 > %263 = call float @llvm.exp2.f32(float %262) > %264 = fadd float %263, %256 > %265 = fsub float -0.000000e+00, %256 > %266 = call float @llvm.fma.f32(float %265, float %263, float %264) > %267 = fmul float %214, 0x3FEFE01FE0000000 > %268 = fmul float %215, 0x3F6FE01FE0000000 > %269 = fadd float %267, %268 > %270 = fsub float 1.000000e+00, %269 > %271 = fmul float %270, %270 > %272 = fmul float %271, %271 > %273 = fmul float %272, %272 > %274 = fmul float %273, 0xC024329E80000000 > %275 = call float @llvm.exp2.f32(float %274) > %276 = fadd float %275, %213 > %277 = fsub float -0.000000e+00, %213 > %278 = call float @llvm.fma.f32(float %277, float %275, float %276) > %279 = fcmp olt float %266, %278 > %. = select i1 %279, float %213, float %253 > %temp.0 = select i1 %279, float %269, float %255 > %280 = fsub float %., %28 > %281 = fsub float %244, %28 > %282 = fsub float %temp.0, %28 > %283 = call float @llvm.AMDGPU.clamp.(float %280, float 0.000000e+00, float 1.000000e+00) > %284 = call float @llvm.AMDGPU.clamp.(float %281, float 0.000000e+00, float 1.000000e+00) > %285 = call float @llvm.AMDGPU.clamp.(float %282, float 0.000000e+00, float 1.000000e+00) > %286 = fmul float %285, 0x406FFFE000000000 > %287 = call float @llvm.floor.f32(float %286) > %288 = fmul float %287, 2.560000e+02 > %289 = fmul float %287, 0x3F70101020000000 > %290 = fsub float -0.000000e+00, %288 > %291 = call float @llvm.fma.f32(float %285, float 6.553500e+04, float %290) > %292 = fmul float %291, 0x3F70101020000000 > %293 = bitcast float %5 to i32 > %294 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %293, 10 > %295 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %294, float %283, 11 > %296 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %295, float %284, 12 > %297 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %296, float %289, 13 > %298 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %297, float %292, 14 > %299 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %298, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %299 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 > >; Function Attrs: nounwind readnone >declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.floor.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} >!1 = !{i32 0, i32 64} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL IN[5], GENERIC[5], PERSPECTIVE >DCL IN[6], GENERIC[6], PERSPECTIVE >DCL IN[7], GENERIC[7], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..7] >DCL TEMP[0..4], LOCAL >IMM[0] UINT32 {0, 112, 0, 0} >IMM[1] FLT32 { 2.0000, 0.0625, 255.0000, 1.0000} >IMM[2] FLT32 { 0.0039, 0.0000, 158456325028528675187087900672.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].zwww > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 3: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][7].xxxx > 4: MOV TEMP[1].xy, IN[0].xyyy > 5: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 6: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1].xyzz > 7: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 8: MOV TEMP[2].xy, IN[1].xyyy > 9: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 10: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 11: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 12: MOV TEMP[2].xy, IN[1].zwww > 13: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 14: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 15: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 16: MOV TEMP[2].xy, IN[2].xyyy > 17: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 18: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 19: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][7].xxxx > 20: FMA TEMP[0].xyz, TEMP[1].xyzz, IMM[1].xxxx, TEMP[0].xyzz > 21: MOV TEMP[2].xy, IN[3].xyyy > 22: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 23: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 24: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 25: MOV TEMP[2].xy, IN[3].zwww > 26: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 27: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 28: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 29: MOV TEMP[2].xy, IN[4].xyyy > 30: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 31: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 32: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 33: MOV TEMP[2].xy, IN[4].zwww > 34: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 35: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 36: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 37: MOV TEMP[2].xy, IN[5].xyyy > 38: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 39: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 40: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 41: MOV TEMP[2].xy, IN[5].zwww > 42: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 43: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 44: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 45: MOV TEMP[2].xy, IN[6].xyyy > 46: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 47: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 48: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 49: MOV TEMP[2].xy, IN[6].zwww > 50: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 51: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 52: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 53: MOV TEMP[2].xy, IN[7].xyyy > 54: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 55: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 56: FMA TEMP[0].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 57: MOV TEMP[2].xy, IN[7].zwww > 58: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 59: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[2].xyzz > 60: FMA TEMP[2].xyz, TEMP[1].xyzz, CONST[1][7].xxxx, TEMP[0].xyzz > 61: MUL TEMP[0].xyz, TEMP[2].xyzz, IMM[1].yyyy > 62: MAX TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx > 63: MAX TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx > 64: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][7].yyyy > 65: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz > 66: CEIL TEMP[2].x, TEMP[2].xxxx > 67: MAX TEMP[2].x, TEMP[2].xxxx, IMM[1].wwww > 68: MUL TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx > 69: MUL TEMP[1].x, TEMP[2].xxxx, CONST[1][7].xxxx > 70: MOV TEMP[2].w, TEMP[2].xxxx > 71: FSEQ TEMP[3].xyz, TEMP[1].xxxx, IMM[2].yyyy > 72: SSG TEMP[4].xyz, TEMP[0].xyzz > 73: MUL TEMP[4].xyz, IMM[2].zzzz, TEMP[4].xyzz > 74: RCP TEMP[1].xyz, TEMP[1].xxxx > 75: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz > 76: UCMP TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[0].xyzz > 77: MOV OUT[0], TEMP[2] > 78: END >radeonsi: Compiling shader 44 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %6, <2 x i32> %8) > %66 = bitcast float %38 to i32 > %67 = bitcast float %39 to i32 > %68 = insertelement <2 x i32> undef, i32 %66, i32 0 > %69 = insertelement <2 x i32> %68, i32 %67, i32 1 > %70 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %69, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = fmul float %74, %71 > %76 = fmul float %74, %72 > %77 = fmul float %74, %73 > %78 = fmul float %75, %25 > %79 = fmul float %76, %25 > %80 = fmul float %77, %25 > %81 = bitcast float %36 to i32 > %82 = bitcast float %37 to i32 > %83 = insertelement <2 x i32> undef, i32 %81, i32 0 > %84 = insertelement <2 x i32> %83, i32 %82, i32 1 > %85 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %84, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = extractelement <4 x float> %85, i32 2 > %89 = extractelement <4 x float> %85, i32 3 > %90 = fmul float %89, %86 > %91 = fmul float %89, %87 > %92 = fmul float %89, %88 > %93 = call float @llvm.fma.f32(float %90, float %25, float %78) > %94 = call float @llvm.fma.f32(float %91, float %25, float %79) > %95 = call float @llvm.fma.f32(float %92, float %25, float %80) > %96 = bitcast float %40 to i32 > %97 = bitcast float %41 to i32 > %98 = insertelement <2 x i32> undef, i32 %96, i32 0 > %99 = insertelement <2 x i32> %98, i32 %97, i32 1 > %100 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %99, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %101 = extractelement <4 x float> %100, i32 0 > %102 = extractelement <4 x float> %100, i32 1 > %103 = extractelement <4 x float> %100, i32 2 > %104 = extractelement <4 x float> %100, i32 3 > %105 = fmul float %104, %101 > %106 = fmul float %104, %102 > %107 = fmul float %104, %103 > %108 = call float @llvm.fma.f32(float %105, float %25, float %93) > %109 = call float @llvm.fma.f32(float %106, float %25, float %94) > %110 = call float @llvm.fma.f32(float %107, float %25, float %95) > %111 = bitcast float %42 to i32 > %112 = bitcast float %43 to i32 > %113 = insertelement <2 x i32> undef, i32 %111, i32 0 > %114 = insertelement <2 x i32> %113, i32 %112, i32 1 > %115 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %114, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %116 = extractelement <4 x float> %115, i32 0 > %117 = extractelement <4 x float> %115, i32 1 > %118 = extractelement <4 x float> %115, i32 2 > %119 = extractelement <4 x float> %115, i32 3 > %120 = fmul float %119, %116 > %121 = fmul float %119, %117 > %122 = fmul float %119, %118 > %123 = call float @llvm.fma.f32(float %120, float %25, float %108) > %124 = call float @llvm.fma.f32(float %121, float %25, float %109) > %125 = call float @llvm.fma.f32(float %122, float %25, float %110) > %126 = bitcast float %44 to i32 > %127 = bitcast float %45 to i32 > %128 = insertelement <2 x i32> undef, i32 %126, i32 0 > %129 = insertelement <2 x i32> %128, i32 %127, i32 1 > %130 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %129, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %131 = extractelement <4 x float> %130, i32 0 > %132 = extractelement <4 x float> %130, i32 1 > %133 = extractelement <4 x float> %130, i32 2 > %134 = extractelement <4 x float> %130, i32 3 > %135 = fmul float %134, %131 > %136 = fmul float %134, %132 > %137 = fmul float %134, %133 > %138 = fmul float %135, %25 > %139 = fmul float %136, %25 > %140 = fmul float %137, %25 > %141 = call float @llvm.fma.f32(float %138, float 2.000000e+00, float %123) > %142 = call float @llvm.fma.f32(float %139, float 2.000000e+00, float %124) > %143 = call float @llvm.fma.f32(float %140, float 2.000000e+00, float %125) > %144 = bitcast float %46 to i32 > %145 = bitcast float %47 to i32 > %146 = insertelement <2 x i32> undef, i32 %144, i32 0 > %147 = insertelement <2 x i32> %146, i32 %145, i32 1 > %148 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %147, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %149 = extractelement <4 x float> %148, i32 0 > %150 = extractelement <4 x float> %148, i32 1 > %151 = extractelement <4 x float> %148, i32 2 > %152 = extractelement <4 x float> %148, i32 3 > %153 = fmul float %152, %149 > %154 = fmul float %152, %150 > %155 = fmul float %152, %151 > %156 = call float @llvm.fma.f32(float %153, float %25, float %141) > %157 = call float @llvm.fma.f32(float %154, float %25, float %142) > %158 = call float @llvm.fma.f32(float %155, float %25, float %143) > %159 = bitcast float %48 to i32 > %160 = bitcast float %49 to i32 > %161 = insertelement <2 x i32> undef, i32 %159, i32 0 > %162 = insertelement <2 x i32> %161, i32 %160, i32 1 > %163 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %162, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %164 = extractelement <4 x float> %163, i32 0 > %165 = extractelement <4 x float> %163, i32 1 > %166 = extractelement <4 x float> %163, i32 2 > %167 = extractelement <4 x float> %163, i32 3 > %168 = fmul float %167, %164 > %169 = fmul float %167, %165 > %170 = fmul float %167, %166 > %171 = call float @llvm.fma.f32(float %168, float %25, float %156) > %172 = call float @llvm.fma.f32(float %169, float %25, float %157) > %173 = call float @llvm.fma.f32(float %170, float %25, float %158) > %174 = bitcast float %50 to i32 > %175 = bitcast float %51 to i32 > %176 = insertelement <2 x i32> undef, i32 %174, i32 0 > %177 = insertelement <2 x i32> %176, i32 %175, i32 1 > %178 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %177, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %179 = extractelement <4 x float> %178, i32 0 > %180 = extractelement <4 x float> %178, i32 1 > %181 = extractelement <4 x float> %178, i32 2 > %182 = extractelement <4 x float> %178, i32 3 > %183 = fmul float %182, %179 > %184 = fmul float %182, %180 > %185 = fmul float %182, %181 > %186 = call float @llvm.fma.f32(float %183, float %25, float %171) > %187 = call float @llvm.fma.f32(float %184, float %25, float %172) > %188 = call float @llvm.fma.f32(float %185, float %25, float %173) > %189 = bitcast float %52 to i32 > %190 = bitcast float %53 to i32 > %191 = insertelement <2 x i32> undef, i32 %189, i32 0 > %192 = insertelement <2 x i32> %191, i32 %190, i32 1 > %193 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %192, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %194 = extractelement <4 x float> %193, i32 0 > %195 = extractelement <4 x float> %193, i32 1 > %196 = extractelement <4 x float> %193, i32 2 > %197 = extractelement <4 x float> %193, i32 3 > %198 = fmul float %197, %194 > %199 = fmul float %197, %195 > %200 = fmul float %197, %196 > %201 = call float @llvm.fma.f32(float %198, float %25, float %186) > %202 = call float @llvm.fma.f32(float %199, float %25, float %187) > %203 = call float @llvm.fma.f32(float %200, float %25, float %188) > %204 = bitcast float %54 to i32 > %205 = bitcast float %55 to i32 > %206 = insertelement <2 x i32> undef, i32 %204, i32 0 > %207 = insertelement <2 x i32> %206, i32 %205, i32 1 > %208 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %207, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %209 = extractelement <4 x float> %208, i32 0 > %210 = extractelement <4 x float> %208, i32 1 > %211 = extractelement <4 x float> %208, i32 2 > %212 = extractelement <4 x float> %208, i32 3 > %213 = fmul float %212, %209 > %214 = fmul float %212, %210 > %215 = fmul float %212, %211 > %216 = call float @llvm.fma.f32(float %213, float %25, float %201) > %217 = call float @llvm.fma.f32(float %214, float %25, float %202) > %218 = call float @llvm.fma.f32(float %215, float %25, float %203) > %219 = bitcast float %56 to i32 > %220 = bitcast float %57 to i32 > %221 = insertelement <2 x i32> undef, i32 %219, i32 0 > %222 = insertelement <2 x i32> %221, i32 %220, i32 1 > %223 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %222, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %224 = extractelement <4 x float> %223, i32 0 > %225 = extractelement <4 x float> %223, i32 1 > %226 = extractelement <4 x float> %223, i32 2 > %227 = extractelement <4 x float> %223, i32 3 > %228 = fmul float %227, %224 > %229 = fmul float %227, %225 > %230 = fmul float %227, %226 > %231 = call float @llvm.fma.f32(float %228, float %25, float %216) > %232 = call float @llvm.fma.f32(float %229, float %25, float %217) > %233 = call float @llvm.fma.f32(float %230, float %25, float %218) > %234 = bitcast float %58 to i32 > %235 = bitcast float %59 to i32 > %236 = insertelement <2 x i32> undef, i32 %234, i32 0 > %237 = insertelement <2 x i32> %236, i32 %235, i32 1 > %238 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %237, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %239 = extractelement <4 x float> %238, i32 0 > %240 = extractelement <4 x float> %238, i32 1 > %241 = extractelement <4 x float> %238, i32 2 > %242 = extractelement <4 x float> %238, i32 3 > %243 = fmul float %242, %239 > %244 = fmul float %242, %240 > %245 = fmul float %242, %241 > %246 = call float @llvm.fma.f32(float %243, float %25, float %231) > %247 = call float @llvm.fma.f32(float %244, float %25, float %232) > %248 = call float @llvm.fma.f32(float %245, float %25, float %233) > %249 = bitcast float %60 to i32 > %250 = bitcast float %61 to i32 > %251 = insertelement <2 x i32> undef, i32 %249, i32 0 > %252 = insertelement <2 x i32> %251, i32 %250, i32 1 > %253 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %252, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %254 = extractelement <4 x float> %253, i32 0 > %255 = extractelement <4 x float> %253, i32 1 > %256 = extractelement <4 x float> %253, i32 2 > %257 = extractelement <4 x float> %253, i32 3 > %258 = fmul float %257, %254 > %259 = fmul float %257, %255 > %260 = fmul float %257, %256 > %261 = call float @llvm.fma.f32(float %258, float %25, float %246) > %262 = call float @llvm.fma.f32(float %259, float %25, float %247) > %263 = call float @llvm.fma.f32(float %260, float %25, float %248) > %264 = bitcast float %62 to i32 > %265 = bitcast float %63 to i32 > %266 = insertelement <2 x i32> undef, i32 %264, i32 0 > %267 = insertelement <2 x i32> %266, i32 %265, i32 1 > %268 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %267, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %269 = extractelement <4 x float> %268, i32 0 > %270 = extractelement <4 x float> %268, i32 1 > %271 = extractelement <4 x float> %268, i32 2 > %272 = extractelement <4 x float> %268, i32 3 > %273 = fmul float %272, %269 > %274 = fmul float %272, %270 > %275 = fmul float %272, %271 > %276 = call float @llvm.fma.f32(float %273, float %25, float %261) > %277 = call float @llvm.fma.f32(float %274, float %25, float %262) > %278 = call float @llvm.fma.f32(float %275, float %25, float %263) > %279 = bitcast float %64 to i32 > %280 = bitcast float %65 to i32 > %281 = insertelement <2 x i32> undef, i32 %279, i32 0 > %282 = insertelement <2 x i32> %281, i32 %280, i32 1 > %283 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %282, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %284 = extractelement <4 x float> %283, i32 0 > %285 = extractelement <4 x float> %283, i32 1 > %286 = extractelement <4 x float> %283, i32 2 > %287 = extractelement <4 x float> %283, i32 3 > %288 = fmul float %287, %284 > %289 = fmul float %287, %285 > %290 = fmul float %287, %286 > %291 = call float @llvm.fma.f32(float %288, float %25, float %276) > %292 = call float @llvm.fma.f32(float %289, float %25, float %277) > %293 = call float @llvm.fma.f32(float %290, float %25, float %278) > %294 = fmul float %291, 6.250000e-02 > %295 = fmul float %292, 6.250000e-02 > %296 = fmul float %293, 6.250000e-02 > %297 = call float @llvm.maxnum.f32(float %295, float %294) > %298 = call float @llvm.maxnum.f32(float %296, float %297) > %299 = fmul float %298, %26 > %300 = fmul float %299, 2.550000e+02 > %301 = call float @llvm.ceil.f32(float %300) > %302 = call float @llvm.maxnum.f32(float %301, float 1.000000e+00) > %303 = fmul float %302, 0x3F70101020000000 > %304 = fmul float %303, %25 > %305 = fcmp oeq float %304, 0.000000e+00 > %306 = fcmp oeq float %304, 0.000000e+00 > %307 = fcmp oeq float %304, 0.000000e+00 > %308 = fcmp ogt float %294, 0.000000e+00 > %309 = select i1 %308, float 1.000000e+00, float %294 > %310 = fcmp oge float %309, 0.000000e+00 > %311 = fcmp ogt float %295, 0.000000e+00 > %312 = select i1 %311, float 1.000000e+00, float %295 > %313 = fcmp oge float %312, 0.000000e+00 > %314 = fcmp ogt float %296, 0.000000e+00 > %315 = select i1 %314, float 1.000000e+00, float %296 > %316 = fcmp oge float %315, 0.000000e+00 > %.op = fmul float %309, 0x4600000000000000 > %317 = select i1 %310, float %.op, float 0xC600000000000000 > %.op20 = fmul float %312, 0x4600000000000000 > %318 = select i1 %313, float %.op20, float 0xC600000000000000 > %.op21 = fmul float %315, 0x4600000000000000 > %319 = select i1 %316, float %.op21, float 0xC600000000000000 > %320 = fdiv float 1.000000e+00, %304 > %321 = fmul float %294, %320 > %322 = fmul float %295, %320 > %323 = fmul float %296, %320 > %324 = select i1 %305, float %317, float %321 > %325 = select i1 %306, float %318, float %322 > %326 = select i1 %307, float %319, float %323 > %327 = bitcast float %5 to i32 > %328 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %327, 10 > %329 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %328, float %324, 11 > %330 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %329, float %325, 12 > %331 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %330, float %326, 13 > %332 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %331, float %303, 14 > %333 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %332, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %333 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..21] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 2.0000} >IMM[1] FLT32 { 255.0020, 0.0000, 0.0000, 0.0000} >IMM[2] INT32 {1, 2, 4, 0} >IMM[3] UINT32 {1, 16, 0, 304} >IMM[4] UINT32 {320, 336, 0, 0} > 0: FMA TEMP[0].xy, IN[1].xyyy, IMM[0].xyyy, IMM[0].zxxx > 1: FMA TEMP[1].xy, TEMP[0].xyyy, IMM[0].wwww, IMM[0].yyyy > 2: MOV TEMP[1].zw, IMM[0].xxxx > 3: MUL TEMP[0].xyz, IN[4].zyxx, IMM[1].xxxx > 4: F2I TEMP[2].xyz, TEMP[0].xyzz > 5: SHL TEMP[3].xyz, TEMP[2].xyzz, IMM[2].xxxx > 6: UMAD TEMP[2].xyz, TEMP[2].xyzz, IMM[2].yyyy, IMM[2].xxxx > 7: UMUL TEMP[4].x, TEMP[3].xxxx, IMM[3].yyyy > 8: USHR TEMP[5].x, TEMP[4].xxxx, IMM[2].zzzz > 9: UARL ADDR[0].x, TEMP[5].xxxx > 10: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 11: MUL TEMP[4].x, IN[3].xxxx, TEMP[4].yyyy > 12: MOV TEMP[4].w, TEMP[4].xxxx > 13: UMUL TEMP[5].x, TEMP[3].yyyy, IMM[3].yyyy > 14: USHR TEMP[6].x, TEMP[5].xxxx, IMM[2].zzzz > 15: UARL ADDR[0].x, TEMP[6].xxxx > 16: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 17: MUL TEMP[5].x, IN[3].yyyy, TEMP[5].yyyy > 18: MOV TEMP[5].w, TEMP[5].xxxx > 19: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[3].yyyy > 20: USHR TEMP[7].x, TEMP[6].xxxx, IMM[2].zzzz > 21: UARL ADDR[0].x, TEMP[7].xxxx > 22: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 23: UMUL TEMP[7].x, TEMP[2].xxxx, IMM[3].yyyy > 24: USHR TEMP[8].x, TEMP[7].xxxx, IMM[2].zzzz > 25: UARL ADDR[0].x, TEMP[8].xxxx > 26: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 27: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 28: UMUL TEMP[7].x, TEMP[2].xxxx, IMM[3].yyyy > 29: USHR TEMP[8].x, TEMP[7].xxxx, IMM[2].zzzz > 30: UARL ADDR[0].x, TEMP[8].xxxx > 31: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 32: UMUL TEMP[8].x, TEMP[2].xxxx, IMM[3].yyyy > 33: USHR TEMP[9].x, TEMP[8].xxxx, IMM[2].zzzz > 34: UARL ADDR[0].x, TEMP[9].xxxx > 35: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 36: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 37: UMUL TEMP[8].x, TEMP[2].xxxx, IMM[3].yyyy > 38: USHR TEMP[9].x, TEMP[8].xxxx, IMM[2].zzzz > 39: UARL ADDR[0].x, TEMP[9].xxxx > 40: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 41: UMUL TEMP[9].x, TEMP[2].xxxx, IMM[3].yyyy > 42: USHR TEMP[10].x, TEMP[9].xxxx, IMM[2].zzzz > 43: UARL ADDR[0].x, TEMP[10].xxxx > 44: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 45: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 46: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].xxxx > 47: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].xxxx > 48: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].wwww > 49: MOV TEMP[4].z, TEMP[7].xxxx > 50: UMUL TEMP[7].x, TEMP[2].yyyy, IMM[3].yyyy > 51: USHR TEMP[8].x, TEMP[7].xxxx, IMM[2].zzzz > 52: UARL ADDR[0].x, TEMP[8].xxxx > 53: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 54: UMUL TEMP[8].x, TEMP[2].yyyy, IMM[3].yyyy > 55: USHR TEMP[9].x, TEMP[8].xxxx, IMM[2].zzzz > 56: UARL ADDR[0].x, TEMP[9].xxxx > 57: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 58: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 59: UMUL TEMP[8].x, TEMP[2].yyyy, IMM[3].yyyy > 60: USHR TEMP[9].x, TEMP[8].xxxx, IMM[2].zzzz > 61: UARL ADDR[0].x, TEMP[9].xxxx > 62: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 63: UMUL TEMP[9].x, TEMP[2].yyyy, IMM[3].yyyy > 64: USHR TEMP[10].x, TEMP[9].xxxx, IMM[2].zzzz > 65: UARL ADDR[0].x, TEMP[10].xxxx > 66: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 67: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 68: UMUL TEMP[9].x, TEMP[2].yyyy, IMM[3].yyyy > 69: USHR TEMP[10].x, TEMP[9].xxxx, IMM[2].zzzz > 70: UARL ADDR[0].x, TEMP[10].xxxx > 71: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 72: UMUL TEMP[10].x, TEMP[2].yyyy, IMM[3].yyyy > 73: USHR TEMP[11].x, TEMP[10].xxxx, IMM[2].zzzz > 74: UARL ADDR[0].x, TEMP[11].xxxx > 75: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 76: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 77: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].yyyy > 78: MUL TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx > 79: MOV TEMP[7].y, TEMP[7].xxxx > 80: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].yyyy > 81: MUL TEMP[8].x, IMM[0].wwww, TEMP[8].xxxx > 82: MOV TEMP[5].z, TEMP[8].xxxx > 83: UMUL TEMP[8].x, TEMP[2].xxxx, IMM[3].yyyy > 84: USHR TEMP[9].x, TEMP[8].xxxx, IMM[2].zzzz > 85: UARL ADDR[0].x, TEMP[9].xxxx > 86: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 87: UMUL TEMP[9].x, TEMP[2].xxxx, IMM[3].yyyy > 88: USHR TEMP[10].x, TEMP[9].xxxx, IMM[2].zzzz > 89: UARL ADDR[0].x, TEMP[10].xxxx > 90: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 91: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 92: UMUL TEMP[9].x, TEMP[2].xxxx, IMM[3].yyyy > 93: USHR TEMP[10].x, TEMP[9].xxxx, IMM[2].zzzz > 94: UARL ADDR[0].x, TEMP[10].xxxx > 95: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 96: UMUL TEMP[10].x, TEMP[2].xxxx, IMM[3].yyyy > 97: USHR TEMP[11].x, TEMP[10].xxxx, IMM[2].zzzz > 98: UARL ADDR[0].x, TEMP[11].xxxx > 99: MOV TEMP[10].y, CONST[2][ADDR[0].x] >100: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx >101: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].xxxx >102: MUL TEMP[4].x, IMM[0].wwww, TEMP[9].xxxx >103: UMUL TEMP[9].x, TEMP[2].xxxx, IMM[3].yyyy >104: USHR TEMP[10].x, TEMP[9].xxxx, IMM[2].zzzz >105: UARL ADDR[0].x, TEMP[10].xxxx >106: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >107: UMUL TEMP[10].x, TEMP[2].xxxx, IMM[3].yyyy >108: USHR TEMP[11].x, TEMP[10].xxxx, IMM[2].zzzz >109: UARL ADDR[0].x, TEMP[11].xxxx >110: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >111: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >112: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >113: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].wwww, IMM[0].xxxx >114: MUL TEMP[11].x, IN[3].xxxx, TEMP[10].yyyy >115: MOV TEMP[4].y, TEMP[11].xxxx >116: UMUL TEMP[11].x, TEMP[2].yyyy, IMM[3].yyyy >117: USHR TEMP[12].x, TEMP[11].xxxx, IMM[2].zzzz >118: UARL ADDR[0].x, TEMP[12].xxxx >119: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >120: UMUL TEMP[12].x, TEMP[2].yyyy, IMM[3].yyyy >121: USHR TEMP[13].x, TEMP[12].xxxx, IMM[2].zzzz >122: UARL ADDR[0].x, TEMP[13].xxxx >123: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >124: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >125: UMUL TEMP[12].x, TEMP[2].yyyy, IMM[3].yyyy >126: USHR TEMP[13].x, TEMP[12].xxxx, IMM[2].zzzz >127: UARL ADDR[0].x, TEMP[13].xxxx >128: MOV TEMP[12].x, CONST[2][ADDR[0].x] >129: UMUL TEMP[13].x, TEMP[2].yyyy, IMM[3].yyyy >130: USHR TEMP[14].x, TEMP[13].xxxx, IMM[2].zzzz >131: UARL ADDR[0].x, TEMP[14].xxxx >132: MOV TEMP[13].y, CONST[2][ADDR[0].x] >133: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >134: MUL TEMP[12].x, TEMP[12].xxxx, IN[3].yyyy >135: MUL TEMP[5].x, IMM[0].wwww, TEMP[12].xxxx >136: UMUL TEMP[12].x, TEMP[2].yyyy, IMM[3].yyyy >137: USHR TEMP[13].x, TEMP[12].xxxx, IMM[2].zzzz >138: UARL ADDR[0].x, TEMP[13].xxxx >139: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >140: UMUL TEMP[13].x, TEMP[2].yyyy, IMM[3].yyyy >141: USHR TEMP[14].x, TEMP[13].xxxx, IMM[2].zzzz >142: UARL ADDR[0].x, TEMP[14].xxxx >143: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >144: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >145: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >146: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].wwww, IMM[0].xxxx >147: MUL TEMP[14].x, IN[3].yyyy, TEMP[13].yyyy >148: MOV TEMP[5].y, TEMP[14].xxxx >149: ADD TEMP[4], TEMP[4], TEMP[5] >150: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[3].yyyy >151: USHR TEMP[15].x, TEMP[14].xxxx, IMM[2].zzzz >152: UARL ADDR[0].x, TEMP[15].xxxx >153: MOV TEMP[14].y, CONST[2][ADDR[0].x] >154: MUL TEMP[14].x, IN[3].zzzz, TEMP[14].yyyy >155: MOV TEMP[5].w, TEMP[14].xxxx >156: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[3].yyyy >157: USHR TEMP[15].x, TEMP[14].xxxx, IMM[2].zzzz >158: UARL ADDR[0].x, TEMP[15].xxxx >159: MOV TEMP[14].x, CONST[2][ADDR[0].x] >160: UMUL TEMP[15].x, TEMP[2].zzzz, IMM[3].yyyy >161: USHR TEMP[16].x, TEMP[15].xxxx, IMM[2].zzzz >162: UARL ADDR[0].x, TEMP[16].xxxx >163: MOV TEMP[15].w, CONST[2][ADDR[0].x] >164: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >165: UMUL TEMP[15].x, TEMP[2].zzzz, IMM[3].yyyy >166: USHR TEMP[16].x, TEMP[15].xxxx, IMM[2].zzzz >167: UARL ADDR[0].x, TEMP[16].xxxx >168: MOV TEMP[15].y, CONST[2][ADDR[0].x] >169: UMUL TEMP[16].x, TEMP[2].zzzz, IMM[3].yyyy >170: USHR TEMP[17].x, TEMP[16].xxxx, IMM[2].zzzz >171: UARL ADDR[0].x, TEMP[17].xxxx >172: MOV TEMP[16].z, CONST[2][ADDR[0].x] >173: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >174: UMUL TEMP[16].x, TEMP[2].zzzz, IMM[3].yyyy >175: USHR TEMP[17].x, TEMP[16].xxxx, IMM[2].zzzz >176: UARL ADDR[0].x, TEMP[17].xxxx >177: MOV TEMP[16].y, CONST[2][ADDR[0].x] >178: UMUL TEMP[17].x, TEMP[2].zzzz, IMM[3].yyyy >179: USHR TEMP[18].x, TEMP[17].xxxx, IMM[2].zzzz >180: UARL ADDR[0].x, TEMP[18].xxxx >181: MOV TEMP[17].z, CONST[2][ADDR[0].x] >182: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >183: MUL TEMP[14].x, TEMP[14].xxxx, IN[3].zzzz >184: MUL TEMP[14].x, IMM[0].wwww, TEMP[14].xxxx >185: MOV TEMP[14].y, TEMP[14].xxxx >186: MUL TEMP[15].x, TEMP[15].xxxx, IN[3].zzzz >187: MUL TEMP[15].x, IMM[0].wwww, TEMP[15].xxxx >188: MOV TEMP[5].z, TEMP[15].xxxx >189: UMUL TEMP[15].x, TEMP[2].zzzz, IMM[3].yyyy >190: USHR TEMP[16].x, TEMP[15].xxxx, IMM[2].zzzz >191: UARL ADDR[0].x, TEMP[16].xxxx >192: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >193: UMUL TEMP[16].x, TEMP[2].zzzz, IMM[3].yyyy >194: USHR TEMP[17].x, TEMP[16].xxxx, IMM[2].zzzz >195: UARL ADDR[0].x, TEMP[17].xxxx >196: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >197: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >198: UMUL TEMP[16].x, TEMP[2].zzzz, IMM[3].yyyy >199: USHR TEMP[17].x, TEMP[16].xxxx, IMM[2].zzzz >200: UARL ADDR[0].x, TEMP[17].xxxx >201: MOV TEMP[16].x, CONST[2][ADDR[0].x] >202: UMUL TEMP[17].x, TEMP[2].zzzz, IMM[3].yyyy >203: USHR TEMP[18].x, TEMP[17].xxxx, IMM[2].zzzz >204: UARL ADDR[0].x, TEMP[18].xxxx >205: MOV TEMP[17].y, CONST[2][ADDR[0].x] >206: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >207: MUL TEMP[16].x, TEMP[16].xxxx, IN[3].zzzz >208: MUL TEMP[5].x, IMM[0].wwww, TEMP[16].xxxx >209: UMUL TEMP[16].x, TEMP[2].zzzz, IMM[3].yyyy >210: USHR TEMP[17].x, TEMP[16].xxxx, IMM[2].zzzz >211: UARL ADDR[0].x, TEMP[17].xxxx >212: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >213: UMUL TEMP[17].x, TEMP[2].zzzz, IMM[3].yyyy >214: USHR TEMP[18].x, TEMP[17].xxxx, IMM[2].zzzz >215: UARL ADDR[0].x, TEMP[18].xxxx >216: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >217: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >218: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >219: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].wwww, IMM[0].xxxx >220: MUL TEMP[18].x, IN[3].zzzz, TEMP[17].yyyy >221: MOV TEMP[5].y, TEMP[18].xxxx >222: ADD TEMP[4], TEMP[4], TEMP[5] >223: MOV TEMP[5].xyz, IN[0].xyzx >224: MOV TEMP[5].w, IMM[0].xxxx >225: DP4 TEMP[18].x, TEMP[4], TEMP[5] >226: MOV TEMP[18].y, TEMP[18].xxxx >227: DP3 TEMP[19].x, TEMP[4].xyzz, IN[2].xyzz >228: MOV TEMP[4].y, TEMP[19].xxxx >229: UMUL TEMP[19].x, TEMP[2].xxxx, IMM[3].yyyy >230: USHR TEMP[20].x, TEMP[19].xxxx, IMM[2].zzzz >231: UARL ADDR[0].x, TEMP[20].xxxx >232: MOV TEMP[19].x, CONST[2][ADDR[0].x] >233: UMUL TEMP[20].x, TEMP[2].xxxx, IMM[3].yyyy >234: USHR TEMP[21].x, TEMP[20].xxxx, IMM[2].zzzz >235: UARL ADDR[0].x, TEMP[21].xxxx >236: MOV TEMP[20].z, CONST[2][ADDR[0].x] >237: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >238: MUL TEMP[19].x, TEMP[19].xxxx, IN[3].xxxx >239: MUL TEMP[19].x, IMM[0].wwww, TEMP[19].xxxx >240: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww >241: MOV TEMP[19].y, TEMP[6].xxxx >242: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[3].yyyy >243: USHR TEMP[20].x, TEMP[6].xxxx, IMM[2].zzzz >244: UARL ADDR[0].x, TEMP[20].xxxx >245: MOV TEMP[6].x, CONST[2][ADDR[0].x] >246: UMUL TEMP[20].x, TEMP[2].yyyy, IMM[3].yyyy >247: USHR TEMP[21].x, TEMP[20].xxxx, IMM[2].zzzz >248: UARL ADDR[0].x, TEMP[21].xxxx >249: MOV TEMP[20].z, CONST[2][ADDR[0].x] >250: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >251: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].yyyy >252: MUL TEMP[7].x, IMM[0].wwww, TEMP[6].xxxx >253: MUL TEMP[6].x, IN[3].xxxx, TEMP[10].zzzz >254: MOV TEMP[19].z, TEMP[6].xxxx >255: MUL TEMP[9].x, IN[3].xxxx, TEMP[10].xxxx >256: MUL TEMP[6].x, IN[3].yyyy, TEMP[13].zzzz >257: MOV TEMP[7].z, TEMP[6].xxxx >258: MUL TEMP[12].x, IN[3].yyyy, TEMP[13].xxxx >259: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[3].yyyy >260: USHR TEMP[10].x, TEMP[6].xxxx, IMM[2].zzzz >261: UARL ADDR[0].x, TEMP[10].xxxx >262: MOV TEMP[6].z, CONST[2][ADDR[0].x] >263: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].zzzz >264: MOV TEMP[19].w, TEMP[6].xxxx >265: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[3].yyyy >266: USHR TEMP[10].x, TEMP[6].xxxx, IMM[2].zzzz >267: UARL ADDR[0].x, TEMP[10].xxxx >268: MOV TEMP[6].z, CONST[2][ADDR[0].x] >269: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].zzzz >270: MOV TEMP[7].w, TEMP[6].xxxx >271: ADD TEMP[7], TEMP[7], TEMP[19] >272: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[3].yyyy >273: USHR TEMP[10].x, TEMP[6].xxxx, IMM[2].zzzz >274: UARL ADDR[0].x, TEMP[10].xxxx >275: MOV TEMP[6].x, CONST[2][ADDR[0].x] >276: UMUL TEMP[10].x, TEMP[2].zzzz, IMM[3].yyyy >277: USHR TEMP[13].x, TEMP[10].xxxx, IMM[2].zzzz >278: UARL ADDR[0].x, TEMP[13].xxxx >279: MOV TEMP[10].z, CONST[2][ADDR[0].x] >280: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >281: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].zzzz >282: MUL TEMP[14].x, IMM[0].wwww, TEMP[6].xxxx >283: MUL TEMP[6].x, IN[3].zzzz, TEMP[17].zzzz >284: MOV TEMP[14].z, TEMP[6].xxxx >285: MUL TEMP[16].x, IN[3].zzzz, TEMP[17].xxxx >286: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[3].yyyy >287: USHR TEMP[10].x, TEMP[6].xxxx, IMM[2].zzzz >288: UARL ADDR[0].x, TEMP[10].xxxx >289: MOV TEMP[6].z, CONST[2][ADDR[0].x] >290: MUL TEMP[6].x, IN[3].zzzz, TEMP[6].zzzz >291: MOV TEMP[14].w, TEMP[6].xxxx >292: ADD TEMP[7], TEMP[7], TEMP[14] >293: DP4 TEMP[6].x, TEMP[7], TEMP[5] >294: MOV TEMP[18].z, TEMP[6].xxxx >295: DP3 TEMP[6].x, TEMP[7].xyzz, IN[2].xyzz >296: MOV TEMP[4].z, TEMP[6].xxxx >297: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[3].yyyy >298: USHR TEMP[7].x, TEMP[6].xxxx, IMM[2].zzzz >299: UARL ADDR[0].x, TEMP[7].xxxx >300: MOV TEMP[6].x, CONST[2][ADDR[0].x] >301: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].xxxx >302: MOV TEMP[9].w, TEMP[6].xxxx >303: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[3].yyyy >304: USHR TEMP[7].x, TEMP[6].xxxx, IMM[2].zzzz >305: UARL ADDR[0].x, TEMP[7].xxxx >306: MOV TEMP[6].x, CONST[2][ADDR[0].x] >307: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].xxxx >308: MOV TEMP[12].w, TEMP[6].xxxx >309: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[3].yyyy >310: USHR TEMP[6].x, TEMP[3].xxxx, IMM[2].zzzz >311: UARL ADDR[0].x, TEMP[6].xxxx >312: MOV TEMP[3].x, CONST[2][ADDR[0].x] >313: MUL TEMP[3].x, IN[3].zzzz, TEMP[3].xxxx >314: MOV TEMP[16].w, TEMP[3].xxxx >315: UMUL TEMP[3].x, TEMP[2].xxxx, IMM[3].yyyy >316: USHR TEMP[6].x, TEMP[3].xxxx, IMM[2].zzzz >317: UARL ADDR[0].x, TEMP[6].xxxx >318: MOV TEMP[3].x, CONST[2][ADDR[0].x] >319: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[3].yyyy >320: USHR TEMP[7].x, TEMP[6].xxxx, IMM[2].zzzz >321: UARL ADDR[0].x, TEMP[7].xxxx >322: MOV TEMP[6].y, CONST[2][ADDR[0].x] >323: FMA TEMP[0].x, TEMP[3].xxxx, TEMP[6].yyyy, -TEMP[8].xxxx >324: ADD TEMP[3].x, TEMP[8].zzzz, TEMP[8].yyyy >325: MOV TEMP[0].w, TEMP[3].xxxx >326: MUL TEMP[3].xy, TEMP[0].xwww, IN[3].xxxx >327: MUL TEMP[3].xy, IMM[0].wwww, TEMP[3].xyyy >328: MOV TEMP[9].yz, TEMP[3].yxyy >329: UMUL TEMP[3].x, TEMP[2].yyyy, IMM[3].yyyy >330: USHR TEMP[6].x, TEMP[3].xxxx, IMM[2].zzzz >331: UARL ADDR[0].x, TEMP[6].xxxx >332: MOV TEMP[3].x, CONST[2][ADDR[0].x] >333: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[3].yyyy >334: USHR TEMP[7].x, TEMP[6].xxxx, IMM[2].zzzz >335: UARL ADDR[0].x, TEMP[7].xxxx >336: MOV TEMP[6].y, CONST[2][ADDR[0].x] >337: FMA TEMP[0].x, TEMP[3].xxxx, TEMP[6].yyyy, -TEMP[11].xxxx >338: UMUL TEMP[3].x, TEMP[2].zzzz, IMM[3].yyyy >339: USHR TEMP[6].x, TEMP[3].xxxx, IMM[2].zzzz >340: UARL ADDR[0].x, TEMP[6].xxxx >341: MOV TEMP[3].x, CONST[2][ADDR[0].x] >342: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[3].yyyy >343: USHR TEMP[6].x, TEMP[2].xxxx, IMM[2].zzzz >344: UARL ADDR[0].x, TEMP[6].xxxx >345: MOV TEMP[2].y, CONST[2][ADDR[0].x] >346: FMA TEMP[2].x, TEMP[3].xxxx, TEMP[2].yyyy, -TEMP[15].xxxx >347: MOV TEMP[0].y, TEMP[2].xxxx >348: ADD TEMP[2].x, TEMP[15].zzzz, TEMP[15].yyyy >349: MOV TEMP[0].z, TEMP[2].xxxx >350: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].yzzz >351: MUL TEMP[2].xy, IMM[0].wwww, TEMP[0].yzzz >352: MOV TEMP[16].yz, TEMP[2].yxyy >353: ADD TEMP[2].x, TEMP[11].zzzz, TEMP[11].yyyy >354: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy >355: MOV TEMP[0].y, TEMP[2].xxxx >356: MUL TEMP[2].xy, IMM[0].wwww, TEMP[0].xyyy >357: MOV TEMP[12].yz, TEMP[2].yxyy >358: ADD TEMP[0], TEMP[9], TEMP[12] >359: ADD TEMP[0], TEMP[16], TEMP[0] >360: DP4 TEMP[18].x, TEMP[0], TEMP[5] >361: DP3 TEMP[4].x, TEMP[0].xyzz, IN[2].xyzz >362: MOV TEMP[18].w, IMM[0].xxxx >363: DP4 TEMP[0].x, CONST[1][19], TEMP[18] >364: DP4 TEMP[2].x, CONST[1][20], TEMP[18] >365: MOV TEMP[0].y, TEMP[2].xxxx >366: DP4 TEMP[2].x, CONST[1][21], TEMP[18] >367: MOV TEMP[0].z, TEMP[2].xxxx >368: DP3 TEMP[2].x, CONST[1][19].xyzz, TEMP[4].xyzz >369: DP3 TEMP[3].x, CONST[1][20].xyzz, TEMP[4].xyzz >370: MOV TEMP[2].y, TEMP[3].xxxx >371: DP3 TEMP[3].x, CONST[1][21].xyzz, TEMP[4].xyzz >372: MOV TEMP[2].z, TEMP[3].xxxx >373: MOV TEMP[3].xy, IN[1].xyxx >374: MOV OUT[3], TEMP[3] >375: MOV OUT[2], TEMP[2] >376: MOV OUT[1], TEMP[0] >377: MOV OUT[0], TEMP[1] >378: END >radeonsi: Compiling shader 45 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 304) > %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 308) > %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 312) > %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 316) > %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 320) > %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 324) > %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 328) > %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 332) > %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 336) > %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 340) > %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 344) > %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 348) > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %35, i32 0, i32 %13) > %37 = extractelement <4 x float> %36, i32 0 > %38 = extractelement <4 x float> %36, i32 1 > %39 = extractelement <4 x float> %36, i32 2 > %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 > %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %14) > %43 = extractelement <4 x float> %42, i32 0 > %44 = extractelement <4 x float> %42, i32 1 > %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 > %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %15) > %48 = extractelement <4 x float> %47, i32 0 > %49 = extractelement <4 x float> %47, i32 1 > %50 = extractelement <4 x float> %47, i32 2 > %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 > %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %16) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = extractelement <4 x float> %53, i32 2 > %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 > %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %17) > %60 = extractelement <4 x float> %59, i32 0 > %61 = extractelement <4 x float> %59, i32 1 > %62 = extractelement <4 x float> %59, i32 2 > %63 = call float @llvm.fma.f32(float %43, float 1.000000e+00, float 0.000000e+00) > %64 = call float @llvm.fma.f32(float %44, float -1.000000e+00, float 1.000000e+00) > %65 = call float @llvm.fma.f32(float %63, float 2.000000e+00, float -1.000000e+00) > %66 = call float @llvm.fma.f32(float %64, float 2.000000e+00, float -1.000000e+00) > %67 = fmul float %62, 0x406FE01000000000 > %68 = fmul float %61, 0x406FE01000000000 > %69 = fmul float %60, 0x406FE01000000000 > %70 = fptosi float %67 to i32 > %71 = fptosi float %68 to i32 > %72 = fptosi float %69 to i32 > %73 = shl i32 %72, 1 > %74 = bitcast i32 %73 to float > %75 = shl i32 %70, 1 > %76 = or i32 %75, 1 > %77 = shl i32 %71, 1 > %78 = or i32 %77, 1 > %79 = shl i32 %72, 1 > %80 = or i32 %79, 1 > %81 = shl i32 %70, 5 > %82 = or i32 %81, 4 > %83 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %82) > %84 = fmul float %54, %83 > %85 = shl i32 %71, 5 > %86 = or i32 %85, 4 > %87 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %86) > %88 = fmul float %55, %87 > %89 = shl i32 %76, 4 > %90 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %89) > %91 = shl i32 %76, 4 > %92 = or i32 %91, 12 > %93 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %92) > %94 = fmul float %90, %93 > %95 = shl i32 %76, 4 > %96 = or i32 %95, 4 > %97 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %96) > %98 = shl i32 %76, 4 > %99 = or i32 %98, 8 > %100 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %99) > %101 = fsub float -0.000000e+00, %94 > %102 = call float @llvm.fma.f32(float %97, float %100, float %101) > %103 = shl i32 %76, 4 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %104) > %106 = shl i32 %76, 4 > %107 = or i32 %106, 8 > %108 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %107) > %109 = call float @llvm.fma.f32(float %105, float %108, float %94) > %110 = fmul float %109, %54 > %111 = fmul float %102, %54 > %112 = fmul float %111, 2.000000e+00 > %113 = shl i32 %78, 4 > %114 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %113) > %115 = shl i32 %78, 4 > %116 = or i32 %115, 12 > %117 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %116) > %118 = fmul float %114, %117 > %119 = shl i32 %78, 4 > %120 = or i32 %119, 4 > %121 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %120) > %122 = shl i32 %78, 4 > %123 = or i32 %122, 8 > %124 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %123) > %125 = fsub float -0.000000e+00, %118 > %126 = call float @llvm.fma.f32(float %121, float %124, float %125) > %127 = shl i32 %78, 4 > %128 = or i32 %127, 4 > %129 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %128) > %130 = shl i32 %78, 4 > %131 = or i32 %130, 8 > %132 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %131) > %133 = call float @llvm.fma.f32(float %129, float %132, float %118) > %134 = fmul float %133, %55 > %135 = fmul float %134, 2.000000e+00 > %136 = fmul float %126, %55 > %137 = fmul float %136, 2.000000e+00 > %138 = shl i32 %76, 4 > %139 = or i32 %138, 4 > %140 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %139) > %141 = shl i32 %76, 4 > %142 = or i32 %141, 8 > %143 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %142) > %144 = shl i32 %76, 4 > %145 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %144) > %146 = shl i32 %76, 4 > %147 = or i32 %146, 12 > %148 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %147) > %149 = fmul float %143, %148 > %150 = fmul float %143, %145 > %151 = fmul float %140, %148 > %152 = shl i32 %76, 4 > %153 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %152) > %154 = shl i32 %76, 4 > %155 = or i32 %154, 4 > %156 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %155) > %157 = call float @llvm.fma.f32(float %153, float %156, float %149) > %158 = fmul float %157, %54 > %159 = fmul float %158, 2.000000e+00 > %160 = shl i32 %76, 4 > %161 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %160) > %162 = shl i32 %76, 4 > %163 = or i32 %162, 4 > %164 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %163) > %165 = shl i32 %76, 4 > %166 = or i32 %165, 8 > %167 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %166) > %168 = shl i32 %76, 4 > %169 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %168) > %170 = shl i32 %76, 4 > %171 = or i32 %170, 4 > %172 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %171) > %173 = shl i32 %76, 4 > %174 = or i32 %173, 8 > %175 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %174) > %176 = fmul float %161, %169 > %177 = fmul float %164, %172 > %178 = fmul float %167, %175 > %179 = fadd float %178, %177 > %180 = fadd float %178, %176 > %181 = fadd float %177, %176 > %182 = fsub float -0.000000e+00, %179 > %183 = call float @llvm.fma.f32(float %182, float 2.000000e+00, float 1.000000e+00) > %184 = fsub float -0.000000e+00, %180 > %185 = call float @llvm.fma.f32(float %184, float 2.000000e+00, float 1.000000e+00) > %186 = fsub float -0.000000e+00, %181 > %187 = call float @llvm.fma.f32(float %186, float 2.000000e+00, float 1.000000e+00) > %188 = fmul float %54, %185 > %189 = shl i32 %78, 4 > %190 = or i32 %189, 4 > %191 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %190) > %192 = shl i32 %78, 4 > %193 = or i32 %192, 8 > %194 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %193) > %195 = shl i32 %78, 4 > %196 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %195) > %197 = shl i32 %78, 4 > %198 = or i32 %197, 12 > %199 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %198) > %200 = fmul float %194, %199 > %201 = fmul float %194, %196 > %202 = fmul float %191, %199 > %203 = shl i32 %78, 4 > %204 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %203) > %205 = shl i32 %78, 4 > %206 = or i32 %205, 4 > %207 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %206) > %208 = call float @llvm.fma.f32(float %204, float %207, float %200) > %209 = fmul float %208, %55 > %210 = fmul float %209, 2.000000e+00 > %211 = shl i32 %78, 4 > %212 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %211) > %213 = shl i32 %78, 4 > %214 = or i32 %213, 4 > %215 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %214) > %216 = shl i32 %78, 4 > %217 = or i32 %216, 8 > %218 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %217) > %219 = shl i32 %78, 4 > %220 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %219) > %221 = shl i32 %78, 4 > %222 = or i32 %221, 4 > %223 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %222) > %224 = shl i32 %78, 4 > %225 = or i32 %224, 8 > %226 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %225) > %227 = fmul float %212, %220 > %228 = fmul float %215, %223 > %229 = fmul float %218, %226 > %230 = fadd float %229, %228 > %231 = fadd float %229, %227 > %232 = fadd float %228, %227 > %233 = fsub float -0.000000e+00, %230 > %234 = call float @llvm.fma.f32(float %233, float 2.000000e+00, float 1.000000e+00) > %235 = fsub float -0.000000e+00, %231 > %236 = call float @llvm.fma.f32(float %235, float 2.000000e+00, float 1.000000e+00) > %237 = fsub float -0.000000e+00, %232 > %238 = call float @llvm.fma.f32(float %237, float 2.000000e+00, float 1.000000e+00) > %239 = fmul float %55, %236 > %240 = fadd float %159, %210 > %241 = fadd float %188, %239 > %242 = fadd float %112, %137 > %243 = fadd float %84, %88 > %244 = shl i32 %72, 5 > %245 = or i32 %244, 4 > %246 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %245) > %247 = fmul float %56, %246 > %248 = shl i32 %80, 4 > %249 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %248) > %250 = shl i32 %80, 4 > %251 = or i32 %250, 12 > %252 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %251) > %253 = fmul float %249, %252 > %254 = shl i32 %80, 4 > %255 = or i32 %254, 4 > %256 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %255) > %257 = shl i32 %80, 4 > %258 = or i32 %257, 8 > %259 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %258) > %260 = fsub float -0.000000e+00, %253 > %261 = call float @llvm.fma.f32(float %256, float %259, float %260) > %262 = shl i32 %80, 4 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %263) > %265 = shl i32 %80, 4 > %266 = or i32 %265, 8 > %267 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %266) > %268 = call float @llvm.fma.f32(float %264, float %267, float %253) > %269 = fmul float %268, %56 > %270 = fmul float %269, 2.000000e+00 > %271 = fmul float %261, %56 > %272 = fmul float %271, 2.000000e+00 > %273 = shl i32 %80, 4 > %274 = or i32 %273, 4 > %275 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %274) > %276 = shl i32 %80, 4 > %277 = or i32 %276, 8 > %278 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %277) > %279 = shl i32 %80, 4 > %280 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %279) > %281 = shl i32 %80, 4 > %282 = or i32 %281, 12 > %283 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %282) > %284 = fmul float %278, %283 > %285 = fmul float %278, %280 > %286 = fmul float %275, %283 > %287 = shl i32 %80, 4 > %288 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %287) > %289 = shl i32 %80, 4 > %290 = or i32 %289, 4 > %291 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %290) > %292 = call float @llvm.fma.f32(float %288, float %291, float %284) > %293 = fmul float %292, %56 > %294 = fmul float %293, 2.000000e+00 > %295 = shl i32 %80, 4 > %296 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %295) > %297 = shl i32 %80, 4 > %298 = or i32 %297, 4 > %299 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %298) > %300 = shl i32 %80, 4 > %301 = or i32 %300, 8 > %302 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %301) > %303 = shl i32 %80, 4 > %304 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %303) > %305 = shl i32 %80, 4 > %306 = or i32 %305, 4 > %307 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %306) > %308 = shl i32 %80, 4 > %309 = or i32 %308, 8 > %310 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %309) > %311 = fmul float %296, %304 > %312 = fmul float %299, %307 > %313 = fmul float %302, %310 > %314 = fadd float %313, %312 > %315 = fadd float %313, %311 > %316 = fadd float %312, %311 > %317 = fsub float -0.000000e+00, %314 > %318 = call float @llvm.fma.f32(float %317, float 2.000000e+00, float 1.000000e+00) > %319 = fsub float -0.000000e+00, %315 > %320 = call float @llvm.fma.f32(float %319, float 2.000000e+00, float 1.000000e+00) > %321 = fsub float -0.000000e+00, %316 > %322 = call float @llvm.fma.f32(float %321, float 2.000000e+00, float 1.000000e+00) > %323 = fmul float %56, %320 > %324 = fadd float %240, %294 > %325 = fadd float %241, %323 > %326 = fadd float %242, %272 > %327 = fadd float %243, %247 > %328 = fmul float %324, %37 > %329 = fmul float %325, %38 > %330 = fadd float %328, %329 > %331 = fmul float %326, %39 > %332 = fadd float %330, %331 > %333 = fadd float %332, %327 > %334 = fmul float %324, %48 > %335 = fmul float %325, %49 > %336 = fadd float %335, %334 > %337 = fmul float %326, %50 > %338 = fadd float %336, %337 > %339 = shl i32 %76, 4 > %340 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %339) > %341 = shl i32 %76, 4 > %342 = or i32 %341, 8 > %343 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %342) > %344 = fsub float -0.000000e+00, %151 > %345 = call float @llvm.fma.f32(float %340, float %343, float %344) > %346 = fmul float %345, %54 > %347 = fmul float %346, 2.000000e+00 > %348 = fmul float %110, 2.000000e+00 > %349 = shl i32 %78, 4 > %350 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %349) > %351 = shl i32 %78, 4 > %352 = or i32 %351, 8 > %353 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %352) > %354 = fsub float -0.000000e+00, %202 > %355 = call float @llvm.fma.f32(float %350, float %353, float %354) > %356 = fmul float %355, %55 > %357 = fmul float %356, 2.000000e+00 > %358 = fmul float %54, %187 > %359 = fmul float %54, %183 > %360 = fmul float %55, %238 > %361 = fmul float %55, %234 > %362 = shl i32 %70, 5 > %363 = or i32 %362, 8 > %364 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %363) > %365 = fmul float %54, %364 > %366 = shl i32 %71, 5 > %367 = or i32 %366, 8 > %368 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %367) > %369 = fmul float %55, %368 > %370 = fadd float %357, %347 > %371 = fadd float %135, %348 > %372 = fadd float %360, %358 > %373 = fadd float %369, %365 > %374 = shl i32 %80, 4 > %375 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %374) > %376 = shl i32 %80, 4 > %377 = or i32 %376, 8 > %378 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %377) > %379 = fsub float -0.000000e+00, %286 > %380 = call float @llvm.fma.f32(float %375, float %378, float %379) > %381 = fmul float %380, %56 > %382 = fmul float %381, 2.000000e+00 > %383 = fmul float %56, %322 > %384 = fmul float %56, %318 > %385 = shl i32 %72, 5 > %386 = or i32 %385, 8 > %387 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %386) > %388 = fmul float %56, %387 > %389 = fadd float %370, %382 > %390 = fadd float %371, %270 > %391 = fadd float %372, %383 > %392 = fadd float %373, %388 > %393 = fmul float %389, %37 > %394 = fmul float %390, %38 > %395 = fadd float %393, %394 > %396 = fmul float %391, %39 > %397 = fadd float %395, %396 > %398 = fadd float %397, %392 > %399 = fmul float %389, %48 > %400 = fmul float %390, %49 > %401 = fadd float %400, %399 > %402 = fmul float %391, %50 > %403 = fadd float %401, %402 > %404 = shl i32 %70, 5 > %405 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %404) > %406 = fmul float %54, %405 > %407 = shl i32 %71, 5 > %408 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %407) > %409 = fmul float %55, %408 > %410 = shl i32 %72, 5 > %411 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %410) > %412 = fmul float %56, %411 > %413 = shl i32 %76, 4 > %414 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %413) > %415 = shl i32 %76, 4 > %416 = or i32 %415, 4 > %417 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %416) > %418 = fsub float -0.000000e+00, %149 > %419 = call float @llvm.fma.f32(float %414, float %417, float %418) > %420 = fadd float %151, %150 > %421 = fmul float %419, %54 > %422 = fmul float %420, %54 > %423 = fmul float %421, 2.000000e+00 > %424 = fmul float %422, 2.000000e+00 > %425 = shl i32 %78, 4 > %426 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %425) > %427 = shl i32 %78, 4 > %428 = or i32 %427, 4 > %429 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %428) > %430 = fsub float -0.000000e+00, %200 > %431 = call float @llvm.fma.f32(float %426, float %429, float %430) > %432 = shl i32 %80, 4 > %433 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %432) > %434 = shl i32 %80, 4 > %435 = or i32 %434, 4 > %436 = call float @llvm.SI.load.const(<16 x i8> %33, i32 %435) > %437 = fsub float -0.000000e+00, %284 > %438 = call float @llvm.fma.f32(float %433, float %436, float %437) > %439 = fadd float %286, %285 > %440 = fmul float %431, %55 > %441 = fmul float %438, %56 > %442 = fmul float %439, %56 > %443 = fmul float %441, 2.000000e+00 > %444 = fmul float %442, 2.000000e+00 > %445 = fadd float %202, %201 > %446 = fmul float %445, %55 > %447 = fmul float %440, 2.000000e+00 > %448 = fmul float %446, 2.000000e+00 > %449 = fadd float %359, %361 > %450 = fadd float %423, %447 > %451 = fadd float %424, %448 > %452 = fadd float %406, %409 > %453 = fadd float %384, %449 > %454 = fadd float %443, %450 > %455 = fadd float %444, %451 > %456 = fadd float %412, %452 > %457 = fmul float %453, %37 > %458 = fmul float %454, %38 > %459 = fadd float %457, %458 > %460 = fmul float %455, %39 > %461 = fadd float %459, %460 > %462 = fadd float %461, %456 > %463 = fmul float %453, %48 > %464 = fmul float %454, %49 > %465 = fadd float %464, %463 > %466 = fmul float %455, %50 > %467 = fadd float %465, %466 > %468 = fmul float %20, %462 > %469 = fmul float %21, %333 > %470 = fadd float %468, %469 > %471 = fmul float %22, %398 > %472 = fadd float %470, %471 > %473 = fadd float %472, %23 > %474 = fmul float %24, %462 > %475 = fmul float %25, %333 > %476 = fadd float %474, %475 > %477 = fmul float %26, %398 > %478 = fadd float %476, %477 > %479 = fadd float %478, %27 > %480 = fmul float %28, %462 > %481 = fmul float %29, %333 > %482 = fadd float %480, %481 > %483 = fmul float %30, %398 > %484 = fadd float %482, %483 > %485 = fadd float %484, %31 > %486 = fmul float %20, %467 > %487 = fmul float %21, %338 > %488 = fadd float %487, %486 > %489 = fmul float %22, %403 > %490 = fadd float %488, %489 > %491 = fmul float %24, %467 > %492 = fmul float %25, %338 > %493 = fadd float %492, %491 > %494 = fmul float %26, %403 > %495 = fadd float %493, %494 > %496 = fmul float %28, %467 > %497 = fmul float %29, %338 > %498 = fadd float %497, %496 > %499 = fmul float %30, %403 > %500 = fadd float %498, %499 > %501 = bitcast i32 %11 to float > %502 = insertvalue <{ float, float, float }> undef, float %501, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %473, float %479, float %485, float %456) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %490, float %495, float %500, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %43, float %44, float %74, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %66, float 1.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %502 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..18] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 256, 208, 224} >IMM[1] FLT32 { 0.5000, 0.0000, 158456325028528675187087900672.0000, 1.0000} >IMM[2] UINT32 {240, 272, 1065353216, 288} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] FLT32 { 2.5000, 0.9961, 0.0039, 256.0000} >IMM[5] FLT32 {65535.0000, 255.9961, 0.0039, 0.0000} > 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz > 1: RSQ TEMP[1].x, TEMP[0].xxxx > 2: MUL TEMP[0].xyz, TEMP[1].xxxx, IN[1].xyzz > 3: DP3 TEMP[0].x, TEMP[0].xyzz, -CONST[1][16].xyzz > 4: FMA TEMP[1].x, TEMP[0].xxxx, IMM[1].xxxx, IMM[1].xxxx > 5: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 6: ADD TEMP[2].xyz, IN[0].xyzz, -CONST[1][13].xyzz > 7: DP3 TEMP[3].x, TEMP[2].xyzz, CONST[1][16].xyzz > 8: MOV TEMP[3].z, TEMP[3].xxxx > 9: DP3 TEMP[3].x, TEMP[2].xyzz, CONST[1][14].xyzz > 10: DP3 TEMP[4].x, TEMP[2].xyzz, CONST[1][15].xyzz > 11: MOV TEMP[3].y, TEMP[4].xxxx > 12: FSEQ TEMP[4].xyz, CONST[1][17].xyzz, IMM[1].yyyy > 13: SSG TEMP[5].xyz, TEMP[3].xyzz > 14: MUL TEMP[5].xyz, IMM[1].zzzz, TEMP[5].xyzz > 15: RCP TEMP[6].x, CONST[1][17].xxxx > 16: RCP TEMP[6].y, CONST[1][17].yyyy > 17: RCP TEMP[6].z, CONST[1][17].zzzz > 18: MUL TEMP[6].xyz, TEMP[3].xyzz, TEMP[6].xyzz > 19: UCMP TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xyzz, TEMP[6].xyzz > 20: ABS TEMP[5].xy, TEMP[4].yzzz > 21: FSGE TEMP[5].xy, IMM[1].wwww, TEMP[5].xyyy > 22: AND TEMP[5].xy, TEMP[5].xyyy, IMM[3].xxxx > 23: INEG TEMP[5].xy, TEMP[5].xyyy > 24: AND TEMP[5].xy, TEMP[5].xyyy, IMM[2].zzzz > 25: ABS TEMP[6].x, TEMP[4].xxxx > 26: MIN TEMP[6].x, TEMP[6].xxxx, IMM[1].wwww > 27: ADD TEMP[6].x, -TEMP[6].xxxx, IMM[1].wwww > 28: MUL TEMP[6].x, TEMP[5].xxxx, TEMP[6].xxxx > 29: MUL TEMP[5].x, TEMP[5].yyyy, TEMP[6].xxxx > 30: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[5].xxxx > 31: MOV TEMP[4].y, -CONST[1][17].yyyy > 32: FSNE TEMP[1].x, CONST[1][18].yyyy, IMM[1].yyyy > 33: UIF TEMP[1].xxxx :0 > 34: RCP TEMP[1].x, CONST[1][18].yyyy > 35: MUL TEMP[1].x, CONST[1][18].xxxx, TEMP[1].xxxx > 36: ELSE :0 > 37: SSG TEMP[5].x, CONST[1][18].xxxx > 38: MUL TEMP[1].x, IMM[1].zzzz, TEMP[5].xxxx > 39: ENDIF > 40: MUL TEMP[4].x, TEMP[1].xxxx, CONST[1][17].yyyy > 41: MUL TEMP[1].x, TEMP[1].xxxx, IMM[4].xxxx > 42: FSEQ TEMP[5].xy, TEMP[4].xyyy, IMM[1].yyyy > 43: SSG TEMP[6].xy, TEMP[3].xyyy > 44: MUL TEMP[6].xy, IMM[1].zzzz, TEMP[6].xyyy > 45: RCP TEMP[7].x, TEMP[4].xxxx > 46: RCP TEMP[7].y, TEMP[4].yyyy > 47: MUL TEMP[4].xy, TEMP[3].xyyy, TEMP[7].xyyy > 48: UCMP TEMP[3].xy, TEMP[5].xyyy, TEMP[6].xyyy, TEMP[4].xyyy > 49: FMA TEMP[3].xy, TEMP[3].xyyy, IMM[1].xxxx, IMM[1].xxxx > 50: MOV TEMP[4].xy, TEMP[3].xyyy > 51: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D > 52: MUL TEMP[3].xyz, TEMP[0].xxxx, TEMP[4].xyzz > 53: MOV TEMP[1].y, IMM[4].xxxx > 54: FSEQ TEMP[4].xy, TEMP[1].xyyy, IMM[1].yyyy > 55: SSG TEMP[5].xy, TEMP[2].xyyy > 56: MUL TEMP[5].xy, IMM[1].zzzz, TEMP[5].xyyy > 57: RCP TEMP[1].x, TEMP[1].xxxx > 58: RCP TEMP[1].y, IMM[4].xxxx > 59: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[1].xyyy > 60: UCMP TEMP[0].xy, TEMP[4].xyyy, TEMP[5].xyyy, TEMP[1].xyyy > 61: FMA TEMP[0].xy, TEMP[0].xyyy, IMM[1].xxxx, IMM[1].xxxx > 62: MOV TEMP[1].xy, TEMP[0].xyyy > 63: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D > 64: MUL TEMP[0].x, TEMP[1].wwww, TEMP[3].zzzz > 65: MOV TEMP[1].xy, IN[2].xyyy > 66: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 67: DP2 TEMP[2].x, TEMP[1].zwww, IMM[4].yzzz > 68: MAX TEMP[1].xy, TEMP[3].yxxx, TEMP[1].xyyy > 69: MAX TEMP[2].x, TEMP[0].xxxx, TEMP[2].xxxx > 70: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 71: MUL TEMP[0].xy, TEMP[2].xxxx, IMM[5].xyyy > 72: FLR TEMP[2].x, TEMP[0].yyyy > 73: MOV TEMP[0].y, TEMP[2].xxxx > 74: FMA TEMP[0].x, -TEMP[2].xxxx, IMM[4].wwww, TEMP[0].xxxx > 75: MUL TEMP[0].xy, TEMP[0].yxxx, IMM[5].zzzz > 76: MOV TEMP[1].zw, TEMP[0].yyxy > 77: MOV OUT[0], TEMP[1] > 78: END >radeonsi: Compiling shader 46 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 208) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 212) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 216) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 224) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 228) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 232) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 240) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %42 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !tbaa !0 > %44 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %45 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %44, i64 0, i64 3 > %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 > %47 = extractelement <8 x i32> %43, i32 7 > %48 = extractelement <4 x i32> %46, i32 0 > %49 = and i32 %48, %47 > %50 = insertelement <4 x i32> %46, i32 %49, i32 0 > %51 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 > %53 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %54 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %53, i64 0, i64 7 > %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 > %56 = extractelement <8 x i32> %52, i32 7 > %57 = extractelement <4 x i32> %55, i32 0 > %58 = and i32 %57, %56 > %59 = insertelement <4 x i32> %55, i32 %58, i32 0 > %60 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %68 = fmul float %63, %63 > %69 = fmul float %64, %64 > %70 = fadd float %69, %68 > %71 = fmul float %65, %65 > %72 = fadd float %70, %71 > %73 = call float @llvm.AMDGPU.rsq.clamped.f32(float %72) > %74 = fmul float %73, %63 > %75 = fmul float %73, %64 > %76 = fmul float %73, %65 > %77 = fmul float %34, %74 > %78 = fsub float -0.000000e+00, %77 > %79 = fmul float %35, %75 > %80 = fsub float %78, %79 > %81 = fmul float %36, %76 > %82 = fsub float %80, %81 > %83 = call float @llvm.fma.f32(float %82, float 5.000000e-01, float 5.000000e-01) > %84 = call float @llvm.AMDGPU.clamp.(float %83, float 0.000000e+00, float 1.000000e+00) > %85 = fsub float %60, %25 > %86 = fsub float %61, %26 > %87 = fsub float %62, %27 > %88 = fmul float %85, %34 > %89 = fmul float %86, %35 > %90 = fadd float %89, %88 > %91 = fmul float %87, %36 > %92 = fadd float %90, %91 > %93 = fmul float %85, %28 > %94 = fmul float %86, %29 > %95 = fadd float %94, %93 > %96 = fmul float %87, %30 > %97 = fadd float %95, %96 > %98 = fmul float %85, %31 > %99 = fmul float %86, %32 > %100 = fadd float %99, %98 > %101 = fmul float %87, %33 > %102 = fadd float %100, %101 > %103 = fcmp oeq float %37, 0.000000e+00 > %104 = fcmp oeq float %38, 0.000000e+00 > %105 = fcmp oeq float %39, 0.000000e+00 > %106 = fcmp ogt float %97, 0.000000e+00 > %107 = select i1 %106, float 1.000000e+00, float %97 > %108 = fcmp oge float %107, 0.000000e+00 > %109 = fcmp ogt float %102, 0.000000e+00 > %110 = select i1 %109, float 1.000000e+00, float %102 > %111 = fcmp oge float %110, 0.000000e+00 > %112 = fcmp ogt float %92, 0.000000e+00 > %113 = select i1 %112, float 1.000000e+00, float %92 > %114 = fcmp oge float %113, 0.000000e+00 > %.op = fmul float %107, 0x4600000000000000 > %115 = select i1 %108, float %.op, float 0xC600000000000000 > %.op32 = fmul float %110, 0x4600000000000000 > %116 = select i1 %111, float %.op32, float 0xC600000000000000 > %.op33 = fmul float %113, 0x4600000000000000 > %117 = select i1 %114, float %.op33, float 0xC600000000000000 > %118 = fdiv float 1.000000e+00, %37 > %119 = fdiv float 1.000000e+00, %38 > %120 = fdiv float 1.000000e+00, %39 > %121 = fmul float %97, %118 > %122 = fmul float %102, %119 > %123 = fmul float %92, %120 > %124 = select i1 %103, float %115, float %121 > %125 = select i1 %104, float %116, float %122 > %126 = select i1 %105, float %117, float %123 > %127 = call float @llvm.fabs.f32(float %125) > %128 = call float @llvm.fabs.f32(float %126) > %129 = fcmp ole float %127, 1.000000e+00 > %130 = fcmp ole float %128, 1.000000e+00 > %131 = select i1 %129, float 1.000000e+00, float 0.000000e+00 > %132 = select i1 %130, float 1.000000e+00, float 0.000000e+00 > %133 = call float @llvm.fabs.f32(float %124) > %134 = call float @llvm.minnum.f32(float %133, float 1.000000e+00) > %135 = fsub float 1.000000e+00, %134 > %136 = fmul float %131, %135 > %137 = fmul float %132, %136 > %138 = fmul float %84, %137 > %139 = fsub float -0.000000e+00, %38 > %140 = fcmp une float %41, 0.000000e+00 > br i1 %140, label %IF, label %ELSE > >IF: ; preds = %main_body > %141 = fdiv float 1.000000e+00, %41 > %142 = fmul float %40, %141 > br label %ENDIF > >ELSE: ; preds = %main_body > %143 = fcmp ogt float %40, 0.000000e+00 > %144 = select i1 %143, float 1.000000e+00, float %40 > %145 = fcmp oge float %144, 0.000000e+00 > %.op34 = fmul float %144, 0x4600000000000000 > %146 = select i1 %145, float %.op34, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %142, %IF ], [ %146, %ELSE ] > %147 = fmul float %temp4.0, %38 > %148 = fmul float %temp4.0, 2.500000e+00 > %149 = fcmp oeq float %147, 0.000000e+00 > %150 = fcmp oeq float %38, -0.000000e+00 > %151 = fcmp ogt float %97, 0.000000e+00 > %152 = select i1 %151, float 1.000000e+00, float %97 > %153 = fcmp oge float %152, 0.000000e+00 > %154 = fcmp ogt float %102, 0.000000e+00 > %155 = select i1 %154, float 1.000000e+00, float %102 > %156 = fcmp oge float %155, 0.000000e+00 > %.op35 = fmul float %152, 0x4600000000000000 > %157 = select i1 %153, float %.op35, float 0xC600000000000000 > %.op36 = fmul float %155, 0x4600000000000000 > %158 = select i1 %156, float %.op36, float 0xC600000000000000 > %159 = fdiv float 1.000000e+00, %147 > %160 = fdiv float 1.000000e+00, %139 > %161 = fmul float %97, %159 > %162 = fmul float %102, %160 > %163 = select i1 %149, float %157, float %161 > %164 = select i1 %150, float %158, float %162 > %165 = call float @llvm.fma.f32(float %163, float 5.000000e-01, float 5.000000e-01) > %166 = call float @llvm.fma.f32(float %164, float 5.000000e-01, float 5.000000e-01) > %167 = bitcast float %165 to i32 > %168 = bitcast float %166 to i32 > %169 = insertelement <2 x i32> undef, i32 %167, i32 0 > %170 = insertelement <2 x i32> %169, i32 %168, i32 1 > %171 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %170, <8 x i32> %43, <4 x i32> %50, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %172 = extractelement <4 x float> %171, i32 0 > %173 = extractelement <4 x float> %171, i32 1 > %174 = extractelement <4 x float> %171, i32 2 > %175 = fmul float %138, %172 > %176 = fmul float %138, %173 > %177 = fmul float %138, %174 > %178 = fcmp oeq float %148, 0.000000e+00 > %179 = fcmp ogt float %85, 0.000000e+00 > %180 = select i1 %179, float 1.000000e+00, float %85 > %181 = fcmp oge float %180, 0.000000e+00 > %.op37 = fmul float %180, 0x4600000000000000 > %182 = select i1 %181, float %.op37, float 0xC600000000000000 > %183 = fdiv float 1.000000e+00, %148 > %184 = fmul float %85, %183 > %185 = fmul float %86, 0x3FD99999A0000000 > %186 = select i1 %178, float %182, float %184 > %187 = call float @llvm.fma.f32(float %186, float 5.000000e-01, float 5.000000e-01) > %188 = call float @llvm.fma.f32(float %185, float 5.000000e-01, float 5.000000e-01) > %189 = bitcast float %187 to i32 > %190 = bitcast float %188 to i32 > %191 = insertelement <2 x i32> undef, i32 %189, i32 0 > %192 = insertelement <2 x i32> %191, i32 %190, i32 1 > %193 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %192, <8 x i32> %43, <4 x i32> %50, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %194 = extractelement <4 x float> %193, i32 3 > %195 = fmul float %194, %177 > %196 = bitcast float %66 to i32 > %197 = bitcast float %67 to i32 > %198 = insertelement <2 x i32> undef, i32 %196, i32 0 > %199 = insertelement <2 x i32> %198, i32 %197, i32 1 > %200 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %199, <8 x i32> %52, <4 x i32> %59, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %201 = extractelement <4 x float> %200, i32 0 > %202 = extractelement <4 x float> %200, i32 1 > %203 = extractelement <4 x float> %200, i32 2 > %204 = extractelement <4 x float> %200, i32 3 > %205 = fmul float %203, 0x3FEFE01FE0000000 > %206 = fmul float %204, 0x3F6FE01FE0000000 > %207 = fadd float %205, %206 > %208 = call float @llvm.maxnum.f32(float %176, float %201) > %209 = call float @llvm.maxnum.f32(float %175, float %202) > %210 = call float @llvm.maxnum.f32(float %195, float %207) > %211 = call float @llvm.AMDGPU.clamp.(float %210, float 0.000000e+00, float 1.000000e+00) > %212 = fmul float %211, 6.553500e+04 > %213 = fmul float %211, 0x406FFFE000000000 > %214 = call float @llvm.floor.f32(float %213) > %215 = fsub float -0.000000e+00, %214 > %216 = call float @llvm.fma.f32(float %215, float 2.560000e+02, float %212) > %217 = fmul float %214, 0x3F70101020000000 > %218 = fmul float %216, 0x3F70101020000000 > %219 = bitcast float %5 to i32 > %220 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %219, 10 > %221 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %220, float %208, 11 > %222 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %221, float %209, 12 > %223 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %222, float %217, 13 > %224 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %223, float %218, 14 > %225 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %224, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %225 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.floor.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..5] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 80, 0, 0} >IMM[1] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.1037, 0.1718} >IMM[2] FLT32 { -6.3704, -2.4492, -4.4092, -0.6575} >IMM[3] FLT32 { 0.6575, 4.4092, 2.4492, 6.3704} >IMM[4] FLT32 { 0.1815, 0.0430, 0.0000, 0.0000} > 0: MOV TEMP[0].xz, IN[0].xxxx > 1: FSNE TEMP[1].x, CONST[1][5].yyyy, IMM[1].xxxx > 2: UIF TEMP[1].xxxx :0 > 3: RCP TEMP[1].x, CONST[1][5].yyyy > 4: ELSE :0 > 5: MOV TEMP[1].x, IMM[1].yyyy > 6: ENDIF > 7: FMA TEMP[2], TEMP[1].xxxx, IMM[2], IN[0].yyyy > 8: MOV TEMP[3].yw, TEMP[2].wyww > 9: FMA TEMP[1], TEMP[1].xxxx, IMM[3], IN[0].yyyy > 10: MOV TEMP[4].yw, TEMP[1].wyww > 11: MOV TEMP[0].yw, TEMP[2].zxzz > 12: MOV TEMP[2].xy, TEMP[0].zwww > 13: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 14: MUL TEMP[2], TEMP[2], IMM[1].zzzz > 15: MOV TEMP[3].xz, IN[0].xxxx > 16: MOV TEMP[5].xy, TEMP[3].zwww > 17: TEX TEMP[5], TEMP[5], SAMP[0], 2D > 18: MOV TEMP[6].xy, TEMP[3].xyyy > 19: TEX TEMP[6], TEMP[6], SAMP[0], 2D > 20: MOV TEMP[0].xy, TEMP[0].xyyy > 21: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 22: FMA TEMP[0], TEMP[0], IMM[4].yyyy, TEMP[2] > 23: FMA TEMP[0], TEMP[6], IMM[4].xxxx, TEMP[0] > 24: FMA TEMP[0], TEMP[5], IMM[1].wwww, TEMP[0] > 25: MOV TEMP[3].yw, TEMP[1].zxzz > 26: MOV TEMP[3].xz, IN[0].xxxx > 27: MOV TEMP[1].xy, TEMP[3].xyyy > 28: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 29: MOV TEMP[2].xy, TEMP[3].zwww > 30: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 31: FMA TEMP[0], TEMP[1], IMM[1].wwww, TEMP[0] > 32: FMA TEMP[0], TEMP[2], IMM[4].xxxx, TEMP[0] > 33: MOV TEMP[4].xz, IN[0].xxxx > 34: MOV TEMP[1].xy, TEMP[4].xyyy > 35: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 36: MOV TEMP[2].xy, TEMP[4].zwww > 37: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 38: FMA TEMP[0], TEMP[1], IMM[1].zzzz, TEMP[0] > 39: FMA TEMP[0], TEMP[2], IMM[4].yyyy, TEMP[0] > 40: MOV OUT[0], TEMP[0] > 41: END >radeonsi: Compiling shader 47 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %26 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 > %28 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %29 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %28, i64 0, i64 3 > %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 > %31 = extractelement <8 x i32> %27, i32 7 > %32 = extractelement <4 x i32> %30, i32 0 > %33 = and i32 %32, %31 > %34 = insertelement <4 x i32> %30, i32 %33, i32 0 > %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %37 = fcmp une float %25, 0.000000e+00 > %38 = fdiv float 1.000000e+00, %25 > %temp4.0 = select i1 %37, float %38, float 0x4600000000000000 > %39 = call float @llvm.fma.f32(float %temp4.0, float 0xC0197B4360000000, float %36) > %40 = call float @llvm.fma.f32(float %temp4.0, float 0xC00397DF20000000, float %36) > %41 = call float @llvm.fma.f32(float %temp4.0, float 0xC011A300A0000000, float %36) > %42 = call float @llvm.fma.f32(float %temp4.0, float 0xBFE50A87E0000000, float %36) > %43 = call float @llvm.fma.f32(float %temp4.0, float 0x3FE50A87E0000000, float %36) > %44 = call float @llvm.fma.f32(float %temp4.0, float 0x4011A300A0000000, float %36) > %45 = call float @llvm.fma.f32(float %temp4.0, float 0x400397DF20000000, float %36) > %46 = call float @llvm.fma.f32(float %temp4.0, float 0x40197B4360000000, float %36) > %47 = bitcast float %35 to i32 > %48 = bitcast float %41 to i32 > %49 = insertelement <2 x i32> undef, i32 %47, i32 0 > %50 = insertelement <2 x i32> %49, i32 %48, i32 1 > %51 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %50, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %52 = extractelement <4 x float> %51, i32 0 > %53 = extractelement <4 x float> %51, i32 1 > %54 = extractelement <4 x float> %51, i32 2 > %55 = extractelement <4 x float> %51, i32 3 > %56 = fmul float %52, 0x3FBA8AA3E0000000 > %57 = fmul float %53, 0x3FBA8AA3E0000000 > %58 = fmul float %54, 0x3FBA8AA3E0000000 > %59 = fmul float %55, 0x3FBA8AA3E0000000 > %60 = bitcast float %35 to i32 > %61 = bitcast float %42 to i32 > %62 = insertelement <2 x i32> undef, i32 %60, i32 0 > %63 = insertelement <2 x i32> %62, i32 %61, i32 1 > %64 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %63, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = extractelement <4 x float> %64, i32 3 > %69 = bitcast float %35 to i32 > %70 = bitcast float %40 to i32 > %71 = insertelement <2 x i32> undef, i32 %69, i32 0 > %72 = insertelement <2 x i32> %71, i32 %70, i32 1 > %73 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %72, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = extractelement <4 x float> %73, i32 3 > %78 = bitcast float %35 to i32 > %79 = bitcast float %39 to i32 > %80 = insertelement <2 x i32> undef, i32 %78, i32 0 > %81 = insertelement <2 x i32> %80, i32 %79, i32 1 > %82 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %81, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %83 = extractelement <4 x float> %82, i32 0 > %84 = extractelement <4 x float> %82, i32 1 > %85 = extractelement <4 x float> %82, i32 2 > %86 = extractelement <4 x float> %82, i32 3 > %87 = call float @llvm.fma.f32(float %83, float 0x3FA604E9A0000000, float %56) > %88 = call float @llvm.fma.f32(float %84, float 0x3FA604E9A0000000, float %57) > %89 = call float @llvm.fma.f32(float %85, float 0x3FA604E9A0000000, float %58) > %90 = call float @llvm.fma.f32(float %86, float 0x3FA604E9A0000000, float %59) > %91 = call float @llvm.fma.f32(float %74, float 0x3FC73AB6A0000000, float %87) > %92 = call float @llvm.fma.f32(float %75, float 0x3FC73AB6A0000000, float %88) > %93 = call float @llvm.fma.f32(float %76, float 0x3FC73AB6A0000000, float %89) > %94 = call float @llvm.fma.f32(float %77, float 0x3FC73AB6A0000000, float %90) > %95 = call float @llvm.fma.f32(float %65, float 0x3FC5FEBD20000000, float %91) > %96 = call float @llvm.fma.f32(float %66, float 0x3FC5FEBD20000000, float %92) > %97 = call float @llvm.fma.f32(float %67, float 0x3FC5FEBD20000000, float %93) > %98 = call float @llvm.fma.f32(float %68, float 0x3FC5FEBD20000000, float %94) > %99 = bitcast float %35 to i32 > %100 = bitcast float %43 to i32 > %101 = insertelement <2 x i32> undef, i32 %99, i32 0 > %102 = insertelement <2 x i32> %101, i32 %100, i32 1 > %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = extractelement <4 x float> %103, i32 2 > %107 = extractelement <4 x float> %103, i32 3 > %108 = bitcast float %35 to i32 > %109 = bitcast float %45 to i32 > %110 = insertelement <2 x i32> undef, i32 %108, i32 0 > %111 = insertelement <2 x i32> %110, i32 %109, i32 1 > %112 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %111, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %113 = extractelement <4 x float> %112, i32 0 > %114 = extractelement <4 x float> %112, i32 1 > %115 = extractelement <4 x float> %112, i32 2 > %116 = extractelement <4 x float> %112, i32 3 > %117 = call float @llvm.fma.f32(float %104, float 0x3FC5FEBD20000000, float %95) > %118 = call float @llvm.fma.f32(float %105, float 0x3FC5FEBD20000000, float %96) > %119 = call float @llvm.fma.f32(float %106, float 0x3FC5FEBD20000000, float %97) > %120 = call float @llvm.fma.f32(float %107, float 0x3FC5FEBD20000000, float %98) > %121 = call float @llvm.fma.f32(float %113, float 0x3FC73AB6A0000000, float %117) > %122 = call float @llvm.fma.f32(float %114, float 0x3FC73AB6A0000000, float %118) > %123 = call float @llvm.fma.f32(float %115, float 0x3FC73AB6A0000000, float %119) > %124 = call float @llvm.fma.f32(float %116, float 0x3FC73AB6A0000000, float %120) > %125 = bitcast float %35 to i32 > %126 = bitcast float %44 to i32 > %127 = insertelement <2 x i32> undef, i32 %125, i32 0 > %128 = insertelement <2 x i32> %127, i32 %126, i32 1 > %129 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %128, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %130 = extractelement <4 x float> %129, i32 0 > %131 = extractelement <4 x float> %129, i32 1 > %132 = extractelement <4 x float> %129, i32 2 > %133 = extractelement <4 x float> %129, i32 3 > %134 = bitcast float %35 to i32 > %135 = bitcast float %46 to i32 > %136 = insertelement <2 x i32> undef, i32 %134, i32 0 > %137 = insertelement <2 x i32> %136, i32 %135, i32 1 > %138 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %137, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %139 = extractelement <4 x float> %138, i32 0 > %140 = extractelement <4 x float> %138, i32 1 > %141 = extractelement <4 x float> %138, i32 2 > %142 = extractelement <4 x float> %138, i32 3 > %143 = call float @llvm.fma.f32(float %130, float 0x3FBA8AA3E0000000, float %121) > %144 = call float @llvm.fma.f32(float %131, float 0x3FBA8AA3E0000000, float %122) > %145 = call float @llvm.fma.f32(float %132, float 0x3FBA8AA3E0000000, float %123) > %146 = call float @llvm.fma.f32(float %133, float 0x3FBA8AA3E0000000, float %124) > %147 = call float @llvm.fma.f32(float %139, float 0x3FA604E9A0000000, float %143) > %148 = call float @llvm.fma.f32(float %140, float 0x3FA604E9A0000000, float %144) > %149 = call float @llvm.fma.f32(float %141, float 0x3FA604E9A0000000, float %145) > %150 = call float @llvm.fma.f32(float %142, float 0x3FA604E9A0000000, float %146) > %151 = bitcast float %5 to i32 > %152 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %151, 10 > %153 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %152, float %147, 11 > %154 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %153, float %148, 12 > %155 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %154, float %149, 13 > %156 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %155, float %150, 14 > %157 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %156, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %157 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..9] >DCL TEMP[0..4], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, -0.5000} >IMM[1] FLT32 { 0.5000, -0.5000, 1.0000, -1.0000} >IMM[2] UINT32 {0, 144, 0, 0} >IMM[3] FLT32 {158456325028528675187087900672.0000, -0.4447, -1.3392, 1.3392} >IMM[4] FLT32 { -1.3392, 0.4447, 1.3392, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: FMA TEMP[1], IN[0].xyxy, IMM[1], IMM[0].zzxx > 3: FMA TEMP[2], IN[0].xyxy, IMM[0].zwzw, IMM[0].zzzz > 4: FSEQ TEMP[3], CONST[1][9].xyxy, IMM[0].xxxx > 5: RCP TEMP[4].xz, CONST[1][9].xxxx > 6: RCP TEMP[4].yw, CONST[1][9].yyyy > 7: UCMP TEMP[3], TEMP[3], IMM[3].xxxx, TEMP[4] > 8: FMA TEMP[4], TEMP[3].zwzw, IMM[3].yzwy, TEMP[2].zwzw > 9: FMA TEMP[2], TEMP[3], IMM[4].xyyz, TEMP[2] > 10: MOV OUT[3], TEMP[2] > 11: MOV OUT[2], TEMP[4] > 12: MOV OUT[1], TEMP[1] > 13: MOV OUT[0], TEMP[0] > 14: END >radeonsi: Compiling shader 48 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 144) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 148) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %24 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %25 = call float @llvm.fma.f32(float %21, float 1.000000e+00, float 0.000000e+00) > %26 = call float @llvm.fma.f32(float %22, float -1.000000e+00, float 0.000000e+00) > %27 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %28 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %29 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %30 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %31 = fcmp oeq float %16, 0.000000e+00 > %32 = fcmp oeq float %17, 0.000000e+00 > %33 = fcmp oeq float %16, 0.000000e+00 > %34 = fcmp oeq float %17, 0.000000e+00 > %35 = fdiv float 1.000000e+00, %16 > %36 = fdiv float 1.000000e+00, %17 > %37 = select i1 %31, float 0x4600000000000000, float %35 > %38 = select i1 %32, float 0x4600000000000000, float %36 > %39 = select i1 %33, float 0x4600000000000000, float %35 > %40 = select i1 %34, float 0x4600000000000000, float %36 > %41 = call float @llvm.fma.f32(float %39, float 0xBFDC758180000000, float %29) > %42 = call float @llvm.fma.f32(float %40, float 0xBFF56D8B20000000, float %30) > %43 = call float @llvm.fma.f32(float %39, float 0x3FF56D8B20000000, float %29) > %44 = call float @llvm.fma.f32(float %40, float 0xBFDC758180000000, float %30) > %45 = call float @llvm.fma.f32(float %37, float 0xBFF56D8B20000000, float %27) > %46 = call float @llvm.fma.f32(float %38, float 0x3FDC758180000000, float %28) > %47 = call float @llvm.fma.f32(float %39, float 0x3FDC758180000000, float %29) > %48 = call float @llvm.fma.f32(float %40, float 0x3FF56D8B20000000, float %30) > %49 = bitcast i32 %11 to float > %50 = insertvalue <{ float, float, float }> undef, float %49, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float %43, float %44) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %45, float %46, float %47, float %48) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %50 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL CONST[1][0..10] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 112, 160, 128} >IMM[1] FLT32 { 0.0000, 1.0000, 0.2243, 0.1028} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] FLT32 { -0.6667, 0.6667, 0.4000, 0.1000} >IMM[4] FLT32 { 1.0000, 0.0000, -0.6000, -0.4000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D > 2: ADD TEMP[0].x, TEMP[0].xxxx, -CONST[1][7].xxxx > 3: MUL TEMP[1].x, TEMP[0].xxxx, CONST[1][7].yyyy > 4: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 5: MOV TEMP[0].x, TEMP[1].xxxx > 6: MUL TEMP[2].x, TEMP[1].xxxx, CONST[1][7].zzzz > 7: MOV TEMP[0].y, TEMP[2].xxxx > 8: USNE TEMP[2].x, CONST[1][10].xxxx, IMM[0].xxxx > 9: UIF TEMP[2].xxxx :0 > 10: MOV TEMP[2].xy, IN[0].xyyy > 11: MOV TEMP[2].w, IMM[1].xxxx > 12: TXL TEMP[2].x, TEMP[2], SAMP[1], 2D > 13: FSLT TEMP[3].x, TEMP[2].xxxx, IMM[1].yyyy > 14: AND TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx > 15: INEG TEMP[3].x, TEMP[3].xxxx > 16: USNE TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx > 17: UIF TEMP[3].xxxx :0 > 18: MOV TEMP[3].xy, IN[0].xyyy > 19: MOV TEMP[3].w, IMM[1].xxxx > 20: TXL TEMP[3].x, TEMP[3], SAMP[2], 2D > 21: ADD TEMP[3].x, TEMP[3].xxxx, -CONST[1][7].xxxx > 22: MUL TEMP[3].x, TEMP[3].xxxx, CONST[1][7].yyyy > 23: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 24: MUL TEMP[3].x, TEMP[3].xxxx, CONST[1][7].zzzz > 25: FMA TEMP[0].x, TEMP[1].xxxx, CONST[1][7].zzzz, -TEMP[3].xxxx > 26: FMA TEMP[1].x, TEMP[2].xxxx, TEMP[0].xxxx, TEMP[3].xxxx > 27: MOV TEMP[0].y, TEMP[1].xxxx > 28: ENDIF > 29: ENDIF > 30: MOV TEMP[1].xy, IN[1].xyyy > 31: TEX TEMP[1].xyz, TEMP[1], SAMP[3], 2D > 32: MOV TEMP[2].xy, IN[1].zwww > 33: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D > 34: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz > 35: MOV TEMP[2].xy, IN[2].xyyy > 36: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D > 37: ADD TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz > 38: MOV TEMP[2].xy, IN[2].zwww > 39: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D > 40: ADD TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz > 41: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[1].zzzz > 42: MOV TEMP[2].xy, IN[0].xyyy > 43: TEX TEMP[2].xyz, TEMP[2], SAMP[4], 2D > 44: MOV TEMP[3].xy, IN[0].xyyy > 45: TEX TEMP[3], TEMP[3], SAMP[5], 2D > 46: MUL TEMP[4].x, TEMP[3].wwww, CONST[1][8].zzzz > 47: MAX TEMP[4].x, TEMP[0].yyyy, TEMP[4].xxxx > 48: FMA TEMP[4], TEMP[4].xxxx, IMM[3], IMM[4] > 49: MOV_SAT TEMP[4], TEMP[4] > 50: ADD TEMP[5].xy, -TEMP[4].zwww, TEMP[4].yzzz > 51: FMA TEMP[6].x, TEMP[5].xxxx, IMM[1].wwww, TEMP[4].xxxx > 52: MOV TEMP[6].w, TEMP[6].xxxx > 53: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].yyyy > 54: FMA TEMP[0].xyz, TEMP[1].xyzz, TEMP[5].xxxx, TEMP[2].xyzz > 55: FMA TEMP[6].xyz, TEMP[3].xyzz, TEMP[4].wwww, TEMP[0].xyzz > 56: MOV OUT[0], TEMP[6] > 57: END >radeonsi: Compiling shader 49 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 136) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 160) > %30 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 > %32 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %33 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %32, i64 0, i64 3 > %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 > %35 = extractelement <8 x i32> %31, i32 7 > %36 = extractelement <4 x i32> %34, i32 0 > %37 = and i32 %36, %35 > %38 = insertelement <4 x i32> %34, i32 %37, i32 0 > %39 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 > %41 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %42 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %41, i64 0, i64 7 > %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 > %44 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 > %46 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %47 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %46, i64 0, i64 11 > %48 = load <4 x i32>, <4 x i32> addrspace(2)* %47, align 16, !tbaa !0 > %49 = extractelement <8 x i32> %45, i32 7 > %50 = extractelement <4 x i32> %48, i32 0 > %51 = and i32 %50, %49 > %52 = insertelement <4 x i32> %48, i32 %51, i32 0 > %53 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %54 = load <8 x i32>, <8 x i32> addrspace(2)* %53, align 32, !tbaa !0 > %55 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %56 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %55, i64 0, i64 15 > %57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !tbaa !0 > %58 = extractelement <8 x i32> %54, i32 7 > %59 = extractelement <4 x i32> %57, i32 0 > %60 = and i32 %59, %58 > %61 = insertelement <4 x i32> %57, i32 %60, i32 0 > %62 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %63 = load <8 x i32>, <8 x i32> addrspace(2)* %62, align 32, !tbaa !0 > %64 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %65 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %64, i64 0, i64 19 > %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 > %67 = extractelement <8 x i32> %63, i32 7 > %68 = extractelement <4 x i32> %66, i32 0 > %69 = and i32 %68, %67 > %70 = insertelement <4 x i32> %66, i32 %69, i32 0 > %71 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0 > %73 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %74 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %73, i64 0, i64 23 > %75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !0 > %76 = extractelement <8 x i32> %72, i32 7 > %77 = extractelement <4 x i32> %75, i32 0 > %78 = and i32 %77, %76 > %79 = insertelement <4 x i32> %75, i32 %78, i32 0 > %80 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %90 = bitcast float %80 to i32 > %91 = bitcast float %81 to i32 > %92 = insertelement <2 x i32> undef, i32 %90, i32 0 > %93 = insertelement <2 x i32> %92, i32 %91, i32 1 > %94 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %93, <8 x i32> %31, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %95 = extractelement <4 x float> %94, i32 0 > %96 = fsub float %95, %25 > %97 = fmul float %96, %26 > %98 = call float @llvm.AMDGPU.clamp.(float %97, float 0.000000e+00, float 1.000000e+00) > %99 = fmul float %98, %27 > %100 = bitcast float %29 to i32 > %101 = icmp eq i32 %100, 0 > br i1 %101, label %ENDIF, label %IF > >IF: ; preds = %main_body > %102 = extractelement <4 x i32> %43, i32 0 > %103 = extractelement <8 x i32> %40, i32 7 > %104 = and i32 %102, %103 > %105 = insertelement <4 x i32> %43, i32 %104, i32 0 > %106 = bitcast float %80 to i32 > %107 = bitcast float %81 to i32 > %108 = insertelement <4 x i32> undef, i32 %106, i32 0 > %109 = insertelement <4 x i32> %108, i32 %107, i32 1 > %110 = insertelement <4 x i32> %109, i32 0, i32 2 > %111 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %110, <8 x i32> %40, <4 x i32> %105, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %112 = extractelement <4 x float> %111, i32 0 > %113 = fcmp olt float %112, 1.000000e+00 > br i1 %113, label %IF29, label %ENDIF > >ENDIF: ; preds = %main_body, %IF29, %IF > %temp1.0 = phi float [ %99, %main_body ], [ %217, %IF29 ], [ %99, %IF ] > %114 = bitcast float %82 to i32 > %115 = bitcast float %83 to i32 > %116 = insertelement <2 x i32> undef, i32 %114, i32 0 > %117 = insertelement <2 x i32> %116, i32 %115, i32 1 > %118 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %117, <8 x i32> %54, <4 x i32> %61, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %119 = extractelement <4 x float> %118, i32 0 > %120 = extractelement <4 x float> %118, i32 1 > %121 = extractelement <4 x float> %118, i32 2 > %122 = bitcast float %84 to i32 > %123 = bitcast float %85 to i32 > %124 = insertelement <2 x i32> undef, i32 %122, i32 0 > %125 = insertelement <2 x i32> %124, i32 %123, i32 1 > %126 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %125, <8 x i32> %54, <4 x i32> %61, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %127 = extractelement <4 x float> %126, i32 0 > %128 = extractelement <4 x float> %126, i32 1 > %129 = extractelement <4 x float> %126, i32 2 > %130 = fadd float %119, %127 > %131 = fadd float %120, %128 > %132 = fadd float %121, %129 > %133 = bitcast float %86 to i32 > %134 = bitcast float %87 to i32 > %135 = insertelement <2 x i32> undef, i32 %133, i32 0 > %136 = insertelement <2 x i32> %135, i32 %134, i32 1 > %137 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %136, <8 x i32> %54, <4 x i32> %61, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %138 = extractelement <4 x float> %137, i32 0 > %139 = extractelement <4 x float> %137, i32 1 > %140 = extractelement <4 x float> %137, i32 2 > %141 = fadd float %138, %130 > %142 = fadd float %139, %131 > %143 = fadd float %140, %132 > %144 = bitcast float %88 to i32 > %145 = bitcast float %89 to i32 > %146 = insertelement <2 x i32> undef, i32 %144, i32 0 > %147 = insertelement <2 x i32> %146, i32 %145, i32 1 > %148 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %147, <8 x i32> %54, <4 x i32> %61, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %149 = extractelement <4 x float> %148, i32 0 > %150 = extractelement <4 x float> %148, i32 1 > %151 = extractelement <4 x float> %148, i32 2 > %152 = fadd float %149, %141 > %153 = fadd float %150, %142 > %154 = fadd float %151, %143 > %155 = fmul float %152, 0x3FCCB5F600000000 > %156 = fmul float %153, 0x3FCCB5F600000000 > %157 = fmul float %154, 0x3FCCB5F600000000 > %158 = bitcast float %80 to i32 > %159 = bitcast float %81 to i32 > %160 = insertelement <2 x i32> undef, i32 %158, i32 0 > %161 = insertelement <2 x i32> %160, i32 %159, i32 1 > %162 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %161, <8 x i32> %63, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %163 = extractelement <4 x float> %162, i32 0 > %164 = extractelement <4 x float> %162, i32 1 > %165 = extractelement <4 x float> %162, i32 2 > %166 = bitcast float %80 to i32 > %167 = bitcast float %81 to i32 > %168 = insertelement <2 x i32> undef, i32 %166, i32 0 > %169 = insertelement <2 x i32> %168, i32 %167, i32 1 > %170 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %169, <8 x i32> %72, <4 x i32> %79, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %171 = extractelement <4 x float> %170, i32 0 > %172 = extractelement <4 x float> %170, i32 1 > %173 = extractelement <4 x float> %170, i32 2 > %174 = extractelement <4 x float> %170, i32 3 > %175 = fmul float %174, %28 > %176 = call float @llvm.maxnum.f32(float %temp1.0, float %175) > %177 = call float @llvm.fma.f32(float %176, float 0xBFE5555560000000, float 1.000000e+00) > %178 = call float @llvm.fma.f32(float %176, float 0x3FE5555560000000, float 0.000000e+00) > %179 = call float @llvm.fma.f32(float %176, float 0x3FD99999A0000000, float 0xBFE3333340000000) > %180 = call float @llvm.fma.f32(float %176, float 0x3FB99999A0000000, float 0xBFD99999A0000000) > %181 = call float @llvm.AMDGPU.clamp.(float %177, float 0.000000e+00, float 1.000000e+00) > %182 = call float @llvm.AMDGPU.clamp.(float %178, float 0.000000e+00, float 1.000000e+00) > %183 = call float @llvm.AMDGPU.clamp.(float %179, float 0.000000e+00, float 1.000000e+00) > %184 = call float @llvm.AMDGPU.clamp.(float %180, float 0.000000e+00, float 1.000000e+00) > %185 = fsub float %182, %183 > %186 = fsub float %183, %184 > %187 = call float @llvm.fma.f32(float %185, float 0x3FBA503FC0000000, float %181) > %188 = fmul float %163, %186 > %189 = fmul float %164, %186 > %190 = fmul float %165, %186 > %191 = call float @llvm.fma.f32(float %155, float %185, float %188) > %192 = call float @llvm.fma.f32(float %156, float %185, float %189) > %193 = call float @llvm.fma.f32(float %157, float %185, float %190) > %194 = call float @llvm.fma.f32(float %171, float %184, float %191) > %195 = call float @llvm.fma.f32(float %172, float %184, float %192) > %196 = call float @llvm.fma.f32(float %173, float %184, float %193) > %197 = bitcast float %5 to i32 > %198 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %197, 10 > %199 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %198, float %194, 11 > %200 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %199, float %195, 12 > %201 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %200, float %196, 13 > %202 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %201, float %187, 14 > %203 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %202, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %203 > >IF29: ; preds = %IF > %204 = bitcast float %80 to i32 > %205 = bitcast float %81 to i32 > %206 = insertelement <4 x i32> undef, i32 %204, i32 0 > %207 = insertelement <4 x i32> %206, i32 %205, i32 1 > %208 = insertelement <4 x i32> %207, i32 0, i32 2 > %209 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %208, <8 x i32> %45, <4 x i32> %52, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %210 = extractelement <4 x float> %209, i32 0 > %211 = fsub float %210, %25 > %212 = fmul float %211, %26 > %213 = call float @llvm.AMDGPU.clamp.(float %212, float 0.000000e+00, float 1.000000e+00) > %214 = fmul float %213, %27 > %215 = fsub float -0.000000e+00, %214 > %216 = call float @llvm.fma.f32(float %98, float %27, float %215) > %217 = call float @llvm.fma.f32(float %112, float %216, float %214) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..8] >DCL TEMP[0..4], LOCAL >IMM[0] FLT32 { 0.2243, 0.1028, 0.0000, -0.6000} >IMM[1] UINT32 {0, 96, 112, 128} >IMM[2] FLT32 { -0.6667, 0.6667, 0.4000, 0.1000} >IMM[3] FLT32 { 1.0000, 0.0000, -0.6000, -0.4000} > 0: MOV TEMP[0].xy, IN[1].xyyy > 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D > 2: MOV TEMP[1].xy, IN[1].zwww > 3: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D > 4: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz > 5: MOV TEMP[1].xy, IN[2].xyyy > 6: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D > 7: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz > 8: MOV TEMP[1].xy, IN[2].zwww > 9: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D > 10: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz > 11: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx > 12: DP2 TEMP[2].x, IN[0].zwww, IN[0].zwww > 13: SQRT TEMP[2].x, TEMP[2].xxxx > 14: ADD TEMP[2].x, TEMP[2].xxxx, -CONST[1][6].xxxx > 15: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][6].yyyy > 16: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 17: LG2 TEMP[2].x, TEMP[2].xxxx > 18: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][6].wwww > 19: EX2 TEMP[2].x, TEMP[2].xxxx > 20: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][6].zzzz > 21: MOV TEMP[3].xy, IN[0].xyyy > 22: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D > 23: ADD TEMP[1].x, TEMP[3].xxxx, -CONST[1][7].xxxx > 24: MUL TEMP[3].x, TEMP[1].xxxx, CONST[1][7].yyyy > 25: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 26: MUL TEMP[1].x, TEMP[3].xxxx, CONST[1][7].zzzz > 27: MOV TEMP[3].xy, IN[0].xyyy > 28: TEX TEMP[3], TEMP[3], SAMP[2], 2D > 29: MUL TEMP[4].x, TEMP[3].wwww, CONST[1][8].zzzz > 30: MAX TEMP[1].x, TEMP[4].xxxx, TEMP[1].xxxx > 31: MAX TEMP[1].x, TEMP[2].xxxx, TEMP[1].xxxx > 32: FMA TEMP[1], TEMP[1].xxxx, IMM[2], IMM[3] > 33: MOV_SAT TEMP[1], TEMP[1] > 34: ADD TEMP[2].xy, -TEMP[1].zwww, TEMP[1].yzzz > 35: MOV TEMP[4].xy, IN[0].xyyy > 36: TEX TEMP[4].xyz, TEMP[4], SAMP[3], 2D > 37: MUL TEMP[4].xyz, TEMP[2].yyyy, TEMP[4].xyzz > 38: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx, TEMP[4].xyzz > 39: FMA TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy, TEMP[1].xxxx > 40: MOV TEMP[2].w, TEMP[2].xxxx > 41: FMA TEMP[2].xyz, TEMP[3].xyzz, TEMP[1].wwww, TEMP[0].xyzz > 42: MOV OUT[0], TEMP[2] > 43: END >radeonsi: Compiling shader 50 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 136) > %33 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %34 = load <8 x i32>, <8 x i32> addrspace(2)* %33, align 32, !tbaa !0 > %35 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %36 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %35, i64 0, i64 3 > %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !tbaa !0 > %38 = extractelement <8 x i32> %34, i32 7 > %39 = extractelement <4 x i32> %37, i32 0 > %40 = and i32 %39, %38 > %41 = insertelement <4 x i32> %37, i32 %40, i32 0 > %42 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !tbaa !0 > %44 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %45 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %44, i64 0, i64 7 > %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 > %47 = extractelement <8 x i32> %43, i32 7 > %48 = extractelement <4 x i32> %46, i32 0 > %49 = and i32 %48, %47 > %50 = insertelement <4 x i32> %46, i32 %49, i32 0 > %51 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 > %53 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %54 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %53, i64 0, i64 11 > %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 > %56 = extractelement <8 x i32> %52, i32 7 > %57 = extractelement <4 x i32> %55, i32 0 > %58 = and i32 %57, %56 > %59 = insertelement <4 x i32> %55, i32 %58, i32 0 > %60 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 > %62 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %63 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %62, i64 0, i64 15 > %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 > %65 = extractelement <8 x i32> %61, i32 7 > %66 = extractelement <4 x i32> %64, i32 0 > %67 = and i32 %66, %65 > %68 = insertelement <4 x i32> %64, i32 %67, i32 0 > %69 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %81 = bitcast float %73 to i32 > %82 = bitcast float %74 to i32 > %83 = insertelement <2 x i32> undef, i32 %81, i32 0 > %84 = insertelement <2 x i32> %83, i32 %82, i32 1 > %85 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %84, <8 x i32> %34, <4 x i32> %41, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = extractelement <4 x float> %85, i32 2 > %89 = bitcast float %75 to i32 > %90 = bitcast float %76 to i32 > %91 = insertelement <2 x i32> undef, i32 %89, i32 0 > %92 = insertelement <2 x i32> %91, i32 %90, i32 1 > %93 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %92, <8 x i32> %34, <4 x i32> %41, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %94 = extractelement <4 x float> %93, i32 0 > %95 = extractelement <4 x float> %93, i32 1 > %96 = extractelement <4 x float> %93, i32 2 > %97 = fadd float %86, %94 > %98 = fadd float %87, %95 > %99 = fadd float %88, %96 > %100 = bitcast float %77 to i32 > %101 = bitcast float %78 to i32 > %102 = insertelement <2 x i32> undef, i32 %100, i32 0 > %103 = insertelement <2 x i32> %102, i32 %101, i32 1 > %104 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %103, <8 x i32> %34, <4 x i32> %41, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %105 = extractelement <4 x float> %104, i32 0 > %106 = extractelement <4 x float> %104, i32 1 > %107 = extractelement <4 x float> %104, i32 2 > %108 = fadd float %97, %105 > %109 = fadd float %98, %106 > %110 = fadd float %99, %107 > %111 = bitcast float %79 to i32 > %112 = bitcast float %80 to i32 > %113 = insertelement <2 x i32> undef, i32 %111, i32 0 > %114 = insertelement <2 x i32> %113, i32 %112, i32 1 > %115 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %114, <8 x i32> %34, <4 x i32> %41, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %116 = extractelement <4 x float> %115, i32 0 > %117 = extractelement <4 x float> %115, i32 1 > %118 = extractelement <4 x float> %115, i32 2 > %119 = fadd float %108, %116 > %120 = fadd float %109, %117 > %121 = fadd float %110, %118 > %122 = fmul float %119, 0x3FCCB5F600000000 > %123 = fmul float %120, 0x3FCCB5F600000000 > %124 = fmul float %121, 0x3FCCB5F600000000 > %125 = fmul float %71, %71 > %126 = fmul float %72, %72 > %127 = fadd float %125, %126 > %128 = call float @llvm.sqrt.f32(float %127) > %129 = fsub float %128, %25 > %130 = fmul float %129, %26 > %131 = call float @llvm.AMDGPU.clamp.(float %130, float 0.000000e+00, float 1.000000e+00) > %132 = call float @llvm.log2.f32(float %131) > %133 = fmul float %132, %28 > %134 = call float @llvm.exp2.f32(float %133) > %135 = fmul float %134, %27 > %136 = bitcast float %69 to i32 > %137 = bitcast float %70 to i32 > %138 = insertelement <2 x i32> undef, i32 %136, i32 0 > %139 = insertelement <2 x i32> %138, i32 %137, i32 1 > %140 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %139, <8 x i32> %43, <4 x i32> %50, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %141 = extractelement <4 x float> %140, i32 0 > %142 = fsub float %141, %29 > %143 = fmul float %142, %30 > %144 = call float @llvm.AMDGPU.clamp.(float %143, float 0.000000e+00, float 1.000000e+00) > %145 = fmul float %144, %31 > %146 = bitcast float %69 to i32 > %147 = bitcast float %70 to i32 > %148 = insertelement <2 x i32> undef, i32 %146, i32 0 > %149 = insertelement <2 x i32> %148, i32 %147, i32 1 > %150 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %149, <8 x i32> %52, <4 x i32> %59, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %151 = extractelement <4 x float> %150, i32 0 > %152 = extractelement <4 x float> %150, i32 1 > %153 = extractelement <4 x float> %150, i32 2 > %154 = extractelement <4 x float> %150, i32 3 > %155 = fmul float %154, %32 > %156 = call float @llvm.maxnum.f32(float %155, float %145) > %157 = call float @llvm.maxnum.f32(float %135, float %156) > %158 = call float @llvm.fma.f32(float %157, float 0xBFE5555560000000, float 1.000000e+00) > %159 = call float @llvm.fma.f32(float %157, float 0x3FE5555560000000, float 0.000000e+00) > %160 = call float @llvm.fma.f32(float %157, float 0x3FD99999A0000000, float 0xBFE3333340000000) > %161 = call float @llvm.fma.f32(float %157, float 0x3FB99999A0000000, float 0xBFD99999A0000000) > %162 = call float @llvm.AMDGPU.clamp.(float %158, float 0.000000e+00, float 1.000000e+00) > %163 = call float @llvm.AMDGPU.clamp.(float %159, float 0.000000e+00, float 1.000000e+00) > %164 = call float @llvm.AMDGPU.clamp.(float %160, float 0.000000e+00, float 1.000000e+00) > %165 = call float @llvm.AMDGPU.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) > %166 = fsub float %163, %164 > %167 = fsub float %164, %165 > %168 = bitcast float %69 to i32 > %169 = bitcast float %70 to i32 > %170 = insertelement <2 x i32> undef, i32 %168, i32 0 > %171 = insertelement <2 x i32> %170, i32 %169, i32 1 > %172 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %171, <8 x i32> %61, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %173 = extractelement <4 x float> %172, i32 0 > %174 = extractelement <4 x float> %172, i32 1 > %175 = extractelement <4 x float> %172, i32 2 > %176 = fmul float %167, %173 > %177 = fmul float %167, %174 > %178 = fmul float %167, %175 > %179 = call float @llvm.fma.f32(float %122, float %166, float %176) > %180 = call float @llvm.fma.f32(float %123, float %166, float %177) > %181 = call float @llvm.fma.f32(float %124, float %166, float %178) > %182 = call float @llvm.fma.f32(float %166, float 0x3FBA503FC0000000, float %162) > %183 = call float @llvm.fma.f32(float %151, float %165, float %179) > %184 = call float @llvm.fma.f32(float %152, float %165, float %180) > %185 = call float @llvm.fma.f32(float %153, float %165, float %181) > %186 = bitcast float %5 to i32 > %187 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %186, 10 > %188 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %187, float %183, 11 > %189 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %188, float %184, 12 > %190 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %189, float %185, 13 > %191 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %190, float %182, 14 > %192 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %191, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %192 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..11] >DCL CONST[2][0..25] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.5000} >IMM[1] UINT32 {1, 384, 336, 400} >IMM[2] UINT32 {352, 64, 80, 96} >IMM[3] UINT32 {0, 176, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: MUL TEMP[1].xy, IMM[0].yzzz, CONST[2][24].xyyy > 3: FMA TEMP[1].xy, IN[0].xyyy, TEMP[1].xyyy, CONST[2][24].xyyy > 4: MOV TEMP[2].xy, TEMP[1].xyxx > 5: FMA TEMP[3].xy, TEMP[1].xyyy, CONST[2][21].zwww, CONST[2][21].xyyy > 6: MOV TEMP[2].zw, IMM[0].yyxy > 7: ADD TEMP[1].xy, IN[0].xyyy, IMM[0].yyyy > 8: MOV TEMP[4].w, CONST[2][21].zzzw > 9: MUL TEMP[1].x, TEMP[1].xxxx, CONST[2][21].zzzz > 10: MOV TEMP[4].z, IMM[0].wwww > 11: FMA TEMP[5].x, -TEMP[1].yyyy, IMM[0].wwww, IMM[0].yyyy > 12: MOV TEMP[1].z, TEMP[5].xxxx > 13: FMA TEMP[5].xy, TEMP[1].xzzz, TEMP[4].zwww, CONST[2][21].xyyy > 14: MOV TEMP[4].z, CONST[2][25].yyyy > 15: MOV TEMP[4].w, IMM[0].yyyy > 16: DP2 TEMP[6].x, TEMP[4].zwww, CONST[2][22].xyyy > 17: MUL TEMP[4].xy, TEMP[5].xyyy, TEMP[6].xxxx > 18: DP4 TEMP[1].x, CONST[2][4], TEMP[4] > 19: DP4 TEMP[5].x, CONST[2][5], TEMP[4] > 20: MOV TEMP[1].y, TEMP[5].xxxx > 21: DP4 TEMP[4].x, CONST[2][6], TEMP[4] > 22: MOV TEMP[1].z, TEMP[4].xxxx > 23: ADD TEMP[1].xyz, -TEMP[1].xyzz, CONST[1][11].xyzz > 24: MOV TEMP[1].xyz, -TEMP[1].xyzx > 25: MOV OUT[3], TEMP[1] > 26: MOV OUT[2], TEMP[3] > 27: MOV OUT[1], TEMP[2] > 28: MOV OUT[0], TEMP[0] > 29: END >radeonsi: Compiling shader 51 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 176) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 180) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 184) > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 64) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 68) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 72) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 76) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 84) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 88) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 92) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 96) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 100) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 104) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 108) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 336) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 340) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 344) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 348) > %37 = call float @llvm.SI.load.const(<16 x i8> %20, i32 352) > %38 = call float @llvm.SI.load.const(<16 x i8> %20, i32 356) > %39 = call float @llvm.SI.load.const(<16 x i8> %20, i32 384) > %40 = call float @llvm.SI.load.const(<16 x i8> %20, i32 388) > %41 = call float @llvm.SI.load.const(<16 x i8> %20, i32 404) > %42 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 > %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %13) > %45 = extractelement <4 x float> %44, i32 0 > %46 = extractelement <4 x float> %44, i32 1 > %47 = fsub float -0.000000e+00, %40 > %48 = call float @llvm.fma.f32(float %45, float %39, float %39) > %49 = call float @llvm.fma.f32(float %46, float %47, float %40) > %50 = call float @llvm.fma.f32(float %48, float %35, float %33) > %51 = call float @llvm.fma.f32(float %49, float %36, float %34) > %52 = fadd float %45, 1.000000e+00 > %53 = fadd float %46, 1.000000e+00 > %54 = fmul float %52, %35 > %55 = fsub float -0.000000e+00, %53 > %56 = call float @llvm.fma.f32(float %55, float 5.000000e-01, float 1.000000e+00) > %57 = call float @llvm.fma.f32(float %54, float 5.000000e-01, float %33) > %58 = call float @llvm.fma.f32(float %56, float %36, float %34) > %59 = fmul float %41, %37 > %60 = fadd float %59, %38 > %61 = fmul float %57, %60 > %62 = fmul float %58, %60 > %63 = fmul float %21, %61 > %64 = fmul float %22, %62 > %65 = fadd float %63, %64 > %66 = fmul float %23, %41 > %67 = fadd float %65, %66 > %68 = fadd float %67, %24 > %69 = fmul float %25, %61 > %70 = fmul float %26, %62 > %71 = fadd float %69, %70 > %72 = fmul float %27, %41 > %73 = fadd float %71, %72 > %74 = fadd float %73, %28 > %75 = fmul float %29, %61 > %76 = fmul float %30, %62 > %77 = fadd float %75, %76 > %78 = fmul float %31, %41 > %79 = fadd float %77, %78 > %80 = fadd float %79, %32 > %81 = fsub float %16, %68 > %82 = fsub float %17, %74 > %83 = fsub float %18, %80 > %84 = fsub float -0.000000e+00, %81 > %85 = fsub float -0.000000e+00, %82 > %86 = fsub float -0.000000e+00, %83 > %87 = bitcast i32 %11 to float > %88 = insertvalue <{ float, float, float }> undef, float %87, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %48, float %49, float 0.000000e+00, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %50, float %51, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %84, float %85, float %86, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %45, float %46, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %88 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 3D, FLOAT >DCL CONST[1][0..20] >DCL CONST[2][0..68] >DCL TEMP[0..14], LOCAL >IMM[0] FLT32 {637100032.0000, 0.0100, 0.5000, 1.0000} >IMM[1] UINT32 {0, 224, 1, 176} >IMM[2] UINT32 {288, 80, 320, 256} >IMM[3] FLT32 { 0.0000, 158456325028528675187087900672.0000, 10000000.0000, 0.0597} >IMM[4] INT32 {1, 0, 0, 0} >IMM[5] FLT32 { -1.5000, 1.4427, -0.1500, 0.0000} >IMM[6] UINT32 {240, 304, 272, 1088} > 0: ADD TEMP[0].x, IMM[0].xxxx, CONST[1][14].wwww > 1: ADD TEMP[1].x, CONST[2][11].yyyy, CONST[1][18].wwww > 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].xxxx > 3: MOV TEMP[2].y, -TEMP[2].xxxx > 4: MOV TEMP[2].xz, -CONST[2][11].xxzx > 5: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz > 6: DP3 TEMP[4].x, IN[2].xyzz, IN[2].xyzz > 7: RSQ TEMP[4].x, TEMP[4].xxxx > 8: MUL TEMP[4].xyz, TEMP[4].xxxx, IN[2].yxzz > 9: DP3 TEMP[5].x, TEMP[2].yxzz, TEMP[4].xyzz > 10: FMA TEMP[3].x, -TEMP[5].xxxx, TEMP[5].xxxx, TEMP[3].xxxx > 11: FMA TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx, -TEMP[3].xxxx > 12: SQRT TEMP[3].x, TEMP[0].xxxx > 13: ADD TEMP[0].x, TEMP[3].xxxx, TEMP[5].xxxx > 14: MAX TEMP[3].x, TEMP[4].xxxx, IMM[0].yyyy > 15: MUL TEMP[5].x, TEMP[0].xxxx, TEMP[3].xxxx > 16: FMA TEMP[2].x, TEMP[5].xxxx, IMM[0].zzzz, TEMP[1].xxxx > 17: MOV TEMP[1].w, IMM[0].wwww > 18: FSEQ TEMP[5].xy, IN[0].wwww, IMM[3].xxxx > 19: SSG TEMP[6].xy, IN[0].xyyy > 20: MUL TEMP[6].xy, IMM[3].yyyy, TEMP[6].xyyy > 21: RCP TEMP[7].xy, IN[0].wwww > 22: MUL TEMP[7].xy, IN[0].xyyy, TEMP[7].xyyy > 23: UCMP TEMP[5].xy, TEMP[5].xyyy, TEMP[6].xyyy, TEMP[7].xyyy > 24: MOV TEMP[6].xy, TEMP[5].xyyy > 25: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D > 26: MOV TEMP[1].x, TEMP[6].xxxx > 27: MOV TEMP[5].xy, TEMP[5].xyyy > 28: TEX TEMP[5].xyz, TEMP[5], SAMP[1], 2D > 29: FSEQ TEMP[7].xy, IN[0].wwww, IMM[3].xxxx > 30: SSG TEMP[8].xy, IN[1].xyyy > 31: MUL TEMP[8].xy, IMM[3].yyyy, TEMP[8].xyyy > 32: RCP TEMP[9].xy, IN[0].wwww > 33: MUL TEMP[9].xy, IN[1].xyyy, TEMP[9].xyyy > 34: UCMP TEMP[7].xy, TEMP[7].xyyy, TEMP[8].xyyy, TEMP[9].xyyy > 35: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[6].xxxx > 36: MOV TEMP[1].yz, TEMP[7].yxyy > 37: FSEQ TEMP[7].x, TEMP[6].xxxx, IMM[3].zzzz > 38: AND TEMP[7].x, TEMP[7].xxxx, IMM[4].xxxx > 39: INEG TEMP[7].x, TEMP[7].xxxx > 40: DP4 TEMP[8].x, CONST[1][5].zxyw, TEMP[1] > 41: USNE TEMP[9].x, TEMP[7].xxxx, IMM[1].xxxx > 42: UIF TEMP[9].xxxx :0 > 43: MOV TEMP[9].x, TEMP[0].xxxx > 44: ELSE :0 > 45: MOV TEMP[9].x, TEMP[6].xxxx > 46: ENDIF > 47: ADD TEMP[1].x, TEMP[8].xxxx, CONST[1][18].wwww > 48: DP3 TEMP[6].x, TEMP[4].yxzz, CONST[1][20].xyzz > 49: FMA TEMP[8].x, -CONST[1][16].yyyy, TEMP[6].xxxx, CONST[1][16].xxxx > 50: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[6].xxxx, IMM[0].wwww > 51: MUL TEMP[6].x, TEMP[6].xxxx, IMM[3].wwww > 52: ABS TEMP[8].x, TEMP[8].xxxx > 53: LG2 TEMP[8].x, TEMP[8].xxxx > 54: MUL TEMP[8].x, TEMP[8].xxxx, IMM[5].xxxx > 55: EX2 TEMP[8].x, TEMP[8].xxxx > 56: MUL TEMP[8].x, TEMP[8].xxxx, CONST[1][16].zzzz > 57: MOV TEMP[2].y, TEMP[8].xxxx > 58: MIN TEMP[8].x, TEMP[8].xxxx, CONST[1][15].zzzz > 59: MOV TEMP[1].y, TEMP[8].xxxx > 60: USNE TEMP[8].x, TEMP[7].xxxx, IMM[1].xxxx > 61: UIF TEMP[8].xxxx :0 > 62: MOV TEMP[8].x, TEMP[2].xxxx > 63: ELSE :0 > 64: MOV TEMP[8].x, TEMP[1].xxxx > 65: ENDIF > 66: MOV TEMP[8].x, TEMP[8].xxxx > 67: USNE TEMP[10].x, TEMP[7].xxxx, IMM[1].xxxx > 68: UIF TEMP[10].xxxx :0 > 69: MOV TEMP[10].x, TEMP[2].yyyy > 70: ELSE :0 > 71: MOV TEMP[10].x, TEMP[1].yyyy > 72: ENDIF > 73: MOV TEMP[8].y, TEMP[10].xxxx > 74: ADD TEMP[10].x, -TEMP[1].xxxx, CONST[1][18].zzzz > 75: FSNE TEMP[11].x, CONST[1][15].yyyy, IMM[3].xxxx > 76: UIF TEMP[11].xxxx :0 > 77: RCP TEMP[11].x, CONST[1][15].yyyy > 78: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[11].xxxx > 79: ELSE :0 > 80: SSG TEMP[10].x, TEMP[10].xxxx > 81: MUL TEMP[11].x, IMM[3].yyyy, TEMP[10].xxxx > 82: ENDIF > 83: MAX TEMP[10].x, TEMP[8].xxxx, IMM[3].xxxx > 84: MAX TEMP[8].x, TEMP[8].yyyy, CONST[1][19].wwww > 85: FSNE TEMP[12].x, CONST[1][15].xxxx, IMM[3].xxxx > 86: UIF TEMP[12].xxxx :0 > 87: RCP TEMP[12].x, CONST[1][15].xxxx > 88: MUL TEMP[12].x, -TEMP[10].xxxx, TEMP[12].xxxx > 89: ELSE :0 > 90: SSG TEMP[13].x, -TEMP[10].xxxx > 91: MUL TEMP[12].x, IMM[3].yyyy, TEMP[13].xxxx > 92: ENDIF > 93: MOV TEMP[2].w, TEMP[12].xxxx > 94: FSNE TEMP[12].x, CONST[1][14].wwww, IMM[3].xxxx > 95: UIF TEMP[12].xxxx :0 > 96: RCP TEMP[12].x, CONST[1][14].wwww > 97: MUL TEMP[12].x, -TEMP[10].xxxx, TEMP[12].xxxx > 98: ELSE :0 > 99: SSG TEMP[10].x, -TEMP[10].xxxx >100: MUL TEMP[12].x, IMM[3].yyyy, TEMP[10].xxxx >101: ENDIF >102: MOV TEMP[2].x, TEMP[12].xxxx >103: MUL TEMP[10].xy, TEMP[2].xwww, IMM[5].yyyy >104: EX2 TEMP[12].x, TEMP[10].xxxx >105: EX2 TEMP[10].x, TEMP[10].yyyy >106: ADD TEMP[10].x, TEMP[10].xxxx, CONST[1][16].wwww >107: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][17].yyyy >108: MUL TEMP[10].x, TEMP[10].xxxx, IMM[0].zzzz >109: MUL TEMP[13].xyz, TEMP[4].yxzz, CONST[1][17].zwzz >110: MOV TEMP[13].xyz, TEMP[13].xyzz >111: MOV TEMP[13].w, IMM[3].xxxx >112: TXL TEMP[13].xyz, TEMP[13], SAMP[2], 3D >113: USNE TEMP[14].x, TEMP[7].xxxx, IMM[1].xxxx >114: UIF TEMP[14].xxxx :0 >115: MOV TEMP[3].x, TEMP[3].xxxx >116: ELSE :0 >117: MOV TEMP[3].x, TEMP[4].xxxx >118: ENDIF >119: AND TEMP[7].x, TEMP[7].xxxx, CONST[1][18].xxxx >120: ADD TEMP[3].x, TEMP[3].xxxx, IMM[5].zzzz >121: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].wwww >122: MOV_SAT TEMP[3].x, TEMP[3].xxxx >123: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx >124: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[10].xxxx >125: FMA TEMP[10].x, -TEMP[10].xxxx, CONST[1][18].yyyy, IMM[0].wwww >126: MOV_SAT TEMP[10].x, TEMP[10].xxxx >127: MIN TEMP[3].x, TEMP[3].xxxx, CONST[1][15].wwww >128: MAX TEMP[3].x, TEMP[3].xxxx, CONST[1][17].xxxx >129: MUL TEMP[8].x, TEMP[3].xxxx, TEMP[8].xxxx >130: FMA TEMP[3].xyz, CONST[1][14].xyzz, TEMP[12].xxxx, TEMP[3].xxxx >131: MUL TEMP[12].xyz, TEMP[12].xxxx, CONST[1][14].xyzz >132: FMA TEMP[6].xyz, TEMP[12].xyzz, TEMP[6].xxxx, TEMP[8].xxxx >133: FSEQ TEMP[8].xyz, TEMP[3].xyzz, IMM[3].xxxx >134: SSG TEMP[12].xyz, TEMP[6].xyzz >135: MUL TEMP[12].xyz, IMM[3].yyyy, TEMP[12].xyzz >136: RCP TEMP[14].x, TEMP[3].xxxx >137: RCP TEMP[14].y, TEMP[3].yyyy >138: RCP TEMP[14].z, TEMP[3].zzzz >139: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[14].xyzz >140: UCMP TEMP[6].xyz, TEMP[8].xyzz, TEMP[12].xyzz, TEMP[6].xyzz >141: MUL TEMP[2].xyz, TEMP[11].xxxx, -TEMP[3].xyzz >142: MUL TEMP[3].xyz, TEMP[9].xxxx, -TEMP[3].xyzz >143: MUL TEMP[3].xyz, TEMP[3].xyzz, IMM[5].yyyy >144: EX2 TEMP[8].x, TEMP[3].xxxx >145: EX2 TEMP[8].y, TEMP[3].yyyy >146: EX2 TEMP[8].z, TEMP[3].zzzz >147: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[5].yyyy >148: EX2 TEMP[3].x, TEMP[2].xxxx >149: EX2 TEMP[3].y, TEMP[2].yyyy >150: EX2 TEMP[3].z, TEMP[2].zzzz >151: MUL TEMP[2].xyz, TEMP[3].xyzz, CONST[1][19].xyzz >152: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xyzz >153: ADD TEMP[1].xyz, -TEMP[8].xyzz, IMM[0].wwww >154: FMA TEMP[3].xyz, TEMP[7].xxxx, TEMP[10].xxxx, TEMP[8].xyzz >155: MOV_SAT TEMP[0].xyz, TEMP[3].xyzz >156: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[1].xyzz >157: MUL TEMP[4].xyz, TEMP[13].xyzz, TEMP[2].xyzz >158: FMA TEMP[2].xyz, TEMP[4].xyzz, CONST[2][68].xxxx, TEMP[2].xyzz >159: FMA TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].xyzz, TEMP[2].xyzz >160: MOV TEMP[0].w, IMM[3].xxxx >161: MOV OUT[0], TEMP[0] >162: END >radeonsi: Compiling shader 52 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 92) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 224) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 228) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 232) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 236) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 240) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 312) > %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) > %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) > %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) > %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call float @llvm.SI.load.const(<16 x i8> %57, i32 176) > %59 = call float @llvm.SI.load.const(<16 x i8> %57, i32 180) > %60 = call float @llvm.SI.load.const(<16 x i8> %57, i32 184) > %61 = call float @llvm.SI.load.const(<16 x i8> %57, i32 1088) > %62 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %63 = load <8 x i32>, <8 x i32> addrspace(2)* %62, align 32, !tbaa !0 > %64 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %65 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %64, i64 0, i64 3 > %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 > %67 = extractelement <8 x i32> %63, i32 7 > %68 = extractelement <4 x i32> %66, i32 0 > %69 = and i32 %68, %67 > %70 = insertelement <4 x i32> %66, i32 %69, i32 0 > %71 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0 > %73 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %74 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %73, i64 0, i64 7 > %75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !0 > %76 = extractelement <8 x i32> %72, i32 7 > %77 = extractelement <4 x i32> %75, i32 0 > %78 = and i32 %77, %76 > %79 = insertelement <4 x i32> %75, i32 %78, i32 0 > %80 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %81 = load <8 x i32>, <8 x i32> addrspace(2)* %80, align 32, !tbaa !0 > %82 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %83 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %82, i64 0, i64 11 > %84 = load <4 x i32>, <4 x i32> addrspace(2)* %83, align 16, !tbaa !0 > %85 = extractelement <8 x i32> %81, i32 7 > %86 = extractelement <4 x i32> %84, i32 0 > %87 = and i32 %86, %85 > %88 = insertelement <4 x i32> %84, i32 %87, i32 0 > %89 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %93 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %94 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %95 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %96 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %97 = fadd float %32, 0x41C2FCB000000000 > %98 = fadd float %59, %48 > %99 = fadd float %98, 0x41C2FCB000000000 > %100 = fmul float %58, %58 > %101 = fmul float %99, %99 > %102 = fadd float %101, %100 > %103 = fmul float %60, %60 > %104 = fadd float %102, %103 > %105 = fmul float %94, %94 > %106 = fmul float %95, %95 > %107 = fadd float %106, %105 > %108 = fmul float %96, %96 > %109 = fadd float %107, %108 > %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) > %111 = fmul float %110, %95 > %112 = fmul float %110, %94 > %113 = fmul float %110, %96 > %114 = fmul float %99, %111 > %115 = fsub float -0.000000e+00, %114 > %116 = fmul float %58, %112 > %117 = fsub float %115, %116 > %118 = fmul float %60, %113 > %119 = fsub float %117, %118 > %120 = fsub float -0.000000e+00, %119 > %121 = call float @llvm.fma.f32(float %120, float %119, float %104) > %122 = fsub float -0.000000e+00, %121 > %123 = call float @llvm.fma.f32(float %97, float %97, float %122) > %124 = call float @llvm.sqrt.f32(float %123) > %125 = fadd float %124, %119 > %126 = call float @llvm.maxnum.f32(float %111, float 0x3F847AE140000000) > %127 = fmul float %125, %126 > %128 = call float @llvm.fma.f32(float %127, float 5.000000e-01, float %98) > %129 = fcmp oeq float %91, 0.000000e+00 > %130 = fcmp oeq float %91, 0.000000e+00 > %131 = fcmp ogt float %89, 0.000000e+00 > %132 = select i1 %131, float 1.000000e+00, float %89 > %133 = fcmp oge float %132, 0.000000e+00 > %134 = fcmp ogt float %90, 0.000000e+00 > %135 = select i1 %134, float 1.000000e+00, float %90 > %136 = fcmp oge float %135, 0.000000e+00 > %.op = fmul float %132, 0x4600000000000000 > %137 = select i1 %133, float %.op, float 0xC600000000000000 > %.op80 = fmul float %135, 0x4600000000000000 > %138 = select i1 %136, float %.op80, float 0xC600000000000000 > %139 = fdiv float 1.000000e+00, %91 > %140 = fmul float %89, %139 > %141 = fmul float %90, %139 > %142 = select i1 %129, float %137, float %140 > %143 = select i1 %130, float %138, float %141 > %144 = bitcast float %142 to i32 > %145 = bitcast float %143 to i32 > %146 = insertelement <2 x i32> undef, i32 %144, i32 0 > %147 = insertelement <2 x i32> %146, i32 %145, i32 1 > %148 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %147, <8 x i32> %63, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %149 = extractelement <4 x float> %148, i32 0 > %150 = bitcast float %142 to i32 > %151 = bitcast float %143 to i32 > %152 = insertelement <2 x i32> undef, i32 %150, i32 0 > %153 = insertelement <2 x i32> %152, i32 %151, i32 1 > %154 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %153, <8 x i32> %72, <4 x i32> %79, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %155 = extractelement <4 x float> %154, i32 0 > %156 = extractelement <4 x float> %154, i32 1 > %157 = extractelement <4 x float> %154, i32 2 > %158 = fcmp oeq float %91, 0.000000e+00 > %159 = fcmp oeq float %91, 0.000000e+00 > %160 = fcmp ogt float %92, 0.000000e+00 > %161 = select i1 %160, float 1.000000e+00, float %92 > %162 = fcmp oge float %161, 0.000000e+00 > %163 = fcmp ogt float %93, 0.000000e+00 > %164 = select i1 %163, float 1.000000e+00, float %93 > %165 = fcmp oge float %164, 0.000000e+00 > %.op81 = fmul float %161, 0x4600000000000000 > %166 = select i1 %162, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %164, 0x4600000000000000 > %167 = select i1 %165, float %.op82, float 0xC600000000000000 > %168 = fdiv float 1.000000e+00, %91 > %169 = fmul float %92, %168 > %170 = fmul float %93, %168 > %171 = select i1 %158, float %166, float %169 > %172 = select i1 %159, float %167, float %170 > %173 = fmul float %171, %149 > %174 = fmul float %172, %149 > %175 = fcmp oeq float %149, 1.000000e+07 > %176 = fmul float %27, %149 > %177 = fmul float %25, %173 > %178 = fadd float %176, %177 > %179 = fmul float %26, %174 > %180 = fadd float %178, %179 > %181 = fadd float %180, %28 > %. = select i1 %175, float %125, float %149 > %182 = fadd float %181, %48 > %183 = fmul float %112, %53 > %184 = fmul float %111, %54 > %185 = fadd float %184, %183 > %186 = fmul float %113, %55 > %187 = fadd float %185, %186 > %188 = fsub float -0.000000e+00, %38 > %189 = call float @llvm.fma.f32(float %188, float %187, float %37) > %190 = call float @llvm.fma.f32(float %187, float %187, float 1.000000e+00) > %191 = fmul float %190, 0x3FAE8EC8A0000000 > %192 = call float @llvm.fabs.f32(float %189) > %193 = call float @llvm.log2.f32(float %192) > %194 = fmul float %193, -1.500000e+00 > %195 = call float @llvm.exp2.f32(float %194) > %196 = fmul float %195, %39 > %197 = call float @llvm.minnum.f32(float %196, float %35) > %temp32.0 = select i1 %175, float %128, float %182 > %.78 = select i1 %175, float %196, float %197 > %198 = fsub float %47, %182 > %199 = fcmp une float %34, 0.000000e+00 > br i1 %199, label %IF67, label %ELSE68 > >IF67: ; preds = %main_body > %200 = fdiv float 1.000000e+00, %34 > %201 = fmul float %198, %200 > br label %ENDIF66 > >ELSE68: ; preds = %main_body > %202 = fcmp ogt float %198, 0.000000e+00 > %203 = select i1 %202, float 1.000000e+00, float %198 > %204 = fcmp oge float %203, 0.000000e+00 > %.op83 = fmul float %203, 0x4600000000000000 > %205 = select i1 %204, float %.op83, float 0xC600000000000000 > br label %ENDIF66 > >ENDIF66: ; preds = %ELSE68, %IF67 > %temp44.0 = phi float [ %201, %IF67 ], [ %205, %ELSE68 ] > %206 = call float @llvm.maxnum.f32(float %temp32.0, float 0.000000e+00) > %207 = call float @llvm.maxnum.f32(float %.78, float %52) > %208 = fcmp une float %33, 0.000000e+00 > br i1 %208, label %IF70, label %ELSE71 > >IF70: ; preds = %ENDIF66 > %209 = fdiv float 1.000000e+00, %33 > %210 = fmul float %206, %209 > %211 = fsub float -0.000000e+00, %210 > br label %ENDIF69 > >ELSE71: ; preds = %ENDIF66 > %212 = fcmp ole float %206, -0.000000e+00 > %.op84 = fmul float %206, 0xC600000000000000 > %213 = select i1 %212, float %.op84, float 0xC600000000000000 > br label %ENDIF69 > >ENDIF69: ; preds = %ELSE71, %IF70 > %temp48.0 = phi float [ %211, %IF70 ], [ %213, %ELSE71 ] > %214 = fcmp une float %32, 0.000000e+00 > br i1 %214, label %IF73, label %ELSE74 > >IF73: ; preds = %ENDIF69 > %215 = fdiv float 1.000000e+00, %32 > %216 = fmul float %206, %215 > %217 = fsub float -0.000000e+00, %216 > br label %ENDIF72 > >ELSE74: ; preds = %ENDIF69 > %218 = fcmp ole float %206, -0.000000e+00 > %.op85 = fmul float %206, 0xC600000000000000 > %219 = select i1 %218, float %.op85, float 0xC600000000000000 > br label %ENDIF72 > >ENDIF72: ; preds = %ELSE74, %IF73 > %temp48.1 = phi float [ %217, %IF73 ], [ %219, %ELSE74 ] > %220 = fmul float %temp48.1, 0x3FF7154760000000 > %221 = fmul float %temp48.0, 0x3FF7154760000000 > %222 = call float @llvm.exp2.f32(float %220) > %223 = call float @llvm.exp2.f32(float %221) > %224 = fadd float %223, %40 > %225 = fmul float %224, %42 > %226 = fmul float %225, 5.000000e-01 > %227 = fmul float %112, %43 > %228 = fmul float %111, %44 > %229 = fmul float %113, %43 > %230 = bitcast float %227 to i32 > %231 = bitcast float %228 to i32 > %232 = bitcast float %229 to i32 > %233 = insertelement <4 x i32> undef, i32 %230, i32 0 > %234 = insertelement <4 x i32> %233, i32 %231, i32 1 > %235 = insertelement <4 x i32> %234, i32 %232, i32 2 > %236 = insertelement <4 x i32> %235, i32 0, i32 3 > %237 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %236, <8 x i32> %81, <4 x i32> %88, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %238 = extractelement <4 x float> %237, i32 0 > %239 = extractelement <4 x float> %237, i32 1 > %240 = extractelement <4 x float> %237, i32 2 > %.79 = select i1 %175, float %126, float %111 > %241 = select i1 %175, float %45, float 0.000000e+00 > %242 = fadd float %.79, 0xBFC3333340000000 > %243 = fsub float 1.000000e+00, %242 > %244 = call float @llvm.AMDGPU.clamp.(float %243, float 0.000000e+00, float 1.000000e+00) > %245 = fmul float %244, %244 > %246 = fmul float %245, %226 > %247 = fsub float -0.000000e+00, %226 > %248 = call float @llvm.fma.f32(float %247, float %46, float 1.000000e+00) > %249 = call float @llvm.AMDGPU.clamp.(float %248, float 0.000000e+00, float 1.000000e+00) > %250 = call float @llvm.minnum.f32(float %246, float %36) > %251 = call float @llvm.maxnum.f32(float %250, float %41) > %252 = fmul float %251, %207 > %253 = call float @llvm.fma.f32(float %29, float %222, float %251) > %254 = call float @llvm.fma.f32(float %30, float %222, float %251) > %255 = call float @llvm.fma.f32(float %31, float %222, float %251) > %256 = fmul float %222, %29 > %257 = fmul float %222, %30 > %258 = fmul float %222, %31 > %259 = call float @llvm.fma.f32(float %256, float %191, float %252) > %260 = call float @llvm.fma.f32(float %257, float %191, float %252) > %261 = call float @llvm.fma.f32(float %258, float %191, float %252) > %262 = fcmp oeq float %253, 0.000000e+00 > %263 = fcmp oeq float %254, 0.000000e+00 > %264 = fcmp oeq float %255, 0.000000e+00 > %265 = fcmp ogt float %259, 0.000000e+00 > %266 = select i1 %265, float 1.000000e+00, float %259 > %267 = fcmp oge float %266, 0.000000e+00 > %268 = fcmp ogt float %260, 0.000000e+00 > %269 = select i1 %268, float 1.000000e+00, float %260 > %270 = fcmp oge float %269, 0.000000e+00 > %271 = fcmp ogt float %261, 0.000000e+00 > %272 = select i1 %271, float 1.000000e+00, float %261 > %273 = fcmp oge float %272, 0.000000e+00 > %.op86 = fmul float %266, 0x4600000000000000 > %274 = select i1 %267, float %.op86, float 0xC600000000000000 > %.op87 = fmul float %269, 0x4600000000000000 > %275 = select i1 %270, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %272, 0x4600000000000000 > %276 = select i1 %273, float %.op88, float 0xC600000000000000 > %277 = fdiv float 1.000000e+00, %253 > %278 = fdiv float 1.000000e+00, %254 > %279 = fdiv float 1.000000e+00, %255 > %280 = fmul float %259, %277 > %281 = fmul float %260, %278 > %282 = fmul float %261, %279 > %283 = select i1 %262, float %274, float %280 > %284 = select i1 %263, float %275, float %281 > %285 = select i1 %264, float %276, float %282 > %286 = fmul float %253, %temp44.0 > %287 = fmul float %254, %temp44.0 > %288 = fmul float %255, %temp44.0 > %289 = fmul float %253, %. > %290 = fmul float %254, %. > %291 = fmul float %255, %. > %292 = fmul float %289, 0xBFF7154760000000 > %293 = fmul float %290, 0xBFF7154760000000 > %294 = fmul float %291, 0xBFF7154760000000 > %295 = call float @llvm.exp2.f32(float %292) > %296 = call float @llvm.exp2.f32(float %293) > %297 = call float @llvm.exp2.f32(float %294) > %298 = fmul float %286, 0xBFF7154760000000 > %299 = fmul float %287, 0xBFF7154760000000 > %300 = fmul float %288, 0xBFF7154760000000 > %301 = call float @llvm.exp2.f32(float %298) > %302 = call float @llvm.exp2.f32(float %299) > %303 = call float @llvm.exp2.f32(float %300) > %304 = fmul float %301, %49 > %305 = fmul float %302, %50 > %306 = fmul float %303, %51 > %307 = fmul float %304, %283 > %308 = fmul float %305, %284 > %309 = fmul float %306, %285 > %310 = fsub float 1.000000e+00, %295 > %311 = fsub float 1.000000e+00, %296 > %312 = fsub float 1.000000e+00, %297 > %313 = call float @llvm.fma.f32(float %241, float %249, float %295) > %314 = call float @llvm.fma.f32(float %241, float %249, float %296) > %315 = call float @llvm.fma.f32(float %241, float %249, float %297) > %316 = call float @llvm.AMDGPU.clamp.(float %313, float 0.000000e+00, float 1.000000e+00) > %317 = call float @llvm.AMDGPU.clamp.(float %314, float 0.000000e+00, float 1.000000e+00) > %318 = call float @llvm.AMDGPU.clamp.(float %315, float 0.000000e+00, float 1.000000e+00) > %319 = fmul float %307, %310 > %320 = fmul float %308, %311 > %321 = fmul float %309, %312 > %322 = fmul float %238, %319 > %323 = fmul float %239, %320 > %324 = fmul float %240, %321 > %325 = call float @llvm.fma.f32(float %322, float %61, float %319) > %326 = call float @llvm.fma.f32(float %323, float %61, float %320) > %327 = call float @llvm.fma.f32(float %324, float %61, float %321) > %328 = call float @llvm.fma.f32(float %155, float %316, float %325) > %329 = call float @llvm.fma.f32(float %156, float %317, float %326) > %330 = call float @llvm.fma.f32(float %157, float %318, float %327) > %331 = bitcast float %5 to i32 > %332 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %331, 10 > %333 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %332, float %328, 11 > %334 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %333, float %329, 12 > %335 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %334, float %330, 13 > %336 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %335, float 0.000000e+00, 14 > %337 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %336, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %337 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..6] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, -0.5000} >IMM[1] UINT32 {0, 96, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: FMA TEMP[1], IN[0].xyxy, IMM[0].yyzw, IMM[0].xxzz > 3: FMA TEMP[2].xy, IN[0].xyyy, IMM[0].zwww, IMM[0].zzzz > 4: ADD TEMP[3].xy, TEMP[2].xyyy, -CONST[1][6].zwww > 5: ADD TEMP[2].xy, TEMP[2].xyyy, CONST[1][6].zwww > 6: MOV TEMP[3].zw, TEMP[2].yyxy > 7: MOV OUT[2], TEMP[3] > 8: MOV OUT[1], TEMP[1] > 9: MOV OUT[0], TEMP[0] > 10: END >radeonsi: Compiling shader 53 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 104) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 108) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = call float @llvm.fma.f32(float %21, float 1.000000e+00, float 0.000000e+00) > %24 = call float @llvm.fma.f32(float %22, float 1.000000e+00, float 0.000000e+00) > %25 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %26 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %27 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %28 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %29 = fsub float %27, %16 > %30 = fsub float %28, %17 > %31 = fadd float %27, %16 > %32 = fadd float %28, %17 > %33 = bitcast i32 %11 to float > %34 = insertvalue <{ float, float, float }> undef, float %33, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %29, float %30, float %31, float %32) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %34 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..11] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 2.0000, 10000.0000} >IMM[1] UINT32 {0, 176, 96, 0} >IMM[2] FLT32 { 1.0000, 4.0000, 0.1250, 0.2500} >IMM[3] FLT32 { 0.2990, 0.5870, 0.1140, 0.0039} >IMM[4] FLT32 { -2.0000, -0.5000, 0.5000, -0.1667} >IMM[5] FLT32 { -0.1667, 0.1667, 0.0000, 0.0000} >IMM[6] INT32 {1, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[1].xyyy > 1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D > 2: FSNE TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx > 3: UIF TEMP[1].xxxx :0 > 4: RCP TEMP[1].x, TEMP[0].xxxx > 5: MUL TEMP[1].x, CONST[1][11].xxxx, TEMP[1].xxxx > 6: ELSE :0 > 7: SSG TEMP[2].x, CONST[1][11].xxxx > 8: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 9: ENDIF > 10: MOV TEMP[2].xy, IN[1].zwww > 11: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 12: FSNE TEMP[3].x, TEMP[2].xxxx, IMM[0].xxxx > 13: UIF TEMP[3].xxxx :0 > 14: RCP TEMP[2].x, TEMP[2].xxxx > 15: MUL TEMP[2].x, CONST[1][11].xxxx, TEMP[2].xxxx > 16: ELSE :0 > 17: SSG TEMP[3].x, CONST[1][11].xxxx > 18: MUL TEMP[2].x, IMM[0].yyyy, TEMP[3].xxxx > 19: ENDIF > 20: ADD TEMP[0].x, TEMP[2].xxxx, TEMP[1].xxxx > 21: MOV TEMP[1].xy, IN[0].zwww > 22: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 23: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].xxxx > 24: UIF TEMP[2].xxxx :0 > 25: RCP TEMP[1].x, TEMP[1].xxxx > 26: MUL TEMP[1].x, CONST[1][11].xxxx, TEMP[1].xxxx > 27: ELSE :0 > 28: SSG TEMP[2].x, CONST[1][11].xxxx > 29: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 30: ENDIF > 31: FMA TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz, TEMP[0].xxxx > 32: MOV TEMP[2].xy, IN[1].zyyy > 33: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 34: FSNE TEMP[3].x, TEMP[2].xxxx, IMM[0].xxxx > 35: UIF TEMP[3].xxxx :0 > 36: RCP TEMP[2].x, TEMP[2].xxxx > 37: MUL TEMP[2].x, CONST[1][11].xxxx, TEMP[2].xxxx > 38: ELSE :0 > 39: SSG TEMP[3].x, CONST[1][11].xxxx > 40: MUL TEMP[2].x, IMM[0].yyyy, TEMP[3].xxxx > 41: ENDIF > 42: MOV TEMP[3].xy, IN[1].xwww > 43: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D > 44: FSNE TEMP[4].x, TEMP[3].xxxx, IMM[0].xxxx > 45: UIF TEMP[4].xxxx :0 > 46: RCP TEMP[3].x, TEMP[3].xxxx > 47: MUL TEMP[3].x, CONST[1][11].xxxx, TEMP[3].xxxx > 48: ELSE :0 > 49: SSG TEMP[4].x, CONST[1][11].xxxx > 50: MUL TEMP[3].x, IMM[0].yyyy, TEMP[4].xxxx > 51: ENDIF > 52: ADD TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx > 53: FMA TEMP[1].x, -TEMP[1].xxxx, IMM[0].zzzz, TEMP[2].xxxx > 54: ABS TEMP[1].x, TEMP[1].xxxx > 55: ABS TEMP[2].x, TEMP[0].xxxx > 56: ADD TEMP[0].x, TEMP[1].xxxx, TEMP[2].xxxx > 57: FMA TEMP[0].x, -TEMP[0].xxxx, IMM[0].wwww, IMM[2].xxxx > 58: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 59: FMA TEMP[1].x, TEMP[0].xxxx, IMM[2].yyyy, IMM[2].zzzz > 60: MOV TEMP[2].xy, IN[1].xyyy > 61: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D > 62: DP3 TEMP[2].x, TEMP[2].xyzz, IMM[3].xyzz > 63: MOV TEMP[3].xy, IN[1].zyyy > 64: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D > 65: DP3 TEMP[3].x, TEMP[3].xyzz, IMM[3].xyzz > 66: MOV TEMP[2].y, TEMP[3].xxxx > 67: MOV TEMP[3].xy, IN[1].xwww > 68: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D > 69: DP3 TEMP[3].x, TEMP[3].xyzz, IMM[3].xyzz > 70: MOV TEMP[2].z, TEMP[3].xxxx > 71: MOV TEMP[3].xy, IN[1].zwww > 72: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D > 73: DP3 TEMP[3].x, TEMP[3].xyzz, IMM[3].xyzz > 74: MOV TEMP[2].w, TEMP[3].xxxx > 75: DP4 TEMP[3].x, TEMP[2], IMM[2].xxxx > 76: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[1].xxxx > 77: MUL TEMP[0].x, TEMP[0].xxxx, IMM[2].wwww > 78: MAX TEMP[1].x, TEMP[0].xxxx, IMM[3].wwww > 79: ADD TEMP[3], TEMP[2].ywzw, TEMP[2].xzxy > 80: ADD TEMP[4].x, -TEMP[3].yyyy, TEMP[3].xxxx > 81: ADD TEMP[5].xy, -TEMP[3].wwww, TEMP[3].zzzz > 82: MOV TEMP[3].yw, TEMP[5].yxyy > 83: ABS TEMP[6].x, TEMP[4].xxxx > 84: ABS TEMP[5].x, TEMP[5].yyyy > 85: MIN TEMP[5].x, TEMP[6].xxxx, TEMP[5].xxxx > 86: MOV TEMP[3].xz, -TEMP[4].xxxx > 87: ADD TEMP[0].x, TEMP[1].xxxx, TEMP[5].xxxx > 88: FSNE TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx > 89: UIF TEMP[1].xxxx :0 > 90: RCP TEMP[1].x, TEMP[0].xxxx > 91: ELSE :0 > 92: MOV TEMP[1].x, IMM[0].yyyy > 93: ENDIF > 94: MUL TEMP[0], TEMP[1].xxxx, TEMP[3] > 95: MAX TEMP[1], TEMP[0], IMM[4].xxxx > 96: MIN TEMP[1], TEMP[1], IMM[0].zzzz > 97: MUL TEMP[0], TEMP[1], CONST[1][6].zwzw > 98: FMA TEMP[1], TEMP[0], IMM[4].yyzz, IN[0].zwzw > 99: FMA TEMP[4], TEMP[0].zwzw, IMM[5].xxyy, IN[0].zwzw >100: MOV TEMP[5].xy, TEMP[1].zwww >101: TEX TEMP[5], TEMP[5], SAMP[1], 2D >102: MOV TEMP[1].xy, TEMP[1].xyyy >103: TEX TEMP[1], TEMP[1], SAMP[1], 2D >104: ADD TEMP[3], TEMP[5], TEMP[1] >105: MUL TEMP[3], TEMP[3], IMM[2].wwww >106: MOV TEMP[1].xy, TEMP[4].xyyy >107: TEX TEMP[1], TEMP[1], SAMP[1], 2D >108: MOV TEMP[4].xy, TEMP[4].zwww >109: TEX TEMP[4], TEMP[4], SAMP[1], 2D >110: ADD TEMP[0], TEMP[4], TEMP[1] >111: FMA TEMP[1], TEMP[0], IMM[2].wwww, TEMP[3] >112: MUL TEMP[0], TEMP[0], IMM[4].zzzz >113: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[3].xyzz >114: MIN TEMP[4].xy, TEMP[2].ywww, TEMP[2].xzzz >115: MAX TEMP[2].xy, TEMP[2].ywww, TEMP[2].xzzz >116: MAX TEMP[2].x, TEMP[2].yyyy, TEMP[2].xxxx >117: MIN TEMP[4].x, TEMP[4].yyyy, TEMP[4].xxxx >118: MOV TEMP[5].xy, IN[0].zwww >119: TEX TEMP[5].xyz, TEMP[5], SAMP[1], 2D >120: DP3 TEMP[5].x, TEMP[5].xyzz, IMM[3].xyzz >121: MIN TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx >122: MAX TEMP[2].x, TEMP[2].xxxx, TEMP[5].xxxx >123: FSLT TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx >124: AND TEMP[2].x, TEMP[2].xxxx, IMM[6].xxxx >125: INEG TEMP[2].x, TEMP[2].xxxx >126: FSLT TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx >127: AND TEMP[3].x, TEMP[3].xxxx, IMM[6].xxxx >128: INEG TEMP[3].x, TEMP[3].xxxx >129: OR TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx >130: USNE TEMP[3].x, TEMP[2].xxxx, IMM[1].xxxx >131: UIF TEMP[3].xxxx :0 >132: MOV TEMP[3].x, TEMP[0].xxxx >133: ELSE :0 >134: MOV TEMP[3].x, TEMP[1].xxxx >135: ENDIF >136: MOV TEMP[3].x, TEMP[3].xxxx >137: USNE TEMP[4].x, TEMP[2].xxxx, IMM[1].xxxx >138: UIF TEMP[4].xxxx :0 >139: MOV TEMP[4].x, TEMP[0].yyyy >140: ELSE :0 >141: MOV TEMP[4].x, TEMP[1].yyyy >142: ENDIF >143: MOV TEMP[3].y, TEMP[4].xxxx >144: USNE TEMP[4].x, TEMP[2].xxxx, IMM[1].xxxx >145: UIF TEMP[4].xxxx :0 >146: MOV TEMP[4].x, TEMP[0].zzzz >147: ELSE :0 >148: MOV TEMP[4].x, TEMP[1].zzzz >149: ENDIF >150: MOV TEMP[3].z, TEMP[4].xxxx >151: USNE TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx >152: UIF TEMP[2].xxxx :0 >153: MOV TEMP[0].x, TEMP[0].wwww >154: ELSE :0 >155: MOV TEMP[0].x, TEMP[1].wwww >156: ENDIF >157: MOV TEMP[3].w, TEMP[0].xxxx >158: MOV OUT[0], TEMP[3] >159: END >radeonsi: Compiling shader 54 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 176) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %38 = load <8 x i32>, <8 x i32> addrspace(2)* %37, align 32, !tbaa !0 > %39 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %40 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %39, i64 0, i64 7 > %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 > %42 = extractelement <8 x i32> %38, i32 7 > %43 = extractelement <4 x i32> %41, i32 0 > %44 = and i32 %43, %42 > %45 = insertelement <4 x i32> %41, i32 %44, i32 0 > %46 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %52 = bitcast float %48 to i32 > %53 = bitcast float %49 to i32 > %54 = insertelement <2 x i32> undef, i32 %52, i32 0 > %55 = insertelement <2 x i32> %54, i32 %53, i32 1 > %56 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %55, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %57 = extractelement <4 x float> %56, i32 0 > %58 = fcmp une float %57, 0.000000e+00 > br i1 %58, label %IF, label %ELSE > >IF: ; preds = %main_body > %59 = fdiv float 1.000000e+00, %57 > %60 = fmul float %27, %59 > br label %ENDIF > >ELSE: ; preds = %main_body > %61 = fcmp ogt float %27, 0.000000e+00 > %62 = select i1 %61, float 1.000000e+00, float %27 > %63 = fcmp oge float %62, 0.000000e+00 > %.op = fmul float %62, 0x4600000000000000 > %64 = select i1 %63, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %60, %IF ], [ %64, %ELSE ] > %65 = bitcast float %50 to i32 > %66 = bitcast float %51 to i32 > %67 = insertelement <2 x i32> undef, i32 %65, i32 0 > %68 = insertelement <2 x i32> %67, i32 %66, i32 1 > %69 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %68, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %70 = extractelement <4 x float> %69, i32 0 > %71 = fcmp une float %70, 0.000000e+00 > br i1 %71, label %IF29, label %ELSE30 > >IF29: ; preds = %ENDIF > %72 = fdiv float 1.000000e+00, %70 > %73 = fmul float %27, %72 > br label %ENDIF28 > >ELSE30: ; preds = %ENDIF > %74 = fcmp ogt float %27, 0.000000e+00 > %75 = select i1 %74, float 1.000000e+00, float %27 > %76 = fcmp oge float %75, 0.000000e+00 > %.op56 = fmul float %75, 0x4600000000000000 > %77 = select i1 %76, float %.op56, float 0xC600000000000000 > br label %ENDIF28 > >ENDIF28: ; preds = %ELSE30, %IF29 > %temp8.0 = phi float [ %73, %IF29 ], [ %77, %ELSE30 ] > %78 = fadd float %temp8.0, %temp4.0 > %79 = bitcast float %46 to i32 > %80 = bitcast float %47 to i32 > %81 = insertelement <2 x i32> undef, i32 %79, i32 0 > %82 = insertelement <2 x i32> %81, i32 %80, i32 1 > %83 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %82, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %84 = extractelement <4 x float> %83, i32 0 > %85 = fcmp une float %84, 0.000000e+00 > br i1 %85, label %IF32, label %ELSE33 > >IF32: ; preds = %ENDIF28 > %86 = fdiv float 1.000000e+00, %84 > %87 = fmul float %27, %86 > br label %ENDIF31 > >ELSE33: ; preds = %ENDIF28 > %88 = fcmp ogt float %27, 0.000000e+00 > %89 = select i1 %88, float 1.000000e+00, float %27 > %90 = fcmp oge float %89, 0.000000e+00 > %.op57 = fmul float %89, 0x4600000000000000 > %91 = select i1 %90, float %.op57, float 0xC600000000000000 > br label %ENDIF31 > >ENDIF31: ; preds = %ELSE33, %IF32 > %temp4.1 = phi float [ %87, %IF32 ], [ %91, %ELSE33 ] > %92 = fsub float -0.000000e+00, %temp4.1 > %93 = call float @llvm.fma.f32(float %92, float 2.000000e+00, float %78) > %94 = bitcast float %50 to i32 > %95 = bitcast float %49 to i32 > %96 = insertelement <2 x i32> undef, i32 %94, i32 0 > %97 = insertelement <2 x i32> %96, i32 %95, i32 1 > %98 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %97, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %99 = extractelement <4 x float> %98, i32 0 > %100 = fcmp une float %99, 0.000000e+00 > br i1 %100, label %IF35, label %ELSE36 > >IF35: ; preds = %ENDIF31 > %101 = fdiv float 1.000000e+00, %99 > %102 = fmul float %27, %101 > br label %ENDIF34 > >ELSE36: ; preds = %ENDIF31 > %103 = fcmp ogt float %27, 0.000000e+00 > %104 = select i1 %103, float 1.000000e+00, float %27 > %105 = fcmp oge float %104, 0.000000e+00 > %.op58 = fmul float %104, 0x4600000000000000 > %106 = select i1 %105, float %.op58, float 0xC600000000000000 > br label %ENDIF34 > >ENDIF34: ; preds = %ELSE36, %IF35 > %temp8.1 = phi float [ %102, %IF35 ], [ %106, %ELSE36 ] > %107 = bitcast float %48 to i32 > %108 = bitcast float %51 to i32 > %109 = insertelement <2 x i32> undef, i32 %107, i32 0 > %110 = insertelement <2 x i32> %109, i32 %108, i32 1 > %111 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %110, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %112 = extractelement <4 x float> %111, i32 0 > %113 = fcmp une float %112, 0.000000e+00 > br i1 %113, label %IF38, label %ELSE39 > >IF38: ; preds = %ENDIF34 > %114 = fdiv float 1.000000e+00, %112 > %115 = fmul float %27, %114 > br label %ENDIF37 > >ELSE39: ; preds = %ENDIF34 > %116 = fcmp ogt float %27, 0.000000e+00 > %117 = select i1 %116, float 1.000000e+00, float %27 > %118 = fcmp oge float %117, 0.000000e+00 > %.op59 = fmul float %117, 0x4600000000000000 > %119 = select i1 %118, float %.op59, float 0xC600000000000000 > br label %ENDIF37 > >ENDIF37: ; preds = %ELSE39, %IF38 > %temp12.0 = phi float [ %115, %IF38 ], [ %119, %ELSE39 ] > %120 = fadd float %temp12.0, %temp8.1 > %121 = fsub float -0.000000e+00, %temp4.1 > %122 = call float @llvm.fma.f32(float %121, float 2.000000e+00, float %120) > %123 = call float @llvm.fabs.f32(float %122) > %124 = call float @llvm.fabs.f32(float %93) > %125 = fadd float %123, %124 > %126 = fsub float -0.000000e+00, %125 > %127 = call float @llvm.fma.f32(float %126, float 1.000000e+04, float 1.000000e+00) > %128 = call float @llvm.maxnum.f32(float %127, float 0.000000e+00) > %129 = call float @llvm.fma.f32(float %128, float 4.000000e+00, float 1.250000e-01) > %130 = bitcast float %48 to i32 > %131 = bitcast float %49 to i32 > %132 = insertelement <2 x i32> undef, i32 %130, i32 0 > %133 = insertelement <2 x i32> %132, i32 %131, i32 1 > %134 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %133, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %135 = extractelement <4 x float> %134, i32 0 > %136 = extractelement <4 x float> %134, i32 1 > %137 = extractelement <4 x float> %134, i32 2 > %138 = fmul float %135, 0x3FD322D0E0000000 > %139 = fmul float %136, 0x3FE2C8B440000000 > %140 = fadd float %139, %138 > %141 = fmul float %137, 0x3FBD2F1AA0000000 > %142 = fadd float %140, %141 > %143 = bitcast float %50 to i32 > %144 = bitcast float %49 to i32 > %145 = insertelement <2 x i32> undef, i32 %143, i32 0 > %146 = insertelement <2 x i32> %145, i32 %144, i32 1 > %147 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %146, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %148 = extractelement <4 x float> %147, i32 0 > %149 = extractelement <4 x float> %147, i32 1 > %150 = extractelement <4 x float> %147, i32 2 > %151 = fmul float %148, 0x3FD322D0E0000000 > %152 = fmul float %149, 0x3FE2C8B440000000 > %153 = fadd float %152, %151 > %154 = fmul float %150, 0x3FBD2F1AA0000000 > %155 = fadd float %153, %154 > %156 = bitcast float %48 to i32 > %157 = bitcast float %51 to i32 > %158 = insertelement <2 x i32> undef, i32 %156, i32 0 > %159 = insertelement <2 x i32> %158, i32 %157, i32 1 > %160 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %159, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %161 = extractelement <4 x float> %160, i32 0 > %162 = extractelement <4 x float> %160, i32 1 > %163 = extractelement <4 x float> %160, i32 2 > %164 = fmul float %161, 0x3FD322D0E0000000 > %165 = fmul float %162, 0x3FE2C8B440000000 > %166 = fadd float %165, %164 > %167 = fmul float %163, 0x3FBD2F1AA0000000 > %168 = fadd float %166, %167 > %169 = bitcast float %50 to i32 > %170 = bitcast float %51 to i32 > %171 = insertelement <2 x i32> undef, i32 %169, i32 0 > %172 = insertelement <2 x i32> %171, i32 %170, i32 1 > %173 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %172, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %174 = extractelement <4 x float> %173, i32 0 > %175 = extractelement <4 x float> %173, i32 1 > %176 = extractelement <4 x float> %173, i32 2 > %177 = fmul float %174, 0x3FD322D0E0000000 > %178 = fmul float %175, 0x3FE2C8B440000000 > %179 = fadd float %178, %177 > %180 = fmul float %176, 0x3FBD2F1AA0000000 > %181 = fadd float %179, %180 > %182 = fadd float %142, %155 > %183 = fadd float %182, %168 > %184 = fadd float %183, %181 > %185 = fmul float %184, %129 > %186 = fmul float %185, 2.500000e-01 > %187 = call float @llvm.maxnum.f32(float %186, float 3.906250e-03) > %188 = fadd float %155, %142 > %189 = fadd float %181, %168 > %190 = fadd float %168, %142 > %191 = fadd float %181, %155 > %192 = fsub float %188, %189 > %193 = fsub float %190, %191 > %194 = fsub float %190, %191 > %195 = call float @llvm.fabs.f32(float %192) > %196 = call float @llvm.fabs.f32(float %194) > %197 = call float @llvm.minnum.f32(float %195, float %196) > %198 = fadd float %187, %197 > %199 = fcmp une float %198, 0.000000e+00 > %200 = fdiv float 1.000000e+00, %198 > %temp4.2 = select i1 %199, float %200, float 0x4600000000000000 > %201 = fmul float %192, %temp4.2 > %202 = fsub float -0.000000e+00, %201 > %203 = fmul float %temp4.2, %193 > %204 = fmul float %192, %temp4.2 > %205 = fsub float -0.000000e+00, %204 > %206 = fmul float %temp4.2, %194 > %207 = call float @llvm.maxnum.f32(float %202, float -2.000000e+00) > %208 = call float @llvm.maxnum.f32(float %203, float -2.000000e+00) > %209 = call float @llvm.maxnum.f32(float %205, float -2.000000e+00) > %210 = call float @llvm.maxnum.f32(float %206, float -2.000000e+00) > %211 = call float @llvm.minnum.f32(float %207, float 2.000000e+00) > %212 = call float @llvm.minnum.f32(float %208, float 2.000000e+00) > %213 = call float @llvm.minnum.f32(float %209, float 2.000000e+00) > %214 = call float @llvm.minnum.f32(float %210, float 2.000000e+00) > %215 = fmul float %211, %25 > %216 = fmul float %212, %26 > %217 = fmul float %213, %25 > %218 = fmul float %214, %26 > %219 = call float @llvm.fma.f32(float %215, float -5.000000e-01, float %46) > %220 = call float @llvm.fma.f32(float %216, float -5.000000e-01, float %47) > %221 = call float @llvm.fma.f32(float %217, float 5.000000e-01, float %46) > %222 = call float @llvm.fma.f32(float %218, float 5.000000e-01, float %47) > %223 = call float @llvm.fma.f32(float %217, float 0xBFC5555560000000, float %46) > %224 = call float @llvm.fma.f32(float %218, float 0xBFC5555560000000, float %47) > %225 = call float @llvm.fma.f32(float %217, float 0x3FC5555560000000, float %46) > %226 = call float @llvm.fma.f32(float %218, float 0x3FC5555560000000, float %47) > %227 = bitcast float %221 to i32 > %228 = bitcast float %222 to i32 > %229 = insertelement <2 x i32> undef, i32 %227, i32 0 > %230 = insertelement <2 x i32> %229, i32 %228, i32 1 > %231 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %230, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %232 = extractelement <4 x float> %231, i32 0 > %233 = extractelement <4 x float> %231, i32 1 > %234 = extractelement <4 x float> %231, i32 2 > %235 = extractelement <4 x float> %231, i32 3 > %236 = bitcast float %219 to i32 > %237 = bitcast float %220 to i32 > %238 = insertelement <2 x i32> undef, i32 %236, i32 0 > %239 = insertelement <2 x i32> %238, i32 %237, i32 1 > %240 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %239, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %241 = extractelement <4 x float> %240, i32 0 > %242 = extractelement <4 x float> %240, i32 1 > %243 = extractelement <4 x float> %240, i32 2 > %244 = extractelement <4 x float> %240, i32 3 > %245 = fadd float %232, %241 > %246 = fadd float %233, %242 > %247 = fadd float %234, %243 > %248 = fadd float %235, %244 > %249 = fmul float %245, 2.500000e-01 > %250 = fmul float %246, 2.500000e-01 > %251 = fmul float %247, 2.500000e-01 > %252 = fmul float %248, 2.500000e-01 > %253 = bitcast float %223 to i32 > %254 = bitcast float %224 to i32 > %255 = insertelement <2 x i32> undef, i32 %253, i32 0 > %256 = insertelement <2 x i32> %255, i32 %254, i32 1 > %257 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %256, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %258 = extractelement <4 x float> %257, i32 0 > %259 = extractelement <4 x float> %257, i32 1 > %260 = extractelement <4 x float> %257, i32 2 > %261 = extractelement <4 x float> %257, i32 3 > %262 = bitcast float %225 to i32 > %263 = bitcast float %226 to i32 > %264 = insertelement <2 x i32> undef, i32 %262, i32 0 > %265 = insertelement <2 x i32> %264, i32 %263, i32 1 > %266 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %265, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %267 = extractelement <4 x float> %266, i32 0 > %268 = extractelement <4 x float> %266, i32 1 > %269 = extractelement <4 x float> %266, i32 2 > %270 = extractelement <4 x float> %266, i32 3 > %271 = fadd float %267, %258 > %272 = fadd float %268, %259 > %273 = fadd float %269, %260 > %274 = fadd float %270, %261 > %275 = call float @llvm.fma.f32(float %271, float 2.500000e-01, float %249) > %276 = call float @llvm.fma.f32(float %272, float 2.500000e-01, float %250) > %277 = call float @llvm.fma.f32(float %273, float 2.500000e-01, float %251) > %278 = call float @llvm.fma.f32(float %274, float 2.500000e-01, float %252) > %279 = fmul float %271, 5.000000e-01 > %280 = fmul float %272, 5.000000e-01 > %281 = fmul float %273, 5.000000e-01 > %282 = fmul float %274, 5.000000e-01 > %283 = fmul float %275, 0x3FD322D0E0000000 > %284 = fmul float %276, 0x3FE2C8B440000000 > %285 = fadd float %284, %283 > %286 = fmul float %277, 0x3FBD2F1AA0000000 > %287 = fadd float %285, %286 > %288 = call float @llvm.minnum.f32(float %155, float %142) > %289 = call float @llvm.minnum.f32(float %181, float %168) > %290 = call float @llvm.maxnum.f32(float %155, float %142) > %291 = call float @llvm.maxnum.f32(float %181, float %168) > %292 = call float @llvm.maxnum.f32(float %291, float %290) > %293 = call float @llvm.minnum.f32(float %289, float %288) > %294 = bitcast float %46 to i32 > %295 = bitcast float %47 to i32 > %296 = insertelement <2 x i32> undef, i32 %294, i32 0 > %297 = insertelement <2 x i32> %296, i32 %295, i32 1 > %298 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %297, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %299 = extractelement <4 x float> %298, i32 0 > %300 = extractelement <4 x float> %298, i32 1 > %301 = extractelement <4 x float> %298, i32 2 > %302 = fmul float %299, 0x3FD322D0E0000000 > %303 = fmul float %300, 0x3FE2C8B440000000 > %304 = fadd float %303, %302 > %305 = fmul float %301, 0x3FBD2F1AA0000000 > %306 = fadd float %304, %305 > %307 = call float @llvm.minnum.f32(float %293, float %306) > %308 = call float @llvm.maxnum.f32(float %292, float %306) > %309 = fcmp olt float %308, %287 > %310 = fcmp olt float %287, %307 > %311 = or i1 %309, %310 > %. = select i1 %311, float %279, float %275 > %temp16.0 = select i1 %311, float %280, float %276 > %.55 = select i1 %311, float %281, float %277 > %temp.0 = select i1 %311, float %282, float %278 > %312 = bitcast float %5 to i32 > %313 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %312, 10 > %314 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %313, float %., 11 > %315 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %314, float %temp16.0, 12 > %316 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %315, float %.55, 13 > %317 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %316, float %temp.0, 14 > %318 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %317, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %318 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..11] >DCL TEMP[0..22], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 16.0000} >IMM[1] UINT32 {0, 176, 112, 128} >IMM[2] UINT32 {160, 80, 96, 0} >IMM[3] FLT32 { -0.1500, 0.1500, 0.5000, 0.2000} >IMM[4] INT32 {1, 0, 0, 0} >IMM[5] FLT32 { 2.0000, 10000.0000, 4.0000, 0.1250} >IMM[6] FLT32 { 0.2990, 0.5870, 0.1140, 0.2500} >IMM[7] FLT32 { 0.0039, -2.0000, -0.1667, 0.1667} >IMM[8] FLT32 { -0.5000, 0.5000, 0.0909, 0.0000} > 0: MOV TEMP[0].xy, IN[0].zwww > 1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D > 2: FSNE TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx > 3: UIF TEMP[1].xxxx :0 > 4: RCP TEMP[1].x, TEMP[0].xxxx > 5: MUL TEMP[1].x, CONST[1][11].xxxx, TEMP[1].xxxx > 6: ELSE :0 > 7: SSG TEMP[2].x, CONST[1][11].xxxx > 8: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 9: ENDIF > 10: MOV TEMP[0].z, TEMP[1].xxxx > 11: MOV TEMP[0].xy, IN[0].xyxx > 12: MOV TEMP[0].w, IMM[0].zzzz > 13: DP4 TEMP[2].x, CONST[1][7], TEMP[0] > 14: DP4 TEMP[3].x, CONST[1][8], TEMP[0] > 15: MOV TEMP[2].y, TEMP[3].xxxx > 16: DP4 TEMP[3].x, CONST[1][10], TEMP[0] > 17: FSEQ TEMP[4].xy, TEMP[3].xxxx, IMM[0].xxxx > 18: SSG TEMP[5].xy, TEMP[2].xyyy > 19: MUL TEMP[5].xy, IMM[0].yyyy, TEMP[5].xyyy > 20: RCP TEMP[3].xy, TEMP[3].xxxx > 21: MUL TEMP[3].xy, TEMP[2].xyyy, TEMP[3].xyyy > 22: UCMP TEMP[3].xy, TEMP[4].xyyy, TEMP[5].xyyy, TEMP[3].xyyy > 23: ADD TEMP[0].xy, -TEMP[3].xyyy, IN[0].xyyy > 24: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[3].xyyy > 25: MUL TEMP[0].xy, TEMP[0].xyyy, CONST[1][5].zzzz > 26: MUL TEMP[0].xy, TEMP[0].xyyy, CONST[1][6].xyyy > 27: DP2 TEMP[3].x, TEMP[0].xyyy, TEMP[0].xyyy > 28: SQRT TEMP[4].x, TEMP[3].xxxx > 29: RSQ TEMP[3].x, TEMP[3].xxxx > 30: MUL TEMP[0].xy, TEMP[3].xxxx, TEMP[0].xyyy > 31: MIN TEMP[3].x, TEMP[4].xxxx, IMM[0].wwww > 32: MUL TEMP[0].xy, TEMP[3].xxxx, TEMP[0].xyyy > 33: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[1][6].zwww > 34: MOV TEMP[5].xy, IN[0].zwww > 35: TEX TEMP[5].w, TEMP[5], SAMP[1], 2D > 36: FSLT TEMP[5].x, TEMP[5].wwww, IMM[0].zzzz > 37: AND TEMP[5].x, TEMP[5].xxxx, IMM[4].xxxx > 38: INEG TEMP[5].x, TEMP[5].xxxx > 39: FSLT TEMP[4].x, IMM[3].zzzz, TEMP[4].xxxx > 40: AND TEMP[4].x, TEMP[4].xxxx, IMM[4].xxxx > 41: INEG TEMP[4].x, TEMP[4].xxxx > 42: AND TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: FMA TEMP[0].xy, -TEMP[0].xyyy, CONST[1][6].zwww, IN[0].zwww > 45: MOV TEMP[5].xy, TEMP[0].xyyy > 46: TEX TEMP[5], TEMP[5], SAMP[2], 2D > 47: MOV TEMP[6], TEMP[5] > 48: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].wwww, TEMP[0].xyyy > 49: MOV TEMP[7].xy, TEMP[0].xyyy > 50: TEX TEMP[7], TEMP[7], SAMP[2], 2D > 51: MOV TEMP[8], TEMP[7] > 52: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].wwww, TEMP[0].xyyy > 53: MOV TEMP[9].xy, TEMP[0].xyyy > 54: TEX TEMP[9], TEMP[9], SAMP[2], 2D > 55: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].wwww, TEMP[0].xyyy > 56: MOV TEMP[10].xy, TEMP[0].xyyy > 57: TEX TEMP[10], TEMP[10], SAMP[2], 2D > 58: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].wwww, TEMP[0].xyyy > 59: MOV TEMP[11].xy, TEMP[0].xyyy > 60: TEX TEMP[11], TEMP[11], SAMP[2], 2D > 61: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].wwww, TEMP[0].xyyy > 62: MOV TEMP[12].xy, TEMP[0].xyyy > 63: TEX TEMP[12], TEMP[12], SAMP[2], 2D > 64: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].wwww, TEMP[0].xyyy > 65: MOV TEMP[13].xy, TEMP[0].xyyy > 66: TEX TEMP[13], TEMP[13], SAMP[2], 2D > 67: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].wwww, TEMP[0].xyyy > 68: MOV TEMP[14].xy, TEMP[0].xyyy > 69: TEX TEMP[14], TEMP[14], SAMP[2], 2D > 70: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].wwww, TEMP[0].xyyy > 71: MOV TEMP[15].xy, TEMP[0].xyyy > 72: TEX TEMP[15], TEMP[15], SAMP[2], 2D > 73: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].wwww, TEMP[0].xyyy > 74: MOV TEMP[16].xy, TEMP[0].xyyy > 75: TEX TEMP[16], TEMP[16], SAMP[2], 2D > 76: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].wwww, TEMP[0].xyyy > 77: MOV TEMP[3].xy, TEMP[0].xyyy > 78: TEX TEMP[3], TEMP[3], SAMP[2], 2D > 79: MOV TEMP[2], TEMP[3] > 80: MOV TEMP[17].xy, IN[1].xyyy > 81: TEX TEMP[17].x, TEMP[17], SAMP[0], 2D > 82: FSNE TEMP[18].x, TEMP[17].xxxx, IMM[0].xxxx > 83: UIF TEMP[18].xxxx :0 > 84: RCP TEMP[17].x, TEMP[17].xxxx > 85: MUL TEMP[17].x, CONST[1][11].xxxx, TEMP[17].xxxx > 86: ELSE :0 > 87: SSG TEMP[18].x, CONST[1][11].xxxx > 88: MUL TEMP[17].x, IMM[0].yyyy, TEMP[18].xxxx > 89: ENDIF > 90: MOV TEMP[0].x, TEMP[17].xxxx > 91: MOV TEMP[17].xy, IN[1].zyyy > 92: TEX TEMP[17].x, TEMP[17], SAMP[0], 2D > 93: FSNE TEMP[18].x, TEMP[17].xxxx, IMM[0].xxxx > 94: UIF TEMP[18].xxxx :0 > 95: RCP TEMP[17].x, TEMP[17].xxxx > 96: MUL TEMP[17].x, CONST[1][11].xxxx, TEMP[17].xxxx > 97: ELSE :0 > 98: SSG TEMP[18].x, CONST[1][11].xxxx > 99: MUL TEMP[17].x, IMM[0].yyyy, TEMP[18].xxxx >100: ENDIF >101: MOV TEMP[0].y, TEMP[17].xxxx >102: MOV TEMP[17].xy, IN[1].xwww >103: TEX TEMP[17].x, TEMP[17], SAMP[0], 2D >104: FSNE TEMP[18].x, TEMP[17].xxxx, IMM[0].xxxx >105: UIF TEMP[18].xxxx :0 >106: RCP TEMP[18].x, TEMP[17].xxxx >107: MUL TEMP[18].x, CONST[1][11].xxxx, TEMP[18].xxxx >108: ELSE :0 >109: SSG TEMP[19].x, CONST[1][11].xxxx >110: MUL TEMP[18].x, IMM[0].yyyy, TEMP[19].xxxx >111: ENDIF >112: MOV TEMP[17].x, TEMP[18].xxxx >113: MOV TEMP[18].xy, IN[1].zwww >114: TEX TEMP[18].x, TEMP[18], SAMP[0], 2D >115: FSNE TEMP[19].x, TEMP[18].xxxx, IMM[0].xxxx >116: UIF TEMP[19].xxxx :0 >117: RCP TEMP[18].x, TEMP[18].xxxx >118: MUL TEMP[18].x, CONST[1][11].xxxx, TEMP[18].xxxx >119: ELSE :0 >120: SSG TEMP[19].x, CONST[1][11].xxxx >121: MUL TEMP[18].x, IMM[0].yyyy, TEMP[19].xxxx >122: ENDIF >123: MOV TEMP[17].y, TEMP[18].xxxx >124: ADD TEMP[0].xy, TEMP[0].xyyy, TEMP[17].yxxx >125: FMA TEMP[0].x, -TEMP[1].xxxx, IMM[5].xxxx, TEMP[0].xxxx >126: FMA TEMP[1].x, -TEMP[1].xxxx, IMM[5].xxxx, TEMP[0].yyyy >127: ABS TEMP[1].x, TEMP[1].xxxx >128: ABS TEMP[18].x, TEMP[0].xxxx >129: ADD TEMP[0].x, TEMP[1].xxxx, TEMP[18].xxxx >130: FMA TEMP[0].x, -TEMP[0].xxxx, IMM[5].yyyy, IMM[0].zzzz >131: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx >132: FMA TEMP[1].x, TEMP[0].xxxx, IMM[5].zzzz, IMM[5].wwww >133: MOV TEMP[18].xy, IN[1].xyyy >134: TEX TEMP[18].xyz, TEMP[18], SAMP[2], 2D >135: MOV TEMP[19].xy, IN[1].zyyy >136: TEX TEMP[19].xyz, TEMP[19], SAMP[2], 2D >137: MOV TEMP[20].xy, IN[1].xwww >138: TEX TEMP[20].xyz, TEMP[20], SAMP[2], 2D >139: MOV TEMP[21].xy, IN[1].zwww >140: TEX TEMP[21].xyz, TEMP[21], SAMP[2], 2D >141: DP3 TEMP[17].x, TEMP[18].xyzz, IMM[6].xyzz >142: DP3 TEMP[22].x, TEMP[19].xyzz, IMM[6].xyzz >143: MOV TEMP[17].y, TEMP[22].xxxx >144: DP3 TEMP[20].x, TEMP[20].xyzz, IMM[6].xyzz >145: MOV TEMP[17].z, TEMP[20].xxxx >146: DP3 TEMP[20].x, TEMP[21].xyzz, IMM[6].xyzz >147: MOV TEMP[17].w, TEMP[20].xxxx >148: ADD TEMP[18], TEMP[17].ywzw, TEMP[17].xzxy >149: ADD TEMP[20].x, -TEMP[18].yyyy, TEMP[18].xxxx >150: MOV TEMP[19].xz, -TEMP[20].xxxx >151: ADD TEMP[21].xy, -TEMP[18].wwww, TEMP[18].zzzz >152: MOV TEMP[19].yw, TEMP[21].yxyy >153: DP4 TEMP[22].x, TEMP[17], IMM[0].zzzz >154: MOV TEMP[0].z, TEMP[22].xxxx >155: MUL TEMP[0].x, TEMP[22].xxxx, TEMP[1].xxxx >156: MUL TEMP[0].x, TEMP[0].xxxx, IMM[6].wwww >157: MAX TEMP[1].x, TEMP[0].xxxx, IMM[7].xxxx >158: ABS TEMP[20].x, TEMP[20].xxxx >159: ABS TEMP[21].x, TEMP[21].yyyy >160: MIN TEMP[20].x, TEMP[20].xxxx, TEMP[21].xxxx >161: MOV TEMP[0].y, TEMP[20].xxxx >162: ADD TEMP[0].x, TEMP[1].xxxx, TEMP[20].xxxx >163: FSNE TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx >164: UIF TEMP[1].xxxx :0 >165: RCP TEMP[1].x, TEMP[0].xxxx >166: ELSE :0 >167: MOV TEMP[1].x, IMM[0].yyyy >168: ENDIF >169: MOV TEMP[0].x, TEMP[1].xxxx >170: MUL TEMP[18], TEMP[1].xxxx, TEMP[19] >171: MAX TEMP[1], TEMP[18], IMM[7].yyyy >172: MIN TEMP[1], TEMP[1], IMM[5].xxxx >173: MUL TEMP[18], TEMP[1], CONST[1][6].zwzw >174: FMA TEMP[1], TEMP[18].zwzw, IMM[7].zzww, IN[0].zwzw >175: MOV TEMP[19].xy, TEMP[1].xyyy >176: TEX TEMP[19], TEMP[19], SAMP[2], 2D >177: MOV TEMP[1].xy, TEMP[1].zwww >178: TEX TEMP[1], TEMP[1], SAMP[2], 2D >179: FMA TEMP[18], TEMP[18], IMM[8].xxyy, IN[0].zwzw >180: MOV TEMP[20].xy, TEMP[18].xyyy >181: TEX TEMP[20], TEMP[20], SAMP[2], 2D >182: MOV TEMP[18].xy, TEMP[18].zwww >183: TEX TEMP[18], TEMP[18], SAMP[2], 2D >184: USNE TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx >185: UIF TEMP[4].xxxx :0 >186: MUL TEMP[0], TEMP[7], IMM[8].zzzz >187: FMA TEMP[4], TEMP[5], IMM[8].zzzz, TEMP[0] >188: FMA TEMP[4], TEMP[9], IMM[8].zzzz, TEMP[4] >189: FMA TEMP[4], TEMP[10], IMM[8].zzzz, TEMP[4] >190: FMA TEMP[4], TEMP[11], IMM[8].zzzz, TEMP[4] >191: FMA TEMP[4], TEMP[12], IMM[8].zzzz, TEMP[4] >192: FMA TEMP[4], TEMP[13], IMM[8].zzzz, TEMP[4] >193: FMA TEMP[4], TEMP[14], IMM[8].zzzz, TEMP[4] >194: FMA TEMP[4], TEMP[15], IMM[8].zzzz, TEMP[4] >195: FMA TEMP[4], TEMP[16], IMM[8].zzzz, TEMP[4] >196: MOV TEMP[0].xyz, TEMP[4] >197: FMA TEMP[3], TEMP[3], IMM[8].zzzz, TEMP[4] >198: ELSE :0 >199: MOV TEMP[4].xy, IN[0].zwww >200: TEX TEMP[4].xyz, TEMP[4], SAMP[2], 2D >201: MOV TEMP[0].xyz, TEMP[4].xyzx >202: DP3 TEMP[0].x, TEMP[4].xyzz, IMM[6].xyzz >203: MIN TEMP[4].xy, TEMP[17].ywww, TEMP[17].xzzz >204: MOV TEMP[0].yz, TEMP[4].yxyy >205: MIN TEMP[4].x, TEMP[4].yyyy, TEMP[4].xxxx >206: MOV TEMP[0].y, TEMP[4].xxxx >207: MIN TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx >208: MOV TEMP[0].y, TEMP[4].xxxx >209: MAX TEMP[4].xy, TEMP[17].ywww, TEMP[17].xzzz >210: MOV TEMP[0].z, TEMP[4].yyxy >211: MAX TEMP[4].x, TEMP[4].yyyy, TEMP[4].xxxx >212: MOV TEMP[0].z, TEMP[4].xxxx >213: MAX TEMP[0].x, TEMP[4].xxxx, TEMP[0].xxxx >214: ADD TEMP[2], TEMP[1], TEMP[19] >215: MUL TEMP[6], TEMP[2], IMM[3].zzzz >216: ADD TEMP[8], TEMP[18], TEMP[20] >217: MUL TEMP[8], TEMP[8], IMM[6].wwww >218: FMA TEMP[1], TEMP[2], IMM[6].wwww, TEMP[8] >219: DP3 TEMP[2].x, TEMP[1].xyzz, IMM[6].xyzz >220: MOV TEMP[0].z, TEMP[2].xxxx >221: FSLT TEMP[0].xy, TEMP[0].xzzz, TEMP[0].zyyy >222: AND TEMP[0].xy, TEMP[0].xyyy, IMM[4].xxxx >223: INEG TEMP[0].xy, TEMP[0].xyyy >224: OR TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy >225: USNE TEMP[2].x, TEMP[0].xxxx, IMM[1].xxxx >226: UIF TEMP[2].xxxx :0 >227: MOV TEMP[2].x, TEMP[6].xxxx >228: ELSE :0 >229: MOV TEMP[2].x, TEMP[1].xxxx >230: ENDIF >231: MOV TEMP[2].x, TEMP[2].xxxx >232: USNE TEMP[4].x, TEMP[0].xxxx, IMM[1].xxxx >233: UIF TEMP[4].xxxx :0 >234: MOV TEMP[4].x, TEMP[6].yyyy >235: ELSE :0 >236: MOV TEMP[4].x, TEMP[1].yyyy >237: ENDIF >238: MOV TEMP[2].y, TEMP[4].xxxx >239: USNE TEMP[4].x, TEMP[0].xxxx, IMM[1].xxxx >240: UIF TEMP[4].xxxx :0 >241: MOV TEMP[4].x, TEMP[6].zzzz >242: ELSE :0 >243: MOV TEMP[4].x, TEMP[1].zzzz >244: ENDIF >245: MOV TEMP[2].z, TEMP[4].xxxx >246: USNE TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx >247: UIF TEMP[0].xxxx :0 >248: MOV TEMP[0].x, TEMP[6].wwww >249: ELSE :0 >250: MOV TEMP[0].x, TEMP[1].wwww >251: ENDIF >252: MOV TEMP[2].w, TEMP[0].xxxx >253: MOV TEMP[3], TEMP[2] >254: ENDIF >255: MOV OUT[0], TEMP[3] >256: END >radeonsi: Compiling shader 55 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 124) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 128) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 132) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 136) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 140) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 160) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 164) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 168) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 172) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 176) > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 3 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 7 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 > %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 11 > %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0 > %66 = extractelement <8 x i32> %62, i32 7 > %67 = extractelement <4 x i32> %65, i32 0 > %68 = and i32 %67, %66 > %69 = insertelement <4 x i32> %65, i32 %68, i32 0 > %70 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %78 = bitcast float %72 to i32 > %79 = bitcast float %73 to i32 > %80 = insertelement <2 x i32> undef, i32 %78, i32 0 > %81 = insertelement <2 x i32> %80, i32 %79, i32 1 > %82 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %81, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %83 = extractelement <4 x float> %82, i32 0 > %84 = fcmp une float %83, 0.000000e+00 > br i1 %84, label %IF, label %ELSE > >IF: ; preds = %main_body > %85 = fdiv float 1.000000e+00, %83 > %86 = fmul float %42, %85 > br label %ENDIF > >ELSE: ; preds = %main_body > %87 = fcmp ogt float %42, 0.000000e+00 > %88 = select i1 %87, float 1.000000e+00, float %42 > %89 = fcmp oge float %88, 0.000000e+00 > %.op = fmul float %88, 0x4600000000000000 > %90 = select i1 %89, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %86, %IF ], [ %90, %ELSE ] > %91 = fmul float %30, %70 > %92 = fmul float %31, %71 > %93 = fadd float %91, %92 > %94 = fmul float %32, %temp4.0 > %95 = fadd float %93, %94 > %96 = fadd float %95, %33 > %97 = fmul float %34, %70 > %98 = fmul float %35, %71 > %99 = fadd float %97, %98 > %100 = fmul float %36, %temp4.0 > %101 = fadd float %99, %100 > %102 = fadd float %101, %37 > %103 = fmul float %38, %70 > %104 = fmul float %39, %71 > %105 = fadd float %103, %104 > %106 = fmul float %40, %temp4.0 > %107 = fadd float %105, %106 > %108 = fadd float %107, %41 > %109 = fcmp oeq float %108, 0.000000e+00 > %110 = fcmp oeq float %108, 0.000000e+00 > %111 = fcmp ogt float %96, 0.000000e+00 > %112 = select i1 %111, float 1.000000e+00, float %96 > %113 = fcmp oge float %112, 0.000000e+00 > %114 = fcmp ogt float %102, 0.000000e+00 > %115 = select i1 %114, float 1.000000e+00, float %102 > %116 = fcmp oge float %115, 0.000000e+00 > %.op123 = fmul float %112, 0x4600000000000000 > %117 = select i1 %113, float %.op123, float 0xC600000000000000 > %.op124 = fmul float %115, 0x4600000000000000 > %118 = select i1 %116, float %.op124, float 0xC600000000000000 > %119 = fdiv float 1.000000e+00, %108 > %120 = fmul float %96, %119 > %121 = fmul float %102, %119 > %122 = select i1 %109, float %117, float %120 > %123 = select i1 %110, float %118, float %121 > %124 = fsub float %70, %122 > %125 = fsub float %71, %123 > %126 = fmul float %124, 0xBFC3333340000000 > %127 = fmul float %125, 0x3FC3333340000000 > %128 = fmul float %126, %25 > %129 = fmul float %127, %25 > %130 = fmul float %128, %26 > %131 = fmul float %129, %27 > %132 = fmul float %130, %130 > %133 = fmul float %131, %131 > %134 = fadd float %132, %133 > %135 = call float @llvm.sqrt.f32(float %134) > %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134) > %137 = fmul float %136, %130 > %138 = fmul float %136, %131 > %139 = call float @llvm.minnum.f32(float %135, float 1.600000e+01) > %140 = fmul float %139, %137 > %141 = fmul float %139, %138 > %142 = fmul float %140, %28 > %143 = fmul float %141, %29 > %144 = bitcast float %72 to i32 > %145 = bitcast float %73 to i32 > %146 = insertelement <2 x i32> undef, i32 %144, i32 0 > %147 = insertelement <2 x i32> %146, i32 %145, i32 1 > %148 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %147, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %149 = extractelement <4 x float> %148, i32 3 > %150 = fcmp olt float %149, 1.000000e+00 > %151 = fcmp ogt float %135, 5.000000e-01 > %152 = and i1 %150, %151 > %153 = fsub float -0.000000e+00, %140 > %154 = call float @llvm.fma.f32(float %153, float %28, float %72) > %155 = fsub float -0.000000e+00, %141 > %156 = call float @llvm.fma.f32(float %155, float %29, float %73) > %157 = bitcast float %154 to i32 > %158 = bitcast float %156 to i32 > %159 = insertelement <2 x i32> undef, i32 %157, i32 0 > %160 = insertelement <2 x i32> %159, i32 %158, i32 1 > %161 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %160, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %162 = extractelement <4 x float> %161, i32 0 > %163 = extractelement <4 x float> %161, i32 1 > %164 = extractelement <4 x float> %161, i32 2 > %165 = extractelement <4 x float> %161, i32 3 > %166 = call float @llvm.fma.f32(float %142, float 0x3FC99999A0000000, float %154) > %167 = call float @llvm.fma.f32(float %143, float 0x3FC99999A0000000, float %156) > %168 = bitcast float %166 to i32 > %169 = bitcast float %167 to i32 > %170 = insertelement <2 x i32> undef, i32 %168, i32 0 > %171 = insertelement <2 x i32> %170, i32 %169, i32 1 > %172 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %171, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %173 = extractelement <4 x float> %172, i32 0 > %174 = extractelement <4 x float> %172, i32 1 > %175 = extractelement <4 x float> %172, i32 2 > %176 = extractelement <4 x float> %172, i32 3 > %177 = call float @llvm.fma.f32(float %142, float 0x3FC99999A0000000, float %166) > %178 = call float @llvm.fma.f32(float %143, float 0x3FC99999A0000000, float %167) > %179 = bitcast float %177 to i32 > %180 = bitcast float %178 to i32 > %181 = insertelement <2 x i32> undef, i32 %179, i32 0 > %182 = insertelement <2 x i32> %181, i32 %180, i32 1 > %183 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %182, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %184 = extractelement <4 x float> %183, i32 0 > %185 = extractelement <4 x float> %183, i32 1 > %186 = extractelement <4 x float> %183, i32 2 > %187 = extractelement <4 x float> %183, i32 3 > %188 = call float @llvm.fma.f32(float %142, float 0x3FC99999A0000000, float %177) > %189 = call float @llvm.fma.f32(float %143, float 0x3FC99999A0000000, float %178) > %190 = bitcast float %188 to i32 > %191 = bitcast float %189 to i32 > %192 = insertelement <2 x i32> undef, i32 %190, i32 0 > %193 = insertelement <2 x i32> %192, i32 %191, i32 1 > %194 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %193, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %195 = extractelement <4 x float> %194, i32 0 > %196 = extractelement <4 x float> %194, i32 1 > %197 = extractelement <4 x float> %194, i32 2 > %198 = extractelement <4 x float> %194, i32 3 > %199 = call float @llvm.fma.f32(float %142, float 0x3FC99999A0000000, float %188) > %200 = call float @llvm.fma.f32(float %143, float 0x3FC99999A0000000, float %189) > %201 = bitcast float %199 to i32 > %202 = bitcast float %200 to i32 > %203 = insertelement <2 x i32> undef, i32 %201, i32 0 > %204 = insertelement <2 x i32> %203, i32 %202, i32 1 > %205 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %204, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %206 = extractelement <4 x float> %205, i32 0 > %207 = extractelement <4 x float> %205, i32 1 > %208 = extractelement <4 x float> %205, i32 2 > %209 = extractelement <4 x float> %205, i32 3 > %210 = call float @llvm.fma.f32(float %142, float 0x3FC99999A0000000, float %199) > %211 = call float @llvm.fma.f32(float %143, float 0x3FC99999A0000000, float %200) > %212 = bitcast float %210 to i32 > %213 = bitcast float %211 to i32 > %214 = insertelement <2 x i32> undef, i32 %212, i32 0 > %215 = insertelement <2 x i32> %214, i32 %213, i32 1 > %216 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %215, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %217 = extractelement <4 x float> %216, i32 0 > %218 = extractelement <4 x float> %216, i32 1 > %219 = extractelement <4 x float> %216, i32 2 > %220 = extractelement <4 x float> %216, i32 3 > %221 = call float @llvm.fma.f32(float %142, float 0x3FC99999A0000000, float %210) > %222 = call float @llvm.fma.f32(float %143, float 0x3FC99999A0000000, float %211) > %223 = bitcast float %221 to i32 > %224 = bitcast float %222 to i32 > %225 = insertelement <2 x i32> undef, i32 %223, i32 0 > %226 = insertelement <2 x i32> %225, i32 %224, i32 1 > %227 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %226, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %228 = extractelement <4 x float> %227, i32 0 > %229 = extractelement <4 x float> %227, i32 1 > %230 = extractelement <4 x float> %227, i32 2 > %231 = extractelement <4 x float> %227, i32 3 > %232 = call float @llvm.fma.f32(float %142, float 0x3FC99999A0000000, float %221) > %233 = call float @llvm.fma.f32(float %143, float 0x3FC99999A0000000, float %222) > %234 = bitcast float %232 to i32 > %235 = bitcast float %233 to i32 > %236 = insertelement <2 x i32> undef, i32 %234, i32 0 > %237 = insertelement <2 x i32> %236, i32 %235, i32 1 > %238 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %237, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %239 = extractelement <4 x float> %238, i32 0 > %240 = extractelement <4 x float> %238, i32 1 > %241 = extractelement <4 x float> %238, i32 2 > %242 = extractelement <4 x float> %238, i32 3 > %243 = call float @llvm.fma.f32(float %142, float 0x3FC99999A0000000, float %232) > %244 = call float @llvm.fma.f32(float %143, float 0x3FC99999A0000000, float %233) > %245 = bitcast float %243 to i32 > %246 = bitcast float %244 to i32 > %247 = insertelement <2 x i32> undef, i32 %245, i32 0 > %248 = insertelement <2 x i32> %247, i32 %246, i32 1 > %249 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %248, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %250 = extractelement <4 x float> %249, i32 0 > %251 = extractelement <4 x float> %249, i32 1 > %252 = extractelement <4 x float> %249, i32 2 > %253 = extractelement <4 x float> %249, i32 3 > %254 = call float @llvm.fma.f32(float %142, float 0x3FC99999A0000000, float %243) > %255 = call float @llvm.fma.f32(float %143, float 0x3FC99999A0000000, float %244) > %256 = bitcast float %254 to i32 > %257 = bitcast float %255 to i32 > %258 = insertelement <2 x i32> undef, i32 %256, i32 0 > %259 = insertelement <2 x i32> %258, i32 %257, i32 1 > %260 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %259, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %261 = extractelement <4 x float> %260, i32 0 > %262 = extractelement <4 x float> %260, i32 1 > %263 = extractelement <4 x float> %260, i32 2 > %264 = extractelement <4 x float> %260, i32 3 > %265 = call float @llvm.fma.f32(float %142, float 0x3FC99999A0000000, float %254) > %266 = call float @llvm.fma.f32(float %143, float 0x3FC99999A0000000, float %255) > %267 = bitcast float %265 to i32 > %268 = bitcast float %266 to i32 > %269 = insertelement <2 x i32> undef, i32 %267, i32 0 > %270 = insertelement <2 x i32> %269, i32 %268, i32 1 > %271 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %270, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %272 = extractelement <4 x float> %271, i32 0 > %273 = extractelement <4 x float> %271, i32 1 > %274 = extractelement <4 x float> %271, i32 2 > %275 = extractelement <4 x float> %271, i32 3 > %276 = bitcast float %74 to i32 > %277 = bitcast float %75 to i32 > %278 = insertelement <2 x i32> undef, i32 %276, i32 0 > %279 = insertelement <2 x i32> %278, i32 %277, i32 1 > %280 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %279, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %281 = extractelement <4 x float> %280, i32 0 > %282 = fcmp une float %281, 0.000000e+00 > br i1 %282, label %IF93, label %ELSE94 > >IF93: ; preds = %ENDIF > %283 = fdiv float 1.000000e+00, %281 > %284 = fmul float %42, %283 > br label %ENDIF92 > >ELSE94: ; preds = %ENDIF > %285 = fcmp ogt float %42, 0.000000e+00 > %286 = select i1 %285, float 1.000000e+00, float %42 > %287 = fcmp oge float %286, 0.000000e+00 > %.op125 = fmul float %286, 0x4600000000000000 > %288 = select i1 %287, float %.op125, float 0xC600000000000000 > br label %ENDIF92 > >ENDIF92: ; preds = %ELSE94, %IF93 > %temp68.0 = phi float [ %284, %IF93 ], [ %288, %ELSE94 ] > %289 = bitcast float %76 to i32 > %290 = bitcast float %75 to i32 > %291 = insertelement <2 x i32> undef, i32 %289, i32 0 > %292 = insertelement <2 x i32> %291, i32 %290, i32 1 > %293 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %292, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %294 = extractelement <4 x float> %293, i32 0 > %295 = fcmp une float %294, 0.000000e+00 > br i1 %295, label %IF96, label %ELSE97 > >IF96: ; preds = %ENDIF92 > %296 = fdiv float 1.000000e+00, %294 > %297 = fmul float %42, %296 > br label %ENDIF95 > >ELSE97: ; preds = %ENDIF92 > %298 = fcmp ogt float %42, 0.000000e+00 > %299 = select i1 %298, float 1.000000e+00, float %42 > %300 = fcmp oge float %299, 0.000000e+00 > %.op126 = fmul float %299, 0x4600000000000000 > %301 = select i1 %300, float %.op126, float 0xC600000000000000 > br label %ENDIF95 > >ENDIF95: ; preds = %ELSE97, %IF96 > %temp68.1 = phi float [ %297, %IF96 ], [ %301, %ELSE97 ] > %302 = bitcast float %74 to i32 > %303 = bitcast float %77 to i32 > %304 = insertelement <2 x i32> undef, i32 %302, i32 0 > %305 = insertelement <2 x i32> %304, i32 %303, i32 1 > %306 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %305, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %307 = extractelement <4 x float> %306, i32 0 > %308 = fcmp une float %307, 0.000000e+00 > br i1 %308, label %IF99, label %ELSE100 > >IF99: ; preds = %ENDIF95 > %309 = fdiv float 1.000000e+00, %307 > %310 = fmul float %42, %309 > br label %ENDIF98 > >ELSE100: ; preds = %ENDIF95 > %311 = fcmp ogt float %42, 0.000000e+00 > %312 = select i1 %311, float 1.000000e+00, float %42 > %313 = fcmp oge float %312, 0.000000e+00 > %.op127 = fmul float %312, 0x4600000000000000 > %314 = select i1 %313, float %.op127, float 0xC600000000000000 > br label %ENDIF98 > >ENDIF98: ; preds = %ELSE100, %IF99 > %temp72.0 = phi float [ %310, %IF99 ], [ %314, %ELSE100 ] > %315 = bitcast float %76 to i32 > %316 = bitcast float %77 to i32 > %317 = insertelement <2 x i32> undef, i32 %315, i32 0 > %318 = insertelement <2 x i32> %317, i32 %316, i32 1 > %319 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %318, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %320 = extractelement <4 x float> %319, i32 0 > %321 = fcmp une float %320, 0.000000e+00 > br i1 %321, label %IF102, label %ELSE103 > >IF102: ; preds = %ENDIF98 > %322 = fdiv float 1.000000e+00, %320 > %323 = fmul float %42, %322 > br label %ENDIF101 > >ELSE103: ; preds = %ENDIF98 > %324 = fcmp ogt float %42, 0.000000e+00 > %325 = select i1 %324, float 1.000000e+00, float %42 > %326 = fcmp oge float %325, 0.000000e+00 > %.op128 = fmul float %325, 0x4600000000000000 > %327 = select i1 %326, float %.op128, float 0xC600000000000000 > br label %ENDIF101 > >ENDIF101: ; preds = %ELSE103, %IF102 > %temp72.1 = phi float [ %323, %IF102 ], [ %327, %ELSE103 ] > %328 = fadd float %temp68.0, %temp72.1 > %329 = fadd float %temp68.1, %temp72.0 > %330 = fsub float -0.000000e+00, %temp4.0 > %331 = call float @llvm.fma.f32(float %330, float 2.000000e+00, float %328) > %332 = fsub float -0.000000e+00, %temp4.0 > %333 = call float @llvm.fma.f32(float %332, float 2.000000e+00, float %329) > %334 = call float @llvm.fabs.f32(float %333) > %335 = call float @llvm.fabs.f32(float %331) > %336 = fadd float %334, %335 > %337 = fsub float -0.000000e+00, %336 > %338 = call float @llvm.fma.f32(float %337, float 1.000000e+04, float 1.000000e+00) > %339 = call float @llvm.maxnum.f32(float %338, float 0.000000e+00) > %340 = call float @llvm.fma.f32(float %339, float 4.000000e+00, float 1.250000e-01) > %341 = bitcast float %74 to i32 > %342 = bitcast float %75 to i32 > %343 = insertelement <2 x i32> undef, i32 %341, i32 0 > %344 = insertelement <2 x i32> %343, i32 %342, i32 1 > %345 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %344, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %346 = extractelement <4 x float> %345, i32 0 > %347 = extractelement <4 x float> %345, i32 1 > %348 = extractelement <4 x float> %345, i32 2 > %349 = bitcast float %76 to i32 > %350 = bitcast float %75 to i32 > %351 = insertelement <2 x i32> undef, i32 %349, i32 0 > %352 = insertelement <2 x i32> %351, i32 %350, i32 1 > %353 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %352, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %354 = extractelement <4 x float> %353, i32 0 > %355 = extractelement <4 x float> %353, i32 1 > %356 = extractelement <4 x float> %353, i32 2 > %357 = bitcast float %74 to i32 > %358 = bitcast float %77 to i32 > %359 = insertelement <2 x i32> undef, i32 %357, i32 0 > %360 = insertelement <2 x i32> %359, i32 %358, i32 1 > %361 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %360, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %362 = extractelement <4 x float> %361, i32 0 > %363 = extractelement <4 x float> %361, i32 1 > %364 = extractelement <4 x float> %361, i32 2 > %365 = bitcast float %76 to i32 > %366 = bitcast float %77 to i32 > %367 = insertelement <2 x i32> undef, i32 %365, i32 0 > %368 = insertelement <2 x i32> %367, i32 %366, i32 1 > %369 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %368, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %370 = extractelement <4 x float> %369, i32 0 > %371 = extractelement <4 x float> %369, i32 1 > %372 = extractelement <4 x float> %369, i32 2 > %373 = fmul float %346, 0x3FD322D0E0000000 > %374 = fmul float %347, 0x3FE2C8B440000000 > %375 = fadd float %374, %373 > %376 = fmul float %348, 0x3FBD2F1AA0000000 > %377 = fadd float %375, %376 > %378 = fmul float %354, 0x3FD322D0E0000000 > %379 = fmul float %355, 0x3FE2C8B440000000 > %380 = fadd float %379, %378 > %381 = fmul float %356, 0x3FBD2F1AA0000000 > %382 = fadd float %380, %381 > %383 = fmul float %362, 0x3FD322D0E0000000 > %384 = fmul float %363, 0x3FE2C8B440000000 > %385 = fadd float %384, %383 > %386 = fmul float %364, 0x3FBD2F1AA0000000 > %387 = fadd float %385, %386 > %388 = fmul float %370, 0x3FD322D0E0000000 > %389 = fmul float %371, 0x3FE2C8B440000000 > %390 = fadd float %389, %388 > %391 = fmul float %372, 0x3FBD2F1AA0000000 > %392 = fadd float %390, %391 > %393 = fadd float %382, %377 > %394 = fadd float %392, %387 > %395 = fadd float %387, %377 > %396 = fadd float %392, %382 > %397 = fsub float %393, %394 > %398 = fsub float %395, %396 > %399 = fsub float %395, %396 > %400 = fadd float %377, %382 > %401 = fadd float %400, %387 > %402 = fadd float %401, %392 > %403 = fmul float %402, %340 > %404 = fmul float %403, 2.500000e-01 > %405 = call float @llvm.maxnum.f32(float %404, float 3.906250e-03) > %406 = call float @llvm.fabs.f32(float %397) > %407 = call float @llvm.fabs.f32(float %399) > %408 = call float @llvm.minnum.f32(float %406, float %407) > %409 = fadd float %405, %408 > %410 = fcmp une float %409, 0.000000e+00 > %411 = fdiv float 1.000000e+00, %409 > %temp4.1 = select i1 %410, float %411, float 0x4600000000000000 > %412 = fmul float %397, %temp4.1 > %413 = fsub float -0.000000e+00, %412 > %414 = fmul float %temp4.1, %398 > %415 = fmul float %397, %temp4.1 > %416 = fsub float -0.000000e+00, %415 > %417 = fmul float %temp4.1, %399 > %418 = call float @llvm.maxnum.f32(float %413, float -2.000000e+00) > %419 = call float @llvm.maxnum.f32(float %414, float -2.000000e+00) > %420 = call float @llvm.maxnum.f32(float %416, float -2.000000e+00) > %421 = call float @llvm.maxnum.f32(float %417, float -2.000000e+00) > %422 = call float @llvm.minnum.f32(float %418, float 2.000000e+00) > %423 = call float @llvm.minnum.f32(float %419, float 2.000000e+00) > %424 = call float @llvm.minnum.f32(float %420, float 2.000000e+00) > %425 = call float @llvm.minnum.f32(float %421, float 2.000000e+00) > %426 = fmul float %422, %28 > %427 = fmul float %423, %29 > %428 = fmul float %424, %28 > %429 = fmul float %425, %29 > %430 = call float @llvm.fma.f32(float %428, float 0xBFC5555560000000, float %72) > %431 = call float @llvm.fma.f32(float %429, float 0xBFC5555560000000, float %73) > %432 = call float @llvm.fma.f32(float %428, float 0x3FC5555560000000, float %72) > %433 = call float @llvm.fma.f32(float %429, float 0x3FC5555560000000, float %73) > %434 = bitcast float %430 to i32 > %435 = bitcast float %431 to i32 > %436 = insertelement <2 x i32> undef, i32 %434, i32 0 > %437 = insertelement <2 x i32> %436, i32 %435, i32 1 > %438 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %437, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %439 = bitcast float %432 to i32 > %440 = bitcast float %433 to i32 > %441 = insertelement <2 x i32> undef, i32 %439, i32 0 > %442 = insertelement <2 x i32> %441, i32 %440, i32 1 > %443 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %442, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %444 = call float @llvm.fma.f32(float %426, float -5.000000e-01, float %72) > %445 = call float @llvm.fma.f32(float %427, float -5.000000e-01, float %73) > %446 = call float @llvm.fma.f32(float %428, float 5.000000e-01, float %72) > %447 = call float @llvm.fma.f32(float %429, float 5.000000e-01, float %73) > %448 = bitcast float %444 to i32 > %449 = bitcast float %445 to i32 > %450 = insertelement <2 x i32> undef, i32 %448, i32 0 > %451 = insertelement <2 x i32> %450, i32 %449, i32 1 > %452 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %451, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %453 = bitcast float %446 to i32 > %454 = bitcast float %447 to i32 > %455 = insertelement <2 x i32> undef, i32 %453, i32 0 > %456 = insertelement <2 x i32> %455, i32 %454, i32 1 > %457 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %456, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > br i1 %152, label %IF108, label %ELSE109 > >IF108: ; preds = %ENDIF101 > %458 = fmul float %173, 0x3FB745D180000000 > %459 = fmul float %174, 0x3FB745D180000000 > %460 = fmul float %175, 0x3FB745D180000000 > %461 = fmul float %176, 0x3FB745D180000000 > %462 = call float @llvm.fma.f32(float %162, float 0x3FB745D180000000, float %458) > %463 = call float @llvm.fma.f32(float %163, float 0x3FB745D180000000, float %459) > %464 = call float @llvm.fma.f32(float %164, float 0x3FB745D180000000, float %460) > %465 = call float @llvm.fma.f32(float %165, float 0x3FB745D180000000, float %461) > %466 = call float @llvm.fma.f32(float %184, float 0x3FB745D180000000, float %462) > %467 = call float @llvm.fma.f32(float %185, float 0x3FB745D180000000, float %463) > %468 = call float @llvm.fma.f32(float %186, float 0x3FB745D180000000, float %464) > %469 = call float @llvm.fma.f32(float %187, float 0x3FB745D180000000, float %465) > %470 = call float @llvm.fma.f32(float %195, float 0x3FB745D180000000, float %466) > %471 = call float @llvm.fma.f32(float %196, float 0x3FB745D180000000, float %467) > %472 = call float @llvm.fma.f32(float %197, float 0x3FB745D180000000, float %468) > %473 = call float @llvm.fma.f32(float %198, float 0x3FB745D180000000, float %469) > %474 = call float @llvm.fma.f32(float %206, float 0x3FB745D180000000, float %470) > %475 = call float @llvm.fma.f32(float %207, float 0x3FB745D180000000, float %471) > %476 = call float @llvm.fma.f32(float %208, float 0x3FB745D180000000, float %472) > %477 = call float @llvm.fma.f32(float %209, float 0x3FB745D180000000, float %473) > %478 = call float @llvm.fma.f32(float %217, float 0x3FB745D180000000, float %474) > %479 = call float @llvm.fma.f32(float %218, float 0x3FB745D180000000, float %475) > %480 = call float @llvm.fma.f32(float %219, float 0x3FB745D180000000, float %476) > %481 = call float @llvm.fma.f32(float %220, float 0x3FB745D180000000, float %477) > %482 = call float @llvm.fma.f32(float %228, float 0x3FB745D180000000, float %478) > %483 = call float @llvm.fma.f32(float %229, float 0x3FB745D180000000, float %479) > %484 = call float @llvm.fma.f32(float %230, float 0x3FB745D180000000, float %480) > %485 = call float @llvm.fma.f32(float %231, float 0x3FB745D180000000, float %481) > %486 = call float @llvm.fma.f32(float %239, float 0x3FB745D180000000, float %482) > %487 = call float @llvm.fma.f32(float %240, float 0x3FB745D180000000, float %483) > %488 = call float @llvm.fma.f32(float %241, float 0x3FB745D180000000, float %484) > %489 = call float @llvm.fma.f32(float %242, float 0x3FB745D180000000, float %485) > %490 = call float @llvm.fma.f32(float %250, float 0x3FB745D180000000, float %486) > %491 = call float @llvm.fma.f32(float %251, float 0x3FB745D180000000, float %487) > %492 = call float @llvm.fma.f32(float %252, float 0x3FB745D180000000, float %488) > %493 = call float @llvm.fma.f32(float %253, float 0x3FB745D180000000, float %489) > %494 = call float @llvm.fma.f32(float %261, float 0x3FB745D180000000, float %490) > %495 = call float @llvm.fma.f32(float %262, float 0x3FB745D180000000, float %491) > %496 = call float @llvm.fma.f32(float %263, float 0x3FB745D180000000, float %492) > %497 = call float @llvm.fma.f32(float %264, float 0x3FB745D180000000, float %493) > %498 = call float @llvm.fma.f32(float %272, float 0x3FB745D180000000, float %494) > %499 = call float @llvm.fma.f32(float %273, float 0x3FB745D180000000, float %495) > %500 = call float @llvm.fma.f32(float %274, float 0x3FB745D180000000, float %496) > %501 = call float @llvm.fma.f32(float %275, float 0x3FB745D180000000, float %497) > br label %ENDIF107 > >ELSE109: ; preds = %ENDIF101 > %502 = extractelement <4 x float> %457, i32 3 > %503 = extractelement <4 x float> %457, i32 2 > %504 = extractelement <4 x float> %457, i32 1 > %505 = extractelement <4 x float> %457, i32 0 > %506 = extractelement <4 x float> %452, i32 3 > %507 = extractelement <4 x float> %452, i32 2 > %508 = extractelement <4 x float> %452, i32 1 > %509 = extractelement <4 x float> %452, i32 0 > %510 = extractelement <4 x float> %443, i32 3 > %511 = extractelement <4 x float> %443, i32 2 > %512 = extractelement <4 x float> %443, i32 1 > %513 = extractelement <4 x float> %443, i32 0 > %514 = extractelement <4 x float> %438, i32 3 > %515 = extractelement <4 x float> %438, i32 2 > %516 = extractelement <4 x float> %438, i32 1 > %517 = extractelement <4 x float> %438, i32 0 > %518 = bitcast float %72 to i32 > %519 = bitcast float %73 to i32 > %520 = insertelement <2 x i32> undef, i32 %518, i32 0 > %521 = insertelement <2 x i32> %520, i32 %519, i32 1 > %522 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %521, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %523 = extractelement <4 x float> %522, i32 0 > %524 = extractelement <4 x float> %522, i32 1 > %525 = extractelement <4 x float> %522, i32 2 > %526 = fmul float %523, 0x3FD322D0E0000000 > %527 = fmul float %524, 0x3FE2C8B440000000 > %528 = fadd float %527, %526 > %529 = fmul float %525, 0x3FBD2F1AA0000000 > %530 = fadd float %528, %529 > %531 = call float @llvm.minnum.f32(float %382, float %377) > %532 = call float @llvm.minnum.f32(float %392, float %387) > %533 = call float @llvm.minnum.f32(float %532, float %531) > %534 = call float @llvm.minnum.f32(float %533, float %530) > %535 = call float @llvm.maxnum.f32(float %382, float %377) > %536 = call float @llvm.maxnum.f32(float %392, float %387) > %537 = call float @llvm.maxnum.f32(float %536, float %535) > %538 = call float @llvm.maxnum.f32(float %537, float %530) > %539 = fadd float %513, %517 > %540 = fadd float %512, %516 > %541 = fadd float %511, %515 > %542 = fadd float %510, %514 > %543 = fmul float %539, 5.000000e-01 > %544 = fmul float %540, 5.000000e-01 > %545 = fmul float %541, 5.000000e-01 > %546 = fmul float %542, 5.000000e-01 > %547 = fadd float %505, %509 > %548 = fadd float %504, %508 > %549 = fadd float %503, %507 > %550 = fadd float %502, %506 > %551 = fmul float %547, 2.500000e-01 > %552 = fmul float %548, 2.500000e-01 > %553 = fmul float %549, 2.500000e-01 > %554 = fmul float %550, 2.500000e-01 > %555 = call float @llvm.fma.f32(float %539, float 2.500000e-01, float %551) > %556 = call float @llvm.fma.f32(float %540, float 2.500000e-01, float %552) > %557 = call float @llvm.fma.f32(float %541, float 2.500000e-01, float %553) > %558 = call float @llvm.fma.f32(float %542, float 2.500000e-01, float %554) > %559 = fmul float %555, 0x3FD322D0E0000000 > %560 = fmul float %556, 0x3FE2C8B440000000 > %561 = fadd float %560, %559 > %562 = fmul float %557, 0x3FBD2F1AA0000000 > %563 = fadd float %561, %562 > %564 = fcmp olt float %538, %563 > %565 = fcmp olt float %563, %534 > %566 = or i1 %564, %565 > %. = select i1 %566, float %543, float %555 > %temp16.0 = select i1 %566, float %544, float %556 > %.122 = select i1 %566, float %545, float %557 > %temp.0 = select i1 %566, float %546, float %558 > br label %ENDIF107 > >ENDIF107: ; preds = %ELSE109, %IF108 > %temp12.0 = phi float [ %498, %IF108 ], [ %., %ELSE109 ] > %temp13.0 = phi float [ %499, %IF108 ], [ %temp16.0, %ELSE109 ] > %temp14.0 = phi float [ %500, %IF108 ], [ %.122, %ELSE109 ] > %temp15.0 = phi float [ %501, %IF108 ], [ %temp.0, %ELSE109 ] > %567 = bitcast float %5 to i32 > %568 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %567, 10 > %569 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %568, float %temp12.0, 11 > %570 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %569, float %temp13.0, 12 > %571 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %570, float %temp14.0, 13 > %572 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %571, float %temp15.0, 14 > %573 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %572, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %573 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..6] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, -0.5000} >IMM[1] UINT32 {0, 96, 0, 0} >IMM[2] FLT32 {158456325028528675187087900672.0000, 0.0000, -1.0000, 1.0000} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: FMA TEMP[1].xy, IN[0].xyyy, IMM[0].zwww, IMM[0].zzzz > 3: FSEQ TEMP[2].xy, CONST[1][6].xyyy, IMM[0].xxxx > 4: RCP TEMP[3].x, CONST[1][6].xxxx > 5: RCP TEMP[3].y, CONST[1][6].yyyy > 6: UCMP TEMP[2].xy, TEMP[2].xyyy, IMM[2].xxxx, TEMP[3].xyyy > 7: ADD TEMP[3].xy, -TEMP[2].xyyy, TEMP[1].xyyy > 8: FMA TEMP[4].xy, TEMP[2].xyyy, IMM[2].yzzz, TEMP[1].xyyy > 9: MOV TEMP[3].zw, TEMP[4].yyxy > 10: FMA TEMP[4].xy, TEMP[2].xyyy, IMM[2].wzzz, TEMP[1].xyyy > 11: FMA TEMP[5].xy, TEMP[2].xyyy, IMM[2].zyyy, TEMP[1].xyyy > 12: MOV TEMP[4].zw, TEMP[5].yyxy > 13: FMA TEMP[5].xyz, IN[0].xyyy, IMM[0].zwww, IMM[0].zzzz > 14: MOV TEMP[5].xyw, TEMP[5].xyxz > 15: ADD TEMP[6].x, TEMP[2].xxxx, TEMP[1].xxxx > 16: MOV TEMP[5].z, TEMP[6].xxxx > 17: FMA TEMP[6], TEMP[2].xyxy, IMM[2].zwyw, TEMP[1].xyxy > 18: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].xyyy > 19: MOV OUT[5], TEMP[1] > 20: MOV OUT[4], TEMP[6] > 21: MOV OUT[3], TEMP[5] > 22: MOV OUT[2], TEMP[4] > 23: MOV OUT[1], TEMP[3] > 24: MOV OUT[0], TEMP[0] > 25: END >radeonsi: Compiling shader 56 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 96) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 100) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %24 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %25 = fcmp oeq float %16, 0.000000e+00 > %26 = fcmp oeq float %17, 0.000000e+00 > %27 = fdiv float 1.000000e+00, %16 > %28 = fdiv float 1.000000e+00, %17 > %29 = select i1 %25, float 0x4600000000000000, float %27 > %30 = select i1 %26, float 0x4600000000000000, float %28 > %31 = fsub float %23, %29 > %32 = fsub float %24, %30 > %33 = call float @llvm.fma.f32(float %29, float 0.000000e+00, float %23) > %34 = call float @llvm.fma.f32(float %30, float -1.000000e+00, float %24) > %35 = call float @llvm.fma.f32(float %29, float 1.000000e+00, float %23) > %36 = call float @llvm.fma.f32(float %30, float -1.000000e+00, float %24) > %37 = call float @llvm.fma.f32(float %29, float -1.000000e+00, float %23) > %38 = call float @llvm.fma.f32(float %30, float 0.000000e+00, float %24) > %39 = call float @llvm.fma.f32(float %21, float 5.000000e-01, float 5.000000e-01) > %40 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %41 = call float @llvm.fma.f32(float %22, float -5.000000e-01, float 5.000000e-01) > %42 = fadd float %29, %23 > %43 = call float @llvm.fma.f32(float %29, float -1.000000e+00, float %23) > %44 = call float @llvm.fma.f32(float %30, float 1.000000e+00, float %24) > %45 = call float @llvm.fma.f32(float %29, float 0.000000e+00, float %23) > %46 = call float @llvm.fma.f32(float %30, float 1.000000e+00, float %24) > %47 = fadd float %29, %23 > %48 = fadd float %30, %24 > %49 = bitcast i32 %11 to float > %50 = insertvalue <{ float, float, float }> undef, float %49, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %31, float %32, float %33, float %34) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %35, float %36, float %37, float %38) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %39, float %40, float %42, float %41) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %43, float %44, float %45, float %46) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %47, float %48, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %50 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0..9], LOCAL >IMM[0] INT32 {1, 0, 0, 0} >IMM[1] UINT32 {0, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[2].xyyy > 1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D > 2: MOV TEMP[1].x, TEMP[0].xxxx > 3: MOV TEMP[2].xy, IN[2].zwww > 4: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 5: MOV TEMP[1].y, TEMP[2].xxxx > 6: MIN TEMP[3].x, TEMP[0].xxxx, TEMP[2].xxxx > 7: MOV TEMP[4].xy, IN[1].zwww > 8: TEX TEMP[4].x, TEMP[4], SAMP[0], 2D > 9: FSGE TEMP[5].x, TEMP[4].xxxx, TEMP[2].xxxx > 10: AND TEMP[5].x, TEMP[5].xxxx, IMM[0].xxxx > 11: INEG TEMP[5].x, TEMP[5].xxxx > 12: USNE TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx > 13: UIF TEMP[5].xxxx :0 > 14: MOV TEMP[5].x, TEMP[4].xxxx > 15: ELSE :0 > 16: MOV TEMP[5].x, TEMP[3].xxxx > 17: ENDIF > 18: MAX TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx > 19: FSLT TEMP[2].xy, TEMP[4].xxxx, TEMP[1].xyyy > 20: AND TEMP[2].xy, TEMP[2].xyyy, IMM[0].xxxx > 21: INEG TEMP[2].xy, TEMP[2].xyyy > 22: USNE TEMP[3].x, TEMP[2].yyyy, IMM[1].xxxx > 23: UIF TEMP[3].xxxx :0 > 24: MOV TEMP[3].x, TEMP[4].xxxx > 25: ELSE :0 > 26: MOV TEMP[3].x, TEMP[0].xxxx > 27: ENDIF > 28: USNE TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx > 29: UIF TEMP[2].xxxx :0 > 30: MOV TEMP[2].x, TEMP[5].xxxx > 31: ELSE :0 > 32: MOV TEMP[2].x, TEMP[3].xxxx > 33: ENDIF > 34: MOV TEMP[1].x, TEMP[2].xxxx > 35: MOV TEMP[3].xy, IN[3].zwww > 36: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D > 37: MOV TEMP[1].y, TEMP[3].xxxx > 38: MOV TEMP[4].xy, IN[4].xyyy > 39: TEX TEMP[4].x, TEMP[4], SAMP[0], 2D > 40: MOV TEMP[1].z, TEMP[4].xxxx > 41: MIN TEMP[5].x, TEMP[3].xxxx, TEMP[4].xxxx > 42: MOV TEMP[6].xy, IN[3].xyyy > 43: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D > 44: FSGE TEMP[7].x, TEMP[6].xxxx, TEMP[4].xxxx > 45: AND TEMP[7].x, TEMP[7].xxxx, IMM[0].xxxx > 46: INEG TEMP[7].x, TEMP[7].xxxx > 47: USNE TEMP[7].x, TEMP[7].xxxx, IMM[1].xxxx > 48: UIF TEMP[7].xxxx :0 > 49: MOV TEMP[7].x, TEMP[6].xxxx > 50: ELSE :0 > 51: MOV TEMP[7].x, TEMP[5].xxxx > 52: ENDIF > 53: MAX TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx > 54: FSLT TEMP[4].xy, TEMP[6].xxxx, TEMP[1].yzzz > 55: AND TEMP[4].xy, TEMP[4].xyyy, IMM[0].xxxx > 56: INEG TEMP[4].xy, TEMP[4].xyyy > 57: USNE TEMP[5].x, TEMP[4].yyyy, IMM[1].xxxx > 58: UIF TEMP[5].xxxx :0 > 59: MOV TEMP[5].x, TEMP[6].xxxx > 60: ELSE :0 > 61: MOV TEMP[5].x, TEMP[3].xxxx > 62: ENDIF > 63: USNE TEMP[3].x, TEMP[4].xxxx, IMM[1].xxxx > 64: UIF TEMP[3].xxxx :0 > 65: MOV TEMP[3].x, TEMP[7].xxxx > 66: ELSE :0 > 67: MOV TEMP[3].x, TEMP[5].xxxx > 68: ENDIF > 69: MOV TEMP[1].y, TEMP[3].xxxx > 70: MIN TEMP[4].x, TEMP[2].xxxx, TEMP[3].xxxx > 71: MOV TEMP[5].xy, IN[0].zwww > 72: TEX TEMP[5].x, TEMP[5], SAMP[0], 2D > 73: MOV TEMP[6].xy, IN[1].xyyy > 74: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D > 75: MIN TEMP[7].x, TEMP[5].xxxx, TEMP[6].xxxx > 76: MOV TEMP[8].xy, IN[0].xyyy > 77: TEX TEMP[8].x, TEMP[8], SAMP[0], 2D > 78: FSGE TEMP[9].x, TEMP[8].xxxx, TEMP[6].xxxx > 79: AND TEMP[9].x, TEMP[9].xxxx, IMM[0].xxxx > 80: INEG TEMP[9].x, TEMP[9].xxxx > 81: USNE TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx > 82: UIF TEMP[9].xxxx :0 > 83: MOV TEMP[9].x, TEMP[8].xxxx > 84: ELSE :0 > 85: MOV TEMP[9].x, TEMP[7].xxxx > 86: ENDIF > 87: MAX TEMP[7].x, TEMP[5].xxxx, TEMP[6].xxxx > 88: FSLT TEMP[5].x, TEMP[8].xxxx, TEMP[5].xxxx > 89: AND TEMP[5].x, TEMP[5].xxxx, IMM[0].xxxx > 90: INEG TEMP[5].x, TEMP[5].xxxx > 91: FSLT TEMP[6].x, TEMP[8].xxxx, TEMP[6].xxxx > 92: AND TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx > 93: INEG TEMP[6].x, TEMP[6].xxxx > 94: USNE TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx > 95: UIF TEMP[6].xxxx :0 > 96: MOV TEMP[6].x, TEMP[8].xxxx > 97: ELSE :0 > 98: MOV TEMP[6].x, TEMP[7].xxxx > 99: ENDIF >100: USNE TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx >101: UIF TEMP[5].xxxx :0 >102: MOV TEMP[5].x, TEMP[9].xxxx >103: ELSE :0 >104: MOV TEMP[5].x, TEMP[6].xxxx >105: ENDIF >106: FSGE TEMP[6].x, TEMP[5].xxxx, TEMP[3].xxxx >107: AND TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx >108: INEG TEMP[6].x, TEMP[6].xxxx >109: USNE TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx >110: UIF TEMP[6].xxxx :0 >111: MOV TEMP[6].x, TEMP[5].xxxx >112: ELSE :0 >113: MOV TEMP[6].x, TEMP[4].xxxx >114: ENDIF >115: MAX TEMP[0].x, TEMP[3].xxxx, TEMP[2].xxxx >116: FSLT TEMP[1].xy, TEMP[5].xxxx, TEMP[1].xyyy >117: AND TEMP[1].xy, TEMP[1].xyyy, IMM[0].xxxx >118: INEG TEMP[1].xy, TEMP[1].xyyy >119: USNE TEMP[2].x, TEMP[1].yyyy, IMM[1].xxxx >120: UIF TEMP[2].xxxx :0 >121: MOV TEMP[2].x, TEMP[5].xxxx >122: ELSE :0 >123: MOV TEMP[2].x, TEMP[0].xxxx >124: ENDIF >125: USNE TEMP[0].x, TEMP[1].xxxx, IMM[1].xxxx >126: UIF TEMP[0].xxxx :0 >127: MOV TEMP[0].x, TEMP[6].xxxx >128: ELSE :0 >129: MOV TEMP[0].x, TEMP[2].xxxx >130: ENDIF >131: MOV TEMP[0].x, TEMP[0].xxxx >132: USNE TEMP[3].x, TEMP[1].xxxx, IMM[1].xxxx >133: UIF TEMP[3].xxxx :0 >134: MOV TEMP[3].x, TEMP[6].xxxx >135: ELSE :0 >136: MOV TEMP[3].x, TEMP[2].xxxx >137: ENDIF >138: MOV TEMP[0].y, TEMP[3].xxxx >139: USNE TEMP[3].x, TEMP[1].xxxx, IMM[1].xxxx >140: UIF TEMP[3].xxxx :0 >141: MOV TEMP[3].x, TEMP[6].xxxx >142: ELSE :0 >143: MOV TEMP[3].x, TEMP[2].xxxx >144: ENDIF >145: MOV TEMP[0].z, TEMP[3].xxxx >146: USNE TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx >147: UIF TEMP[1].xxxx :0 >148: MOV TEMP[1].x, TEMP[6].xxxx >149: ELSE :0 >150: MOV TEMP[1].x, TEMP[2].xxxx >151: ENDIF >152: MOV TEMP[0].w, TEMP[1].xxxx >153: MOV OUT[0], TEMP[0] >154: END >radeonsi: Compiling shader 57 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %50 = bitcast float %40 to i32 > %51 = bitcast float %41 to i32 > %52 = insertelement <2 x i32> undef, i32 %50, i32 0 > %53 = insertelement <2 x i32> %52, i32 %51, i32 1 > %54 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %53, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %55 = extractelement <4 x float> %54, i32 0 > %56 = bitcast float %42 to i32 > %57 = bitcast float %43 to i32 > %58 = insertelement <2 x i32> undef, i32 %56, i32 0 > %59 = insertelement <2 x i32> %58, i32 %57, i32 1 > %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %61 = extractelement <4 x float> %60, i32 0 > %62 = call float @llvm.minnum.f32(float %55, float %61) > %63 = bitcast float %38 to i32 > %64 = bitcast float %39 to i32 > %65 = insertelement <2 x i32> undef, i32 %63, i32 0 > %66 = insertelement <2 x i32> %65, i32 %64, i32 1 > %67 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %66, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %68 = extractelement <4 x float> %67, i32 0 > %69 = fcmp oge float %68, %61 > %. = select i1 %69, float %68, float %62 > %70 = call float @llvm.maxnum.f32(float %61, float %55) > %71 = fcmp olt float %68, %55 > %72 = fcmp olt float %68, %61 > %temp12.0 = select i1 %72, float %68, float %70 > %..temp12.0 = select i1 %71, float %., float %temp12.0 > %73 = bitcast float %46 to i32 > %74 = bitcast float %47 to i32 > %75 = insertelement <2 x i32> undef, i32 %73, i32 0 > %76 = insertelement <2 x i32> %75, i32 %74, i32 1 > %77 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %76, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %78 = extractelement <4 x float> %77, i32 0 > %79 = bitcast float %48 to i32 > %80 = bitcast float %49 to i32 > %81 = insertelement <2 x i32> undef, i32 %79, i32 0 > %82 = insertelement <2 x i32> %81, i32 %80, i32 1 > %83 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %82, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %84 = extractelement <4 x float> %83, i32 0 > %85 = call float @llvm.minnum.f32(float %78, float %84) > %86 = bitcast float %44 to i32 > %87 = bitcast float %45 to i32 > %88 = insertelement <2 x i32> undef, i32 %86, i32 0 > %89 = insertelement <2 x i32> %88, i32 %87, i32 1 > %90 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %89, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %91 = extractelement <4 x float> %90, i32 0 > %92 = fcmp oge float %91, %84 > %temp28.0 = select i1 %92, float %91, float %85 > %93 = call float @llvm.maxnum.f32(float %84, float %78) > %94 = fcmp olt float %91, %78 > %95 = fcmp olt float %91, %84 > %.82 = select i1 %95, float %91, float %93 > %temp12.1 = select i1 %94, float %temp28.0, float %.82 > %96 = call float @llvm.minnum.f32(float %..temp12.0, float %temp12.1) > %97 = bitcast float %34 to i32 > %98 = bitcast float %35 to i32 > %99 = insertelement <2 x i32> undef, i32 %97, i32 0 > %100 = insertelement <2 x i32> %99, i32 %98, i32 1 > %101 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %100, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %102 = extractelement <4 x float> %101, i32 0 > %103 = bitcast float %36 to i32 > %104 = bitcast float %37 to i32 > %105 = insertelement <2 x i32> undef, i32 %103, i32 0 > %106 = insertelement <2 x i32> %105, i32 %104, i32 1 > %107 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %106, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %108 = extractelement <4 x float> %107, i32 0 > %109 = call float @llvm.minnum.f32(float %102, float %108) > %110 = bitcast float %32 to i32 > %111 = bitcast float %33 to i32 > %112 = insertelement <2 x i32> undef, i32 %110, i32 0 > %113 = insertelement <2 x i32> %112, i32 %111, i32 1 > %114 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %113, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %115 = extractelement <4 x float> %114, i32 0 > %116 = fcmp oge float %115, %108 > %.83 = select i1 %116, float %115, float %109 > %117 = call float @llvm.maxnum.f32(float %102, float %108) > %118 = fcmp olt float %115, %102 > %119 = fcmp olt float %115, %108 > %temp24.0 = select i1 %119, float %115, float %117 > %.83.temp24.0 = select i1 %118, float %.83, float %temp24.0 > %120 = fcmp oge float %.83.temp24.0, %temp12.1 > %temp24.1 = select i1 %120, float %.83.temp24.0, float %96 > %121 = call float @llvm.maxnum.f32(float %temp12.1, float %..temp12.0) > %122 = fcmp olt float %.83.temp24.0, %..temp12.0 > %123 = fcmp olt float %.83.temp24.0, %temp12.1 > %.83.temp24.0. = select i1 %123, float %.83.temp24.0, float %121 > %temp.0 = select i1 %122, float %temp24.1, float %.83.temp24.0. > %temp24.1..83.temp24.0. = select i1 %122, float %temp24.1, float %.83.temp24.0. > %temp12.3 = select i1 %122, float %temp24.1, float %.83.temp24.0. > %temp24.1..83.temp24.0.84 = select i1 %122, float %temp24.1, float %.83.temp24.0. > %124 = bitcast float %5 to i32 > %125 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %124, 10 > %126 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %125, float %temp.0, 11 > %127 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %126, float %temp24.1..83.temp24.0., 12 > %128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %127, float %temp12.3, 13 > %129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128, float %temp24.1..83.temp24.0.84, 14 > %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..5] >DCL TEMP[0..8], LOCAL >IMM[0] FLT32 { 0.3000, 0.5900, 0.1100, 0.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {0, 80, 0, 0} > 0: MOV TEMP[0].xy, IN[2].xyyy > 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D > 2: DP3 TEMP[1].x, TEMP[0].xyzz, IMM[0].xyzz > 3: MOV TEMP[0].x, TEMP[1].xxxx > 4: MOV TEMP[2].xy, IN[2].zwww > 5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D > 6: DP3 TEMP[2].x, TEMP[2].xyzz, IMM[0].xyzz > 7: MOV TEMP[0].y, TEMP[2].xxxx > 8: FSLT TEMP[3].x, TEMP[1].xxxx, TEMP[2].xxxx > 9: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx > 10: INEG TEMP[3].x, TEMP[3].xxxx > 11: USNE TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx > 12: UIF TEMP[3].xxxx :0 > 13: MOV TEMP[3].x, TEMP[1].xxxx > 14: ELSE :0 > 15: MOV TEMP[3].x, TEMP[2].xxxx > 16: ENDIF > 17: MOV TEMP[4].xy, IN[1].zwww > 18: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D > 19: DP3 TEMP[4].x, TEMP[4].xyzz, IMM[0].xyzz > 20: FSGE TEMP[5].x, TEMP[4].xxxx, TEMP[2].xxxx > 21: AND TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx > 22: INEG TEMP[5].x, TEMP[5].xxxx > 23: USNE TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx > 24: UIF TEMP[5].xxxx :0 > 25: MOV TEMP[5].x, TEMP[4].xxxx > 26: ELSE :0 > 27: MOV TEMP[5].x, TEMP[3].xxxx > 28: ENDIF > 29: FSGE TEMP[3].x, TEMP[1].xxxx, TEMP[2].xxxx > 30: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx > 31: INEG TEMP[3].x, TEMP[3].xxxx > 32: USNE TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx > 33: UIF TEMP[3].xxxx :0 > 34: MOV TEMP[1].x, TEMP[1].xxxx > 35: ELSE :0 > 36: MOV TEMP[1].x, TEMP[2].xxxx > 37: ENDIF > 38: FSLT TEMP[2].xy, TEMP[4].xxxx, TEMP[0].xyyy > 39: AND TEMP[2].xy, TEMP[2].xyyy, IMM[1].xxxx > 40: INEG TEMP[2].xy, TEMP[2].xyyy > 41: USNE TEMP[3].x, TEMP[2].yyyy, IMM[2].xxxx > 42: UIF TEMP[3].xxxx :0 > 43: MOV TEMP[3].x, TEMP[4].xxxx > 44: ELSE :0 > 45: MOV TEMP[3].x, TEMP[1].xxxx > 46: ENDIF > 47: USNE TEMP[1].x, TEMP[2].xxxx, IMM[2].xxxx > 48: UIF TEMP[1].xxxx :0 > 49: MOV TEMP[1].x, TEMP[5].xxxx > 50: ELSE :0 > 51: MOV TEMP[1].x, TEMP[3].xxxx > 52: ENDIF > 53: MOV TEMP[0].x, TEMP[1].xxxx > 54: MOV TEMP[2].xy, IN[3].zwww > 55: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D > 56: DP3 TEMP[2].x, TEMP[2].xyzz, IMM[0].xyzz > 57: MOV TEMP[0].y, TEMP[2].xxxx > 58: MOV TEMP[3].xy, IN[4].xyyy > 59: TEX TEMP[3].xyz, TEMP[3], SAMP[0], 2D > 60: DP3 TEMP[3].x, TEMP[3].xyzz, IMM[0].xyzz > 61: MOV TEMP[0].z, TEMP[3].xxxx > 62: FSLT TEMP[4].x, TEMP[2].xxxx, TEMP[3].xxxx > 63: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx > 64: INEG TEMP[4].x, TEMP[4].xxxx > 65: USNE TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx > 66: UIF TEMP[4].xxxx :0 > 67: MOV TEMP[4].x, TEMP[2].xxxx > 68: ELSE :0 > 69: MOV TEMP[4].x, TEMP[3].xxxx > 70: ENDIF > 71: MOV TEMP[5].xy, IN[3].xyyy > 72: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D > 73: DP3 TEMP[5].x, TEMP[5].xyzz, IMM[0].xyzz > 74: FSGE TEMP[6].x, TEMP[5].xxxx, TEMP[3].xxxx > 75: AND TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx > 76: INEG TEMP[6].x, TEMP[6].xxxx > 77: USNE TEMP[6].x, TEMP[6].xxxx, IMM[2].xxxx > 78: UIF TEMP[6].xxxx :0 > 79: MOV TEMP[6].x, TEMP[5].xxxx > 80: ELSE :0 > 81: MOV TEMP[6].x, TEMP[4].xxxx > 82: ENDIF > 83: FSGE TEMP[4].x, TEMP[2].xxxx, TEMP[3].xxxx > 84: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx > 85: INEG TEMP[4].x, TEMP[4].xxxx > 86: USNE TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx > 87: UIF TEMP[4].xxxx :0 > 88: MOV TEMP[2].x, TEMP[2].xxxx > 89: ELSE :0 > 90: MOV TEMP[2].x, TEMP[3].xxxx > 91: ENDIF > 92: FSLT TEMP[3].xy, TEMP[5].xxxx, TEMP[0].yzzz > 93: AND TEMP[3].xy, TEMP[3].xyyy, IMM[1].xxxx > 94: INEG TEMP[3].xy, TEMP[3].xyyy > 95: USNE TEMP[4].x, TEMP[3].yyyy, IMM[2].xxxx > 96: UIF TEMP[4].xxxx :0 > 97: MOV TEMP[4].x, TEMP[5].xxxx > 98: ELSE :0 > 99: MOV TEMP[4].x, TEMP[2].xxxx >100: ENDIF >101: USNE TEMP[2].x, TEMP[3].xxxx, IMM[2].xxxx >102: UIF TEMP[2].xxxx :0 >103: MOV TEMP[2].x, TEMP[6].xxxx >104: ELSE :0 >105: MOV TEMP[2].x, TEMP[4].xxxx >106: ENDIF >107: MOV TEMP[0].y, TEMP[2].xxxx >108: FSLT TEMP[3].x, TEMP[1].xxxx, TEMP[2].xxxx >109: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx >110: INEG TEMP[3].x, TEMP[3].xxxx >111: USNE TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx >112: UIF TEMP[3].xxxx :0 >113: MOV TEMP[3].x, TEMP[1].xxxx >114: ELSE :0 >115: MOV TEMP[3].x, TEMP[2].xxxx >116: ENDIF >117: MOV TEMP[4].xy, IN[0].zwww >118: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D >119: DP3 TEMP[4].x, TEMP[4].xyzz, IMM[0].xyzz >120: MOV TEMP[5].xy, IN[1].xyyy >121: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D >122: DP3 TEMP[5].x, TEMP[5].xyzz, IMM[0].xyzz >123: FSLT TEMP[6].x, TEMP[4].xxxx, TEMP[5].xxxx >124: AND TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx >125: INEG TEMP[6].x, TEMP[6].xxxx >126: USNE TEMP[6].x, TEMP[6].xxxx, IMM[2].xxxx >127: UIF TEMP[6].xxxx :0 >128: MOV TEMP[6].x, TEMP[4].xxxx >129: ELSE :0 >130: MOV TEMP[6].x, TEMP[5].xxxx >131: ENDIF >132: MOV TEMP[7].xy, IN[0].xyyy >133: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D >134: DP3 TEMP[7].x, TEMP[7].xyzz, IMM[0].xyzz >135: FSGE TEMP[8].x, TEMP[7].xxxx, TEMP[5].xxxx >136: AND TEMP[8].x, TEMP[8].xxxx, IMM[1].xxxx >137: INEG TEMP[8].x, TEMP[8].xxxx >138: USNE TEMP[8].x, TEMP[8].xxxx, IMM[2].xxxx >139: UIF TEMP[8].xxxx :0 >140: MOV TEMP[8].x, TEMP[7].xxxx >141: ELSE :0 >142: MOV TEMP[8].x, TEMP[6].xxxx >143: ENDIF >144: FSGE TEMP[6].x, TEMP[4].xxxx, TEMP[5].xxxx >145: AND TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx >146: INEG TEMP[6].x, TEMP[6].xxxx >147: USNE TEMP[6].x, TEMP[6].xxxx, IMM[2].xxxx >148: UIF TEMP[6].xxxx :0 >149: MOV TEMP[6].x, TEMP[4].xxxx >150: ELSE :0 >151: MOV TEMP[6].x, TEMP[5].xxxx >152: ENDIF >153: FSLT TEMP[4].x, TEMP[7].xxxx, TEMP[4].xxxx >154: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx >155: INEG TEMP[4].x, TEMP[4].xxxx >156: FSLT TEMP[5].x, TEMP[7].xxxx, TEMP[5].xxxx >157: AND TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx >158: INEG TEMP[5].x, TEMP[5].xxxx >159: USNE TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx >160: UIF TEMP[5].xxxx :0 >161: MOV TEMP[5].x, TEMP[7].xxxx >162: ELSE :0 >163: MOV TEMP[5].x, TEMP[6].xxxx >164: ENDIF >165: USNE TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx >166: UIF TEMP[4].xxxx :0 >167: MOV TEMP[4].x, TEMP[8].xxxx >168: ELSE :0 >169: MOV TEMP[4].x, TEMP[5].xxxx >170: ENDIF >171: FSGE TEMP[5].x, TEMP[4].xxxx, TEMP[2].xxxx >172: AND TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx >173: INEG TEMP[5].x, TEMP[5].xxxx >174: USNE TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx >175: UIF TEMP[5].xxxx :0 >176: MOV TEMP[5].x, TEMP[4].xxxx >177: ELSE :0 >178: MOV TEMP[5].x, TEMP[3].xxxx >179: ENDIF >180: FSGE TEMP[3].x, TEMP[1].xxxx, TEMP[2].xxxx >181: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx >182: INEG TEMP[3].x, TEMP[3].xxxx >183: USNE TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx >184: UIF TEMP[3].xxxx :0 >185: MOV TEMP[1].x, TEMP[1].xxxx >186: ELSE :0 >187: MOV TEMP[1].x, TEMP[2].xxxx >188: ENDIF >189: FSLT TEMP[0].xy, TEMP[4].xxxx, TEMP[0].xyyy >190: AND TEMP[0].xy, TEMP[0].xyyy, IMM[1].xxxx >191: INEG TEMP[0].xy, TEMP[0].xyyy >192: USNE TEMP[2].x, TEMP[0].yyyy, IMM[2].xxxx >193: UIF TEMP[2].xxxx :0 >194: MOV TEMP[2].x, TEMP[4].xxxx >195: ELSE :0 >196: MOV TEMP[2].x, TEMP[1].xxxx >197: ENDIF >198: USNE TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx >199: UIF TEMP[0].xxxx :0 >200: MOV TEMP[0].x, TEMP[5].xxxx >201: ELSE :0 >202: MOV TEMP[0].x, TEMP[2].xxxx >203: ENDIF >204: MIN TEMP[0], TEMP[0].xxxx, CONST[1][5].zzzz >205: MOV OUT[0], TEMP[0] >206: END >radeonsi: Compiling shader 58 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %26 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 > %28 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %29 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %28, i64 0, i64 3 > %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 > %31 = extractelement <8 x i32> %27, i32 7 > %32 = extractelement <4 x i32> %30, i32 0 > %33 = and i32 %32, %31 > %34 = insertelement <4 x i32> %30, i32 %33, i32 0 > %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %53 = bitcast float %43 to i32 > %54 = bitcast float %44 to i32 > %55 = insertelement <2 x i32> undef, i32 %53, i32 0 > %56 = insertelement <2 x i32> %55, i32 %54, i32 1 > %57 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %56, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %58 = extractelement <4 x float> %57, i32 0 > %59 = extractelement <4 x float> %57, i32 1 > %60 = extractelement <4 x float> %57, i32 2 > %61 = fmul float %58, 0x3FD3333340000000 > %62 = fmul float %59, 0x3FE2E147A0000000 > %63 = fadd float %62, %61 > %64 = fmul float %60, 0x3FBC28F5C0000000 > %65 = fadd float %63, %64 > %66 = bitcast float %45 to i32 > %67 = bitcast float %46 to i32 > %68 = insertelement <2 x i32> undef, i32 %66, i32 0 > %69 = insertelement <2 x i32> %68, i32 %67, i32 1 > %70 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %69, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = fmul float %71, 0x3FD3333340000000 > %75 = fmul float %72, 0x3FE2E147A0000000 > %76 = fadd float %75, %74 > %77 = fmul float %73, 0x3FBC28F5C0000000 > %78 = fadd float %76, %77 > %79 = fcmp olt float %65, %78 > %. = select i1 %79, float %65, float %78 > %80 = bitcast float %41 to i32 > %81 = bitcast float %42 to i32 > %82 = insertelement <2 x i32> undef, i32 %80, i32 0 > %83 = insertelement <2 x i32> %82, i32 %81, i32 1 > %84 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %83, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %85 = extractelement <4 x float> %84, i32 0 > %86 = extractelement <4 x float> %84, i32 1 > %87 = extractelement <4 x float> %84, i32 2 > %88 = fmul float %85, 0x3FD3333340000000 > %89 = fmul float %86, 0x3FE2E147A0000000 > %90 = fadd float %89, %88 > %91 = fmul float %87, 0x3FBC28F5C0000000 > %92 = fadd float %90, %91 > %93 = fcmp oge float %92, %78 > %temp20.0 = select i1 %93, float %92, float %. > %94 = fcmp oge float %65, %78 > %.93 = select i1 %94, float %65, float %78 > %95 = fcmp olt float %92, %65 > %96 = fcmp olt float %92, %78 > %temp12.1 = select i1 %96, float %92, float %.93 > %temp20.0.temp12.1 = select i1 %95, float %temp20.0, float %temp12.1 > %97 = bitcast float %49 to i32 > %98 = bitcast float %50 to i32 > %99 = insertelement <2 x i32> undef, i32 %97, i32 0 > %100 = insertelement <2 x i32> %99, i32 %98, i32 1 > %101 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %100, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %102 = extractelement <4 x float> %101, i32 0 > %103 = extractelement <4 x float> %101, i32 1 > %104 = extractelement <4 x float> %101, i32 2 > %105 = fmul float %102, 0x3FD3333340000000 > %106 = fmul float %103, 0x3FE2E147A0000000 > %107 = fadd float %106, %105 > %108 = fmul float %104, 0x3FBC28F5C0000000 > %109 = fadd float %107, %108 > %110 = bitcast float %51 to i32 > %111 = bitcast float %52 to i32 > %112 = insertelement <2 x i32> undef, i32 %110, i32 0 > %113 = insertelement <2 x i32> %112, i32 %111, i32 1 > %114 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %113, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %115 = extractelement <4 x float> %114, i32 0 > %116 = extractelement <4 x float> %114, i32 1 > %117 = extractelement <4 x float> %114, i32 2 > %118 = fmul float %115, 0x3FD3333340000000 > %119 = fmul float %116, 0x3FE2E147A0000000 > %120 = fadd float %119, %118 > %121 = fmul float %117, 0x3FBC28F5C0000000 > %122 = fadd float %120, %121 > %123 = fcmp olt float %109, %122 > %temp16.0 = select i1 %123, float %109, float %122 > %124 = bitcast float %47 to i32 > %125 = bitcast float %48 to i32 > %126 = insertelement <2 x i32> undef, i32 %124, i32 0 > %127 = insertelement <2 x i32> %126, i32 %125, i32 1 > %128 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %127, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %129 = extractelement <4 x float> %128, i32 0 > %130 = extractelement <4 x float> %128, i32 1 > %131 = extractelement <4 x float> %128, i32 2 > %132 = fmul float %129, 0x3FD3333340000000 > %133 = fmul float %130, 0x3FE2E147A0000000 > %134 = fadd float %133, %132 > %135 = fmul float %131, 0x3FBC28F5C0000000 > %136 = fadd float %134, %135 > %137 = fcmp oge float %136, %122 > %.temp16.0 = select i1 %137, float %136, float %temp16.0 > %138 = fcmp oge float %109, %122 > %temp8.0 = select i1 %138, float %109, float %122 > %139 = fcmp olt float %136, %109 > %140 = fcmp olt float %136, %122 > %.temp8.0 = select i1 %140, float %136, float %temp8.0 > %temp8.1 = select i1 %139, float %.temp16.0, float %.temp8.0 > %141 = fcmp olt float %temp20.0.temp12.1, %temp8.1 > %temp20.0.temp12.1.temp8.1 = select i1 %141, float %temp20.0.temp12.1, float %temp8.1 > %142 = bitcast float %37 to i32 > %143 = bitcast float %38 to i32 > %144 = insertelement <2 x i32> undef, i32 %142, i32 0 > %145 = insertelement <2 x i32> %144, i32 %143, i32 1 > %146 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %145, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %147 = extractelement <4 x float> %146, i32 0 > %148 = extractelement <4 x float> %146, i32 1 > %149 = extractelement <4 x float> %146, i32 2 > %150 = fmul float %147, 0x3FD3333340000000 > %151 = fmul float %148, 0x3FE2E147A0000000 > %152 = fadd float %151, %150 > %153 = fmul float %149, 0x3FBC28F5C0000000 > %154 = fadd float %152, %153 > %155 = bitcast float %39 to i32 > %156 = bitcast float %40 to i32 > %157 = insertelement <2 x i32> undef, i32 %155, i32 0 > %158 = insertelement <2 x i32> %157, i32 %156, i32 1 > %159 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %158, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %160 = extractelement <4 x float> %159, i32 0 > %161 = extractelement <4 x float> %159, i32 1 > %162 = extractelement <4 x float> %159, i32 2 > %163 = fmul float %160, 0x3FD3333340000000 > %164 = fmul float %161, 0x3FE2E147A0000000 > %165 = fadd float %164, %163 > %166 = fmul float %162, 0x3FBC28F5C0000000 > %167 = fadd float %165, %166 > %168 = fcmp olt float %154, %167 > %temp24.1 = select i1 %168, float %154, float %167 > %169 = bitcast float %35 to i32 > %170 = bitcast float %36 to i32 > %171 = insertelement <2 x i32> undef, i32 %169, i32 0 > %172 = insertelement <2 x i32> %171, i32 %170, i32 1 > %173 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %172, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %174 = extractelement <4 x float> %173, i32 0 > %175 = extractelement <4 x float> %173, i32 1 > %176 = extractelement <4 x float> %173, i32 2 > %177 = fmul float %174, 0x3FD3333340000000 > %178 = fmul float %175, 0x3FE2E147A0000000 > %179 = fadd float %178, %177 > %180 = fmul float %176, 0x3FBC28F5C0000000 > %181 = fadd float %179, %180 > %182 = fcmp oge float %181, %167 > %.temp24.1 = select i1 %182, float %181, float %temp24.1 > %183 = fcmp oge float %154, %167 > %temp24.2 = select i1 %183, float %154, float %167 > %184 = fcmp olt float %181, %154 > %185 = fcmp olt float %181, %167 > %.temp24.2 = select i1 %185, float %181, float %temp24.2 > %temp16.2 = select i1 %184, float %.temp24.1, float %.temp24.2 > %186 = fcmp oge float %temp16.2, %temp8.1 > %temp16.2.temp20.0.temp12.1.temp8.1 = select i1 %186, float %temp16.2, float %temp20.0.temp12.1.temp8.1 > %187 = fcmp oge float %temp20.0.temp12.1, %temp8.1 > %temp4.2 = select i1 %187, float %temp20.0.temp12.1, float %temp8.1 > %188 = fcmp olt float %temp16.2, %temp20.0.temp12.1 > %189 = fcmp olt float %temp16.2, %temp8.1 > %temp16.2.temp4.2 = select i1 %189, float %temp16.2, float %temp4.2 > %temp.0 = select i1 %188, float %temp16.2.temp20.0.temp12.1.temp8.1, float %temp16.2.temp4.2 > %190 = call float @llvm.minnum.f32(float %temp.0, float %25) > %191 = call float @llvm.minnum.f32(float %temp.0, float %25) > %192 = call float @llvm.minnum.f32(float %temp.0, float %25) > %193 = call float @llvm.minnum.f32(float %temp.0, float %25) > %194 = bitcast float %5 to i32 > %195 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %194, 10 > %196 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %195, float %190, 11 > %197 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %196, float %191, 12 > %198 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %197, float %192, 13 > %199 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %198, float %193, 14 > %200 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %199, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %200 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..24] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} >IMM[1] UINT32 {0, 384, 336, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: MUL TEMP[1].xy, IMM[0].yzzz, CONST[1][24].xyyy > 3: FMA TEMP[1].xy, IN[0].xyyy, TEMP[1].xyyy, CONST[1][24].xyyy > 4: MOV TEMP[2].xy, TEMP[1].xyxx > 5: FMA TEMP[1].xy, TEMP[1].xyyy, CONST[1][21].zwww, CONST[1][21].xyyy > 6: MOV TEMP[2].zw, IMM[0].yyxy > 7: MOV OUT[2], TEMP[1] > 8: MOV OUT[1], TEMP[2] > 9: MOV OUT[0], TEMP[0] > 10: END >radeonsi: Compiling shader 59 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 336) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 340) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 344) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 348) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 384) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 388) > %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 > %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %13) > %25 = extractelement <4 x float> %24, i32 0 > %26 = extractelement <4 x float> %24, i32 1 > %27 = fsub float -0.000000e+00, %21 > %28 = call float @llvm.fma.f32(float %25, float %20, float %20) > %29 = call float @llvm.fma.f32(float %26, float %27, float %21) > %30 = call float @llvm.fma.f32(float %28, float %18, float %16) > %31 = call float @llvm.fma.f32(float %29, float %19, float %17) > %32 = bitcast i32 %11 to float > %33 = insertvalue <{ float, float, float }> undef, float %32, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %28, float %29, float 0.000000e+00, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %30, float %31, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %26, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %33 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], SHADOW2D_ARRAY, FLOAT >DCL CONST[1][0..62] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.5000, 1.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {1065353216, 0, 992, 704} >IMM[3] UINT32 {864, 848, 928, 976} >IMM[4] UINT32 {880, 896, 912, 736} >IMM[5] UINT32 {752, 768, 784, 816} >IMM[6] FLT32 { 0.5000, -0.5000, 0.2500, 0.0000} >IMM[7] UINT32 {720, 0, 0, 0} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[0].w, TEMP[1].yyxy > 13: MOV TEMP[2].xy, TEMP[0].xyyy > 14: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 15: MOV TEMP[3].z, TEMP[2].xxxx > 16: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xxxx > 17: MOV TEMP[1].xy, TEMP[0].xyyy > 18: TEX TEMP[1].w, TEMP[1], SAMP[1], 2D > 19: FSGE TEMP[1].x, TEMP[1].wwww, IMM[0].zzzz > 20: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 21: INEG TEMP[1].x, TEMP[1].xxxx > 22: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 23: FMA TEMP[1].xyz, TEMP[1].xxxx, CONST[1][62].xyzz, TEMP[3].xyzz > 24: MOV TEMP[0].xyz, TEMP[1].xyzx > 25: F2I TEMP[2].x, CONST[1][44].yyyy > 26: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][54].xyzz > 27: MOV TEMP[3].w, TEMP[1].yxyz > 28: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz > 29: MOV TEMP[3].y, TEMP[1].xxxx > 30: MOV TEMP[4].x, IMM[0].xxxx > 31: MOV TEMP[4].yzw, CONST[1][53].yxyz > 32: FSGE TEMP[5], TEMP[1].xxxx, TEMP[4] > 33: AND TEMP[5], TEMP[5], IMM[1].xxxx > 34: INEG TEMP[5], TEMP[5] > 35: AND TEMP[5], TEMP[5], IMM[2].xxxx > 36: MOV TEMP[4].xy, TEMP[5] > 37: DP4 TEMP[6].x, TEMP[5], IMM[0].wwww > 38: F2I TEMP[6].x, TEMP[6].xxxx > 39: MOV TEMP[3].z, TEMP[6].xxxx > 40: ISGE TEMP[2].x, TEMP[2].xxxx, TEMP[6].xxxx > 41: AND TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx > 42: INEG TEMP[2].x, TEMP[2].xxxx > 43: MOV TEMP[3].x, TEMP[2].xxxx > 44: USNE TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy > 45: UIF TEMP[2].xxxx :0 > 46: FSLT TEMP[1], TEMP[1].xxxx, CONST[1][53] > 47: AND TEMP[1], TEMP[1], IMM[1].xxxx > 48: INEG TEMP[1], TEMP[1] > 49: AND TEMP[1], TEMP[1], IMM[2].xxxx > 50: MUL TEMP[3], TEMP[1], TEMP[5] > 51: DP4 TEMP[4].x, TEMP[3], CONST[1][58] > 52: DP4 TEMP[1].x, TEMP[3], CONST[1][61] > 53: MOV TEMP[4].y, TEMP[1].xxxx > 54: DP4 TEMP[1].x, TEMP[3], CONST[1][55] > 55: DP4 TEMP[2].x, TEMP[3], CONST[1][56] > 56: MOV TEMP[1].y, TEMP[2].xxxx > 57: DP4 TEMP[2].x, TEMP[3], CONST[1][57] > 58: MOV TEMP[1].z, TEMP[2].xxxx > 59: MOV TEMP[0].w, IMM[0].wwww > 60: DP4 TEMP[2].x, CONST[1][46], TEMP[0] > 61: DP4 TEMP[5].x, CONST[1][47], TEMP[0] > 62: MOV TEMP[2].y, TEMP[5].xxxx > 63: DP4 TEMP[5].x, CONST[1][48], TEMP[0] > 64: MOV TEMP[2].z, TEMP[5].xxxx > 65: DP4 TEMP[5].x, CONST[1][49], TEMP[0] > 66: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz > 67: MUL TEMP[1].xyz, TEMP[4].xxyy, TEMP[1].xyzz > 68: FSEQ TEMP[2].xyz, TEMP[5].xxxx, IMM[0].xxxx > 69: SSG TEMP[4].xyz, TEMP[1].xyzz > 70: MUL TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz > 71: RCP TEMP[5].xyz, TEMP[5].xxxx > 72: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz > 73: UCMP TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xyzz, TEMP[1].xyzz > 74: FMA TEMP[2], TEMP[1].xyxy, IMM[6].xyxy, IMM[0].zzzz > 75: DP4 TEMP[4].x, TEMP[3], CONST[1][51] > 76: MOV TEMP[3].z, TEMP[4].xxxx > 77: ADD TEMP[0].x, -TEMP[1].zzzz, IMM[0].wwww > 78: FMA TEMP[1], CONST[1][45].zwzw, IMM[6].yyxy, TEMP[2].zwzw > 79: MOV TEMP[3].xy, TEMP[1].xyxx > 80: MOV TEMP[4].xyz, TEMP[3].xyzz > 81: MOV TEMP[4].w, TEMP[0].xxxx > 82: TEX TEMP[4].x, TEMP[4], SAMP[2], SHADOW2D_ARRAY > 83: MOV TEMP[3].xy, TEMP[1].zwzz > 84: MOV TEMP[1].xyz, TEMP[3].xyzz > 85: MOV TEMP[1].w, TEMP[0].xxxx > 86: TEX TEMP[1].x, TEMP[1], SAMP[2], SHADOW2D_ARRAY > 87: FMA TEMP[2], CONST[1][45].zwzw, IMM[6].yxxx, TEMP[2] > 88: MOV TEMP[3].xy, TEMP[2].xyxx > 89: MOV TEMP[5].xyz, TEMP[3].xyzz > 90: MOV TEMP[5].w, TEMP[0].xxxx > 91: TEX TEMP[5].x, TEMP[5], SAMP[2], SHADOW2D_ARRAY > 92: MOV TEMP[3].xy, TEMP[2].zwzz > 93: MOV TEMP[2].xyz, TEMP[3].xyzz > 94: MOV TEMP[2].w, TEMP[0].xxxx > 95: TEX TEMP[2].x, TEMP[2], SAMP[2], SHADOW2D_ARRAY > 96: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx > 97: ADD TEMP[1].x, TEMP[5].xxxx, TEMP[1].xxxx > 98: ADD TEMP[0].x, TEMP[2].xxxx, TEMP[1].xxxx > 99: FMA TEMP[0].x, -TEMP[0].xxxx, IMM[6].zzzz, IMM[0].wwww >100: ELSE :0 >101: MOV TEMP[0].x, IMM[0].wwww >102: ENDIF >103: MOV TEMP[0].xyz, TEMP[0].xxxx >104: MOV TEMP[0].w, IMM[0].xxxx >105: MOV OUT[0], TEMP[0] >106: END >radeonsi: Compiling shader 60 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 708) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 728) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 732) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 848) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 852) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 856) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 864) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 868) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 872) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 992) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 996) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1000) > %37 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %38 = load <8 x i32>, <8 x i32> addrspace(2)* %37, align 32, !tbaa !0 > %39 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %40 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %39, i64 0, i64 3 > %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 > %42 = extractelement <8 x i32> %38, i32 7 > %43 = extractelement <4 x i32> %41, i32 0 > %44 = and i32 %43, %42 > %45 = insertelement <4 x i32> %41, i32 %44, i32 0 > %46 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 > %48 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %49 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %48, i64 0, i64 7 > %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 > %51 = extractelement <8 x i32> %47, i32 7 > %52 = extractelement <4 x i32> %50, i32 0 > %53 = and i32 %52, %51 > %54 = insertelement <4 x i32> %50, i32 %53, i32 0 > %55 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0 > %57 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %58 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %57, i64 0, i64 11 > %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 > %60 = extractelement <8 x i32> %56, i32 7 > %61 = extractelement <4 x i32> %59, i32 0 > %62 = and i32 %61, %60 > %63 = insertelement <4 x i32> %59, i32 %62, i32 0 > %64 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %69 = fcmp oeq float %66, 0.000000e+00 > %70 = fcmp oeq float %66, 0.000000e+00 > %71 = fcmp ogt float %64, 0.000000e+00 > %72 = select i1 %71, float 1.000000e+00, float %64 > %73 = fcmp oge float %72, 0.000000e+00 > %74 = fcmp ogt float %65, 0.000000e+00 > %75 = select i1 %74, float 1.000000e+00, float %65 > %76 = fcmp oge float %75, 0.000000e+00 > %.op = fmul float %72, 0x4600000000000000 > %77 = select i1 %73, float %.op, float 0xC600000000000000 > %.op28 = fmul float %75, 0x4600000000000000 > %78 = select i1 %76, float %.op28, float 0xC600000000000000 > %79 = fdiv float 1.000000e+00, %66 > %80 = fmul float %64, %79 > %81 = fmul float %65, %79 > %82 = select i1 %69, float %77, float %80 > %83 = select i1 %70, float %78, float %81 > %84 = fcmp oeq float %66, 0.000000e+00 > %85 = fcmp oeq float %66, 0.000000e+00 > %86 = fcmp ogt float %67, 0.000000e+00 > %87 = select i1 %86, float 1.000000e+00, float %67 > %88 = fcmp oge float %87, 0.000000e+00 > %89 = fcmp ogt float %68, 0.000000e+00 > %90 = select i1 %89, float 1.000000e+00, float %68 > %91 = fcmp oge float %90, 0.000000e+00 > %.op29 = fmul float %87, 0x4600000000000000 > %92 = select i1 %88, float %.op29, float 0xC600000000000000 > %.op30 = fmul float %90, 0x4600000000000000 > %93 = select i1 %91, float %.op30, float 0xC600000000000000 > %94 = fdiv float 1.000000e+00, %66 > %95 = fmul float %67, %94 > %96 = fmul float %68, %94 > %97 = select i1 %84, float %92, float %95 > %98 = select i1 %85, float %93, float %96 > %99 = bitcast float %82 to i32 > %100 = bitcast float %83 to i32 > %101 = insertelement <2 x i32> undef, i32 %99, i32 0 > %102 = insertelement <2 x i32> %101, i32 %100, i32 1 > %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %104 = extractelement <4 x float> %103, i32 0 > %105 = fmul float %97, %104 > %106 = fmul float %98, %104 > %107 = bitcast float %82 to i32 > %108 = bitcast float %83 to i32 > %109 = insertelement <2 x i32> undef, i32 %107, i32 0 > %110 = insertelement <2 x i32> %109, i32 %108, i32 1 > %111 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %110, <8 x i32> %47, <4 x i32> %54, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %112 = extractelement <4 x float> %111, i32 3 > %113 = fcmp oge float %112, 5.000000e-01 > %114 = select i1 %113, float 1.000000e+00, float 0.000000e+00 > %115 = call float @llvm.fma.f32(float %114, float %34, float %105) > %116 = call float @llvm.fma.f32(float %114, float %35, float %106) > %117 = call float @llvm.fma.f32(float %114, float %36, float %104) > %118 = fptosi float %25 to i32 > %119 = fmul float %115, %31 > %120 = fmul float %116, %32 > %121 = fmul float %117, %33 > %122 = fmul float %119, %119 > %123 = fmul float %120, %120 > %124 = fadd float %123, %122 > %125 = fmul float %121, %121 > %126 = fadd float %124, %125 > %127 = fcmp oge float %126, 0.000000e+00 > %128 = fcmp oge float %126, %28 > %129 = fcmp oge float %126, %29 > %130 = fcmp oge float %126, %30 > %131 = select i1 %127, float 1.000000e+00, float 0.000000e+00 > %132 = select i1 %128, float 1.000000e+00, float 0.000000e+00 > %133 = select i1 %129, float 1.000000e+00, float 0.000000e+00 > %134 = select i1 %130, float 1.000000e+00, float 0.000000e+00 > %135 = fadd float %131, %132 > %136 = fadd float %135, %133 > %137 = fadd float %136, %134 > %138 = fptosi float %137 to i32 > %139 = icmp slt i32 %118, %138 > br i1 %139, label %ENDIF, label %IF > >IF: ; preds = %main_body > %140 = call float @llvm.SI.load.const(<16 x i8> %24, i32 988) > %141 = call float @llvm.SI.load.const(<16 x i8> %24, i32 984) > %142 = call float @llvm.SI.load.const(<16 x i8> %24, i32 980) > %143 = call float @llvm.SI.load.const(<16 x i8> %24, i32 976) > %144 = call float @llvm.SI.load.const(<16 x i8> %24, i32 940) > %145 = call float @llvm.SI.load.const(<16 x i8> %24, i32 936) > %146 = call float @llvm.SI.load.const(<16 x i8> %24, i32 932) > %147 = call float @llvm.SI.load.const(<16 x i8> %24, i32 928) > %148 = call float @llvm.SI.load.const(<16 x i8> %24, i32 924) > %149 = call float @llvm.SI.load.const(<16 x i8> %24, i32 920) > %150 = call float @llvm.SI.load.const(<16 x i8> %24, i32 916) > %151 = call float @llvm.SI.load.const(<16 x i8> %24, i32 912) > %152 = call float @llvm.SI.load.const(<16 x i8> %24, i32 908) > %153 = call float @llvm.SI.load.const(<16 x i8> %24, i32 904) > %154 = call float @llvm.SI.load.const(<16 x i8> %24, i32 900) > %155 = call float @llvm.SI.load.const(<16 x i8> %24, i32 896) > %156 = call float @llvm.SI.load.const(<16 x i8> %24, i32 892) > %157 = call float @llvm.SI.load.const(<16 x i8> %24, i32 888) > %158 = call float @llvm.SI.load.const(<16 x i8> %24, i32 884) > %159 = call float @llvm.SI.load.const(<16 x i8> %24, i32 880) > %160 = call float @llvm.SI.load.const(<16 x i8> %24, i32 860) > %161 = call float @llvm.SI.load.const(<16 x i8> %24, i32 828) > %162 = call float @llvm.SI.load.const(<16 x i8> %24, i32 824) > %163 = call float @llvm.SI.load.const(<16 x i8> %24, i32 820) > %164 = call float @llvm.SI.load.const(<16 x i8> %24, i32 816) > %165 = call float @llvm.SI.load.const(<16 x i8> %24, i32 796) > %166 = call float @llvm.SI.load.const(<16 x i8> %24, i32 792) > %167 = call float @llvm.SI.load.const(<16 x i8> %24, i32 788) > %168 = call float @llvm.SI.load.const(<16 x i8> %24, i32 784) > %169 = call float @llvm.SI.load.const(<16 x i8> %24, i32 780) > %170 = call float @llvm.SI.load.const(<16 x i8> %24, i32 776) > %171 = call float @llvm.SI.load.const(<16 x i8> %24, i32 772) > %172 = call float @llvm.SI.load.const(<16 x i8> %24, i32 768) > %173 = call float @llvm.SI.load.const(<16 x i8> %24, i32 764) > %174 = call float @llvm.SI.load.const(<16 x i8> %24, i32 760) > %175 = call float @llvm.SI.load.const(<16 x i8> %24, i32 756) > %176 = call float @llvm.SI.load.const(<16 x i8> %24, i32 752) > %177 = call float @llvm.SI.load.const(<16 x i8> %24, i32 748) > %178 = call float @llvm.SI.load.const(<16 x i8> %24, i32 744) > %179 = call float @llvm.SI.load.const(<16 x i8> %24, i32 740) > %180 = call float @llvm.SI.load.const(<16 x i8> %24, i32 736) > %181 = fcmp olt float %126, %28 > %182 = fcmp olt float %126, %29 > %183 = fcmp olt float %126, %30 > %184 = fcmp olt float %126, %160 > %185 = select i1 %181, float 1.000000e+00, float 0.000000e+00 > %186 = select i1 %182, float 1.000000e+00, float 0.000000e+00 > %187 = select i1 %183, float 1.000000e+00, float 0.000000e+00 > %188 = select i1 %184, float 1.000000e+00, float 0.000000e+00 > %189 = fmul float %185, %131 > %190 = fmul float %186, %132 > %191 = fmul float %187, %133 > %192 = fmul float %188, %134 > %193 = fmul float %189, %147 > %194 = fmul float %190, %146 > %195 = fadd float %193, %194 > %196 = fmul float %191, %145 > %197 = fadd float %195, %196 > %198 = fmul float %192, %144 > %199 = fadd float %197, %198 > %200 = fmul float %189, %143 > %201 = fmul float %190, %142 > %202 = fadd float %200, %201 > %203 = fmul float %191, %141 > %204 = fadd float %202, %203 > %205 = fmul float %192, %140 > %206 = fadd float %204, %205 > %207 = fmul float %189, %159 > %208 = fmul float %190, %158 > %209 = fadd float %207, %208 > %210 = fmul float %191, %157 > %211 = fadd float %209, %210 > %212 = fmul float %192, %156 > %213 = fadd float %211, %212 > %214 = fmul float %189, %155 > %215 = fmul float %190, %154 > %216 = fadd float %214, %215 > %217 = fmul float %191, %153 > %218 = fadd float %216, %217 > %219 = fmul float %192, %152 > %220 = fadd float %218, %219 > %221 = fmul float %189, %151 > %222 = fmul float %190, %150 > %223 = fadd float %221, %222 > %224 = fmul float %191, %149 > %225 = fadd float %223, %224 > %226 = fmul float %192, %148 > %227 = fadd float %225, %226 > %228 = fmul float %180, %115 > %229 = fmul float %179, %116 > %230 = fadd float %228, %229 > %231 = fmul float %178, %117 > %232 = fadd float %230, %231 > %233 = fadd float %232, %177 > %234 = fmul float %176, %115 > %235 = fmul float %175, %116 > %236 = fadd float %234, %235 > %237 = fmul float %174, %117 > %238 = fadd float %236, %237 > %239 = fadd float %238, %173 > %240 = fmul float %172, %115 > %241 = fmul float %171, %116 > %242 = fadd float %240, %241 > %243 = fmul float %170, %117 > %244 = fadd float %242, %243 > %245 = fadd float %244, %169 > %246 = fmul float %168, %115 > %247 = fmul float %167, %116 > %248 = fadd float %246, %247 > %249 = fmul float %166, %117 > %250 = fadd float %248, %249 > %251 = fadd float %250, %165 > %252 = fadd float %213, %233 > %253 = fadd float %220, %239 > %254 = fadd float %227, %245 > %255 = fmul float %199, %252 > %256 = fmul float %199, %253 > %257 = fmul float %206, %254 > %258 = fcmp oeq float %251, 0.000000e+00 > %259 = fcmp oeq float %251, 0.000000e+00 > %260 = fcmp oeq float %251, 0.000000e+00 > %261 = fcmp ogt float %255, 0.000000e+00 > %262 = select i1 %261, float 1.000000e+00, float %255 > %263 = fcmp oge float %262, 0.000000e+00 > %264 = fcmp ogt float %256, 0.000000e+00 > %265 = select i1 %264, float 1.000000e+00, float %256 > %266 = fcmp oge float %265, 0.000000e+00 > %267 = fcmp ogt float %257, 0.000000e+00 > %268 = select i1 %267, float 1.000000e+00, float %257 > %269 = fcmp oge float %268, 0.000000e+00 > %.op31 = fmul float %262, 0x4600000000000000 > %270 = select i1 %263, float %.op31, float 0xC600000000000000 > %.op32 = fmul float %265, 0x4600000000000000 > %271 = select i1 %266, float %.op32, float 0xC600000000000000 > %.op33 = fmul float %268, 0x4600000000000000 > %272 = select i1 %269, float %.op33, float 0xC600000000000000 > %273 = fdiv float 1.000000e+00, %251 > %274 = fmul float %255, %273 > %275 = fmul float %256, %273 > %276 = fmul float %257, %273 > %277 = select i1 %258, float %270, float %274 > %278 = select i1 %259, float %271, float %275 > %279 = select i1 %260, float %272, float %276 > %280 = call float @llvm.fma.f32(float %277, float 5.000000e-01, float 5.000000e-01) > %281 = call float @llvm.fma.f32(float %278, float -5.000000e-01, float 5.000000e-01) > %282 = call float @llvm.fma.f32(float %277, float 5.000000e-01, float 5.000000e-01) > %283 = call float @llvm.fma.f32(float %278, float -5.000000e-01, float 5.000000e-01) > %284 = fmul float %189, %164 > %285 = fmul float %190, %163 > %286 = fadd float %284, %285 > %287 = fmul float %191, %162 > %288 = fadd float %286, %287 > %289 = fmul float %192, %161 > %290 = fadd float %288, %289 > %291 = fsub float 1.000000e+00, %279 > %292 = call float @llvm.fma.f32(float %26, float -5.000000e-01, float %282) > %293 = call float @llvm.fma.f32(float %27, float -5.000000e-01, float %283) > %294 = call float @llvm.fma.f32(float %26, float 5.000000e-01, float %282) > %295 = call float @llvm.fma.f32(float %27, float -5.000000e-01, float %283) > %296 = bitcast float %291 to i32 > %297 = bitcast float %292 to i32 > %298 = bitcast float %293 to i32 > %299 = bitcast float %290 to i32 > %300 = insertelement <4 x i32> undef, i32 %296, i32 0 > %301 = insertelement <4 x i32> %300, i32 %297, i32 1 > %302 = insertelement <4 x i32> %301, i32 %298, i32 2 > %303 = insertelement <4 x i32> %302, i32 %299, i32 3 > %304 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %303, <8 x i32> %56, <4 x i32> %63, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %305 = extractelement <4 x float> %304, i32 0 > %306 = bitcast float %291 to i32 > %307 = bitcast float %294 to i32 > %308 = bitcast float %295 to i32 > %309 = bitcast float %290 to i32 > %310 = insertelement <4 x i32> undef, i32 %306, i32 0 > %311 = insertelement <4 x i32> %310, i32 %307, i32 1 > %312 = insertelement <4 x i32> %311, i32 %308, i32 2 > %313 = insertelement <4 x i32> %312, i32 %309, i32 3 > %314 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %313, <8 x i32> %56, <4 x i32> %63, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %315 = extractelement <4 x float> %314, i32 0 > %316 = call float @llvm.fma.f32(float %26, float -5.000000e-01, float %280) > %317 = call float @llvm.fma.f32(float %27, float 5.000000e-01, float %281) > %318 = call float @llvm.fma.f32(float %26, float 5.000000e-01, float %282) > %319 = call float @llvm.fma.f32(float %27, float 5.000000e-01, float %283) > %320 = bitcast float %291 to i32 > %321 = bitcast float %316 to i32 > %322 = bitcast float %317 to i32 > %323 = bitcast float %290 to i32 > %324 = insertelement <4 x i32> undef, i32 %320, i32 0 > %325 = insertelement <4 x i32> %324, i32 %321, i32 1 > %326 = insertelement <4 x i32> %325, i32 %322, i32 2 > %327 = insertelement <4 x i32> %326, i32 %323, i32 3 > %328 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %327, <8 x i32> %56, <4 x i32> %63, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %329 = extractelement <4 x float> %328, i32 0 > %330 = bitcast float %291 to i32 > %331 = bitcast float %318 to i32 > %332 = bitcast float %319 to i32 > %333 = bitcast float %290 to i32 > %334 = insertelement <4 x i32> undef, i32 %330, i32 0 > %335 = insertelement <4 x i32> %334, i32 %331, i32 1 > %336 = insertelement <4 x i32> %335, i32 %332, i32 2 > %337 = insertelement <4 x i32> %336, i32 %333, i32 3 > %338 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %337, <8 x i32> %56, <4 x i32> %63, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %339 = extractelement <4 x float> %338, i32 0 > %340 = fadd float %315, %305 > %341 = fadd float %329, %340 > %342 = fadd float %339, %341 > %343 = fsub float -0.000000e+00, %342 > %344 = call float @llvm.fma.f32(float %343, float 2.500000e-01, float 1.000000e+00) > br label %ENDIF > >ENDIF: ; preds = %main_body, %IF > %temp.0 = phi float [ %344, %IF ], [ 1.000000e+00, %main_body ] > %345 = bitcast float %5 to i32 > %346 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %345, 10 > %347 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %346, float %temp.0, 11 > %348 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %347, float %temp.0, 12 > %349 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %348, float %temp.0, 13 > %350 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %349, float 0.000000e+00, 14 > %351 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %350, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %351 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..24] >DCL TEMP[0..1], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} >IMM[1] UINT32 {0, 384, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: MUL TEMP[1].xy, IMM[0].yzzz, CONST[1][24].xyyy > 3: FMA TEMP[1].xy, IN[0].xyyy, TEMP[1].xyyy, CONST[1][24].xyyy > 4: MOV TEMP[1].xy, TEMP[1].xyxx > 5: MOV TEMP[1].zw, IMM[0].yyxy > 6: MOV OUT[1], TEMP[1] > 7: MOV OUT[0], TEMP[0] > 8: END >radeonsi: Compiling shader 61 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 384) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 388) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = fsub float -0.000000e+00, %17 > %24 = call float @llvm.fma.f32(float %21, float %16, float %16) > %25 = call float @llvm.fma.f32(float %22, float %23, float %17) > %26 = bitcast i32 %11 to float > %27 = insertvalue <{ float, float, float }> undef, float %26, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %24, float %25, float 0.000000e+00, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %27 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..27] >DCL CONST[2][0..22] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, -0.0040, 6.2000} >IMM[1] UINT32 {1, 352, 0, 432} >IMM[2] FLT32 { 0.5000, 1.7000, 0.0600, 0.0000} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[0].xy, TEMP[0].xyyy > 7: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D > 8: FMA TEMP[1].xyz, TEMP[0].xyzz, CONST[2][22].yyyy, IMM[0].zzzz > 9: DP3 TEMP[0].x, TEMP[0].xyzz, CONST[1][27].xyzz > 10: MUL TEMP[0].x, TEMP[0].xxxx, CONST[2][22].xxxx > 11: MOV TEMP[0].w, TEMP[0].xxxx > 12: MAX TEMP[2].xyz, TEMP[1].xyzz, IMM[0].xxxx > 13: FMA TEMP[3].xyz, TEMP[2].xyzz, IMM[0].wwww, IMM[2].xxxx > 14: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xyzz > 15: FMA TEMP[3].xyz, TEMP[2].xyzz, IMM[0].wwww, IMM[2].yyyy > 16: FMA TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz, IMM[2].zzzz > 17: FSEQ TEMP[3].xyz, TEMP[2].xyzz, IMM[0].xxxx > 18: SSG TEMP[4].xyz, TEMP[1].xyzz > 19: MUL TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz > 20: RCP TEMP[5].x, TEMP[2].xxxx > 21: RCP TEMP[5].y, TEMP[2].yyyy > 22: RCP TEMP[5].z, TEMP[2].zzzz > 23: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz > 24: UCMP TEMP[0].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[1].xyzz > 25: MOV OUT[0], TEMP[0] > 26: END >radeonsi: Compiling shader 62 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 > %30 = call float @llvm.SI.load.const(<16 x i8> %29, i32 352) > %31 = call float @llvm.SI.load.const(<16 x i8> %29, i32 356) > %32 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32, !tbaa !0 > %34 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %35 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 3 > %36 = load <4 x i32>, <4 x i32> addrspace(2)* %35, align 16, !tbaa !0 > %37 = extractelement <8 x i32> %33, i32 7 > %38 = extractelement <4 x i32> %36, i32 0 > %39 = and i32 %38, %37 > %40 = insertelement <4 x i32> %36, i32 %39, i32 0 > %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %44 = fcmp oeq float %43, 0.000000e+00 > %45 = fcmp oeq float %43, 0.000000e+00 > %46 = fcmp ogt float %41, 0.000000e+00 > %47 = select i1 %46, float 1.000000e+00, float %41 > %48 = fcmp oge float %47, 0.000000e+00 > %49 = fcmp ogt float %42, 0.000000e+00 > %50 = select i1 %49, float 1.000000e+00, float %42 > %51 = fcmp oge float %50, 0.000000e+00 > %.op = fmul float %47, 0x4600000000000000 > %52 = select i1 %48, float %.op, float 0xC600000000000000 > %.op24 = fmul float %50, 0x4600000000000000 > %53 = select i1 %51, float %.op24, float 0xC600000000000000 > %54 = fdiv float 1.000000e+00, %43 > %55 = fmul float %41, %54 > %56 = fmul float %42, %54 > %57 = select i1 %44, float %52, float %55 > %58 = select i1 %45, float %53, float %56 > %59 = bitcast float %57 to i32 > %60 = bitcast float %58 to i32 > %61 = insertelement <2 x i32> undef, i32 %59, i32 0 > %62 = insertelement <2 x i32> %61, i32 %60, i32 1 > %63 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %62, <8 x i32> %33, <4 x i32> %40, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %64 = extractelement <4 x float> %63, i32 0 > %65 = extractelement <4 x float> %63, i32 1 > %66 = extractelement <4 x float> %63, i32 2 > %67 = call float @llvm.fma.f32(float %64, float %31, float 0xBF70624DE0000000) > %68 = call float @llvm.fma.f32(float %65, float %31, float 0xBF70624DE0000000) > %69 = call float @llvm.fma.f32(float %66, float %31, float 0xBF70624DE0000000) > %70 = fmul float %64, %25 > %71 = fmul float %65, %26 > %72 = fadd float %71, %70 > %73 = fmul float %66, %27 > %74 = fadd float %72, %73 > %75 = fmul float %74, %30 > %76 = call float @llvm.maxnum.f32(float %67, float 0.000000e+00) > %77 = call float @llvm.maxnum.f32(float %68, float 0.000000e+00) > %78 = call float @llvm.maxnum.f32(float %69, float 0.000000e+00) > %79 = call float @llvm.fma.f32(float %76, float 0x4018CCCCC0000000, float 5.000000e-01) > %80 = call float @llvm.fma.f32(float %77, float 0x4018CCCCC0000000, float 5.000000e-01) > %81 = call float @llvm.fma.f32(float %78, float 0x4018CCCCC0000000, float 5.000000e-01) > %82 = fmul float %76, %79 > %83 = fmul float %77, %80 > %84 = fmul float %78, %81 > %85 = call float @llvm.fma.f32(float %76, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %86 = call float @llvm.fma.f32(float %77, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %87 = call float @llvm.fma.f32(float %78, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %88 = call float @llvm.fma.f32(float %76, float %85, float 0x3FAEB851E0000000) > %89 = call float @llvm.fma.f32(float %77, float %86, float 0x3FAEB851E0000000) > %90 = call float @llvm.fma.f32(float %78, float %87, float 0x3FAEB851E0000000) > %91 = fcmp oeq float %88, 0.000000e+00 > %92 = fcmp oeq float %89, 0.000000e+00 > %93 = fcmp oeq float %90, 0.000000e+00 > %94 = fcmp ogt float %82, 0.000000e+00 > %95 = select i1 %94, float 1.000000e+00, float %82 > %96 = fcmp oge float %95, 0.000000e+00 > %97 = fcmp ogt float %83, 0.000000e+00 > %98 = select i1 %97, float 1.000000e+00, float %83 > %99 = fcmp oge float %98, 0.000000e+00 > %100 = fcmp ogt float %84, 0.000000e+00 > %101 = select i1 %100, float 1.000000e+00, float %84 > %102 = fcmp oge float %101, 0.000000e+00 > %.op25 = fmul float %95, 0x4600000000000000 > %103 = select i1 %96, float %.op25, float 0xC600000000000000 > %.op26 = fmul float %98, 0x4600000000000000 > %104 = select i1 %99, float %.op26, float 0xC600000000000000 > %.op27 = fmul float %101, 0x4600000000000000 > %105 = select i1 %102, float %.op27, float 0xC600000000000000 > %106 = fdiv float 1.000000e+00, %88 > %107 = fdiv float 1.000000e+00, %89 > %108 = fdiv float 1.000000e+00, %90 > %109 = fmul float %82, %106 > %110 = fmul float %83, %107 > %111 = fmul float %84, %108 > %112 = select i1 %91, float %103, float %109 > %113 = select i1 %92, float %104, float %110 > %114 = select i1 %93, float %105, float %111 > %115 = bitcast float %5 to i32 > %116 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %115, 10 > %117 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %116, float %112, 11 > %118 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %117, float %113, 12 > %119 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %118, float %114, 13 > %120 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %119, float %75, 14 > %121 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %120, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %121 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..27] >DCL CONST[2][0..22] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.4167, 1.0550} >IMM[1] UINT32 {1, 352, 0, 432} >IMM[2] FLT32 { -0.0550, 0.0031, 12.9200, 0.0000} >IMM[3] INT32 {1, 0, 0, 0} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D > 8: MUL TEMP[2].xyz, TEMP[1].xyzz, CONST[2][22].yyyy > 9: DP3 TEMP[1].x, TEMP[1].xyzz, CONST[1][27].xyzz > 10: MUL TEMP[1].x, TEMP[1].xxxx, CONST[2][22].xxxx > 11: MOV TEMP[1].w, TEMP[1].xxxx > 12: ABS TEMP[3].xyz, TEMP[2].xyzz > 13: LG2 TEMP[4].x, TEMP[3].xxxx > 14: LG2 TEMP[4].y, TEMP[3].yyyy > 15: LG2 TEMP[4].z, TEMP[3].zzzz > 16: MUL TEMP[0].xyz, TEMP[4].xyzz, IMM[0].zzzz > 17: EX2 TEMP[3].x, TEMP[0].xxxx > 18: EX2 TEMP[3].y, TEMP[0].yyyy > 19: EX2 TEMP[3].z, TEMP[0].zzzz > 20: FMA TEMP[0].xyz, TEMP[3].xyzz, IMM[0].wwww, IMM[2].xxxx > 21: FSLT TEMP[3].xyz, TEMP[2].xyzz, IMM[2].yyyy > 22: AND TEMP[3].xyz, TEMP[3].xyzz, IMM[3].xxxx > 23: INEG TEMP[3].xyz, TEMP[3].xyzz > 24: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[2].zzzz > 25: USNE TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz > 26: UIF TEMP[4].xxxx :0 > 27: MOV TEMP[4].x, TEMP[2].xxxx > 28: ELSE :0 > 29: MOV TEMP[4].x, TEMP[0].xxxx > 30: ENDIF > 31: MOV TEMP[4].x, TEMP[4].xxxx > 32: USNE TEMP[5].x, TEMP[3].yyyy, IMM[1].zzzz > 33: UIF TEMP[5].xxxx :0 > 34: MOV TEMP[5].x, TEMP[2].yyyy > 35: ELSE :0 > 36: MOV TEMP[5].x, TEMP[0].yyyy > 37: ENDIF > 38: MOV TEMP[4].y, TEMP[5].xxxx > 39: USNE TEMP[3].x, TEMP[3].zzzz, IMM[1].zzzz > 40: UIF TEMP[3].xxxx :0 > 41: MOV TEMP[2].x, TEMP[2].zzzz > 42: ELSE :0 > 43: MOV TEMP[2].x, TEMP[0].zzzz > 44: ENDIF > 45: MOV TEMP[4].z, TEMP[2].xxxx > 46: MOV TEMP[1].xyz, TEMP[4].xyzx > 47: MOV OUT[0], TEMP[1] > 48: END >radeonsi: Compiling shader 63 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 > %30 = call float @llvm.SI.load.const(<16 x i8> %29, i32 352) > %31 = call float @llvm.SI.load.const(<16 x i8> %29, i32 356) > %32 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32, !tbaa !0 > %34 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %35 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 3 > %36 = load <4 x i32>, <4 x i32> addrspace(2)* %35, align 16, !tbaa !0 > %37 = extractelement <8 x i32> %33, i32 7 > %38 = extractelement <4 x i32> %36, i32 0 > %39 = and i32 %38, %37 > %40 = insertelement <4 x i32> %36, i32 %39, i32 0 > %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %44 = fcmp oeq float %43, 0.000000e+00 > %45 = fcmp oeq float %43, 0.000000e+00 > %46 = fcmp ogt float %41, 0.000000e+00 > %47 = select i1 %46, float 1.000000e+00, float %41 > %48 = fcmp oge float %47, 0.000000e+00 > %49 = fcmp ogt float %42, 0.000000e+00 > %50 = select i1 %49, float 1.000000e+00, float %42 > %51 = fcmp oge float %50, 0.000000e+00 > %.op = fmul float %47, 0x4600000000000000 > %52 = select i1 %48, float %.op, float 0xC600000000000000 > %.op31 = fmul float %50, 0x4600000000000000 > %53 = select i1 %51, float %.op31, float 0xC600000000000000 > %54 = fdiv float 1.000000e+00, %43 > %55 = fmul float %41, %54 > %56 = fmul float %42, %54 > %57 = select i1 %44, float %52, float %55 > %58 = select i1 %45, float %53, float %56 > %59 = bitcast float %57 to i32 > %60 = bitcast float %58 to i32 > %61 = insertelement <2 x i32> undef, i32 %59, i32 0 > %62 = insertelement <2 x i32> %61, i32 %60, i32 1 > %63 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %62, <8 x i32> %33, <4 x i32> %40, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %64 = extractelement <4 x float> %63, i32 0 > %65 = extractelement <4 x float> %63, i32 1 > %66 = extractelement <4 x float> %63, i32 2 > %67 = fmul float %64, %31 > %68 = fmul float %65, %31 > %69 = fmul float %66, %31 > %70 = fmul float %64, %25 > %71 = fmul float %65, %26 > %72 = fadd float %71, %70 > %73 = fmul float %66, %27 > %74 = fadd float %72, %73 > %75 = fmul float %74, %30 > %76 = call float @llvm.fabs.f32(float %67) > %77 = call float @llvm.fabs.f32(float %68) > %78 = call float @llvm.fabs.f32(float %69) > %79 = call float @llvm.log2.f32(float %76) > %80 = call float @llvm.log2.f32(float %77) > %81 = call float @llvm.log2.f32(float %78) > %82 = fmul float %79, 0x3FDAAAAAA0000000 > %83 = fmul float %80, 0x3FDAAAAAA0000000 > %84 = fmul float %81, 0x3FDAAAAAA0000000 > %85 = call float @llvm.exp2.f32(float %82) > %86 = call float @llvm.exp2.f32(float %83) > %87 = call float @llvm.exp2.f32(float %84) > %88 = call float @llvm.fma.f32(float %85, float 0x3FF0E147A0000000, float 0xBFAC28F5C0000000) > %89 = call float @llvm.fma.f32(float %86, float 0x3FF0E147A0000000, float 0xBFAC28F5C0000000) > %90 = call float @llvm.fma.f32(float %87, float 0x3FF0E147A0000000, float 0xBFAC28F5C0000000) > %91 = fcmp olt float %67, 0x3F69A5C380000000 > %92 = fcmp olt float %68, 0x3F69A5C380000000 > %93 = fcmp olt float %69, 0x3F69A5C380000000 > %94 = fmul float %67, 0x4029D70A40000000 > %95 = fmul float %68, 0x4029D70A40000000 > %96 = fmul float %69, 0x4029D70A40000000 > %. = select i1 %91, float %94, float %88 > %temp20.0 = select i1 %92, float %95, float %89 > %.30 = select i1 %93, float %96, float %90 > %97 = bitcast float %5 to i32 > %98 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %97, 10 > %99 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %98, float %., 11 > %100 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %99, float %temp20.0, 12 > %101 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %100, float %.30, 13 > %102 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %101, float %75, 14 > %103 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %102, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %103 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..55] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 336, 848, 864} >IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[2] UINT32 {880, 736, 752, 768} >IMM[3] UINT32 {784, 0, 0, 0} > 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1][21].xyyy > 1: FMA TEMP[1].x, IN[0].zzzz, CONST[1][21].zzzz, CONST[1][21].zzzz > 2: MOV TEMP[0].z, TEMP[1].xxxx > 3: MOV TEMP[0].w, IMM[1].xxxx > 4: DP4 TEMP[1].x, CONST[1][53], TEMP[0] > 5: DP4 TEMP[2].x, CONST[1][54], TEMP[0] > 6: MOV TEMP[1].y, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][55], TEMP[0] > 8: MOV TEMP[1].z, TEMP[0].xxxx > 9: MOV TEMP[1].w, IMM[1].xxxx > 10: DP4 TEMP[0].x, CONST[1][46], TEMP[1] > 11: DP4 TEMP[2].x, CONST[1][47], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][48], TEMP[1] > 14: MOV TEMP[0].z, TEMP[2].xxxx > 15: DP4 TEMP[1].x, CONST[1][49], TEMP[1] > 16: MOV TEMP[0].w, TEMP[1].xxxx > 17: MOV OUT[0], TEMP[0] > 18: END >radeonsi: Compiling shader 64 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 336) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 340) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 344) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 736) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 740) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 744) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 748) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 752) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 756) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 760) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 764) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 848) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 852) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 856) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 860) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 864) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 868) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 872) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 876) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %13) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = fmul float %50, %16 > %54 = fmul float %51, %17 > %55 = call float @llvm.fma.f32(float %52, float %18, float %18) > %56 = fmul float %35, %53 > %57 = fmul float %36, %54 > %58 = fadd float %56, %57 > %59 = fmul float %37, %55 > %60 = fadd float %58, %59 > %61 = fadd float %60, %38 > %62 = fmul float %39, %53 > %63 = fmul float %40, %54 > %64 = fadd float %62, %63 > %65 = fmul float %41, %55 > %66 = fadd float %64, %65 > %67 = fadd float %66, %42 > %68 = fmul float %43, %53 > %69 = fmul float %44, %54 > %70 = fadd float %68, %69 > %71 = fmul float %45, %55 > %72 = fadd float %70, %71 > %73 = fadd float %72, %46 > %74 = fmul float %19, %61 > %75 = fmul float %20, %67 > %76 = fadd float %74, %75 > %77 = fmul float %21, %73 > %78 = fadd float %76, %77 > %79 = fadd float %78, %22 > %80 = fmul float %23, %61 > %81 = fmul float %24, %67 > %82 = fadd float %80, %81 > %83 = fmul float %25, %73 > %84 = fadd float %82, %83 > %85 = fadd float %84, %26 > %86 = fmul float %27, %61 > %87 = fmul float %28, %67 > %88 = fadd float %86, %87 > %89 = fmul float %29, %73 > %90 = fadd float %88, %89 > %91 = fadd float %90, %30 > %92 = fmul float %31, %61 > %93 = fmul float %32, %67 > %94 = fadd float %92, %93 > %95 = fmul float %33, %73 > %96 = fadd float %94, %95 > %97 = fadd float %96, %34 > %98 = bitcast i32 %11 to float > %99 = insertvalue <{ float, float, float }> undef, float %98, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %79, float %85, float %91, float %97) > ret <{ float, float, float }> %99 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG > 0: END >radeonsi: Compiling shader 65 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = bitcast float %5 to i32 > %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %23, 10 > %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %24, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %25 >} > >attributes #0 = { "InitialPSInputAddr"="36983" } > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..57] >DCL CONST[2][0..24] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 336, 848, 864} >IMM[1] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} >IMM[2] UINT32 {880, 736, 752, 768} >IMM[3] UINT32 {784, 912, 1, 384} > 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1][21].xyyy > 1: FMA TEMP[1].x, IN[0].zzzz, CONST[1][21].zzzz, CONST[1][21].zzzz > 2: MOV TEMP[0].z, TEMP[1].xxxx > 3: MOV TEMP[0].w, IMM[1].xxxx > 4: DP4 TEMP[1].x, CONST[1][53], TEMP[0] > 5: DP4 TEMP[2].x, CONST[1][54], TEMP[0] > 6: MOV TEMP[1].y, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][55], TEMP[0] > 8: MOV TEMP[1].z, TEMP[2].xxxx > 9: MOV TEMP[1].w, IMM[1].xxxx > 10: DP4 TEMP[0].x, CONST[1][46], TEMP[1] > 11: DP4 TEMP[2].x, CONST[1][47], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][48], TEMP[1] > 14: MOV TEMP[0].z, TEMP[2].xxxx > 15: DP4 TEMP[2].x, CONST[1][49], TEMP[1] > 16: MOV TEMP[0].w, TEMP[2].xxxx > 17: ADD TEMP[3].xyz, -TEMP[1].xyzz, CONST[1][57].xyzz > 18: MOV TEMP[4], TEMP[0] > 19: MOV TEMP[5].zw, TEMP[0].wwzw > 20: MUL TEMP[1].xy, TEMP[2].xxxx, CONST[2][24].xyyy > 21: MUL TEMP[2].xy, TEMP[2].xxxx, CONST[2][21].xyyy > 22: MUL TEMP[6].xy, IMM[1].xyyy, CONST[2][24].xyyy > 23: FMA TEMP[0].xy, TEMP[0].xyyy, TEMP[6].xyyy, TEMP[1].xyyy > 24: MOV TEMP[5].xy, TEMP[0].xyxx > 25: FMA TEMP[0].xy, TEMP[0].xyyy, CONST[2][21].zwww, TEMP[2].xyyy > 26: MOV OUT[2], TEMP[0] > 27: MOV OUT[1], TEMP[5] > 28: MOV OUT[3], TEMP[3] > 29: MOV OUT[0], TEMP[4] > 30: END >radeonsi: Compiling shader 66 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 336) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 340) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 344) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 736) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 740) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 744) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 748) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 752) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 756) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 760) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 764) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 848) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 852) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 856) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 860) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 864) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 868) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 872) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 876) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %47 = call float @llvm.SI.load.const(<16 x i8> %15, i32 912) > %48 = call float @llvm.SI.load.const(<16 x i8> %15, i32 916) > %49 = call float @llvm.SI.load.const(<16 x i8> %15, i32 920) > %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 > %52 = call float @llvm.SI.load.const(<16 x i8> %51, i32 336) > %53 = call float @llvm.SI.load.const(<16 x i8> %51, i32 340) > %54 = call float @llvm.SI.load.const(<16 x i8> %51, i32 344) > %55 = call float @llvm.SI.load.const(<16 x i8> %51, i32 348) > %56 = call float @llvm.SI.load.const(<16 x i8> %51, i32 384) > %57 = call float @llvm.SI.load.const(<16 x i8> %51, i32 388) > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %13) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = fmul float %61, %16 > %65 = fmul float %62, %17 > %66 = call float @llvm.fma.f32(float %63, float %18, float %18) > %67 = fmul float %35, %64 > %68 = fmul float %36, %65 > %69 = fadd float %67, %68 > %70 = fmul float %37, %66 > %71 = fadd float %69, %70 > %72 = fadd float %71, %38 > %73 = fmul float %39, %64 > %74 = fmul float %40, %65 > %75 = fadd float %73, %74 > %76 = fmul float %41, %66 > %77 = fadd float %75, %76 > %78 = fadd float %77, %42 > %79 = fmul float %43, %64 > %80 = fmul float %44, %65 > %81 = fadd float %79, %80 > %82 = fmul float %45, %66 > %83 = fadd float %81, %82 > %84 = fadd float %83, %46 > %85 = fmul float %19, %72 > %86 = fmul float %20, %78 > %87 = fadd float %85, %86 > %88 = fmul float %21, %84 > %89 = fadd float %87, %88 > %90 = fadd float %89, %22 > %91 = fmul float %23, %72 > %92 = fmul float %24, %78 > %93 = fadd float %91, %92 > %94 = fmul float %25, %84 > %95 = fadd float %93, %94 > %96 = fadd float %95, %26 > %97 = fmul float %27, %72 > %98 = fmul float %28, %78 > %99 = fadd float %97, %98 > %100 = fmul float %29, %84 > %101 = fadd float %99, %100 > %102 = fadd float %101, %30 > %103 = fmul float %31, %72 > %104 = fmul float %32, %78 > %105 = fadd float %103, %104 > %106 = fmul float %33, %84 > %107 = fadd float %105, %106 > %108 = fadd float %107, %34 > %109 = fsub float %47, %72 > %110 = fsub float %48, %78 > %111 = fsub float %49, %84 > %112 = fmul float %108, %56 > %113 = fmul float %108, %57 > %114 = fmul float %108, %52 > %115 = fmul float %108, %53 > %116 = fsub float -0.000000e+00, %57 > %117 = call float @llvm.fma.f32(float %90, float %56, float %112) > %118 = call float @llvm.fma.f32(float %96, float %116, float %113) > %119 = call float @llvm.fma.f32(float %117, float %54, float %114) > %120 = call float @llvm.fma.f32(float %118, float %55, float %115) > %121 = bitcast i32 %11 to float > %122 = insertvalue <{ float, float, float }> undef, float %121, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %117, float %118, float %102, float %108) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %119, float %120, float %102, float %108) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %109, float %110, float %111, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %90, float %96, float %102, float %108) > ret <{ float, float, float }> %122 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SAMP[7] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], SHADOW2D_ARRAY, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL SVIEW[7], 2D, FLOAT >DCL CONST[1][0..40] >DCL TEMP[0..18], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.5000, 1.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {1065353216, 0, 640, 384} >IMM[3] UINT32 {400, 416, 432, 368} >IMM[4] FLT32 { 0.5000, -0.5000, 0.2500, 2.0000} >IMM[5] UINT32 {464, 256, 64, 80} >IMM[6] UINT32 {96, 288, 240, 0} >IMM[7] FLT32 { -1.0000, 4096.0000, 0.0040, 0.3000} >IMM[8] FLT32 { 0.1250, 0.5098, 0.1500, 0.3330} >IMM[9] FLT32 { 0.2500, 1.0000, 0.0000, 0.0000} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: MOV TEMP[3].z, TEMP[2].xxxx > 15: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xxxx > 16: MOV TEMP[1].xy, TEMP[0].xyyy > 17: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 18: FSGE TEMP[4].x, TEMP[1].wwww, IMM[0].zzzz > 19: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx > 20: INEG TEMP[4].x, TEMP[4].xxxx > 21: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx > 22: FMA TEMP[4].xyz, TEMP[4].xxxx, CONST[1][40].xyzz, TEMP[3].xyzz > 23: MOV TEMP[4].w, IMM[0].wwww > 24: DP4 TEMP[5].x, CONST[1][24], TEMP[4] > 25: DP4 TEMP[6].x, CONST[1][25], TEMP[4] > 26: MOV TEMP[5].y, TEMP[6].xxxx > 27: DP4 TEMP[6].x, CONST[1][26], TEMP[4] > 28: MOV TEMP[5].z, TEMP[6].xxxx > 29: DP4 TEMP[6].x, CONST[1][27], TEMP[4] > 30: FSEQ TEMP[7].xyz, TEMP[6].xxxx, IMM[0].xxxx > 31: SSG TEMP[8].xyz, TEMP[5].xyzz > 32: MUL TEMP[8].xyz, IMM[0].yyyy, TEMP[8].xyzz > 33: RCP TEMP[6].xyz, TEMP[6].xxxx > 34: MUL TEMP[6].xyz, TEMP[5].xyzz, TEMP[6].xyzz > 35: UCMP TEMP[6].xyz, TEMP[7].xyzz, TEMP[8].xyzz, TEMP[6].xyzz > 36: FMA TEMP[7], TEMP[6].xyxy, IMM[4].xyxy, IMM[0].zzzz > 37: ADD TEMP[6].x, -TEMP[6].zzzz, IMM[0].wwww > 38: FMA TEMP[8], CONST[1][23].zwzw, IMM[4].xyyy, TEMP[7].zwzw > 39: MOV TEMP[4].xy, TEMP[8].xyxw > 40: MOV TEMP[8].xy, TEMP[8].zwzz > 41: MOV TEMP[8].z, CONST[1][29].xxxx > 42: MOV TEMP[9].xyz, TEMP[8].xyzz > 43: MOV TEMP[9].w, TEMP[6].xxxx > 44: TEX TEMP[9].x, TEMP[9], SAMP[2], SHADOW2D_ARRAY > 45: MOV TEMP[4].z, CONST[1][29].xxxx > 46: MOV TEMP[10].xyz, TEMP[4].xyzz > 47: MOV TEMP[10].w, TEMP[6].xxxx > 48: TEX TEMP[10].x, TEMP[10], SAMP[2], SHADOW2D_ARRAY > 49: FMA TEMP[7], CONST[1][23].zwzw, IMM[4].xxyx, TEMP[7].zwxy > 50: MOV TEMP[5].xy, TEMP[7].xyxw > 51: MOV TEMP[8].xy, TEMP[7].zwzz > 52: MOV TEMP[8].z, CONST[1][29].xxxx > 53: MOV TEMP[7].xyz, TEMP[8].xyzz > 54: MOV TEMP[7].w, TEMP[6].xxxx > 55: TEX TEMP[7].x, TEMP[7], SAMP[2], SHADOW2D_ARRAY > 56: MOV TEMP[5].z, CONST[1][29].xxxx > 57: MOV TEMP[11].xyz, TEMP[5].xyzz > 58: MOV TEMP[11].w, TEMP[6].xxxx > 59: TEX TEMP[6].x, TEMP[11], SAMP[2], SHADOW2D_ARRAY > 60: ADD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx > 61: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[9].xxxx > 62: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx > 63: FMA TEMP[6].x, -TEMP[6].xxxx, IMM[4].zzzz, IMM[0].wwww > 64: ADD TEMP[7].x, -CONST[1][16].wwww, IMM[0].wwww > 65: FMA TEMP[6].x, CONST[1][16].wwww, TEMP[6].xxxx, TEMP[7].xxxx > 66: MOV TEMP[3].w, IMM[0].wwww > 67: DP4 TEMP[4].x, CONST[1][4], TEMP[3] > 68: DP4 TEMP[7].x, CONST[1][5], TEMP[3] > 69: MOV TEMP[4].y, TEMP[7].xxxx > 70: DP4 TEMP[7].x, CONST[1][6], TEMP[3] > 71: MOV TEMP[3].y, TEMP[7].xxxx > 72: MOV TEMP[7].xy, TEMP[4].xyyy > 73: MOV TEMP[7].w, IMM[0].xxxx > 74: TXL TEMP[7], TEMP[7], SAMP[3], 2D > 75: MOV TEMP[3].x, IMM[0].xxxx > 76: MOV TEMP[9].xy, TEMP[3].xyyy > 77: MOV TEMP[9].w, IMM[0].xxxx > 78: TXL TEMP[9], TEMP[9], SAMP[4], 2D > 79: MUL TEMP[3], TEMP[9], TEMP[7] > 80: MUL TEMP[4].xyz, TEMP[3].xyzz, CONST[1][16].xyzz > 81: MOV TEMP[7].xy, TEMP[0].xyyy > 82: TEX TEMP[7], TEMP[7], SAMP[5], 2D > 83: MOV TEMP[5].xyz, TEMP[7] > 84: MUL TEMP[8].xyz, TEMP[4].xyzz, TEMP[7].xyzz > 85: DP3 TEMP[9].x, IN[2].xyzz, IN[2].xyzz > 86: RSQ TEMP[9].x, TEMP[9].xxxx > 87: MOV TEMP[10].xy, TEMP[0].xyyy > 88: TEX TEMP[10], TEMP[10], SAMP[6], 2D > 89: FMA TEMP[12].xyz, TEMP[10].xyzz, IMM[4].wwww, IMM[7].xxxx > 90: DP3 TEMP[13].x, TEMP[12].xyzz, TEMP[12].xyzz > 91: RSQ TEMP[13].x, TEMP[13].xxxx > 92: MUL TEMP[11].xyz, TEMP[13].xxxx, TEMP[12].xyzz > 93: MUL TEMP[3], TEMP[3], CONST[1][18] > 94: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[1].xxxx > 95: FMA TEMP[2].x, TEMP[2].xxxx, IMM[7].yyyy, IMM[7].zzzz > 96: MOV TEMP[12].x, TEMP[2].xxxx > 97: MOV TEMP[13].xy, TEMP[0].xyyy > 98: MOV TEMP[13].w, IMM[0].xxxx > 99: TXL TEMP[13].xy, TEMP[13], SAMP[7], 2D >100: DP3 TEMP[14].x, TEMP[11].xyzz, -CONST[1][15].xyzz >101: FSGE TEMP[15].x, TEMP[14].xxxx, IMM[0].xxxx >102: AND TEMP[15].x, TEMP[15].xxxx, IMM[1].xxxx >103: INEG TEMP[15].x, TEMP[15].xxxx >104: FMA TEMP[16].xyz, IN[2].xyzz, TEMP[9].xxxx, -CONST[1][15].xyzz >105: DP3 TEMP[17].x, TEMP[16].xyzz, TEMP[16].xyzz >106: RSQ TEMP[17].x, TEMP[17].xxxx >107: MUL TEMP[16].xyz, TEMP[17].xxxx, TEMP[16].xyzz >108: DP3 TEMP[17].x, TEMP[11].xyzz, TEMP[16].xyzz >109: MOV_SAT TEMP[17].x, TEMP[17].xxxx >110: MOV TEMP[12].y, TEMP[17].xxxx >111: FMA TEMP[9].xyz, IN[2].xyzz, TEMP[9].xxxx, CONST[1][15].xyzz >112: DP3 TEMP[17].x, TEMP[9].xyzz, TEMP[9].xyzz >113: RSQ TEMP[17].x, TEMP[17].xxxx >114: MUL TEMP[9].xyz, TEMP[17].xxxx, TEMP[9].xyzz >115: DP3 TEMP[17].x, TEMP[11].xyzz, TEMP[9].xyzz >116: MOV_SAT TEMP[17].x, TEMP[17].xxxx >117: MOV TEMP[11].y, TEMP[17].xxxx >118: MUL TEMP[11].x, TEMP[2].xxxx, IMM[7].wwww >119: MUL TEMP[2].xyz, TEMP[1].yyyy, TEMP[7].xyzz >120: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[2].xyzz >121: USNE TEMP[17].x, TEMP[15].xxxx, IMM[2].yyyy >122: UIF TEMP[17].xxxx :0 >123: MOV TEMP[17].x, TEMP[1].yyyy >124: ELSE :0 >125: MOV TEMP[17].x, TEMP[2].xxxx >126: ENDIF >127: MOV TEMP[17].x, TEMP[17].xxxx >128: USNE TEMP[18].x, TEMP[15].xxxx, IMM[2].yyyy >129: UIF TEMP[18].xxxx :0 >130: MOV TEMP[18].x, TEMP[1].yyyy >131: ELSE :0 >132: MOV TEMP[18].x, TEMP[2].yyyy >133: ENDIF >134: MOV TEMP[17].y, TEMP[18].xxxx >135: USNE TEMP[18].x, TEMP[15].xxxx, IMM[2].yyyy >136: UIF TEMP[18].xxxx :0 >137: MOV TEMP[18].x, TEMP[1].yyyy >138: ELSE :0 >139: MOV TEMP[18].x, TEMP[2].zzzz >140: ENDIF >141: MOV TEMP[17].z, TEMP[18].xxxx >142: USNE TEMP[2].x, TEMP[15].xxxx, IMM[2].yyyy >143: UIF TEMP[2].xxxx :0 >144: MOV TEMP[2].x, TEMP[16].xxxx >145: ELSE :0 >146: MOV TEMP[2].x, TEMP[9].xxxx >147: ENDIF >148: MOV TEMP[2].x, TEMP[2].xxxx >149: USNE TEMP[18].x, TEMP[15].xxxx, IMM[2].yyyy >150: UIF TEMP[18].xxxx :0 >151: MOV TEMP[18].x, TEMP[16].yyyy >152: ELSE :0 >153: MOV TEMP[18].x, TEMP[9].yyyy >154: ENDIF >155: MOV TEMP[2].y, TEMP[18].xxxx >156: USNE TEMP[18].x, TEMP[15].xxxx, IMM[2].yyyy >157: UIF TEMP[18].xxxx :0 >158: MOV TEMP[16].x, TEMP[16].zzzz >159: ELSE :0 >160: MOV TEMP[16].x, TEMP[9].zzzz >161: ENDIF >162: MOV TEMP[2].z, TEMP[16].xxxx >163: USNE TEMP[9].x, TEMP[15].xxxx, IMM[2].yyyy >164: UIF TEMP[9].xxxx :0 >165: MOV TEMP[9].x, TEMP[12].xxxx >166: ELSE :0 >167: MOV TEMP[9].x, TEMP[11].xxxx >168: ENDIF >169: MOV TEMP[9].x, TEMP[9].xxxx >170: USNE TEMP[16].x, TEMP[15].xxxx, IMM[2].yyyy >171: UIF TEMP[16].xxxx :0 >172: MOV TEMP[12].x, TEMP[12].yyyy >173: ELSE :0 >174: MOV TEMP[12].x, TEMP[11].yyyy >175: ENDIF >176: MOV TEMP[9].y, TEMP[12].xxxx >177: ADD TEMP[12].x, TEMP[9].xxxx, IMM[4].wwww >178: MUL TEMP[12].x, TEMP[12].xxxx, IMM[8].xxxx >179: LG2 TEMP[16].x, TEMP[9].yyyy >180: MUL TEMP[9].x, TEMP[16].xxxx, TEMP[9].xxxx >181: EX2 TEMP[9].x, TEMP[9].xxxx >182: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[9].xxxx >183: ADD TEMP[12].x, -TEMP[1].zzzz, IMM[0].wwww >184: DP3 TEMP[2].x, -CONST[1][15].xyzz, TEMP[2].xyzz >185: MOV_SAT TEMP[2].x, TEMP[2].xxxx >186: ADD TEMP[2].x, -TEMP[2].xxxx, IMM[0].wwww >187: MUL TEMP[11].x, TEMP[2].xxxx, TEMP[2].xxxx >188: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[11].xxxx >189: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[11].xxxx >190: FMA TEMP[2].x, TEMP[12].xxxx, TEMP[2].xxxx, TEMP[1].zzzz >191: MUL TEMP[2].x, TEMP[9].xxxx, TEMP[2].xxxx >192: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[13].xxxx >193: MUL TEMP[3].xyz, TEMP[17].xyzz, TEMP[3].xyzz >194: ADD TEMP[0].x, TEMP[7].wwww, TEMP[14].xxxx >195: ADD TEMP[0].x, TEMP[0].xxxx, IMM[7].xxxx >196: FSNE TEMP[9].x, TEMP[7].wwww, IMM[0].xxxx >197: UIF TEMP[9].xxxx :0 >198: RCP TEMP[9].x, TEMP[7].wwww >199: MUL TEMP[9].x, TEMP[0].xxxx, TEMP[9].xxxx >200: ELSE :0 >201: SSG TEMP[12].x, TEMP[0].xxxx >202: MUL TEMP[9].x, IMM[0].yyyy, TEMP[12].xxxx >203: ENDIF >204: MOV_SAT TEMP[9].x, TEMP[9].xxxx >205: FMA TEMP[2].xyz, TEMP[3].xyzz, TEMP[2].xxxx, TEMP[8].xyzz >206: MUL TEMP[11].xyz, TEMP[9].xxxx, TEMP[2].xyzz >207: ADD TEMP[0].x, TEMP[7].wwww, -TEMP[14].xxxx >208: ADD TEMP[0].x, TEMP[0].xxxx, IMM[7].xxxx >209: FSNE TEMP[9].x, TEMP[7].wwww, IMM[0].xxxx >210: UIF TEMP[9].xxxx :0 >211: RCP TEMP[9].x, TEMP[7].wwww >212: MUL TEMP[9].x, TEMP[0].xxxx, TEMP[9].xxxx >213: ELSE :0 >214: SSG TEMP[12].x, TEMP[0].xxxx >215: MUL TEMP[9].x, IMM[0].yyyy, TEMP[12].xxxx >216: ENDIF >217: MOV_SAT TEMP[9].x, TEMP[9].xxxx >218: MUL TEMP[3].xyz, TEMP[9].xxxx, TEMP[2].xyzz >219: AND TEMP[2].xyz, TEMP[15].xxxx, TEMP[11].xyzz >220: MOV TEMP[11].xyz, TEMP[2].xyzx >221: USNE TEMP[9].x, TEMP[15].xxxx, IMM[2].yyyy >222: UIF TEMP[9].xxxx :0 >223: MOV TEMP[9].x, IMM[2].yyyy >224: ELSE :0 >225: MOV TEMP[9].x, TEMP[3].xxxx >226: ENDIF >227: MOV TEMP[9].x, TEMP[9].xxxx >228: USNE TEMP[12].x, TEMP[15].xxxx, IMM[2].yyyy >229: UIF TEMP[12].xxxx :0 >230: MOV TEMP[12].x, IMM[2].yyyy >231: ELSE :0 >232: MOV TEMP[12].x, TEMP[3].yyyy >233: ENDIF >234: MOV TEMP[9].y, TEMP[12].xxxx >235: USNE TEMP[12].x, TEMP[15].xxxx, IMM[2].yyyy >236: UIF TEMP[12].xxxx :0 >237: MOV TEMP[12].x, IMM[2].yyyy >238: ELSE :0 >239: MOV TEMP[12].x, TEMP[3].zzzz >240: ENDIF >241: MOV TEMP[9].z, TEMP[12].xxxx >242: FSLT TEMP[12].x, IMM[0].xxxx, TEMP[1].wwww >243: AND TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >244: INEG TEMP[12].x, TEMP[12].xxxx >245: MOV TEMP[0].x, TEMP[12].xxxx >246: USNE TEMP[12].x, TEMP[12].xxxx, IMM[2].yyyy >247: UIF TEMP[12].xxxx :0 >248: FSLT TEMP[10].x, TEMP[10].wwww, IMM[8].yyyy >249: AND TEMP[10].x, TEMP[10].xxxx, IMM[1].xxxx >250: INEG TEMP[10].x, TEMP[10].xxxx >251: MOV TEMP[0].x, TEMP[10].xxxx >252: ADD TEMP[8].xyz, TEMP[8].xyzz, TEMP[8].xyzz >253: MAX TEMP[12].x, TEMP[7].zzzz, TEMP[7].yyyy >254: MAX TEMP[12].x, TEMP[12].xxxx, TEMP[7].xxxx >255: FSEQ TEMP[15].xyz, TEMP[12].xxxx, IMM[0].xxxx >256: SSG TEMP[16].xyz, TEMP[7].xyzz >257: MUL TEMP[16].xyz, IMM[0].yyyy, TEMP[16].xyzz >258: RCP TEMP[12].xyz, TEMP[12].xxxx >259: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[12].xyzz >260: UCMP TEMP[7].xyz, TEMP[15].xyzz, TEMP[16].xyzz, TEMP[7].xyzz >261: MOV_SAT TEMP[7].xyz, TEMP[7].xyzz >262: MUL TEMP[5].xyz, TEMP[7].xyzz, TEMP[7].xyzz >263: MOV_SAT TEMP[7].xyz, TEMP[4].xyzz >264: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[5].xyzz >265: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[8].zzzz >266: USNE TEMP[7].x, TEMP[10].xxxx, IMM[2].yyyy >267: UIF TEMP[7].xxxx :0 >268: MOV TEMP[7].x, TEMP[8].xxxx >269: ELSE :0 >270: MOV TEMP[7].x, TEMP[4].xxxx >271: ENDIF >272: MOV TEMP[7].x, TEMP[7].xxxx >273: USNE TEMP[12].x, TEMP[10].xxxx, IMM[2].yyyy >274: UIF TEMP[12].xxxx :0 >275: MOV TEMP[12].x, TEMP[8].yyyy >276: ELSE :0 >277: MOV TEMP[12].x, TEMP[4].yyyy >278: ENDIF >279: MOV TEMP[7].y, TEMP[12].xxxx >280: USNE TEMP[10].x, TEMP[10].xxxx, IMM[2].yyyy >281: UIF TEMP[10].xxxx :0 >282: MOV TEMP[8].x, TEMP[8].zzzz >283: ELSE :0 >284: MOV TEMP[8].x, TEMP[4].zzzz >285: ENDIF >286: MOV TEMP[7].z, TEMP[8].xxxx >287: ADD TEMP[4].x, TEMP[1].wwww, IMM[4].yyyy >288: MOV_SAT TEMP[4].x, TEMP[4].xxxx >289: MUL TEMP[5].xyz, TEMP[4].xxxx, TEMP[7].xyzz >290: ADD TEMP[4].xy, -TEMP[14].xxxx, IMM[9].xyyy >291: MOV_SAT TEMP[4].xy, TEMP[4].xyyy >292: FMA TEMP[5].xyz, TEMP[5].xyzz, TEMP[4].xxxx, TEMP[2].xyzz >293: MIN TEMP[1].x, TEMP[1].wwww, IMM[0].zzzz >294: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[7].xyzz >295: MUL TEMP[1].xyz, TEMP[4].yyyy, TEMP[1].xyzz >296: ADD TEMP[2].x, TEMP[14].xxxx, IMM[4].zzzz >297: MOV_SAT TEMP[0].x, TEMP[2].xxxx >298: FMA TEMP[11].xyz, TEMP[1].xyzz, TEMP[0].xxxx, TEMP[5].xyzz >299: ENDIF >300: MUL TEMP[3].xyz, TEMP[9].xyzz, TEMP[3].wwww >301: FMA TEMP[0].xyz, TEMP[6].xxxx, TEMP[11].xyzz, TEMP[3].xyzz >302: ADD TEMP[1].x, TEMP[13].yyyy, IMM[8].wwww >303: MOV_SAT TEMP[1].x, TEMP[1].xxxx >304: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz >305: MOV TEMP[0].w, IMM[0].wwww >306: MOV OUT[0], TEMP[0] >307: END >radeonsi: Compiling shader 67 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 76) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 92) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 240) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 376) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 380) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 392) > %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 396) > %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %56 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %57 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %58 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %59 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %60 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %61 = call float @llvm.SI.load.const(<16 x i8> %24, i32 428) > %62 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %63 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %64 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %65 = call float @llvm.SI.load.const(<16 x i8> %24, i32 444) > %66 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) > %67 = call float @llvm.SI.load.const(<16 x i8> %24, i32 640) > %68 = call float @llvm.SI.load.const(<16 x i8> %24, i32 644) > %69 = call float @llvm.SI.load.const(<16 x i8> %24, i32 648) > %70 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %71 = load <8 x i32>, <8 x i32> addrspace(2)* %70, align 32, !tbaa !0 > %72 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %73 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %72, i64 0, i64 3 > %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 > %75 = extractelement <8 x i32> %71, i32 7 > %76 = extractelement <4 x i32> %74, i32 0 > %77 = and i32 %76, %75 > %78 = insertelement <4 x i32> %74, i32 %77, i32 0 > %79 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %80 = load <8 x i32>, <8 x i32> addrspace(2)* %79, align 32, !tbaa !0 > %81 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %82 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %81, i64 0, i64 7 > %83 = load <4 x i32>, <4 x i32> addrspace(2)* %82, align 16, !tbaa !0 > %84 = extractelement <8 x i32> %80, i32 7 > %85 = extractelement <4 x i32> %83, i32 0 > %86 = and i32 %85, %84 > %87 = insertelement <4 x i32> %83, i32 %86, i32 0 > %88 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %89 = load <8 x i32>, <8 x i32> addrspace(2)* %88, align 32, !tbaa !0 > %90 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %91 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %90, i64 0, i64 11 > %92 = load <4 x i32>, <4 x i32> addrspace(2)* %91, align 16, !tbaa !0 > %93 = extractelement <8 x i32> %89, i32 7 > %94 = extractelement <4 x i32> %92, i32 0 > %95 = and i32 %94, %93 > %96 = insertelement <4 x i32> %92, i32 %95, i32 0 > %97 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %98 = load <8 x i32>, <8 x i32> addrspace(2)* %97, align 32, !tbaa !0 > %99 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %100 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %99, i64 0, i64 15 > %101 = load <4 x i32>, <4 x i32> addrspace(2)* %100, align 16, !tbaa !0 > %102 = extractelement <8 x i32> %98, i32 7 > %103 = extractelement <4 x i32> %101, i32 0 > %104 = and i32 %103, %102 > %105 = insertelement <4 x i32> %101, i32 %104, i32 0 > %106 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %107 = load <8 x i32>, <8 x i32> addrspace(2)* %106, align 32, !tbaa !0 > %108 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %109 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %108, i64 0, i64 19 > %110 = load <4 x i32>, <4 x i32> addrspace(2)* %109, align 16, !tbaa !0 > %111 = extractelement <8 x i32> %107, i32 7 > %112 = extractelement <4 x i32> %110, i32 0 > %113 = and i32 %112, %111 > %114 = insertelement <4 x i32> %110, i32 %113, i32 0 > %115 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %116 = load <8 x i32>, <8 x i32> addrspace(2)* %115, align 32, !tbaa !0 > %117 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %118 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %117, i64 0, i64 23 > %119 = load <4 x i32>, <4 x i32> addrspace(2)* %118, align 16, !tbaa !0 > %120 = extractelement <8 x i32> %116, i32 7 > %121 = extractelement <4 x i32> %119, i32 0 > %122 = and i32 %121, %120 > %123 = insertelement <4 x i32> %119, i32 %122, i32 0 > %124 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %125 = load <8 x i32>, <8 x i32> addrspace(2)* %124, align 32, !tbaa !0 > %126 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %127 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %126, i64 0, i64 27 > %128 = load <4 x i32>, <4 x i32> addrspace(2)* %127, align 16, !tbaa !0 > %129 = extractelement <8 x i32> %125, i32 7 > %130 = extractelement <4 x i32> %128, i32 0 > %131 = and i32 %130, %129 > %132 = insertelement <4 x i32> %128, i32 %131, i32 0 > %133 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 14 > %134 = load <8 x i32>, <8 x i32> addrspace(2)* %133, align 32, !tbaa !0 > %135 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %136 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %135, i64 0, i64 31 > %137 = load <4 x i32>, <4 x i32> addrspace(2)* %136, align 16, !tbaa !0 > %138 = extractelement <8 x i32> %134, i32 7 > %139 = extractelement <4 x i32> %137, i32 0 > %140 = and i32 %139, %138 > %141 = insertelement <4 x i32> %137, i32 %140, i32 0 > %142 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %143 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %144 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %145 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %146 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %147 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %148 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %149 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %150 = fcmp oeq float %144, 0.000000e+00 > %151 = fcmp oeq float %144, 0.000000e+00 > %152 = fcmp ogt float %142, 0.000000e+00 > %153 = select i1 %152, float 1.000000e+00, float %142 > %154 = fcmp oge float %153, 0.000000e+00 > %155 = fcmp ogt float %143, 0.000000e+00 > %156 = select i1 %155, float 1.000000e+00, float %143 > %157 = fcmp oge float %156, 0.000000e+00 > %.op = fmul float %153, 0x4600000000000000 > %158 = select i1 %154, float %.op, float 0xC600000000000000 > %.op131 = fmul float %156, 0x4600000000000000 > %159 = select i1 %157, float %.op131, float 0xC600000000000000 > %160 = fdiv float 1.000000e+00, %144 > %161 = fmul float %142, %160 > %162 = fmul float %143, %160 > %163 = select i1 %150, float %158, float %161 > %164 = select i1 %151, float %159, float %162 > %165 = fcmp oeq float %144, 0.000000e+00 > %166 = fcmp oeq float %144, 0.000000e+00 > %167 = fcmp ogt float %145, 0.000000e+00 > %168 = select i1 %167, float 1.000000e+00, float %145 > %169 = fcmp oge float %168, 0.000000e+00 > %170 = fcmp ogt float %146, 0.000000e+00 > %171 = select i1 %170, float 1.000000e+00, float %146 > %172 = fcmp oge float %171, 0.000000e+00 > %.op132 = fmul float %168, 0x4600000000000000 > %173 = select i1 %169, float %.op132, float 0xC600000000000000 > %.op133 = fmul float %171, 0x4600000000000000 > %174 = select i1 %172, float %.op133, float 0xC600000000000000 > %175 = fdiv float 1.000000e+00, %144 > %176 = fmul float %145, %175 > %177 = fmul float %146, %175 > %178 = select i1 %165, float %173, float %176 > %179 = select i1 %166, float %174, float %177 > %180 = bitcast float %163 to i32 > %181 = bitcast float %164 to i32 > %182 = insertelement <2 x i32> undef, i32 %180, i32 0 > %183 = insertelement <2 x i32> %182, i32 %181, i32 1 > %184 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %183, <8 x i32> %71, <4 x i32> %78, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %185 = extractelement <4 x float> %184, i32 0 > %186 = fmul float %178, %185 > %187 = fmul float %179, %185 > %188 = bitcast float %163 to i32 > %189 = bitcast float %164 to i32 > %190 = insertelement <2 x i32> undef, i32 %188, i32 0 > %191 = insertelement <2 x i32> %190, i32 %189, i32 1 > %192 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %191, <8 x i32> %80, <4 x i32> %87, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %193 = extractelement <4 x float> %192, i32 0 > %194 = extractelement <4 x float> %192, i32 1 > %195 = extractelement <4 x float> %192, i32 2 > %196 = extractelement <4 x float> %192, i32 3 > %197 = fcmp oge float %196, 5.000000e-01 > %198 = select i1 %197, float 1.000000e+00, float 0.000000e+00 > %199 = call float @llvm.fma.f32(float %198, float %67, float %186) > %200 = call float @llvm.fma.f32(float %198, float %68, float %187) > %201 = call float @llvm.fma.f32(float %198, float %69, float %185) > %202 = fmul float %50, %199 > %203 = fmul float %51, %200 > %204 = fadd float %202, %203 > %205 = fmul float %52, %201 > %206 = fadd float %204, %205 > %207 = fadd float %206, %53 > %208 = fmul float %54, %199 > %209 = fmul float %55, %200 > %210 = fadd float %208, %209 > %211 = fmul float %56, %201 > %212 = fadd float %210, %211 > %213 = fadd float %212, %57 > %214 = fmul float %58, %199 > %215 = fmul float %59, %200 > %216 = fadd float %214, %215 > %217 = fmul float %60, %201 > %218 = fadd float %216, %217 > %219 = fadd float %218, %61 > %220 = fmul float %62, %199 > %221 = fmul float %63, %200 > %222 = fadd float %220, %221 > %223 = fmul float %64, %201 > %224 = fadd float %222, %223 > %225 = fadd float %224, %65 > %226 = fcmp oeq float %225, 0.000000e+00 > %227 = fcmp oeq float %225, 0.000000e+00 > %228 = fcmp oeq float %225, 0.000000e+00 > %229 = fcmp ogt float %207, 0.000000e+00 > %230 = select i1 %229, float 1.000000e+00, float %207 > %231 = fcmp oge float %230, 0.000000e+00 > %232 = fcmp ogt float %213, 0.000000e+00 > %233 = select i1 %232, float 1.000000e+00, float %213 > %234 = fcmp oge float %233, 0.000000e+00 > %235 = fcmp ogt float %219, 0.000000e+00 > %236 = select i1 %235, float 1.000000e+00, float %219 > %237 = fcmp oge float %236, 0.000000e+00 > %.op134 = fmul float %230, 0x4600000000000000 > %238 = select i1 %231, float %.op134, float 0xC600000000000000 > %.op135 = fmul float %233, 0x4600000000000000 > %239 = select i1 %234, float %.op135, float 0xC600000000000000 > %.op136 = fmul float %236, 0x4600000000000000 > %240 = select i1 %237, float %.op136, float 0xC600000000000000 > %241 = fdiv float 1.000000e+00, %225 > %242 = fmul float %207, %241 > %243 = fmul float %213, %241 > %244 = fmul float %219, %241 > %245 = select i1 %226, float %238, float %242 > %246 = select i1 %227, float %239, float %243 > %247 = select i1 %228, float %240, float %244 > %248 = call float @llvm.fma.f32(float %245, float 5.000000e-01, float 5.000000e-01) > %249 = call float @llvm.fma.f32(float %246, float -5.000000e-01, float 5.000000e-01) > %250 = call float @llvm.fma.f32(float %245, float 5.000000e-01, float 5.000000e-01) > %251 = call float @llvm.fma.f32(float %246, float -5.000000e-01, float 5.000000e-01) > %252 = fsub float 1.000000e+00, %247 > %253 = call float @llvm.fma.f32(float %48, float 5.000000e-01, float %250) > %254 = call float @llvm.fma.f32(float %49, float -5.000000e-01, float %251) > %255 = call float @llvm.fma.f32(float %48, float -5.000000e-01, float %250) > %256 = call float @llvm.fma.f32(float %49, float -5.000000e-01, float %251) > %257 = bitcast float %252 to i32 > %258 = bitcast float %255 to i32 > %259 = bitcast float %256 to i32 > %260 = bitcast float %66 to i32 > %261 = insertelement <4 x i32> undef, i32 %257, i32 0 > %262 = insertelement <4 x i32> %261, i32 %258, i32 1 > %263 = insertelement <4 x i32> %262, i32 %259, i32 2 > %264 = insertelement <4 x i32> %263, i32 %260, i32 3 > %265 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %264, <8 x i32> %89, <4 x i32> %96, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %266 = extractelement <4 x float> %265, i32 0 > %267 = bitcast float %252 to i32 > %268 = bitcast float %253 to i32 > %269 = bitcast float %254 to i32 > %270 = bitcast float %66 to i32 > %271 = insertelement <4 x i32> undef, i32 %267, i32 0 > %272 = insertelement <4 x i32> %271, i32 %268, i32 1 > %273 = insertelement <4 x i32> %272, i32 %269, i32 2 > %274 = insertelement <4 x i32> %273, i32 %270, i32 3 > %275 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %274, <8 x i32> %89, <4 x i32> %96, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %276 = extractelement <4 x float> %275, i32 0 > %277 = call float @llvm.fma.f32(float %48, float 5.000000e-01, float %250) > %278 = call float @llvm.fma.f32(float %49, float 5.000000e-01, float %251) > %279 = call float @llvm.fma.f32(float %48, float -5.000000e-01, float %248) > %280 = call float @llvm.fma.f32(float %49, float 5.000000e-01, float %249) > %281 = bitcast float %252 to i32 > %282 = bitcast float %279 to i32 > %283 = bitcast float %280 to i32 > %284 = bitcast float %66 to i32 > %285 = insertelement <4 x i32> undef, i32 %281, i32 0 > %286 = insertelement <4 x i32> %285, i32 %282, i32 1 > %287 = insertelement <4 x i32> %286, i32 %283, i32 2 > %288 = insertelement <4 x i32> %287, i32 %284, i32 3 > %289 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %288, <8 x i32> %89, <4 x i32> %96, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %290 = extractelement <4 x float> %289, i32 0 > %291 = bitcast float %252 to i32 > %292 = bitcast float %277 to i32 > %293 = bitcast float %278 to i32 > %294 = bitcast float %66 to i32 > %295 = insertelement <4 x i32> undef, i32 %291, i32 0 > %296 = insertelement <4 x i32> %295, i32 %292, i32 1 > %297 = insertelement <4 x i32> %296, i32 %293, i32 2 > %298 = insertelement <4 x i32> %297, i32 %294, i32 3 > %299 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %298, <8 x i32> %89, <4 x i32> %96, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %300 = extractelement <4 x float> %299, i32 0 > %301 = fadd float %266, %276 > %302 = fadd float %290, %301 > %303 = fadd float %300, %302 > %304 = fsub float -0.000000e+00, %303 > %305 = call float @llvm.fma.f32(float %304, float 2.500000e-01, float 1.000000e+00) > %306 = fsub float 1.000000e+00, %43 > %307 = call float @llvm.fma.f32(float %43, float %305, float %306) > %308 = fmul float %25, %186 > %309 = fmul float %26, %187 > %310 = fadd float %308, %309 > %311 = fmul float %27, %185 > %312 = fadd float %310, %311 > %313 = fadd float %312, %28 > %314 = fmul float %29, %186 > %315 = fmul float %30, %187 > %316 = fadd float %314, %315 > %317 = fmul float %31, %185 > %318 = fadd float %316, %317 > %319 = fadd float %318, %32 > %320 = fmul float %33, %186 > %321 = fmul float %34, %187 > %322 = fadd float %320, %321 > %323 = fmul float %35, %185 > %324 = fadd float %322, %323 > %325 = fadd float %324, %36 > %326 = bitcast float %313 to i32 > %327 = bitcast float %319 to i32 > %328 = insertelement <4 x i32> undef, i32 %326, i32 0 > %329 = insertelement <4 x i32> %328, i32 %327, i32 1 > %330 = insertelement <4 x i32> %329, i32 0, i32 2 > %331 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %330, <8 x i32> %98, <4 x i32> %105, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %332 = extractelement <4 x float> %331, i32 0 > %333 = extractelement <4 x float> %331, i32 1 > %334 = extractelement <4 x float> %331, i32 2 > %335 = extractelement <4 x float> %331, i32 3 > %336 = bitcast float %325 to i32 > %337 = insertelement <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, i32 %336, i32 1 > %338 = insertelement <4 x i32> %337, i32 0, i32 2 > %339 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %338, <8 x i32> %107, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %340 = extractelement <4 x float> %339, i32 0 > %341 = extractelement <4 x float> %339, i32 1 > %342 = extractelement <4 x float> %339, i32 2 > %343 = extractelement <4 x float> %339, i32 3 > %344 = fmul float %340, %332 > %345 = fmul float %341, %333 > %346 = fmul float %342, %334 > %347 = fmul float %343, %335 > %348 = fmul float %344, %40 > %349 = fmul float %345, %41 > %350 = fmul float %346, %42 > %351 = bitcast float %163 to i32 > %352 = bitcast float %164 to i32 > %353 = insertelement <2 x i32> undef, i32 %351, i32 0 > %354 = insertelement <2 x i32> %353, i32 %352, i32 1 > %355 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %354, <8 x i32> %116, <4 x i32> %123, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %356 = extractelement <4 x float> %355, i32 0 > %357 = extractelement <4 x float> %355, i32 1 > %358 = extractelement <4 x float> %355, i32 2 > %359 = extractelement <4 x float> %355, i32 3 > %360 = fmul float %348, %356 > %361 = fmul float %349, %357 > %362 = fmul float %350, %358 > %363 = fmul float %147, %147 > %364 = fmul float %148, %148 > %365 = fadd float %364, %363 > %366 = fmul float %149, %149 > %367 = fadd float %365, %366 > %368 = call float @llvm.AMDGPU.rsq.clamped.f32(float %367) > %369 = bitcast float %163 to i32 > %370 = bitcast float %164 to i32 > %371 = insertelement <2 x i32> undef, i32 %369, i32 0 > %372 = insertelement <2 x i32> %371, i32 %370, i32 1 > %373 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %372, <8 x i32> %125, <4 x i32> %132, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %374 = extractelement <4 x float> %373, i32 0 > %375 = extractelement <4 x float> %373, i32 1 > %376 = extractelement <4 x float> %373, i32 2 > %377 = extractelement <4 x float> %373, i32 3 > %378 = call float @llvm.fma.f32(float %374, float 2.000000e+00, float -1.000000e+00) > %379 = call float @llvm.fma.f32(float %375, float 2.000000e+00, float -1.000000e+00) > %380 = call float @llvm.fma.f32(float %376, float 2.000000e+00, float -1.000000e+00) > %381 = fmul float %378, %378 > %382 = fmul float %379, %379 > %383 = fadd float %382, %381 > %384 = fmul float %380, %380 > %385 = fadd float %383, %384 > %386 = call float @llvm.AMDGPU.rsq.clamped.f32(float %385) > %387 = fmul float %386, %378 > %388 = fmul float %386, %379 > %389 = fmul float %386, %380 > %390 = fmul float %344, %44 > %391 = fmul float %345, %45 > %392 = fmul float %346, %46 > %393 = fmul float %347, %47 > %394 = fmul float %193, %193 > %395 = call float @llvm.fma.f32(float %394, float 4.096000e+03, float 0x3F70624DE0000000) > %396 = bitcast float %163 to i32 > %397 = bitcast float %164 to i32 > %398 = insertelement <4 x i32> undef, i32 %396, i32 0 > %399 = insertelement <4 x i32> %398, i32 %397, i32 1 > %400 = insertelement <4 x i32> %399, i32 0, i32 2 > %401 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %400, <8 x i32> %134, <4 x i32> %141, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %402 = extractelement <4 x float> %401, i32 0 > %403 = extractelement <4 x float> %401, i32 1 > %404 = fmul float %37, %387 > %405 = fsub float -0.000000e+00, %404 > %406 = fmul float %38, %388 > %407 = fsub float %405, %406 > %408 = fmul float %39, %389 > %409 = fsub float %407, %408 > %410 = fcmp oge float %409, 0.000000e+00 > %411 = fsub float -0.000000e+00, %37 > %412 = call float @llvm.fma.f32(float %147, float %368, float %411) > %413 = fsub float -0.000000e+00, %38 > %414 = call float @llvm.fma.f32(float %148, float %368, float %413) > %415 = fsub float -0.000000e+00, %39 > %416 = call float @llvm.fma.f32(float %149, float %368, float %415) > %417 = fmul float %412, %412 > %418 = fmul float %414, %414 > %419 = fadd float %418, %417 > %420 = fmul float %416, %416 > %421 = fadd float %419, %420 > %422 = call float @llvm.AMDGPU.rsq.clamped.f32(float %421) > %423 = fmul float %422, %412 > %424 = fmul float %422, %414 > %425 = fmul float %422, %416 > %426 = fmul float %387, %423 > %427 = fmul float %388, %424 > %428 = fadd float %427, %426 > %429 = fmul float %389, %425 > %430 = fadd float %428, %429 > %431 = call float @llvm.AMDGPU.clamp.(float %430, float 0.000000e+00, float 1.000000e+00) > %432 = call float @llvm.fma.f32(float %147, float %368, float %37) > %433 = call float @llvm.fma.f32(float %148, float %368, float %38) > %434 = call float @llvm.fma.f32(float %149, float %368, float %39) > %435 = fmul float %432, %432 > %436 = fmul float %433, %433 > %437 = fadd float %436, %435 > %438 = fmul float %434, %434 > %439 = fadd float %437, %438 > %440 = call float @llvm.AMDGPU.rsq.clamped.f32(float %439) > %441 = fmul float %440, %432 > %442 = fmul float %440, %433 > %443 = fmul float %440, %434 > %444 = fmul float %387, %441 > %445 = fmul float %388, %442 > %446 = fadd float %445, %444 > %447 = fmul float %389, %443 > %448 = fadd float %446, %447 > %449 = call float @llvm.AMDGPU.clamp.(float %448, float 0.000000e+00, float 1.000000e+00) > %450 = fmul float %395, 0x3FD3333340000000 > %451 = fmul float %194, %356 > %452 = fmul float %194, %357 > %453 = fmul float %194, %358 > %454 = fadd float %451, %451 > %455 = fadd float %452, %452 > %456 = fadd float %453, %453 > %. = select i1 %410, float %194, float %454 > %temp72.0 = select i1 %410, float %194, float %455 > %.124 = select i1 %410, float %194, float %456 > %temp8.0 = select i1 %410, float %423, float %441 > %.125 = select i1 %410, float %424, float %442 > %temp64.0 = select i1 %410, float %425, float %443 > %.126 = select i1 %410, float %395, float %450 > %temp48.0 = select i1 %410, float %431, float %449 > %457 = fadd float %.126, 2.000000e+00 > %458 = fmul float %457, 1.250000e-01 > %459 = call float @llvm.log2.f32(float %temp48.0) > %460 = fmul float %459, %.126 > %461 = call float @llvm.exp2.f32(float %460) > %462 = fmul float %458, %461 > %463 = fsub float 1.000000e+00, %195 > %464 = fmul float %37, %temp8.0 > %465 = fsub float -0.000000e+00, %464 > %466 = fmul float %38, %.125 > %467 = fsub float %465, %466 > %468 = fmul float %39, %temp64.0 > %469 = fsub float %467, %468 > %470 = call float @llvm.AMDGPU.clamp.(float %469, float 0.000000e+00, float 1.000000e+00) > %471 = fsub float 1.000000e+00, %470 > %472 = fmul float %471, %471 > %473 = fmul float %472, %472 > %474 = fmul float %471, %473 > %475 = call float @llvm.fma.f32(float %463, float %474, float %195) > %476 = fmul float %462, %475 > %477 = fmul float %390, %402 > %478 = fmul float %391, %402 > %479 = fmul float %392, %402 > %480 = fmul float %., %477 > %481 = fmul float %temp72.0, %478 > %482 = fmul float %.124, %479 > %483 = fadd float %359, %409 > %484 = fadd float %483, -1.000000e+00 > %485 = fcmp une float %359, 0.000000e+00 > br i1 %485, label %IF98, label %ELSE99 > >IF98: ; preds = %main_body > %486 = fdiv float 1.000000e+00, %359 > %487 = fmul float %484, %486 > br label %ENDIF97 > >ELSE99: ; preds = %main_body > %488 = fcmp ogt float %484, 0.000000e+00 > %489 = select i1 %488, float 1.000000e+00, float %484 > %490 = fcmp oge float %489, 0.000000e+00 > %.op137 = fmul float %489, 0x4600000000000000 > %491 = select i1 %490, float %.op137, float 0xC600000000000000 > br label %ENDIF97 > >ENDIF97: ; preds = %ELSE99, %IF98 > %temp36.1 = phi float [ %487, %IF98 ], [ %491, %ELSE99 ] > %492 = call float @llvm.AMDGPU.clamp.(float %temp36.1, float 0.000000e+00, float 1.000000e+00) > %493 = call float @llvm.fma.f32(float %480, float %476, float %360) > %494 = call float @llvm.fma.f32(float %481, float %476, float %361) > %495 = call float @llvm.fma.f32(float %482, float %476, float %362) > %496 = fmul float %492, %493 > %497 = fmul float %492, %494 > %498 = fmul float %492, %495 > %499 = fsub float %359, %409 > %500 = fadd float %499, -1.000000e+00 > %501 = fcmp une float %359, 0.000000e+00 > br i1 %501, label %IF101, label %ELSE102 > >IF101: ; preds = %ENDIF97 > %502 = fdiv float 1.000000e+00, %359 > %503 = fmul float %500, %502 > br label %ENDIF100 > >ELSE102: ; preds = %ENDIF97 > %504 = fcmp ogt float %500, 0.000000e+00 > %505 = select i1 %504, float 1.000000e+00, float %500 > %506 = fcmp oge float %505, 0.000000e+00 > %.op138 = fmul float %505, 0x4600000000000000 > %507 = select i1 %506, float %.op138, float 0xC600000000000000 > br label %ENDIF100 > >ENDIF100: ; preds = %ELSE102, %IF101 > %temp36.2 = phi float [ %503, %IF101 ], [ %507, %ELSE102 ] > %508 = call float @llvm.AMDGPU.clamp.(float %temp36.2, float 0.000000e+00, float 1.000000e+00) > %509 = fmul float %508, %493 > %510 = fmul float %508, %494 > %511 = fmul float %508, %495 > %512 = select i1 %410, float %496, float 0.000000e+00 > %513 = select i1 %410, float %497, float 0.000000e+00 > %514 = select i1 %410, float %498, float 0.000000e+00 > %.127 = select i1 %410, float 0.000000e+00, float %509 > %temp48.1 = select i1 %410, float 0.000000e+00, float %510 > %.128 = select i1 %410, float 0.000000e+00, float %511 > %515 = fcmp ogt float %196, 0.000000e+00 > br i1 %515, label %IF113, label %ENDIF112 > >IF113: ; preds = %ENDIF100 > %516 = fcmp olt float %377, 0x3FE0505060000000 > %517 = fadd float %360, %360 > %518 = fadd float %361, %361 > %519 = fadd float %362, %362 > %520 = call float @llvm.maxnum.f32(float %358, float %357) > %521 = call float @llvm.maxnum.f32(float %520, float %356) > %522 = fcmp oeq float %521, 0.000000e+00 > %523 = fcmp oeq float %521, 0.000000e+00 > %524 = fcmp oeq float %521, 0.000000e+00 > %525 = fcmp ogt float %356, 0.000000e+00 > %526 = select i1 %525, float 1.000000e+00, float %356 > %527 = fcmp oge float %526, 0.000000e+00 > %528 = fcmp ogt float %357, 0.000000e+00 > %529 = select i1 %528, float 1.000000e+00, float %357 > %530 = fcmp oge float %529, 0.000000e+00 > %531 = fcmp ogt float %358, 0.000000e+00 > %532 = select i1 %531, float 1.000000e+00, float %358 > %533 = fcmp oge float %532, 0.000000e+00 > %.op139 = fmul float %526, 0x4600000000000000 > %534 = select i1 %527, float %.op139, float 0xC600000000000000 > %.op140 = fmul float %529, 0x4600000000000000 > %535 = select i1 %530, float %.op140, float 0xC600000000000000 > %.op141 = fmul float %532, 0x4600000000000000 > %536 = select i1 %533, float %.op141, float 0xC600000000000000 > %537 = fdiv float 1.000000e+00, %521 > %538 = fmul float %356, %537 > %539 = fmul float %357, %537 > %540 = fmul float %358, %537 > %541 = select i1 %522, float %534, float %538 > %542 = select i1 %523, float %535, float %539 > %543 = select i1 %524, float %536, float %540 > %544 = call float @llvm.AMDGPU.clamp.(float %541, float 0.000000e+00, float 1.000000e+00) > %545 = call float @llvm.AMDGPU.clamp.(float %542, float 0.000000e+00, float 1.000000e+00) > %546 = call float @llvm.AMDGPU.clamp.(float %543, float 0.000000e+00, float 1.000000e+00) > %547 = fmul float %544, %544 > %548 = fmul float %545, %545 > %549 = fmul float %546, %546 > %550 = call float @llvm.AMDGPU.clamp.(float %348, float 0.000000e+00, float 1.000000e+00) > %551 = call float @llvm.AMDGPU.clamp.(float %349, float 0.000000e+00, float 1.000000e+00) > %552 = call float @llvm.AMDGPU.clamp.(float %350, float 0.000000e+00, float 1.000000e+00) > %553 = fmul float %550, %547 > %554 = fmul float %551, %548 > %555 = fmul float %552, %549 > %556 = fmul float %553, 0x3FC3333340000000 > %557 = fmul float %554, 0x3FC3333340000000 > %558 = fmul float %555, 0x3FC3333340000000 > %.129 = select i1 %516, float %517, float %556 > %temp48.3 = select i1 %516, float %518, float %557 > %.130 = select i1 %516, float %519, float %558 > %559 = fadd float %196, -5.000000e-01 > %560 = call float @llvm.AMDGPU.clamp.(float %559, float 0.000000e+00, float 1.000000e+00) > %561 = fmul float %560, %.129 > %562 = fmul float %560, %temp48.3 > %563 = fmul float %560, %.130 > %564 = fsub float 2.500000e-01, %409 > %565 = fsub float 1.000000e+00, %409 > %566 = call float @llvm.AMDGPU.clamp.(float %564, float 0.000000e+00, float 1.000000e+00) > %567 = call float @llvm.AMDGPU.clamp.(float %565, float 0.000000e+00, float 1.000000e+00) > %568 = call float @llvm.fma.f32(float %561, float %566, float %512) > %569 = call float @llvm.fma.f32(float %562, float %566, float %513) > %570 = call float @llvm.fma.f32(float %563, float %566, float %514) > %571 = call float @llvm.minnum.f32(float %196, float 5.000000e-01) > %572 = fmul float %571, %.129 > %573 = fmul float %571, %temp48.3 > %574 = fmul float %571, %.130 > %575 = fmul float %567, %572 > %576 = fmul float %567, %573 > %577 = fmul float %567, %574 > %578 = fadd float %409, 2.500000e-01 > %579 = call float @llvm.AMDGPU.clamp.(float %578, float 0.000000e+00, float 1.000000e+00) > %580 = call float @llvm.fma.f32(float %575, float %579, float %568) > %581 = call float @llvm.fma.f32(float %576, float %579, float %569) > %582 = call float @llvm.fma.f32(float %577, float %579, float %570) > br label %ENDIF112 > >ENDIF112: ; preds = %ENDIF100, %IF113 > %temp44.0 = phi float [ %580, %IF113 ], [ %512, %ENDIF100 ] > %temp45.0 = phi float [ %581, %IF113 ], [ %513, %ENDIF100 ] > %temp46.0 = phi float [ %582, %IF113 ], [ %514, %ENDIF100 ] > %583 = fmul float %.127, %393 > %584 = fmul float %temp48.1, %393 > %585 = fmul float %.128, %393 > %586 = call float @llvm.fma.f32(float %307, float %temp44.0, float %583) > %587 = call float @llvm.fma.f32(float %307, float %temp45.0, float %584) > %588 = call float @llvm.fma.f32(float %307, float %temp46.0, float %585) > %589 = fadd float %403, 0x3FD54FDF40000000 > %590 = call float @llvm.AMDGPU.clamp.(float %589, float 0.000000e+00, float 1.000000e+00) > %591 = fmul float %590, %586 > %592 = fmul float %590, %587 > %593 = fmul float %590, %588 > %594 = bitcast float %5 to i32 > %595 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %594, 10 > %596 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %595, float %591, 11 > %597 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %596, float %592, 12 > %598 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %597, float %593, 13 > %599 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %598, float 1.000000e+00, 14 > %600 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %599, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %600 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..57] >DCL CONST[2][0..24] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 336, 848, 864} >IMM[2] UINT32 {880, 736, 752, 768} >IMM[3] UINT32 {784, 912, 1, 384} > 0: MOV TEMP[0].w, IMM[0].xxxx > 1: MUL TEMP[1].xyz, IN[0].xyzz, CONST[1][21].xxxx > 2: MOV TEMP[1].w, IMM[0].xxxx > 3: DP4 TEMP[0].x, CONST[1][53], TEMP[1] > 4: DP4 TEMP[2].x, CONST[1][54], TEMP[1] > 5: MOV TEMP[0].y, TEMP[2].xxxx > 6: DP4 TEMP[2].x, CONST[1][55], TEMP[1] > 7: MOV TEMP[0].z, TEMP[2].xxxx > 8: DP4 TEMP[1].x, CONST[1][46], TEMP[0] > 9: DP4 TEMP[2].x, CONST[1][47], TEMP[0] > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP4 TEMP[2].x, CONST[1][48], TEMP[0] > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][49], TEMP[0] > 14: MOV TEMP[1].w, TEMP[2].xxxx > 15: ADD TEMP[3].xyz, -TEMP[0].xyzz, CONST[1][57].xyzz > 16: MOV TEMP[4], TEMP[1] > 17: MOV TEMP[5].zw, TEMP[1].wwzw > 18: MUL TEMP[0].xy, TEMP[2].xxxx, CONST[2][24].xyyy > 19: MUL TEMP[2].xy, TEMP[2].xxxx, CONST[2][21].xyyy > 20: MUL TEMP[6].xy, IMM[0].xyyy, CONST[2][24].xyyy > 21: FMA TEMP[0].xy, TEMP[1].xyyy, TEMP[6].xyyy, TEMP[0].xyyy > 22: MOV TEMP[5].xy, TEMP[0].xyxx > 23: FMA TEMP[0].xy, TEMP[0].xyyy, CONST[2][21].zwww, TEMP[2].xyyy > 24: MOV OUT[2], TEMP[0] > 25: MOV OUT[1], TEMP[5] > 26: MOV OUT[3], TEMP[3] > 27: MOV OUT[0], TEMP[4] > 28: END >radeonsi: Compiling shader 68 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 336) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 736) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 740) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 744) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 748) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 752) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 756) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 760) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 764) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 848) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 852) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 856) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 860) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 864) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 868) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 872) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 876) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 912) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 916) > %47 = call float @llvm.SI.load.const(<16 x i8> %15, i32 920) > %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 > %50 = call float @llvm.SI.load.const(<16 x i8> %49, i32 336) > %51 = call float @llvm.SI.load.const(<16 x i8> %49, i32 340) > %52 = call float @llvm.SI.load.const(<16 x i8> %49, i32 344) > %53 = call float @llvm.SI.load.const(<16 x i8> %49, i32 348) > %54 = call float @llvm.SI.load.const(<16 x i8> %49, i32 384) > %55 = call float @llvm.SI.load.const(<16 x i8> %49, i32 388) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %13) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = fmul float %59, %16 > %63 = fmul float %60, %16 > %64 = fmul float %61, %16 > %65 = fmul float %33, %62 > %66 = fmul float %34, %63 > %67 = fadd float %65, %66 > %68 = fmul float %35, %64 > %69 = fadd float %67, %68 > %70 = fadd float %69, %36 > %71 = fmul float %37, %62 > %72 = fmul float %38, %63 > %73 = fadd float %71, %72 > %74 = fmul float %39, %64 > %75 = fadd float %73, %74 > %76 = fadd float %75, %40 > %77 = fmul float %41, %62 > %78 = fmul float %42, %63 > %79 = fadd float %77, %78 > %80 = fmul float %43, %64 > %81 = fadd float %79, %80 > %82 = fadd float %81, %44 > %83 = fmul float %17, %70 > %84 = fmul float %18, %76 > %85 = fadd float %83, %84 > %86 = fmul float %19, %82 > %87 = fadd float %85, %86 > %88 = fadd float %87, %20 > %89 = fmul float %21, %70 > %90 = fmul float %22, %76 > %91 = fadd float %89, %90 > %92 = fmul float %23, %82 > %93 = fadd float %91, %92 > %94 = fadd float %93, %24 > %95 = fmul float %25, %70 > %96 = fmul float %26, %76 > %97 = fadd float %95, %96 > %98 = fmul float %27, %82 > %99 = fadd float %97, %98 > %100 = fadd float %99, %28 > %101 = fmul float %29, %70 > %102 = fmul float %30, %76 > %103 = fadd float %101, %102 > %104 = fmul float %31, %82 > %105 = fadd float %103, %104 > %106 = fadd float %105, %32 > %107 = fsub float %45, %70 > %108 = fsub float %46, %76 > %109 = fsub float %47, %82 > %110 = fmul float %106, %54 > %111 = fmul float %106, %55 > %112 = fmul float %106, %50 > %113 = fmul float %106, %51 > %114 = fsub float -0.000000e+00, %55 > %115 = call float @llvm.fma.f32(float %88, float %54, float %110) > %116 = call float @llvm.fma.f32(float %94, float %114, float %111) > %117 = call float @llvm.fma.f32(float %115, float %52, float %112) > %118 = call float @llvm.fma.f32(float %116, float %53, float %113) > %119 = bitcast i32 %11 to float > %120 = insertvalue <{ float, float, float }> undef, float %119, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %115, float %116, float %100, float %106) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %117, float %118, float %82, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %107, float %108, float %109, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %88, float %94, float %100, float %106) > ret <{ float, float, float }> %120 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL CONST[1][0..6] >DCL CONST[2][0..18] >DCL TEMP[0..15], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 2.0000, -1.0000} >IMM[1] UINT32 {1, 272, 0, 64} >IMM[2] UINT32 {80, 96, 256, 288} >IMM[3] FLT32 { 4096.0000, 0.0040, 2.0040, 0.1250} >IMM[4] FLT32 { 1.0000, 0.5098, -0.5000, 0.2500} >IMM[5] INT32 {1, 0, 0, 0} >IMM[6] FLT32 { 0.5000, 0.1500, 0.0000, 0.0000} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: MOV TEMP[3].z, TEMP[2].xxxx > 15: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xxxx > 16: ADD TEMP[3].xyz, -TEMP[3].xyzz, CONST[2][17].xyzz > 17: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[2][0].xxxx > 18: DP3 TEMP[1].x, CONST[1][4].xyzz, TEMP[3].xyzz > 19: DP3 TEMP[2].x, CONST[1][5].xyzz, TEMP[3].xyzz > 20: MOV TEMP[1].y, TEMP[2].xxxx > 21: DP3 TEMP[2].x, CONST[1][6].xyzz, TEMP[3].xyzz > 22: MOV TEMP[1].z, TEMP[2].xxxx > 23: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz > 24: SQRT TEMP[4].x, TEMP[2].xxxx > 25: MOV TEMP[3].y, TEMP[4].xxxx > 26: MOV TEMP[3].x, IMM[0].xxxx > 27: MOV TEMP[3].xy, TEMP[3].xyyy > 28: MOV TEMP[3].w, IMM[0].xxxx > 29: TXL TEMP[3].xyz, TEMP[3], SAMP[1], 2D > 30: MOV TEMP[4].xy, TEMP[0].xyyy > 31: TEX TEMP[4], TEMP[4], SAMP[2], 2D > 32: MOV TEMP[5].xyz, TEMP[4] > 33: MUL TEMP[6].xyz, TEMP[4].xyzz, CONST[2][16].xyzz > 34: DP3 TEMP[7].x, IN[2].xyzz, IN[2].xyzz > 35: RSQ TEMP[7].x, TEMP[7].xxxx > 36: MOV TEMP[8].xy, TEMP[0].xyyy > 37: TEX TEMP[8], TEMP[8], SAMP[3], 2D > 38: FMA TEMP[9].xyz, TEMP[8].xyzz, IMM[0].zzzz, IMM[0].wwww > 39: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz > 40: RSQ TEMP[10].x, TEMP[10].xxxx > 41: MUL TEMP[9].xyz, TEMP[10].xxxx, TEMP[9].xyzz > 42: RSQ TEMP[2].x, TEMP[2].xxxx > 43: MUL TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 44: MOV TEMP[2].xy, TEMP[0].xyyy > 45: TEX TEMP[2], TEMP[2], SAMP[4], 2D > 46: MOV TEMP[10], TEMP[2].wxyz > 47: MUL TEMP[11].x, TEMP[2].xxxx, TEMP[2].xxxx > 48: MOV TEMP[12].xy, TEMP[0].xyyy > 49: MOV TEMP[12].w, IMM[0].xxxx > 50: TXL TEMP[12].xy, TEMP[12], SAMP[5], 2D > 51: FMA TEMP[7].xyz, IN[2].xyzz, TEMP[7].xxxx, TEMP[1].xyzz > 52: DP3 TEMP[13].x, TEMP[7].xyzz, TEMP[7].xyzz > 53: RSQ TEMP[13].x, TEMP[13].xxxx > 54: MUL TEMP[7].xyz, TEMP[13].xxxx, TEMP[7].xyzz > 55: DP3 TEMP[13].x, TEMP[9].xyzz, TEMP[7].xyzz > 56: MOV_SAT TEMP[13].x, TEMP[13].xxxx > 57: FMA TEMP[11].xy, TEMP[11].xxxx, IMM[3].xxxx, IMM[3].yzzz > 58: MUL TEMP[14].x, TEMP[11].yyyy, IMM[3].wwww > 59: LG2 TEMP[13].x, TEMP[13].xxxx > 60: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx > 61: EX2 TEMP[11].x, TEMP[11].xxxx > 62: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx > 63: ADD TEMP[13].x, -TEMP[2].zzzz, IMM[4].xxxx > 64: DP3 TEMP[14].x, TEMP[1].xyzz, TEMP[7].xyzz > 65: MOV_SAT TEMP[14].x, TEMP[14].xxxx > 66: ADD TEMP[14].x, -TEMP[14].xxxx, IMM[4].xxxx > 67: MUL TEMP[15].x, TEMP[14].xxxx, TEMP[14].xxxx > 68: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[15].xxxx > 69: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].xxxx > 70: FMA TEMP[13].x, TEMP[13].xxxx, TEMP[14].xxxx, TEMP[2].zzzz > 71: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx > 72: MUL TEMP[7].xyz, TEMP[12].xxxx, CONST[2][18].xyzz > 73: MUL TEMP[7].xyz, TEMP[2].yyyy, TEMP[7].xyzz > 74: DP3 TEMP[9].x, TEMP[9].xyzz, TEMP[1].xyzz > 75: MOV TEMP[0].x, TEMP[9].xxxx > 76: ADD TEMP[13].x, TEMP[4].wwww, TEMP[9].xxxx > 77: ADD TEMP[13].x, TEMP[13].xxxx, IMM[0].wwww > 78: FSNE TEMP[14].x, TEMP[4].wwww, IMM[0].xxxx > 79: UIF TEMP[14].xxxx :0 > 80: RCP TEMP[14].x, TEMP[4].wwww > 81: MUL TEMP[14].x, TEMP[13].xxxx, TEMP[14].xxxx > 82: ELSE :0 > 83: SSG TEMP[13].x, TEMP[13].xxxx > 84: MUL TEMP[14].x, IMM[0].yyyy, TEMP[13].xxxx > 85: ENDIF > 86: MOV_SAT TEMP[13].x, TEMP[14].xxxx > 87: FMA TEMP[11].xyz, TEMP[7].xyzz, TEMP[11].xxxx, TEMP[6].xyzz > 88: MUL TEMP[1].xyz, TEMP[13].xxxx, TEMP[11].xyzz > 89: FSLT TEMP[11].x, IMM[0].xxxx, TEMP[2].wwww > 90: AND TEMP[11].x, TEMP[11].xxxx, IMM[5].xxxx > 91: INEG TEMP[11].x, TEMP[11].xxxx > 92: USNE TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz > 93: UIF TEMP[11].xxxx :0 > 94: FSLT TEMP[8].x, TEMP[8].wwww, IMM[4].yyyy > 95: AND TEMP[8].x, TEMP[8].xxxx, IMM[5].xxxx > 96: INEG TEMP[8].x, TEMP[8].xxxx > 97: ADD TEMP[11].xyz, TEMP[6].xyzz, TEMP[6].xyzz > 98: MOV TEMP[10].yzw, TEMP[11].yxyz > 99: ADD TEMP[13].x, TEMP[2].wwww, IMM[4].zzzz >100: MOV_SAT TEMP[13].x, TEMP[13].xxxx >101: MUL TEMP[6].xyz, TEMP[13].xxxx, TEMP[11].xyzz >102: ADD TEMP[11].xy, -TEMP[9].xxxx, IMM[4].wxxx >103: MOV_SAT TEMP[11].xy, TEMP[11].xyyy >104: FMA TEMP[6].xyz, TEMP[6].xyzz, TEMP[11].xxxx, TEMP[1].xyzz >105: MIN TEMP[7].x, TEMP[2].wwww, IMM[6].xxxx >106: MAX TEMP[2].x, TEMP[4].zzzz, TEMP[4].yyyy >107: MAX TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx >108: FSEQ TEMP[13].xyz, TEMP[2].xxxx, IMM[0].xxxx >109: SSG TEMP[14].xyz, TEMP[4].xyzz >110: MUL TEMP[14].xyz, IMM[0].yyyy, TEMP[14].xyzz >111: RCP TEMP[2].xyz, TEMP[2].xxxx >112: MUL TEMP[2].xyz, TEMP[4].xyzz, TEMP[2].xyzz >113: UCMP TEMP[2].xyz, TEMP[13].xyzz, TEMP[14].xyzz, TEMP[2].xyzz >114: MOV_SAT TEMP[2].xyz, TEMP[2].xyzz >115: MOV_SAT TEMP[4].xyz, CONST[2][16].xyzz >116: MUL TEMP[5].xyz, TEMP[2].xyzz, TEMP[2].xyzz >117: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[4].xyzz >118: MUL TEMP[2].xyz, TEMP[5].xyzz, IMM[6].yyyy >119: MOV TEMP[7].yzw, TEMP[2].yxyz >120: USNE TEMP[2].x, TEMP[8].xxxx, IMM[1].zzzz >121: UIF TEMP[2].xxxx :0 >122: MOV TEMP[2].x, TEMP[6].xxxx >123: ELSE :0 >124: MOV TEMP[2].x, TEMP[1].xxxx >125: ENDIF >126: MOV TEMP[2].x, TEMP[2].xxxx >127: USNE TEMP[4].x, TEMP[8].xxxx, IMM[1].zzzz >128: UIF TEMP[4].xxxx :0 >129: MOV TEMP[4].x, TEMP[6].yyyy >130: ELSE :0 >131: MOV TEMP[4].x, TEMP[1].yyyy >132: ENDIF >133: MOV TEMP[2].y, TEMP[4].xxxx >134: USNE TEMP[4].x, TEMP[8].xxxx, IMM[1].zzzz >135: UIF TEMP[4].xxxx :0 >136: MOV TEMP[4].x, TEMP[6].zzzz >137: ELSE :0 >138: MOV TEMP[4].x, TEMP[1].zzzz >139: ENDIF >140: MOV TEMP[2].z, TEMP[4].xxxx >141: USNE TEMP[4].x, TEMP[8].xxxx, IMM[1].zzzz >142: UIF TEMP[4].xxxx :0 >143: MOV TEMP[4].x, TEMP[10].xxxx >144: ELSE :0 >145: MOV TEMP[4].x, TEMP[7].xxxx >146: ENDIF >147: MOV TEMP[4].x, TEMP[4].xxxx >148: USNE TEMP[5].x, TEMP[8].xxxx, IMM[1].zzzz >149: UIF TEMP[5].xxxx :0 >150: MOV TEMP[5].x, TEMP[10].yyyy >151: ELSE :0 >152: MOV TEMP[5].x, TEMP[7].yyyy >153: ENDIF >154: MOV TEMP[4].y, TEMP[5].xxxx >155: USNE TEMP[5].x, TEMP[8].xxxx, IMM[1].zzzz >156: UIF TEMP[5].xxxx :0 >157: MOV TEMP[5].x, TEMP[10].zzzz >158: ELSE :0 >159: MOV TEMP[5].x, TEMP[7].zzzz >160: ENDIF >161: MOV TEMP[4].z, TEMP[5].xxxx >162: USNE TEMP[5].x, TEMP[8].xxxx, IMM[1].zzzz >163: UIF TEMP[5].xxxx :0 >164: MOV TEMP[5].x, TEMP[10].wwww >165: ELSE :0 >166: MOV TEMP[5].x, TEMP[7].wwww >167: ENDIF >168: MOV TEMP[4].w, TEMP[5].xxxx >169: MIN TEMP[5].x, TEMP[4].xxxx, IMM[6].xxxx >170: MUL TEMP[6].xyz, TEMP[5].xxxx, TEMP[4].yzww >171: MUL TEMP[6].xyz, TEMP[11].yyyy, TEMP[6].xyzz >172: ADD TEMP[4].x, TEMP[9].xxxx, IMM[4].wwww >173: MOV_SAT TEMP[0].x, TEMP[4].xxxx >174: FMA TEMP[1].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[2].xyzz >175: ENDIF >176: MUL TEMP[0].xyz, TEMP[3].xyzz, TEMP[1].xyzz >177: MUL TEMP[0].xyz, TEMP[12].yyyy, TEMP[0].xyzz >178: MOV TEMP[0].w, IMM[4].xxxx >179: MOV OUT[0], TEMP[0] >180: END >radeonsi: Compiling shader 69 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 256) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 260) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 264) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 272) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 276) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 280) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 288) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 292) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 296) > %46 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 > %48 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %49 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %48, i64 0, i64 3 > %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 > %51 = extractelement <8 x i32> %47, i32 7 > %52 = extractelement <4 x i32> %50, i32 0 > %53 = and i32 %52, %51 > %54 = insertelement <4 x i32> %50, i32 %53, i32 0 > %55 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0 > %57 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %58 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %57, i64 0, i64 7 > %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 > %60 = extractelement <8 x i32> %56, i32 7 > %61 = extractelement <4 x i32> %59, i32 0 > %62 = and i32 %61, %60 > %63 = insertelement <4 x i32> %59, i32 %62, i32 0 > %64 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %65 = load <8 x i32>, <8 x i32> addrspace(2)* %64, align 32, !tbaa !0 > %66 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %67 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %66, i64 0, i64 11 > %68 = load <4 x i32>, <4 x i32> addrspace(2)* %67, align 16, !tbaa !0 > %69 = extractelement <8 x i32> %65, i32 7 > %70 = extractelement <4 x i32> %68, i32 0 > %71 = and i32 %70, %69 > %72 = insertelement <4 x i32> %68, i32 %71, i32 0 > %73 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %74 = load <8 x i32>, <8 x i32> addrspace(2)* %73, align 32, !tbaa !0 > %75 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %76 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %75, i64 0, i64 15 > %77 = load <4 x i32>, <4 x i32> addrspace(2)* %76, align 16, !tbaa !0 > %78 = extractelement <8 x i32> %74, i32 7 > %79 = extractelement <4 x i32> %77, i32 0 > %80 = and i32 %79, %78 > %81 = insertelement <4 x i32> %77, i32 %80, i32 0 > %82 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %83 = load <8 x i32>, <8 x i32> addrspace(2)* %82, align 32, !tbaa !0 > %84 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %85 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %84, i64 0, i64 19 > %86 = load <4 x i32>, <4 x i32> addrspace(2)* %85, align 16, !tbaa !0 > %87 = extractelement <8 x i32> %83, i32 7 > %88 = extractelement <4 x i32> %86, i32 0 > %89 = and i32 %88, %87 > %90 = insertelement <4 x i32> %86, i32 %89, i32 0 > %91 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %92 = load <8 x i32>, <8 x i32> addrspace(2)* %91, align 32, !tbaa !0 > %93 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %94 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %93, i64 0, i64 23 > %95 = load <4 x i32>, <4 x i32> addrspace(2)* %94, align 16, !tbaa !0 > %96 = extractelement <8 x i32> %92, i32 7 > %97 = extractelement <4 x i32> %95, i32 0 > %98 = and i32 %97, %96 > %99 = insertelement <4 x i32> %95, i32 %98, i32 0 > %100 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %101 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %102 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %103 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %104 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %105 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %106 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %107 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %108 = fcmp oeq float %102, 0.000000e+00 > %109 = fcmp oeq float %102, 0.000000e+00 > %110 = fcmp ogt float %100, 0.000000e+00 > %111 = select i1 %110, float 1.000000e+00, float %100 > %112 = fcmp oge float %111, 0.000000e+00 > %113 = fcmp ogt float %101, 0.000000e+00 > %114 = select i1 %113, float 1.000000e+00, float %101 > %115 = fcmp oge float %114, 0.000000e+00 > %.op = fmul float %111, 0x4600000000000000 > %116 = select i1 %112, float %.op, float 0xC600000000000000 > %.op91 = fmul float %114, 0x4600000000000000 > %117 = select i1 %115, float %.op91, float 0xC600000000000000 > %118 = fdiv float 1.000000e+00, %102 > %119 = fmul float %100, %118 > %120 = fmul float %101, %118 > %121 = select i1 %108, float %116, float %119 > %122 = select i1 %109, float %117, float %120 > %123 = fcmp oeq float %102, 0.000000e+00 > %124 = fcmp oeq float %102, 0.000000e+00 > %125 = fcmp ogt float %103, 0.000000e+00 > %126 = select i1 %125, float 1.000000e+00, float %103 > %127 = fcmp oge float %126, 0.000000e+00 > %128 = fcmp ogt float %104, 0.000000e+00 > %129 = select i1 %128, float 1.000000e+00, float %104 > %130 = fcmp oge float %129, 0.000000e+00 > %.op92 = fmul float %126, 0x4600000000000000 > %131 = select i1 %127, float %.op92, float 0xC600000000000000 > %.op93 = fmul float %129, 0x4600000000000000 > %132 = select i1 %130, float %.op93, float 0xC600000000000000 > %133 = fdiv float 1.000000e+00, %102 > %134 = fmul float %103, %133 > %135 = fmul float %104, %133 > %136 = select i1 %123, float %131, float %134 > %137 = select i1 %124, float %132, float %135 > %138 = bitcast float %121 to i32 > %139 = bitcast float %122 to i32 > %140 = insertelement <2 x i32> undef, i32 %138, i32 0 > %141 = insertelement <2 x i32> %140, i32 %139, i32 1 > %142 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %141, <8 x i32> %47, <4 x i32> %54, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %143 = extractelement <4 x float> %142, i32 0 > %144 = fmul float %136, %143 > %145 = fmul float %137, %143 > %146 = fsub float %40, %144 > %147 = fsub float %41, %145 > %148 = fsub float %42, %143 > %149 = fmul float %146, %36 > %150 = fmul float %147, %36 > %151 = fmul float %148, %36 > %152 = fmul float %25, %149 > %153 = fmul float %26, %150 > %154 = fadd float %153, %152 > %155 = fmul float %27, %151 > %156 = fadd float %154, %155 > %157 = fmul float %28, %149 > %158 = fmul float %29, %150 > %159 = fadd float %158, %157 > %160 = fmul float %30, %151 > %161 = fadd float %159, %160 > %162 = fmul float %31, %149 > %163 = fmul float %32, %150 > %164 = fadd float %163, %162 > %165 = fmul float %33, %151 > %166 = fadd float %164, %165 > %167 = fmul float %156, %156 > %168 = fmul float %161, %161 > %169 = fadd float %168, %167 > %170 = fmul float %166, %166 > %171 = fadd float %169, %170 > %172 = call float @llvm.sqrt.f32(float %171) > %173 = bitcast float %172 to i32 > %174 = insertelement <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, i32 %173, i32 1 > %175 = insertelement <4 x i32> %174, i32 0, i32 2 > %176 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %175, <8 x i32> %56, <4 x i32> %63, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %177 = extractelement <4 x float> %176, i32 0 > %178 = extractelement <4 x float> %176, i32 1 > %179 = extractelement <4 x float> %176, i32 2 > %180 = bitcast float %121 to i32 > %181 = bitcast float %122 to i32 > %182 = insertelement <2 x i32> undef, i32 %180, i32 0 > %183 = insertelement <2 x i32> %182, i32 %181, i32 1 > %184 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %183, <8 x i32> %65, <4 x i32> %72, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %185 = extractelement <4 x float> %184, i32 0 > %186 = extractelement <4 x float> %184, i32 1 > %187 = extractelement <4 x float> %184, i32 2 > %188 = extractelement <4 x float> %184, i32 3 > %189 = fmul float %185, %37 > %190 = fmul float %186, %38 > %191 = fmul float %187, %39 > %192 = fmul float %105, %105 > %193 = fmul float %106, %106 > %194 = fadd float %193, %192 > %195 = fmul float %107, %107 > %196 = fadd float %194, %195 > %197 = call float @llvm.AMDGPU.rsq.clamped.f32(float %196) > %198 = bitcast float %121 to i32 > %199 = bitcast float %122 to i32 > %200 = insertelement <2 x i32> undef, i32 %198, i32 0 > %201 = insertelement <2 x i32> %200, i32 %199, i32 1 > %202 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %201, <8 x i32> %74, <4 x i32> %81, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %203 = extractelement <4 x float> %202, i32 0 > %204 = extractelement <4 x float> %202, i32 1 > %205 = extractelement <4 x float> %202, i32 2 > %206 = extractelement <4 x float> %202, i32 3 > %207 = call float @llvm.fma.f32(float %203, float 2.000000e+00, float -1.000000e+00) > %208 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float -1.000000e+00) > %209 = call float @llvm.fma.f32(float %205, float 2.000000e+00, float -1.000000e+00) > %210 = fmul float %207, %207 > %211 = fmul float %208, %208 > %212 = fadd float %211, %210 > %213 = fmul float %209, %209 > %214 = fadd float %212, %213 > %215 = call float @llvm.AMDGPU.rsq.clamped.f32(float %214) > %216 = fmul float %215, %207 > %217 = fmul float %215, %208 > %218 = fmul float %215, %209 > %219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %171) > %220 = fmul float %219, %156 > %221 = fmul float %219, %161 > %222 = fmul float %219, %166 > %223 = bitcast float %121 to i32 > %224 = bitcast float %122 to i32 > %225 = insertelement <2 x i32> undef, i32 %223, i32 0 > %226 = insertelement <2 x i32> %225, i32 %224, i32 1 > %227 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %226, <8 x i32> %83, <4 x i32> %90, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %228 = extractelement <4 x float> %227, i32 0 > %229 = extractelement <4 x float> %227, i32 1 > %230 = extractelement <4 x float> %227, i32 2 > %231 = extractelement <4 x float> %227, i32 3 > %232 = fmul float %228, %228 > %233 = bitcast float %121 to i32 > %234 = bitcast float %122 to i32 > %235 = insertelement <4 x i32> undef, i32 %233, i32 0 > %236 = insertelement <4 x i32> %235, i32 %234, i32 1 > %237 = insertelement <4 x i32> %236, i32 0, i32 2 > %238 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %237, <8 x i32> %92, <4 x i32> %99, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %239 = extractelement <4 x float> %238, i32 0 > %240 = extractelement <4 x float> %238, i32 1 > %241 = call float @llvm.fma.f32(float %105, float %197, float %220) > %242 = call float @llvm.fma.f32(float %106, float %197, float %221) > %243 = call float @llvm.fma.f32(float %107, float %197, float %222) > %244 = fmul float %241, %241 > %245 = fmul float %242, %242 > %246 = fadd float %245, %244 > %247 = fmul float %243, %243 > %248 = fadd float %246, %247 > %249 = call float @llvm.AMDGPU.rsq.clamped.f32(float %248) > %250 = fmul float %249, %241 > %251 = fmul float %249, %242 > %252 = fmul float %249, %243 > %253 = fmul float %216, %250 > %254 = fmul float %217, %251 > %255 = fadd float %254, %253 > %256 = fmul float %218, %252 > %257 = fadd float %255, %256 > %258 = call float @llvm.AMDGPU.clamp.(float %257, float 0.000000e+00, float 1.000000e+00) > %259 = call float @llvm.fma.f32(float %232, float 4.096000e+03, float 0x3F70624DE0000000) > %260 = call float @llvm.fma.f32(float %232, float 4.096000e+03, float 0x4000083120000000) > %261 = fmul float %260, 1.250000e-01 > %262 = call float @llvm.log2.f32(float %258) > %263 = fmul float %262, %259 > %264 = call float @llvm.exp2.f32(float %263) > %265 = fmul float %264, %261 > %266 = fsub float 1.000000e+00, %230 > %267 = fmul float %220, %250 > %268 = fmul float %221, %251 > %269 = fadd float %268, %267 > %270 = fmul float %222, %252 > %271 = fadd float %269, %270 > %272 = call float @llvm.AMDGPU.clamp.(float %271, float 0.000000e+00, float 1.000000e+00) > %273 = fsub float 1.000000e+00, %272 > %274 = fmul float %273, %273 > %275 = fmul float %274, %274 > %276 = fmul float %273, %275 > %277 = call float @llvm.fma.f32(float %266, float %276, float %230) > %278 = fmul float %277, %265 > %279 = fmul float %239, %43 > %280 = fmul float %239, %44 > %281 = fmul float %239, %45 > %282 = fmul float %229, %279 > %283 = fmul float %229, %280 > %284 = fmul float %229, %281 > %285 = fmul float %216, %220 > %286 = fmul float %217, %221 > %287 = fadd float %286, %285 > %288 = fmul float %218, %222 > %289 = fadd float %287, %288 > %290 = fadd float %188, %289 > %291 = fadd float %290, -1.000000e+00 > %292 = fcmp une float %188, 0.000000e+00 > br i1 %292, label %IF, label %ELSE > >IF: ; preds = %main_body > %293 = fdiv float 1.000000e+00, %188 > %294 = fmul float %291, %293 > br label %ENDIF > >ELSE: ; preds = %main_body > %295 = fcmp ogt float %291, 0.000000e+00 > %296 = select i1 %295, float 1.000000e+00, float %291 > %297 = fcmp oge float %296, 0.000000e+00 > %.op94 = fmul float %296, 0x4600000000000000 > %298 = select i1 %297, float %.op94, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp56.0 = phi float [ %294, %IF ], [ %298, %ELSE ] > %299 = call float @llvm.AMDGPU.clamp.(float %temp56.0, float 0.000000e+00, float 1.000000e+00) > %300 = call float @llvm.fma.f32(float %282, float %278, float %189) > %301 = call float @llvm.fma.f32(float %283, float %278, float %190) > %302 = call float @llvm.fma.f32(float %284, float %278, float %191) > %303 = fmul float %299, %300 > %304 = fmul float %299, %301 > %305 = fmul float %299, %302 > %306 = fcmp ogt float %231, 0.000000e+00 > br i1 %306, label %IF65, label %ENDIF64 > >IF65: ; preds = %ENDIF > %307 = fcmp olt float %206, 0x3FE0505060000000 > %308 = fadd float %189, %189 > %309 = fadd float %190, %190 > %310 = fadd float %191, %191 > %311 = fadd float %231, -5.000000e-01 > %312 = call float @llvm.AMDGPU.clamp.(float %311, float 0.000000e+00, float 1.000000e+00) > %313 = fmul float %312, %308 > %314 = fmul float %312, %309 > %315 = fmul float %312, %310 > %316 = fsub float 2.500000e-01, %289 > %317 = fsub float 1.000000e+00, %289 > %318 = call float @llvm.AMDGPU.clamp.(float %316, float 0.000000e+00, float 1.000000e+00) > %319 = call float @llvm.AMDGPU.clamp.(float %317, float 0.000000e+00, float 1.000000e+00) > %320 = call float @llvm.fma.f32(float %313, float %318, float %303) > %321 = call float @llvm.fma.f32(float %314, float %318, float %304) > %322 = call float @llvm.fma.f32(float %315, float %318, float %305) > %323 = call float @llvm.minnum.f32(float %231, float 5.000000e-01) > %324 = call float @llvm.maxnum.f32(float %187, float %186) > %325 = call float @llvm.maxnum.f32(float %324, float %185) > %326 = fcmp oeq float %325, 0.000000e+00 > %327 = fcmp oeq float %325, 0.000000e+00 > %328 = fcmp oeq float %325, 0.000000e+00 > %329 = fcmp ogt float %185, 0.000000e+00 > %330 = select i1 %329, float 1.000000e+00, float %185 > %331 = fcmp oge float %330, 0.000000e+00 > %332 = fcmp ogt float %186, 0.000000e+00 > %333 = select i1 %332, float 1.000000e+00, float %186 > %334 = fcmp oge float %333, 0.000000e+00 > %335 = fcmp ogt float %187, 0.000000e+00 > %336 = select i1 %335, float 1.000000e+00, float %187 > %337 = fcmp oge float %336, 0.000000e+00 > %.op95 = fmul float %330, 0x4600000000000000 > %338 = select i1 %331, float %.op95, float 0xC600000000000000 > %.op96 = fmul float %333, 0x4600000000000000 > %339 = select i1 %334, float %.op96, float 0xC600000000000000 > %.op97 = fmul float %336, 0x4600000000000000 > %340 = select i1 %337, float %.op97, float 0xC600000000000000 > %341 = fdiv float 1.000000e+00, %325 > %342 = fmul float %185, %341 > %343 = fmul float %186, %341 > %344 = fmul float %187, %341 > %345 = select i1 %326, float %338, float %342 > %346 = select i1 %327, float %339, float %343 > %347 = select i1 %328, float %340, float %344 > %348 = call float @llvm.AMDGPU.clamp.(float %345, float 0.000000e+00, float 1.000000e+00) > %349 = call float @llvm.AMDGPU.clamp.(float %346, float 0.000000e+00, float 1.000000e+00) > %350 = call float @llvm.AMDGPU.clamp.(float %347, float 0.000000e+00, float 1.000000e+00) > %351 = call float @llvm.AMDGPU.clamp.(float %37, float 0.000000e+00, float 1.000000e+00) > %352 = call float @llvm.AMDGPU.clamp.(float %38, float 0.000000e+00, float 1.000000e+00) > %353 = call float @llvm.AMDGPU.clamp.(float %39, float 0.000000e+00, float 1.000000e+00) > %354 = fmul float %348, %348 > %355 = fmul float %349, %349 > %356 = fmul float %350, %350 > %357 = fmul float %354, %351 > %358 = fmul float %355, %352 > %359 = fmul float %356, %353 > %360 = fmul float %357, 0x3FC3333340000000 > %361 = fmul float %358, 0x3FC3333340000000 > %362 = fmul float %359, 0x3FC3333340000000 > %. = select i1 %307, float %320, float %303 > %temp16.0 = select i1 %307, float %321, float %304 > %.88 = select i1 %307, float %322, float %305 > %temp16.2 = select i1 %307, float %231, float %323 > %.89 = select i1 %307, float %308, float %360 > %temp20.1 = select i1 %307, float %309, float %361 > %.90 = select i1 %307, float %310, float %362 > %363 = call float @llvm.minnum.f32(float %temp16.2, float 5.000000e-01) > %364 = fmul float %363, %.89 > %365 = fmul float %363, %temp20.1 > %366 = fmul float %363, %.90 > %367 = fmul float %319, %364 > %368 = fmul float %319, %365 > %369 = fmul float %319, %366 > %370 = fadd float %289, 2.500000e-01 > %371 = call float @llvm.AMDGPU.clamp.(float %370, float 0.000000e+00, float 1.000000e+00) > %372 = call float @llvm.fma.f32(float %367, float %371, float %.) > %373 = call float @llvm.fma.f32(float %368, float %371, float %temp16.0) > %374 = call float @llvm.fma.f32(float %369, float %371, float %.88) > br label %ENDIF64 > >ENDIF64: ; preds = %ENDIF, %IF65 > %temp6.0 = phi float [ %374, %IF65 ], [ %305, %ENDIF ] > %temp5.0 = phi float [ %373, %IF65 ], [ %304, %ENDIF ] > %temp4.0 = phi float [ %372, %IF65 ], [ %303, %ENDIF ] > %375 = fmul float %177, %temp4.0 > %376 = fmul float %178, %temp5.0 > %377 = fmul float %179, %temp6.0 > %378 = fmul float %240, %375 > %379 = fmul float %240, %376 > %380 = fmul float %240, %377 > %381 = bitcast float %5 to i32 > %382 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %381, 10 > %383 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %382, float %378, 11 > %384 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %383, float %379, 12 > %385 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %384, float %380, 13 > %386 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %385, float 1.000000e+00, 14 > %387 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %386, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %387 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..55] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 336, 848, 864} >IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[2] UINT32 {880, 736, 752, 768} >IMM[3] UINT32 {784, 0, 0, 0} > 0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[1][21].xxxx > 1: MOV TEMP[0].w, IMM[1].xxxx > 2: DP4 TEMP[1].x, CONST[1][53], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][54], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[0].x, CONST[1][55], TEMP[0] > 6: MOV TEMP[1].z, TEMP[0].xxxx > 7: MOV TEMP[1].w, IMM[1].xxxx > 8: DP4 TEMP[0].x, CONST[1][46], TEMP[1] > 9: DP4 TEMP[2].x, CONST[1][47], TEMP[1] > 10: MOV TEMP[0].y, TEMP[2].xxxx > 11: DP4 TEMP[2].x, CONST[1][48], TEMP[1] > 12: MOV TEMP[0].z, TEMP[2].xxxx > 13: DP4 TEMP[1].x, CONST[1][49], TEMP[1] > 14: MOV TEMP[0].w, TEMP[1].xxxx > 15: MOV OUT[0], TEMP[0] > 16: END >radeonsi: Compiling shader 70 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 336) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 736) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 740) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 744) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 748) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 752) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 756) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 760) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 764) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 848) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 852) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 856) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 860) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 864) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 868) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 872) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 876) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 > %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %13) > %48 = extractelement <4 x float> %47, i32 0 > %49 = extractelement <4 x float> %47, i32 1 > %50 = extractelement <4 x float> %47, i32 2 > %51 = fmul float %48, %16 > %52 = fmul float %49, %16 > %53 = fmul float %50, %16 > %54 = fmul float %33, %51 > %55 = fmul float %34, %52 > %56 = fadd float %54, %55 > %57 = fmul float %35, %53 > %58 = fadd float %56, %57 > %59 = fadd float %58, %36 > %60 = fmul float %37, %51 > %61 = fmul float %38, %52 > %62 = fadd float %60, %61 > %63 = fmul float %39, %53 > %64 = fadd float %62, %63 > %65 = fadd float %64, %40 > %66 = fmul float %41, %51 > %67 = fmul float %42, %52 > %68 = fadd float %66, %67 > %69 = fmul float %43, %53 > %70 = fadd float %68, %69 > %71 = fadd float %70, %44 > %72 = fmul float %17, %59 > %73 = fmul float %18, %65 > %74 = fadd float %72, %73 > %75 = fmul float %19, %71 > %76 = fadd float %74, %75 > %77 = fadd float %76, %20 > %78 = fmul float %21, %59 > %79 = fmul float %22, %65 > %80 = fadd float %78, %79 > %81 = fmul float %23, %71 > %82 = fadd float %80, %81 > %83 = fadd float %82, %24 > %84 = fmul float %25, %59 > %85 = fmul float %26, %65 > %86 = fadd float %84, %85 > %87 = fmul float %27, %71 > %88 = fadd float %86, %87 > %89 = fadd float %88, %28 > %90 = fmul float %29, %59 > %91 = fmul float %30, %65 > %92 = fadd float %90, %91 > %93 = fmul float %31, %71 > %94 = fadd float %92, %93 > %95 = fadd float %94, %32 > %96 = bitcast i32 %11 to float > %97 = insertvalue <{ float, float, float }> undef, float %96, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %77, float %83, float %89, float %95) > ret <{ float, float, float }> %97 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], SHADOW2D_ARRAY, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL CONST[1][0..6] >DCL CONST[2][0..23] >DCL TEMP[0..16], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, -0.0000, 1.0000} >IMM[1] UINT32 {1, 272, 0, 64} >IMM[2] UINT32 {80, 96, 336, 368} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] FLT32 { 0.5000, -0.5000, 0.2500, 2.0000} >IMM[5] UINT32 {256, 288, 0, 0} >IMM[6] FLT32 { -1.0000, 4096.0000, 0.0040, 2.0040} >IMM[7] FLT32 { 0.1250, 0.5098, 0.1500, 0.2500} >IMM[8] FLT32 { 0.2500, 1.0000, 0.0000, 0.0000} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: MOV TEMP[3].z, TEMP[2].xxxx > 15: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xxxx > 16: ADD TEMP[3].xyz, -TEMP[3].xyzz, CONST[2][17].xyzz > 17: MUL TEMP[1].xyz, TEMP[3].xyzz, CONST[2][0].xxxx > 18: DP3 TEMP[2].x, CONST[1][4].xyzz, TEMP[3].xyzz > 19: DP3 TEMP[4].x, CONST[1][5].xyzz, TEMP[3].xyzz > 20: MOV TEMP[2].y, TEMP[4].xxxx > 21: DP3 TEMP[4].x, CONST[1][6].xyzz, TEMP[3].xyzz > 22: MOV TEMP[2].z, TEMP[4].xxxx > 23: DP3 TEMP[5].x, -TEMP[2].xyzz, -TEMP[2].xyzz > 24: SQRT TEMP[5].x, TEMP[5].xxxx > 25: FSNE TEMP[6].x, CONST[2][21].xxxx, IMM[0].xxxx > 26: UIF TEMP[6].xxxx :0 > 27: RCP TEMP[6].x, CONST[2][21].xxxx > 28: MUL TEMP[6].x, TEMP[5].xxxx, TEMP[6].xxxx > 29: ELSE :0 > 30: SSG TEMP[7].x, TEMP[5].xxxx > 31: MUL TEMP[6].x, IMM[0].yyyy, TEMP[7].xxxx > 32: ENDIF > 33: FSEQ TEMP[7].xyz, TEMP[5].xxxx, IMM[0].xxxx > 34: SSG TEMP[8].xyz, -TEMP[2].xyzz > 35: MUL TEMP[8].xyz, IMM[0].yyyy, TEMP[8].xyzz > 36: RCP TEMP[5].xyz, TEMP[5].xxxx > 37: MUL TEMP[5].xyz, -TEMP[2].xyzz, TEMP[5].xyzz > 38: UCMP TEMP[5].xyz, TEMP[7].xyzz, TEMP[8].xyzz, TEMP[5].xyzz > 39: FSGE TEMP[4].x, IMM[0].zzzz, TEMP[4].xxxx > 40: AND TEMP[4].x, TEMP[4].xxxx, IMM[3].xxxx > 41: INEG TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[7].x, TEMP[5].zzzz, IMM[0].wwww > 43: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 44: FSEQ TEMP[8].xy, TEMP[7].xxxx, IMM[0].xxxx > 45: SSG TEMP[9].xy, TEMP[5].xyyy > 46: MUL TEMP[9].xy, IMM[0].yyyy, TEMP[9].xyyy > 47: RCP TEMP[7].xy, TEMP[7].xxxx > 48: MUL TEMP[7].xy, TEMP[5].xyyy, TEMP[7].xyyy > 49: UCMP TEMP[7].xy, TEMP[8].xyyy, TEMP[9].xyyy, TEMP[7].xyyy > 50: ADD TEMP[2].xy, TEMP[7].xyyy, IMM[4].xxxx > 51: ADD TEMP[7].x, -TEMP[5].zzzz, IMM[0].wwww > 52: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 53: FSEQ TEMP[8].xy, TEMP[7].xxxx, IMM[0].xxxx > 54: SSG TEMP[9].xy, TEMP[5].xyyy > 55: MUL TEMP[9].xy, IMM[0].yyyy, TEMP[9].xyyy > 56: RCP TEMP[7].xy, TEMP[7].xxxx > 57: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[7].xyyy > 58: UCMP TEMP[5].xy, TEMP[8].xyyy, TEMP[9].xyyy, TEMP[5].xyyy > 59: ADD TEMP[3].xy, TEMP[5].xyyy, IMM[4].xxxx > 60: ADD TEMP[5].x, -TEMP[2].yyyy, IMM[0].wwww > 61: MOV TEMP[2].z, TEMP[5].xxxx > 62: MOV TEMP[2].w, IMM[0].xxxx > 63: ADD TEMP[5].x, -TEMP[3].yyyy, IMM[0].wwww > 64: MOV TEMP[3].z, TEMP[5].xxxx > 65: MOV TEMP[3].w, IMM[0].wwww > 66: USNE TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 67: UIF TEMP[5].xxxx :0 > 68: MOV TEMP[5].x, TEMP[2].xxxx > 69: ELSE :0 > 70: MOV TEMP[5].x, TEMP[3].xxxx > 71: ENDIF > 72: MOV TEMP[5].x, TEMP[5].xxxx > 73: USNE TEMP[7].x, TEMP[4].xxxx, IMM[1].zzzz > 74: UIF TEMP[7].xxxx :0 > 75: MOV TEMP[7].x, TEMP[2].zzzz > 76: ELSE :0 > 77: MOV TEMP[7].x, TEMP[3].zzzz > 78: ENDIF > 79: MOV TEMP[5].y, TEMP[7].xxxx > 80: USNE TEMP[4].x, TEMP[4].xxxx, IMM[1].zzzz > 81: UIF TEMP[4].xxxx :0 > 82: MOV TEMP[4].x, TEMP[2].wwww > 83: ELSE :0 > 84: MOV TEMP[4].x, TEMP[3].wwww > 85: ENDIF > 86: MOV TEMP[5].z, TEMP[4].xxxx > 87: ADD TEMP[4].x, -TEMP[6].xxxx, IMM[0].wwww > 88: FMA TEMP[6], CONST[2][23].zwzw, IMM[4].yyxy, TEMP[5].xyxy > 89: MOV TEMP[7].xy, TEMP[6].xyxx > 90: MOV TEMP[7].z, TEMP[5].zzzz > 91: MOV TEMP[8].xyz, TEMP[7].xyzz > 92: MOV TEMP[8].w, TEMP[4].xxxx > 93: TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D_ARRAY > 94: MOV TEMP[7].xy, TEMP[6].zwzz > 95: MOV TEMP[6].xyz, TEMP[7].xyzz > 96: MOV TEMP[6].w, TEMP[4].xxxx > 97: TEX TEMP[6].x, TEMP[6], SAMP[1], SHADOW2D_ARRAY > 98: FMA TEMP[5], CONST[2][23].zwzw, IMM[4].yxxx, TEMP[5].xyxy > 99: MOV TEMP[7].xy, TEMP[5].xyxx >100: MOV TEMP[9].xyz, TEMP[7].xyzz >101: MOV TEMP[9].w, TEMP[4].xxxx >102: TEX TEMP[9].x, TEMP[9], SAMP[1], SHADOW2D_ARRAY >103: MOV TEMP[7].xy, TEMP[5].zwzz >104: MOV TEMP[5].xyz, TEMP[7].xyzz >105: MOV TEMP[5].w, TEMP[4].xxxx >106: TEX TEMP[4].x, TEMP[5], SAMP[1], SHADOW2D_ARRAY >107: ADD TEMP[5].x, TEMP[8].xxxx, TEMP[6].xxxx >108: ADD TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx >109: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx >110: FMA TEMP[4].x, -TEMP[4].xxxx, IMM[4].zzzz, IMM[0].wwww >111: ADD TEMP[5].x, -CONST[2][16].wwww, IMM[0].wwww >112: FMA TEMP[4].x, CONST[2][16].wwww, TEMP[4].xxxx, TEMP[5].xxxx >113: DP3 TEMP[3].x, CONST[1][4].xyzz, TEMP[1].xyzz >114: DP3 TEMP[5].x, CONST[1][5].xyzz, TEMP[1].xyzz >115: MOV TEMP[3].y, TEMP[5].xxxx >116: DP3 TEMP[5].x, CONST[1][6].xyzz, TEMP[1].xyzz >117: MOV TEMP[3].z, TEMP[5].xxxx >118: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz >119: SQRT TEMP[6].x, TEMP[5].xxxx >120: MOV TEMP[1].y, TEMP[6].xxxx >121: MOV TEMP[1].x, IMM[0].xxxx >122: MOV TEMP[1].xy, TEMP[1].xyyy >123: MOV TEMP[1].w, IMM[0].xxxx >124: TXL TEMP[1].xyz, TEMP[1], SAMP[2], 2D >125: MOV TEMP[6].xy, TEMP[0].xyyy >126: TEX TEMP[6], TEMP[6], SAMP[3], 2D >127: MOV TEMP[2].xyz, TEMP[6] >128: MUL TEMP[7].xyz, TEMP[6].xyzz, CONST[2][16].xyzz >129: DP3 TEMP[8].x, IN[2].xyzz, IN[2].xyzz >130: RSQ TEMP[8].x, TEMP[8].xxxx >131: MOV TEMP[9].xy, TEMP[0].xyyy >132: TEX TEMP[9], TEMP[9], SAMP[4], 2D >133: FMA TEMP[10].xyz, TEMP[9].xyzz, IMM[4].wwww, IMM[6].xxxx >134: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz >135: RSQ TEMP[11].x, TEMP[11].xxxx >136: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz >137: RSQ TEMP[5].x, TEMP[5].xxxx >138: MUL TEMP[3].xyz, TEMP[5].xxxx, TEMP[3].xyzz >139: MOV TEMP[5].xy, TEMP[0].xyyy >140: TEX TEMP[5], TEMP[5], SAMP[5], 2D >141: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx >142: MOV TEMP[12].xy, TEMP[0].xyyy >143: MOV TEMP[12].w, IMM[0].xxxx >144: TXL TEMP[12].xy, TEMP[12], SAMP[6], 2D >145: FMA TEMP[13].xyz, IN[2].xyzz, TEMP[8].xxxx, TEMP[3].xyzz >146: DP3 TEMP[14].x, TEMP[13].xyzz, TEMP[13].xyzz >147: RSQ TEMP[14].x, TEMP[14].xxxx >148: MUL TEMP[13].xyz, TEMP[14].xxxx, TEMP[13].xyzz >149: DP3 TEMP[14].x, TEMP[10].xyzz, TEMP[13].xyzz >150: MOV_SAT TEMP[14].x, TEMP[14].xxxx >151: FMA TEMP[11].xy, TEMP[11].xxxx, IMM[6].yyyy, IMM[6].zwww >152: MUL TEMP[15].x, TEMP[11].yyyy, IMM[7].xxxx >153: LG2 TEMP[14].x, TEMP[14].xxxx >154: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[11].xxxx >155: EX2 TEMP[11].x, TEMP[11].xxxx >156: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[11].xxxx >157: ADD TEMP[14].x, -TEMP[5].zzzz, IMM[0].wwww >158: DP3 TEMP[15].x, TEMP[3].xyzz, TEMP[13].xyzz >159: MOV_SAT TEMP[15].x, TEMP[15].xxxx >160: ADD TEMP[15].x, -TEMP[15].xxxx, IMM[0].wwww >161: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx >162: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[16].xxxx >163: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[16].xxxx >164: FMA TEMP[14].x, TEMP[14].xxxx, TEMP[15].xxxx, TEMP[5].zzzz >165: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx >166: MUL TEMP[13].xyz, TEMP[12].xxxx, CONST[2][18].xyzz >167: MUL TEMP[8].xyz, TEMP[5].yyyy, TEMP[13].xyzz >168: DP3 TEMP[10].x, TEMP[10].xyzz, TEMP[3].xyzz >169: MOV TEMP[0].x, TEMP[10].xxxx >170: ADD TEMP[3].x, TEMP[6].wwww, TEMP[10].xxxx >171: ADD TEMP[3].x, TEMP[3].xxxx, IMM[6].xxxx >172: FSNE TEMP[13].x, TEMP[6].wwww, IMM[0].xxxx >173: UIF TEMP[13].xxxx :0 >174: RCP TEMP[13].x, TEMP[6].wwww >175: MUL TEMP[13].x, TEMP[3].xxxx, TEMP[13].xxxx >176: ELSE :0 >177: SSG TEMP[14].x, TEMP[3].xxxx >178: MUL TEMP[13].x, IMM[0].yyyy, TEMP[14].xxxx >179: ENDIF >180: FMA TEMP[8].xyz, TEMP[8].xyzz, TEMP[11].xxxx, TEMP[7].xyzz >181: MOV_SAT TEMP[11].x, TEMP[13].xxxx >182: MUL TEMP[3].xyz, TEMP[11].xxxx, TEMP[8].xyzz >183: FSLT TEMP[8].x, IMM[0].xxxx, TEMP[5].wwww >184: AND TEMP[8].x, TEMP[8].xxxx, IMM[3].xxxx >185: INEG TEMP[8].x, TEMP[8].xxxx >186: USNE TEMP[8].x, TEMP[8].xxxx, IMM[1].zzzz >187: UIF TEMP[8].xxxx :0 >188: FSLT TEMP[8].x, TEMP[9].wwww, IMM[7].yyyy >189: AND TEMP[8].x, TEMP[8].xxxx, IMM[3].xxxx >190: INEG TEMP[8].x, TEMP[8].xxxx >191: ADD TEMP[7].xyz, TEMP[7].xyzz, TEMP[7].xyzz >192: MAX TEMP[9].x, TEMP[6].zzzz, TEMP[6].yyyy >193: MAX TEMP[9].x, TEMP[9].xxxx, TEMP[6].xxxx >194: FSEQ TEMP[11].xyz, TEMP[9].xxxx, IMM[0].xxxx >195: SSG TEMP[13].xyz, TEMP[6].xyzz >196: MUL TEMP[13].xyz, IMM[0].yyyy, TEMP[13].xyzz >197: RCP TEMP[9].xyz, TEMP[9].xxxx >198: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[9].xyzz >199: UCMP TEMP[6].xyz, TEMP[11].xyzz, TEMP[13].xyzz, TEMP[6].xyzz >200: MOV_SAT TEMP[6].xyz, TEMP[6].xyzz >201: MOV_SAT TEMP[9].xyz, CONST[2][16].xyzz >202: MUL TEMP[2].xyz, TEMP[6].xyzz, TEMP[6].xyzz >203: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[9].xyzz >204: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[7].zzzz >205: USNE TEMP[6].x, TEMP[8].xxxx, IMM[1].zzzz >206: UIF TEMP[6].xxxx :0 >207: MOV TEMP[6].x, TEMP[7].xxxx >208: ELSE :0 >209: MOV TEMP[6].x, TEMP[2].xxxx >210: ENDIF >211: MOV TEMP[6].x, TEMP[6].xxxx >212: USNE TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz >213: UIF TEMP[9].xxxx :0 >214: MOV TEMP[9].x, TEMP[7].yyyy >215: ELSE :0 >216: MOV TEMP[9].x, TEMP[2].yyyy >217: ENDIF >218: MOV TEMP[6].y, TEMP[9].xxxx >219: USNE TEMP[8].x, TEMP[8].xxxx, IMM[1].zzzz >220: UIF TEMP[8].xxxx :0 >221: MOV TEMP[8].x, TEMP[7].zzzz >222: ELSE :0 >223: MOV TEMP[8].x, TEMP[2].zzzz >224: ENDIF >225: MOV TEMP[6].z, TEMP[8].xxxx >226: ADD TEMP[8].x, TEMP[5].wwww, IMM[4].yyyy >227: MOV_SAT TEMP[8].x, TEMP[8].xxxx >228: MUL TEMP[7].xyz, TEMP[8].xxxx, TEMP[6].xyzz >229: ADD TEMP[8].xy, -TEMP[10].xxxx, IMM[8].xyyy >230: MOV_SAT TEMP[8].xy, TEMP[8].xyyy >231: FMA TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx, TEMP[3].xyzz >232: MIN TEMP[5].x, TEMP[5].wwww, IMM[4].xxxx >233: MUL TEMP[2].xyz, TEMP[5].xxxx, TEMP[6].xyzz >234: MUL TEMP[2].xyz, TEMP[8].yyyy, TEMP[2].xyzz >235: ADD TEMP[5].x, TEMP[10].xxxx, IMM[4].zzzz >236: MOV_SAT TEMP[0].x, TEMP[5].xxxx >237: FMA TEMP[3].xyz, TEMP[2].xyzz, TEMP[0].xxxx, TEMP[7].xyzz >238: ENDIF >239: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[1].xyzz >240: MUL TEMP[0].xyz, TEMP[4].xxxx, TEMP[3].xyzz >241: MUL TEMP[0].xyz, TEMP[12].yyyy, TEMP[0].xyzz >242: MOV TEMP[0].w, IMM[0].wwww >243: MOV OUT[0], TEMP[0] >244: END >radeonsi: Compiling shader 71 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 256) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 260) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 264) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 268) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 272) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 276) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 280) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 288) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 292) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 296) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 336) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 376) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 380) > %50 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %51 = load <8 x i32>, <8 x i32> addrspace(2)* %50, align 32, !tbaa !0 > %52 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %53 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %52, i64 0, i64 3 > %54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0 > %55 = extractelement <8 x i32> %51, i32 7 > %56 = extractelement <4 x i32> %54, i32 0 > %57 = and i32 %56, %55 > %58 = insertelement <4 x i32> %54, i32 %57, i32 0 > %59 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 > %61 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %62 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %61, i64 0, i64 7 > %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 > %64 = extractelement <8 x i32> %60, i32 7 > %65 = extractelement <4 x i32> %63, i32 0 > %66 = and i32 %65, %64 > %67 = insertelement <4 x i32> %63, i32 %66, i32 0 > %68 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %69 = load <8 x i32>, <8 x i32> addrspace(2)* %68, align 32, !tbaa !0 > %70 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %71 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %70, i64 0, i64 11 > %72 = load <4 x i32>, <4 x i32> addrspace(2)* %71, align 16, !tbaa !0 > %73 = extractelement <8 x i32> %69, i32 7 > %74 = extractelement <4 x i32> %72, i32 0 > %75 = and i32 %74, %73 > %76 = insertelement <4 x i32> %72, i32 %75, i32 0 > %77 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %78 = load <8 x i32>, <8 x i32> addrspace(2)* %77, align 32, !tbaa !0 > %79 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %80 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %79, i64 0, i64 15 > %81 = load <4 x i32>, <4 x i32> addrspace(2)* %80, align 16, !tbaa !0 > %82 = extractelement <8 x i32> %78, i32 7 > %83 = extractelement <4 x i32> %81, i32 0 > %84 = and i32 %83, %82 > %85 = insertelement <4 x i32> %81, i32 %84, i32 0 > %86 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %87 = load <8 x i32>, <8 x i32> addrspace(2)* %86, align 32, !tbaa !0 > %88 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %89 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %88, i64 0, i64 19 > %90 = load <4 x i32>, <4 x i32> addrspace(2)* %89, align 16, !tbaa !0 > %91 = extractelement <8 x i32> %87, i32 7 > %92 = extractelement <4 x i32> %90, i32 0 > %93 = and i32 %92, %91 > %94 = insertelement <4 x i32> %90, i32 %93, i32 0 > %95 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %96 = load <8 x i32>, <8 x i32> addrspace(2)* %95, align 32, !tbaa !0 > %97 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %98 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %97, i64 0, i64 23 > %99 = load <4 x i32>, <4 x i32> addrspace(2)* %98, align 16, !tbaa !0 > %100 = extractelement <8 x i32> %96, i32 7 > %101 = extractelement <4 x i32> %99, i32 0 > %102 = and i32 %101, %100 > %103 = insertelement <4 x i32> %99, i32 %102, i32 0 > %104 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %105 = load <8 x i32>, <8 x i32> addrspace(2)* %104, align 32, !tbaa !0 > %106 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %107 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %106, i64 0, i64 27 > %108 = load <4 x i32>, <4 x i32> addrspace(2)* %107, align 16, !tbaa !0 > %109 = extractelement <8 x i32> %105, i32 7 > %110 = extractelement <4 x i32> %108, i32 0 > %111 = and i32 %110, %109 > %112 = insertelement <4 x i32> %108, i32 %111, i32 0 > %113 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %114 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %115 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %116 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %117 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %118 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %119 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %120 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %121 = fcmp oeq float %115, 0.000000e+00 > %122 = fcmp oeq float %115, 0.000000e+00 > %123 = fcmp ogt float %113, 0.000000e+00 > %124 = select i1 %123, float 1.000000e+00, float %113 > %125 = fcmp oge float %124, 0.000000e+00 > %126 = fcmp ogt float %114, 0.000000e+00 > %127 = select i1 %126, float 1.000000e+00, float %114 > %128 = fcmp oge float %127, 0.000000e+00 > %.op = fmul float %124, 0x4600000000000000 > %129 = select i1 %125, float %.op, float 0xC600000000000000 > %.op95 = fmul float %127, 0x4600000000000000 > %130 = select i1 %128, float %.op95, float 0xC600000000000000 > %131 = fdiv float 1.000000e+00, %115 > %132 = fmul float %113, %131 > %133 = fmul float %114, %131 > %134 = select i1 %121, float %129, float %132 > %135 = select i1 %122, float %130, float %133 > %136 = fcmp oeq float %115, 0.000000e+00 > %137 = fcmp oeq float %115, 0.000000e+00 > %138 = fcmp ogt float %116, 0.000000e+00 > %139 = select i1 %138, float 1.000000e+00, float %116 > %140 = fcmp oge float %139, 0.000000e+00 > %141 = fcmp ogt float %117, 0.000000e+00 > %142 = select i1 %141, float 1.000000e+00, float %117 > %143 = fcmp oge float %142, 0.000000e+00 > %.op96 = fmul float %139, 0x4600000000000000 > %144 = select i1 %140, float %.op96, float 0xC600000000000000 > %.op97 = fmul float %142, 0x4600000000000000 > %145 = select i1 %143, float %.op97, float 0xC600000000000000 > %146 = fdiv float 1.000000e+00, %115 > %147 = fmul float %116, %146 > %148 = fmul float %117, %146 > %149 = select i1 %136, float %144, float %147 > %150 = select i1 %137, float %145, float %148 > %151 = bitcast float %134 to i32 > %152 = bitcast float %135 to i32 > %153 = insertelement <2 x i32> undef, i32 %151, i32 0 > %154 = insertelement <2 x i32> %153, i32 %152, i32 1 > %155 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %154, <8 x i32> %51, <4 x i32> %58, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %156 = extractelement <4 x float> %155, i32 0 > %157 = fmul float %149, %156 > %158 = fmul float %150, %156 > %159 = fsub float %41, %157 > %160 = fsub float %42, %158 > %161 = fsub float %43, %156 > %162 = fmul float %159, %36 > %163 = fmul float %160, %36 > %164 = fmul float %161, %36 > %165 = fmul float %25, %159 > %166 = fmul float %26, %160 > %167 = fadd float %166, %165 > %168 = fmul float %27, %161 > %169 = fadd float %167, %168 > %170 = fmul float %28, %159 > %171 = fmul float %29, %160 > %172 = fadd float %171, %170 > %173 = fmul float %30, %161 > %174 = fadd float %172, %173 > %175 = fmul float %31, %159 > %176 = fmul float %32, %160 > %177 = fadd float %176, %175 > %178 = fmul float %33, %161 > %179 = fadd float %177, %178 > %180 = fmul float %169, %169 > %181 = fmul float %174, %174 > %182 = fadd float %181, %180 > %183 = fmul float %179, %179 > %184 = fadd float %182, %183 > %185 = call float @llvm.sqrt.f32(float %184) > %186 = fcmp une float %47, 0.000000e+00 > br i1 %186, label %IF, label %ELSE > >IF: ; preds = %main_body > %187 = fdiv float 1.000000e+00, %47 > %188 = fmul float %185, %187 > br label %ENDIF > >ELSE: ; preds = %main_body > %189 = fcmp ogt float %185, 0.000000e+00 > %190 = select i1 %189, float 1.000000e+00, float %185 > %191 = fcmp oge float %190, 0.000000e+00 > %.op98 = fmul float %190, 0x4600000000000000 > %192 = select i1 %191, float %.op98, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp24.0 = phi float [ %188, %IF ], [ %192, %ELSE ] > %193 = fcmp oeq float %185, 0.000000e+00 > %194 = fcmp oeq float %185, 0.000000e+00 > %195 = fcmp oeq float %185, 0.000000e+00 > %196 = fsub float -0.000000e+00, %169 > %197 = fcmp olt float %169, -0.000000e+00 > %198 = select i1 %197, float 1.000000e+00, float %196 > %199 = fcmp oge float %198, 0.000000e+00 > %200 = fsub float -0.000000e+00, %174 > %201 = fcmp olt float %174, -0.000000e+00 > %202 = select i1 %201, float 1.000000e+00, float %200 > %203 = fcmp oge float %202, 0.000000e+00 > %204 = fsub float -0.000000e+00, %179 > %205 = fcmp olt float %179, -0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %204 > %207 = fcmp oge float %206, 0.000000e+00 > %.op99 = fmul float %198, 0x4600000000000000 > %208 = select i1 %199, float %.op99, float 0xC600000000000000 > %.op100 = fmul float %202, 0x4600000000000000 > %209 = select i1 %203, float %.op100, float 0xC600000000000000 > %.op101 = fmul float %206, 0x4600000000000000 > %210 = select i1 %207, float %.op101, float 0xC600000000000000 > %211 = fdiv float 1.000000e+00, %185 > %212 = fmul float %169, %211 > %213 = fsub float -0.000000e+00, %212 > %214 = fmul float %174, %211 > %215 = fsub float -0.000000e+00, %214 > %216 = fmul float %179, %211 > %217 = fsub float -0.000000e+00, %216 > %218 = select i1 %193, float %208, float %213 > %219 = select i1 %194, float %209, float %215 > %220 = select i1 %195, float %210, float %217 > %221 = fcmp ole float %179, -0.000000e+00 > %222 = fadd float %220, 1.000000e+00 > %223 = fadd float %222, %222 > %224 = fcmp oeq float %223, 0.000000e+00 > %225 = fcmp oeq float %223, 0.000000e+00 > %226 = fcmp ogt float %218, 0.000000e+00 > %227 = select i1 %226, float 1.000000e+00, float %218 > %228 = fcmp oge float %227, 0.000000e+00 > %229 = fcmp ogt float %219, 0.000000e+00 > %230 = select i1 %229, float 1.000000e+00, float %219 > %231 = fcmp oge float %230, 0.000000e+00 > %.op102 = fmul float %227, 0x4600000000000000 > %232 = select i1 %228, float %.op102, float 0xC600000000000000 > %.op103 = fmul float %230, 0x4600000000000000 > %233 = select i1 %231, float %.op103, float 0xC600000000000000 > %234 = fdiv float 1.000000e+00, %223 > %235 = fmul float %218, %234 > %236 = fmul float %219, %234 > %237 = select i1 %224, float %232, float %235 > %238 = select i1 %225, float %233, float %236 > %239 = fsub float 1.000000e+00, %220 > %240 = fadd float %239, %239 > %241 = fcmp oeq float %240, 0.000000e+00 > %242 = fcmp oeq float %240, 0.000000e+00 > %243 = fcmp ogt float %218, 0.000000e+00 > %244 = select i1 %243, float 1.000000e+00, float %218 > %245 = fcmp oge float %244, 0.000000e+00 > %246 = fcmp ogt float %219, 0.000000e+00 > %247 = select i1 %246, float 1.000000e+00, float %219 > %248 = fcmp oge float %247, 0.000000e+00 > %.op104 = fmul float %244, 0x4600000000000000 > %249 = select i1 %245, float %.op104, float 0xC600000000000000 > %.op105 = fmul float %247, 0x4600000000000000 > %250 = select i1 %248, float %.op105, float 0xC600000000000000 > %251 = fdiv float 1.000000e+00, %240 > %252 = fmul float %218, %251 > %253 = fmul float %219, %251 > %254 = select i1 %241, float %249, float %252 > %255 = select i1 %242, float %250, float %253 > %..v = select i1 %221, float %237, float %254 > %. = fadd float %..v, 5.000000e-01 > %temp28.0.v.v = select i1 %221, float %238, float %255 > %temp28.0.v = fadd float %temp28.0.v.v, 5.000000e-01 > %temp28.0 = fsub float 1.000000e+00, %temp28.0.v > %.92 = select i1 %221, float 0.000000e+00, float 1.000000e+00 > %256 = fsub float 1.000000e+00, %temp24.0 > %257 = call float @llvm.fma.f32(float %48, float -5.000000e-01, float %.) > %258 = call float @llvm.fma.f32(float %49, float -5.000000e-01, float %temp28.0) > %259 = call float @llvm.fma.f32(float %48, float 5.000000e-01, float %.) > %260 = call float @llvm.fma.f32(float %49, float -5.000000e-01, float %temp28.0) > %261 = bitcast float %256 to i32 > %262 = bitcast float %257 to i32 > %263 = bitcast float %258 to i32 > %264 = bitcast float %.92 to i32 > %265 = insertelement <4 x i32> undef, i32 %261, i32 0 > %266 = insertelement <4 x i32> %265, i32 %262, i32 1 > %267 = insertelement <4 x i32> %266, i32 %263, i32 2 > %268 = insertelement <4 x i32> %267, i32 %264, i32 3 > %269 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %268, <8 x i32> %60, <4 x i32> %67, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %270 = extractelement <4 x float> %269, i32 0 > %271 = bitcast float %256 to i32 > %272 = bitcast float %259 to i32 > %273 = bitcast float %260 to i32 > %274 = bitcast float %.92 to i32 > %275 = insertelement <4 x i32> undef, i32 %271, i32 0 > %276 = insertelement <4 x i32> %275, i32 %272, i32 1 > %277 = insertelement <4 x i32> %276, i32 %273, i32 2 > %278 = insertelement <4 x i32> %277, i32 %274, i32 3 > %279 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %278, <8 x i32> %60, <4 x i32> %67, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %280 = extractelement <4 x float> %279, i32 0 > %281 = call float @llvm.fma.f32(float %48, float -5.000000e-01, float %.) > %282 = call float @llvm.fma.f32(float %49, float 5.000000e-01, float %temp28.0) > %283 = call float @llvm.fma.f32(float %48, float 5.000000e-01, float %.) > %284 = call float @llvm.fma.f32(float %49, float 5.000000e-01, float %temp28.0) > %285 = bitcast float %256 to i32 > %286 = bitcast float %281 to i32 > %287 = bitcast float %282 to i32 > %288 = bitcast float %.92 to i32 > %289 = insertelement <4 x i32> undef, i32 %285, i32 0 > %290 = insertelement <4 x i32> %289, i32 %286, i32 1 > %291 = insertelement <4 x i32> %290, i32 %287, i32 2 > %292 = insertelement <4 x i32> %291, i32 %288, i32 3 > %293 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %292, <8 x i32> %60, <4 x i32> %67, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %294 = extractelement <4 x float> %293, i32 0 > %295 = bitcast float %256 to i32 > %296 = bitcast float %283 to i32 > %297 = bitcast float %284 to i32 > %298 = bitcast float %.92 to i32 > %299 = insertelement <4 x i32> undef, i32 %295, i32 0 > %300 = insertelement <4 x i32> %299, i32 %296, i32 1 > %301 = insertelement <4 x i32> %300, i32 %297, i32 2 > %302 = insertelement <4 x i32> %301, i32 %298, i32 3 > %303 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %302, <8 x i32> %60, <4 x i32> %67, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %304 = extractelement <4 x float> %303, i32 0 > %305 = fadd float %270, %280 > %306 = fadd float %294, %305 > %307 = fadd float %304, %306 > %308 = fsub float -0.000000e+00, %307 > %309 = call float @llvm.fma.f32(float %308, float 2.500000e-01, float 1.000000e+00) > %310 = fsub float 1.000000e+00, %40 > %311 = call float @llvm.fma.f32(float %40, float %309, float %310) > %312 = fmul float %25, %162 > %313 = fmul float %26, %163 > %314 = fadd float %313, %312 > %315 = fmul float %27, %164 > %316 = fadd float %314, %315 > %317 = fmul float %28, %162 > %318 = fmul float %29, %163 > %319 = fadd float %318, %317 > %320 = fmul float %30, %164 > %321 = fadd float %319, %320 > %322 = fmul float %31, %162 > %323 = fmul float %32, %163 > %324 = fadd float %323, %322 > %325 = fmul float %33, %164 > %326 = fadd float %324, %325 > %327 = fmul float %316, %316 > %328 = fmul float %321, %321 > %329 = fadd float %328, %327 > %330 = fmul float %326, %326 > %331 = fadd float %329, %330 > %332 = call float @llvm.sqrt.f32(float %331) > %333 = bitcast float %332 to i32 > %334 = insertelement <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, i32 %333, i32 1 > %335 = insertelement <4 x i32> %334, i32 0, i32 2 > %336 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %335, <8 x i32> %69, <4 x i32> %76, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %337 = extractelement <4 x float> %336, i32 0 > %338 = extractelement <4 x float> %336, i32 1 > %339 = extractelement <4 x float> %336, i32 2 > %340 = bitcast float %134 to i32 > %341 = bitcast float %135 to i32 > %342 = insertelement <2 x i32> undef, i32 %340, i32 0 > %343 = insertelement <2 x i32> %342, i32 %341, i32 1 > %344 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %343, <8 x i32> %78, <4 x i32> %85, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %345 = extractelement <4 x float> %344, i32 0 > %346 = extractelement <4 x float> %344, i32 1 > %347 = extractelement <4 x float> %344, i32 2 > %348 = extractelement <4 x float> %344, i32 3 > %349 = fmul float %345, %37 > %350 = fmul float %346, %38 > %351 = fmul float %347, %39 > %352 = fmul float %118, %118 > %353 = fmul float %119, %119 > %354 = fadd float %353, %352 > %355 = fmul float %120, %120 > %356 = fadd float %354, %355 > %357 = call float @llvm.AMDGPU.rsq.clamped.f32(float %356) > %358 = bitcast float %134 to i32 > %359 = bitcast float %135 to i32 > %360 = insertelement <2 x i32> undef, i32 %358, i32 0 > %361 = insertelement <2 x i32> %360, i32 %359, i32 1 > %362 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %361, <8 x i32> %87, <4 x i32> %94, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %363 = extractelement <4 x float> %362, i32 0 > %364 = extractelement <4 x float> %362, i32 1 > %365 = extractelement <4 x float> %362, i32 2 > %366 = extractelement <4 x float> %362, i32 3 > %367 = call float @llvm.fma.f32(float %363, float 2.000000e+00, float -1.000000e+00) > %368 = call float @llvm.fma.f32(float %364, float 2.000000e+00, float -1.000000e+00) > %369 = call float @llvm.fma.f32(float %365, float 2.000000e+00, float -1.000000e+00) > %370 = fmul float %367, %367 > %371 = fmul float %368, %368 > %372 = fadd float %371, %370 > %373 = fmul float %369, %369 > %374 = fadd float %372, %373 > %375 = call float @llvm.AMDGPU.rsq.clamped.f32(float %374) > %376 = fmul float %375, %367 > %377 = fmul float %375, %368 > %378 = fmul float %375, %369 > %379 = call float @llvm.AMDGPU.rsq.clamped.f32(float %331) > %380 = fmul float %379, %316 > %381 = fmul float %379, %321 > %382 = fmul float %379, %326 > %383 = bitcast float %134 to i32 > %384 = bitcast float %135 to i32 > %385 = insertelement <2 x i32> undef, i32 %383, i32 0 > %386 = insertelement <2 x i32> %385, i32 %384, i32 1 > %387 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %386, <8 x i32> %96, <4 x i32> %103, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %388 = extractelement <4 x float> %387, i32 0 > %389 = extractelement <4 x float> %387, i32 1 > %390 = extractelement <4 x float> %387, i32 2 > %391 = extractelement <4 x float> %387, i32 3 > %392 = fmul float %388, %388 > %393 = bitcast float %134 to i32 > %394 = bitcast float %135 to i32 > %395 = insertelement <4 x i32> undef, i32 %393, i32 0 > %396 = insertelement <4 x i32> %395, i32 %394, i32 1 > %397 = insertelement <4 x i32> %396, i32 0, i32 2 > %398 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %397, <8 x i32> %105, <4 x i32> %112, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %399 = extractelement <4 x float> %398, i32 0 > %400 = extractelement <4 x float> %398, i32 1 > %401 = call float @llvm.fma.f32(float %118, float %357, float %380) > %402 = call float @llvm.fma.f32(float %119, float %357, float %381) > %403 = call float @llvm.fma.f32(float %120, float %357, float %382) > %404 = fmul float %401, %401 > %405 = fmul float %402, %402 > %406 = fadd float %405, %404 > %407 = fmul float %403, %403 > %408 = fadd float %406, %407 > %409 = call float @llvm.AMDGPU.rsq.clamped.f32(float %408) > %410 = fmul float %409, %401 > %411 = fmul float %409, %402 > %412 = fmul float %409, %403 > %413 = fmul float %376, %410 > %414 = fmul float %377, %411 > %415 = fadd float %414, %413 > %416 = fmul float %378, %412 > %417 = fadd float %415, %416 > %418 = call float @llvm.AMDGPU.clamp.(float %417, float 0.000000e+00, float 1.000000e+00) > %419 = call float @llvm.fma.f32(float %392, float 4.096000e+03, float 0x3F70624DE0000000) > %420 = call float @llvm.fma.f32(float %392, float 4.096000e+03, float 0x4000083120000000) > %421 = fmul float %420, 1.250000e-01 > %422 = call float @llvm.log2.f32(float %418) > %423 = fmul float %422, %419 > %424 = call float @llvm.exp2.f32(float %423) > %425 = fmul float %421, %424 > %426 = fsub float 1.000000e+00, %390 > %427 = fmul float %380, %410 > %428 = fmul float %381, %411 > %429 = fadd float %428, %427 > %430 = fmul float %382, %412 > %431 = fadd float %429, %430 > %432 = call float @llvm.AMDGPU.clamp.(float %431, float 0.000000e+00, float 1.000000e+00) > %433 = fsub float 1.000000e+00, %432 > %434 = fmul float %433, %433 > %435 = fmul float %434, %434 > %436 = fmul float %433, %435 > %437 = call float @llvm.fma.f32(float %426, float %436, float %390) > %438 = fmul float %425, %437 > %439 = fmul float %399, %44 > %440 = fmul float %399, %45 > %441 = fmul float %399, %46 > %442 = fmul float %389, %439 > %443 = fmul float %389, %440 > %444 = fmul float %389, %441 > %445 = fmul float %376, %380 > %446 = fmul float %377, %381 > %447 = fadd float %446, %445 > %448 = fmul float %378, %382 > %449 = fadd float %447, %448 > %450 = fadd float %348, %449 > %451 = fadd float %450, -1.000000e+00 > %452 = fcmp une float %348, 0.000000e+00 > br i1 %452, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF > %453 = fdiv float 1.000000e+00, %348 > %454 = fmul float %451, %453 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF > %455 = fcmp ogt float %451, 0.000000e+00 > %456 = select i1 %455, float 1.000000e+00, float %451 > %457 = fcmp oge float %456, 0.000000e+00 > %.op106 = fmul float %456, 0x4600000000000000 > %458 = select i1 %457, float %.op106, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp52.0 = phi float [ %454, %IF78 ], [ %458, %ELSE79 ] > %459 = call float @llvm.fma.f32(float %442, float %438, float %349) > %460 = call float @llvm.fma.f32(float %443, float %438, float %350) > %461 = call float @llvm.fma.f32(float %444, float %438, float %351) > %462 = call float @llvm.AMDGPU.clamp.(float %temp52.0, float 0.000000e+00, float 1.000000e+00) > %463 = fmul float %462, %459 > %464 = fmul float %462, %460 > %465 = fmul float %462, %461 > %466 = fcmp ogt float %391, 0.000000e+00 > br i1 %466, label %IF81, label %ENDIF80 > >IF81: ; preds = %ENDIF77 > %467 = fcmp olt float %366, 0x3FE0505060000000 > %468 = fadd float %349, %349 > %469 = fadd float %350, %350 > %470 = fadd float %351, %351 > %471 = call float @llvm.maxnum.f32(float %347, float %346) > %472 = call float @llvm.maxnum.f32(float %471, float %345) > %473 = fcmp oeq float %472, 0.000000e+00 > %474 = fcmp oeq float %472, 0.000000e+00 > %475 = fcmp oeq float %472, 0.000000e+00 > %476 = fcmp ogt float %345, 0.000000e+00 > %477 = select i1 %476, float 1.000000e+00, float %345 > %478 = fcmp oge float %477, 0.000000e+00 > %479 = fcmp ogt float %346, 0.000000e+00 > %480 = select i1 %479, float 1.000000e+00, float %346 > %481 = fcmp oge float %480, 0.000000e+00 > %482 = fcmp ogt float %347, 0.000000e+00 > %483 = select i1 %482, float 1.000000e+00, float %347 > %484 = fcmp oge float %483, 0.000000e+00 > %.op107 = fmul float %477, 0x4600000000000000 > %485 = select i1 %478, float %.op107, float 0xC600000000000000 > %.op108 = fmul float %480, 0x4600000000000000 > %486 = select i1 %481, float %.op108, float 0xC600000000000000 > %.op109 = fmul float %483, 0x4600000000000000 > %487 = select i1 %484, float %.op109, float 0xC600000000000000 > %488 = fdiv float 1.000000e+00, %472 > %489 = fmul float %345, %488 > %490 = fmul float %346, %488 > %491 = fmul float %347, %488 > %492 = select i1 %473, float %485, float %489 > %493 = select i1 %474, float %486, float %490 > %494 = select i1 %475, float %487, float %491 > %495 = call float @llvm.AMDGPU.clamp.(float %492, float 0.000000e+00, float 1.000000e+00) > %496 = call float @llvm.AMDGPU.clamp.(float %493, float 0.000000e+00, float 1.000000e+00) > %497 = call float @llvm.AMDGPU.clamp.(float %494, float 0.000000e+00, float 1.000000e+00) > %498 = call float @llvm.AMDGPU.clamp.(float %37, float 0.000000e+00, float 1.000000e+00) > %499 = call float @llvm.AMDGPU.clamp.(float %38, float 0.000000e+00, float 1.000000e+00) > %500 = call float @llvm.AMDGPU.clamp.(float %39, float 0.000000e+00, float 1.000000e+00) > %501 = fmul float %495, %495 > %502 = fmul float %496, %496 > %503 = fmul float %497, %497 > %504 = fmul float %501, %498 > %505 = fmul float %502, %499 > %506 = fmul float %503, %500 > %507 = fmul float %504, 0x3FC3333340000000 > %508 = fmul float %505, 0x3FC3333340000000 > %509 = fmul float %506, 0x3FC3333340000000 > %.93 = select i1 %467, float %468, float %507 > %temp36.0 = select i1 %467, float %469, float %508 > %.94 = select i1 %467, float %470, float %509 > %510 = fadd float %391, -5.000000e-01 > %511 = call float @llvm.AMDGPU.clamp.(float %510, float 0.000000e+00, float 1.000000e+00) > %512 = fmul float %511, %.93 > %513 = fmul float %511, %temp36.0 > %514 = fmul float %511, %.94 > %515 = fsub float 2.500000e-01, %449 > %516 = fsub float 1.000000e+00, %449 > %517 = call float @llvm.AMDGPU.clamp.(float %515, float 0.000000e+00, float 1.000000e+00) > %518 = call float @llvm.AMDGPU.clamp.(float %516, float 0.000000e+00, float 1.000000e+00) > %519 = call float @llvm.fma.f32(float %512, float %517, float %463) > %520 = call float @llvm.fma.f32(float %513, float %517, float %464) > %521 = call float @llvm.fma.f32(float %514, float %517, float %465) > %522 = call float @llvm.minnum.f32(float %391, float 5.000000e-01) > %523 = fmul float %522, %.93 > %524 = fmul float %522, %temp36.0 > %525 = fmul float %522, %.94 > %526 = fmul float %518, %523 > %527 = fmul float %518, %524 > %528 = fmul float %518, %525 > %529 = fadd float %449, 2.500000e-01 > %530 = call float @llvm.AMDGPU.clamp.(float %529, float 0.000000e+00, float 1.000000e+00) > %531 = call float @llvm.fma.f32(float %526, float %530, float %519) > %532 = call float @llvm.fma.f32(float %527, float %530, float %520) > %533 = call float @llvm.fma.f32(float %528, float %530, float %521) > br label %ENDIF80 > >ENDIF80: ; preds = %ENDIF77, %IF81 > %temp12.0 = phi float [ %531, %IF81 ], [ %463, %ENDIF77 ] > %temp13.0 = phi float [ %532, %IF81 ], [ %464, %ENDIF77 ] > %temp14.0 = phi float [ %533, %IF81 ], [ %465, %ENDIF77 ] > %534 = fmul float %temp12.0, %337 > %535 = fmul float %temp13.0, %338 > %536 = fmul float %temp14.0, %339 > %537 = fmul float %311, %534 > %538 = fmul float %311, %535 > %539 = fmul float %311, %536 > %540 = fmul float %400, %537 > %541 = fmul float %400, %538 > %542 = fmul float %400, %539 > %543 = bitcast float %5 to i32 > %544 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %543, 10 > %545 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %544, float %540, 11 > %546 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %545, float %541, 12 > %547 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %546, float %542, 13 > %548 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %547, float 1.000000e+00, 14 > %549 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %548, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %549 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..57] >DCL CONST[2][0..24] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 336, 848, 864} >IMM[2] UINT32 {880, 736, 752, 768} >IMM[3] UINT32 {784, 912, 1, 384} >IMM[4] UINT32 {192, 208, 224, 0} > 0: MOV TEMP[0].w, IMM[0].xxxx > 1: MUL TEMP[1].xyz, IN[0].xyzz, CONST[1][21].xyzz > 2: MOV TEMP[1].w, IMM[0].xxxx > 3: DP4 TEMP[0].x, CONST[1][53], TEMP[1] > 4: DP4 TEMP[2].x, CONST[1][54], TEMP[1] > 5: MOV TEMP[0].y, TEMP[2].xxxx > 6: DP4 TEMP[2].x, CONST[1][55], TEMP[1] > 7: MOV TEMP[0].z, TEMP[2].xxxx > 8: DP4 TEMP[1].x, CONST[1][46], TEMP[0] > 9: DP4 TEMP[2].x, CONST[1][47], TEMP[0] > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP4 TEMP[2].x, CONST[1][48], TEMP[0] > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][49], TEMP[0] > 14: MOV TEMP[1].w, TEMP[2].xxxx > 15: ADD TEMP[0].xyz, -TEMP[0].xyzz, CONST[1][57].xyzz > 16: MOV TEMP[3], TEMP[1] > 17: MOV TEMP[4].zw, TEMP[1].wwzw > 18: MUL TEMP[5].xy, TEMP[2].xxxx, CONST[2][24].xyyy > 19: MUL TEMP[2].xy, TEMP[2].xxxx, CONST[2][21].xyyy > 20: MUL TEMP[6].xy, IMM[0].xyyy, CONST[2][24].xyyy > 21: FMA TEMP[1].xy, TEMP[1].xyyy, TEMP[6].xyyy, TEMP[5].xyyy > 22: MOV TEMP[4].xy, TEMP[1].xyxx > 23: FMA TEMP[1].xy, TEMP[1].xyyy, CONST[2][21].zwww, TEMP[2].xyyy > 24: DP3 TEMP[2].x, CONST[1][12].xyzz, TEMP[0].xyzz > 25: DP3 TEMP[5].x, CONST[1][13].xyzz, TEMP[0].xyzz > 26: MOV TEMP[2].y, TEMP[5].xxxx > 27: DP3 TEMP[0].x, CONST[1][14].xyzz, TEMP[0].xyzz > 28: MOV TEMP[2].z, TEMP[0].xxxx > 29: MOV OUT[3], TEMP[2] > 30: MOV OUT[2], TEMP[1] > 31: MOV OUT[1], TEMP[4] > 32: MOV OUT[0], TEMP[3] > 33: END >radeonsi: Compiling shader 72 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 192) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 196) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 200) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 208) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 212) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 216) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 224) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 228) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 232) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 336) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 340) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 344) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 736) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 740) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 744) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 748) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 752) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 756) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 760) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 764) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 848) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 852) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 856) > %47 = call float @llvm.SI.load.const(<16 x i8> %15, i32 860) > %48 = call float @llvm.SI.load.const(<16 x i8> %15, i32 864) > %49 = call float @llvm.SI.load.const(<16 x i8> %15, i32 868) > %50 = call float @llvm.SI.load.const(<16 x i8> %15, i32 872) > %51 = call float @llvm.SI.load.const(<16 x i8> %15, i32 876) > %52 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %53 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %54 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %55 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %56 = call float @llvm.SI.load.const(<16 x i8> %15, i32 912) > %57 = call float @llvm.SI.load.const(<16 x i8> %15, i32 916) > %58 = call float @llvm.SI.load.const(<16 x i8> %15, i32 920) > %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 > %61 = call float @llvm.SI.load.const(<16 x i8> %60, i32 336) > %62 = call float @llvm.SI.load.const(<16 x i8> %60, i32 340) > %63 = call float @llvm.SI.load.const(<16 x i8> %60, i32 344) > %64 = call float @llvm.SI.load.const(<16 x i8> %60, i32 348) > %65 = call float @llvm.SI.load.const(<16 x i8> %60, i32 384) > %66 = call float @llvm.SI.load.const(<16 x i8> %60, i32 388) > %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 > %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %13) > %70 = extractelement <4 x float> %69, i32 0 > %71 = extractelement <4 x float> %69, i32 1 > %72 = extractelement <4 x float> %69, i32 2 > %73 = fmul float %70, %25 > %74 = fmul float %71, %26 > %75 = fmul float %72, %27 > %76 = fmul float %44, %73 > %77 = fmul float %45, %74 > %78 = fadd float %76, %77 > %79 = fmul float %46, %75 > %80 = fadd float %78, %79 > %81 = fadd float %80, %47 > %82 = fmul float %48, %73 > %83 = fmul float %49, %74 > %84 = fadd float %82, %83 > %85 = fmul float %50, %75 > %86 = fadd float %84, %85 > %87 = fadd float %86, %51 > %88 = fmul float %52, %73 > %89 = fmul float %53, %74 > %90 = fadd float %88, %89 > %91 = fmul float %54, %75 > %92 = fadd float %90, %91 > %93 = fadd float %92, %55 > %94 = fmul float %28, %81 > %95 = fmul float %29, %87 > %96 = fadd float %94, %95 > %97 = fmul float %30, %93 > %98 = fadd float %96, %97 > %99 = fadd float %98, %31 > %100 = fmul float %32, %81 > %101 = fmul float %33, %87 > %102 = fadd float %100, %101 > %103 = fmul float %34, %93 > %104 = fadd float %102, %103 > %105 = fadd float %104, %35 > %106 = fmul float %36, %81 > %107 = fmul float %37, %87 > %108 = fadd float %106, %107 > %109 = fmul float %38, %93 > %110 = fadd float %108, %109 > %111 = fadd float %110, %39 > %112 = fmul float %40, %81 > %113 = fmul float %41, %87 > %114 = fadd float %112, %113 > %115 = fmul float %42, %93 > %116 = fadd float %114, %115 > %117 = fadd float %116, %43 > %118 = fsub float %56, %81 > %119 = fsub float %57, %87 > %120 = fsub float %58, %93 > %121 = fmul float %117, %65 > %122 = fmul float %117, %66 > %123 = fmul float %117, %61 > %124 = fmul float %117, %62 > %125 = fsub float -0.000000e+00, %66 > %126 = call float @llvm.fma.f32(float %99, float %65, float %121) > %127 = call float @llvm.fma.f32(float %105, float %125, float %122) > %128 = call float @llvm.fma.f32(float %126, float %63, float %123) > %129 = call float @llvm.fma.f32(float %127, float %64, float %124) > %130 = fmul float %16, %118 > %131 = fmul float %17, %119 > %132 = fadd float %131, %130 > %133 = fmul float %18, %120 > %134 = fadd float %132, %133 > %135 = fmul float %19, %118 > %136 = fmul float %20, %119 > %137 = fadd float %136, %135 > %138 = fmul float %21, %120 > %139 = fadd float %137, %138 > %140 = fmul float %22, %118 > %141 = fmul float %23, %119 > %142 = fadd float %141, %140 > %143 = fmul float %24, %120 > %144 = fadd float %142, %143 > %145 = bitcast i32 %11 to float > %146 = insertvalue <{ float, float, float }> undef, float %145, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %126, float %127, float %111, float %117) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %128, float %129, float %111, float %117) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %134, float %139, float %144, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %99, float %105, float %111, float %117) > ret <{ float, float, float }> %146 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SAMP[7] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 3D, FLOAT >DCL SVIEW[4], 3D, FLOAT >DCL SVIEW[5], 3D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL SVIEW[7], 2D, FLOAT >DCL CONST[1][0..18] >DCL TEMP[0..11], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 2.0000, -1.0000} >IMM[1] UINT32 {0, 192, 208, 224} >IMM[2] FLT32 { 1.0000, 4096.0000, 0.0040, 2.0040} >IMM[3] FLT32 { 0.1250, 0.0000, 0.0000, 0.0000} >IMM[4] INT32 {1, 0, 0, 0} >IMM[5] UINT32 {1065353216, 64, 80, 96} >IMM[6] UINT32 {288, 256, 0, 0} > 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz > 1: RSQ TEMP[1].x, TEMP[0].xxxx > 2: MUL TEMP[0].xyz, TEMP[1].xxxx, IN[2].xyzz > 3: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 4: SSG TEMP[2].xy, IN[0].xyyy > 5: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 6: RCP TEMP[3].xy, IN[0].wwww > 7: MUL TEMP[3].xy, IN[0].xyyy, TEMP[3].xyyy > 8: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 9: MOV TEMP[2].xy, TEMP[1].xyyy > 10: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D > 11: FMA TEMP[2].xyz, TEMP[2].xyzz, IMM[0].zzzz, IMM[0].wwww > 12: DP3 TEMP[3].x, CONST[1][12].xyzz, TEMP[2].xyzz > 13: DP3 TEMP[4].x, CONST[1][13].xyzz, TEMP[2].xyzz > 14: MOV TEMP[3].y, TEMP[4].xxxx > 15: DP3 TEMP[4].x, CONST[1][14].xyzz, TEMP[2].xyzz > 16: MOV TEMP[3].z, TEMP[4].xxxx > 17: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz > 18: RSQ TEMP[4].x, TEMP[4].xxxx > 19: MUL TEMP[2].xyz, TEMP[4].xxxx, TEMP[3].xyzz > 20: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[2].xyzz > 21: ADD TEMP[3].xyz, TEMP[2].xyzz, TEMP[2].xyzz > 22: ABS TEMP[5].xyz, TEMP[2].xyzz > 23: MUL TEMP[2].xyz, TEMP[5].xyzz, TEMP[2].xyzz > 24: FMA TEMP[0].xyz, -TEMP[3].xyzz, TEMP[4].xxxx, TEMP[0].xyzz > 25: ABS TEMP[4].xyz, TEMP[0].xyzz > 26: ADD TEMP[3].xyz, -TEMP[4].xyzz, IMM[2].xxxx > 27: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[3].xyzz > 28: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[3].xyzz > 29: MOV TEMP[4].xy, TEMP[1].xyyy > 30: TEX TEMP[4].xyz, TEMP[4], SAMP[1], 2D > 31: ADD TEMP[5].x, -TEMP[4].zzzz, IMM[2].xxxx > 32: FMA TEMP[5].xyz, TEMP[5].xxxx, TEMP[3].xyzz, TEMP[4].zzzz > 33: ABS TEMP[6].xyz, TEMP[0].xyzz > 34: LG2 TEMP[7].x, TEMP[6].xxxx > 35: LG2 TEMP[7].y, TEMP[6].yyyy > 36: LG2 TEMP[7].z, TEMP[6].zzzz > 37: MUL TEMP[6].x, TEMP[4].xxxx, TEMP[4].xxxx > 38: FMA TEMP[6].xy, TEMP[6].xxxx, IMM[2].yyyy, IMM[2].zwww > 39: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[6].xxxx > 40: MUL TEMP[6].x, TEMP[6].yyyy, IMM[3].xxxx > 41: EX2 TEMP[8].x, TEMP[7].xxxx > 42: EX2 TEMP[8].y, TEMP[7].yyyy > 43: EX2 TEMP[8].z, TEMP[7].zzzz > 44: MIN TEMP[7].xyz, TEMP[8].xyzz, IMM[2].xxxx > 45: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[7].xyzz > 46: ABS TEMP[7].xyz, TEMP[0].xyzz > 47: MUL TEMP[3].xyz, TEMP[7].xyzz, TEMP[3].xyzz > 48: FSGE TEMP[7].xyz, TEMP[0].xyzz, IMM[0].xxxx > 49: AND TEMP[7].xyz, TEMP[7].xyzz, IMM[4].xxxx > 50: INEG TEMP[7].xyz, TEMP[7].xyzz > 51: AND TEMP[8].xyz, TEMP[7].xyzz, IMM[5].xxxx > 52: USNE TEMP[9].x, TEMP[7].xxxx, IMM[1].xxxx > 53: UIF TEMP[9].xxxx :0 > 54: MOV TEMP[9].x, IMM[1].xxxx > 55: ELSE :0 > 56: MOV TEMP[9].x, IMM[5].xxxx > 57: ENDIF > 58: MOV TEMP[9].x, TEMP[9].xxxx > 59: USNE TEMP[10].x, TEMP[7].yyyy, IMM[1].xxxx > 60: UIF TEMP[10].xxxx :0 > 61: MOV TEMP[10].x, IMM[1].xxxx > 62: ELSE :0 > 63: MOV TEMP[10].x, IMM[5].xxxx > 64: ENDIF > 65: MOV TEMP[9].y, TEMP[10].xxxx > 66: USNE TEMP[7].x, TEMP[7].zzzz, IMM[1].xxxx > 67: UIF TEMP[7].xxxx :0 > 68: MOV TEMP[7].x, IMM[1].xxxx > 69: ELSE :0 > 70: MOV TEMP[7].x, IMM[5].xxxx > 71: ENDIF > 72: MOV TEMP[9].z, TEMP[7].xxxx > 73: FSEQ TEMP[7].xy, IN[0].wwww, IMM[0].xxxx > 74: SSG TEMP[10].xy, IN[1].xyyy > 75: MUL TEMP[10].xy, IMM[0].yyyy, TEMP[10].xyyy > 76: RCP TEMP[11].xy, IN[0].wwww > 77: MUL TEMP[11].xy, IN[1].xyyy, TEMP[11].xyyy > 78: UCMP TEMP[7].xy, TEMP[7].xyyy, TEMP[10].xyyy, TEMP[11].xyyy > 79: MOV TEMP[10].xy, TEMP[1].xyyy > 80: TEX TEMP[10].x, TEMP[10], SAMP[2], 2D > 81: MOV TEMP[5].z, TEMP[10].xxxx > 82: MUL TEMP[5].xy, TEMP[7].xyyy, TEMP[10].xxxx > 83: MOV TEMP[5].w, IMM[2].xxxx > 84: DP4 TEMP[7].x, CONST[1][4], TEMP[5] > 85: DP4 TEMP[10].x, CONST[1][5], TEMP[5] > 86: MOV TEMP[7].y, TEMP[10].xxxx > 87: DP4 TEMP[10].x, CONST[1][6], TEMP[5] > 88: MOV TEMP[7].z, TEMP[10].xxxx > 89: MOV TEMP[10].xyz, TEMP[7].xyzz > 90: MOV TEMP[10].w, IMM[0].xxxx > 91: TXL TEMP[10].xyz, TEMP[10], SAMP[3], 3D > 92: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[10].xyzz > 93: MOV TEMP[9].xyz, TEMP[7].xyzz > 94: MOV TEMP[9].w, IMM[0].xxxx > 95: TXL TEMP[9].xyz, TEMP[9], SAMP[4], 3D > 96: MOV TEMP[11].xyz, TEMP[7].xyzz > 97: MOV TEMP[11].w, IMM[0].xxxx > 98: TXL TEMP[11], TEMP[11], SAMP[5], 3D > 99: MUL TEMP[7].xyz, TEMP[11].wwww, TEMP[11].xyzz >100: FMA TEMP[0].xyz, TEMP[9].xyzz, TEMP[8].xyzz, TEMP[0].xyzz >101: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[3].xyzz >102: MUL TEMP[0].x, TEMP[6].xxxx, TEMP[8].xxxx >103: MOV TEMP[6].xy, TEMP[1].xyyy >104: MOV TEMP[6].w, IMM[0].xxxx >105: TXL TEMP[6].xy, TEMP[6], SAMP[6], 2D >106: MOV TEMP[1].xy, TEMP[1].xyyy >107: TEX TEMP[1], TEMP[1], SAMP[7], 2D >108: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[6].xxxx >109: MUL TEMP[3].xyz, TEMP[7].xyzz, CONST[1][18].xyzz >110: MUL TEMP[7].xyz, TEMP[7].xyzz, CONST[1][16].xyzz >111: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[3].xyzz >112: MUL TEMP[0].xyz, TEMP[4].yyyy, TEMP[0].xyzz >113: FSGE TEMP[4].xyz, IMM[0].xxxx, TEMP[2].xyzz >114: AND TEMP[4].xyz, TEMP[4].xyzz, IMM[4].xxxx >115: INEG TEMP[4].xyz, TEMP[4].xyzz >116: USNE TEMP[8].x, TEMP[4].xxxx, IMM[1].xxxx >117: UIF TEMP[8].xxxx :0 >118: MOV TEMP[8].x, IMM[1].xxxx >119: ELSE :0 >120: MOV TEMP[8].x, IMM[5].xxxx >121: ENDIF >122: MOV TEMP[8].x, TEMP[8].xxxx >123: USNE TEMP[11].x, TEMP[4].yyyy, IMM[1].xxxx >124: UIF TEMP[11].xxxx :0 >125: MOV TEMP[11].x, IMM[1].xxxx >126: ELSE :0 >127: MOV TEMP[11].x, IMM[5].xxxx >128: ENDIF >129: MOV TEMP[8].y, TEMP[11].xxxx >130: USNE TEMP[11].x, TEMP[4].zzzz, IMM[1].xxxx >131: UIF TEMP[11].xxxx :0 >132: MOV TEMP[11].x, IMM[1].xxxx >133: ELSE :0 >134: MOV TEMP[11].x, IMM[5].xxxx >135: ENDIF >136: MOV TEMP[8].z, TEMP[11].xxxx >137: AND TEMP[4].xyz, TEMP[4].xyzz, IMM[5].xxxx >138: MUL TEMP[5].xyz, TEMP[10].xyzz, TEMP[8].xyzz >139: FMA TEMP[3].xyz, TEMP[9].xyzz, TEMP[4].xyzz, TEMP[5].xyzz >140: ABS TEMP[4].xyz, TEMP[2].xyzz >141: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[4].xyzz >142: MUL TEMP[3].x, TEMP[6].yyyy, TEMP[3].xxxx >143: MUL TEMP[2].xyz, TEMP[7].xyzz, TEMP[3].xxxx >144: FMA TEMP[0].xyz, TEMP[2].xyzz, TEMP[1].xyzz, TEMP[0].xyzz >145: MUL TEMP[0].xyz, TEMP[1].wwww, TEMP[0].xyzz >146: MOV TEMP[0].w, IMM[2].xxxx >147: MOV OUT[0], TEMP[0] >148: END >radeonsi: Compiling shader 73 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 76) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 92) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 200) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 208) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 212) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 216) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 224) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 228) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 232) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 > %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 7 > %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0 > %66 = extractelement <8 x i32> %62, i32 7 > %67 = extractelement <4 x i32> %65, i32 0 > %68 = and i32 %67, %66 > %69 = insertelement <4 x i32> %65, i32 %68, i32 0 > %70 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %71 = load <8 x i32>, <8 x i32> addrspace(2)* %70, align 32, !tbaa !0 > %72 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %73 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %72, i64 0, i64 11 > %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 > %75 = extractelement <8 x i32> %71, i32 7 > %76 = extractelement <4 x i32> %74, i32 0 > %77 = and i32 %76, %75 > %78 = insertelement <4 x i32> %74, i32 %77, i32 0 > %79 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %80 = load <8 x i32>, <8 x i32> addrspace(2)* %79, align 32, !tbaa !0 > %81 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %82 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %81, i64 0, i64 15 > %83 = load <4 x i32>, <4 x i32> addrspace(2)* %82, align 16, !tbaa !0 > %84 = extractelement <8 x i32> %80, i32 7 > %85 = extractelement <4 x i32> %83, i32 0 > %86 = and i32 %85, %84 > %87 = insertelement <4 x i32> %83, i32 %86, i32 0 > %88 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %89 = load <8 x i32>, <8 x i32> addrspace(2)* %88, align 32, !tbaa !0 > %90 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %91 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %90, i64 0, i64 19 > %92 = load <4 x i32>, <4 x i32> addrspace(2)* %91, align 16, !tbaa !0 > %93 = extractelement <8 x i32> %89, i32 7 > %94 = extractelement <4 x i32> %92, i32 0 > %95 = and i32 %94, %93 > %96 = insertelement <4 x i32> %92, i32 %95, i32 0 > %97 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %98 = load <8 x i32>, <8 x i32> addrspace(2)* %97, align 32, !tbaa !0 > %99 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %100 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %99, i64 0, i64 23 > %101 = load <4 x i32>, <4 x i32> addrspace(2)* %100, align 16, !tbaa !0 > %102 = extractelement <8 x i32> %98, i32 7 > %103 = extractelement <4 x i32> %101, i32 0 > %104 = and i32 %103, %102 > %105 = insertelement <4 x i32> %101, i32 %104, i32 0 > %106 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %107 = load <8 x i32>, <8 x i32> addrspace(2)* %106, align 32, !tbaa !0 > %108 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %109 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %108, i64 0, i64 27 > %110 = load <4 x i32>, <4 x i32> addrspace(2)* %109, align 16, !tbaa !0 > %111 = extractelement <8 x i32> %107, i32 7 > %112 = extractelement <4 x i32> %110, i32 0 > %113 = and i32 %112, %111 > %114 = insertelement <4 x i32> %110, i32 %113, i32 0 > %115 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 14 > %116 = load <8 x i32>, <8 x i32> addrspace(2)* %115, align 32, !tbaa !0 > %117 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %118 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %117, i64 0, i64 31 > %119 = load <4 x i32>, <4 x i32> addrspace(2)* %118, align 16, !tbaa !0 > %120 = extractelement <8 x i32> %116, i32 7 > %121 = extractelement <4 x i32> %119, i32 0 > %122 = and i32 %121, %120 > %123 = insertelement <4 x i32> %119, i32 %122, i32 0 > %124 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %125 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %126 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %127 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %128 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %129 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %130 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %131 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %132 = fmul float %129, %129 > %133 = fmul float %130, %130 > %134 = fadd float %133, %132 > %135 = fmul float %131, %131 > %136 = fadd float %134, %135 > %137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136) > %138 = fmul float %137, %129 > %139 = fmul float %137, %130 > %140 = fmul float %137, %131 > %141 = fcmp oeq float %126, 0.000000e+00 > %142 = fcmp oeq float %126, 0.000000e+00 > %143 = fcmp ogt float %124, 0.000000e+00 > %144 = select i1 %143, float 1.000000e+00, float %124 > %145 = fcmp oge float %144, 0.000000e+00 > %146 = fcmp ogt float %125, 0.000000e+00 > %147 = select i1 %146, float 1.000000e+00, float %125 > %148 = fcmp oge float %147, 0.000000e+00 > %.op = fmul float %144, 0x4600000000000000 > %149 = select i1 %145, float %.op, float 0xC600000000000000 > %.op65 = fmul float %147, 0x4600000000000000 > %150 = select i1 %148, float %.op65, float 0xC600000000000000 > %151 = fdiv float 1.000000e+00, %126 > %152 = fmul float %124, %151 > %153 = fmul float %125, %151 > %154 = select i1 %141, float %149, float %152 > %155 = select i1 %142, float %150, float %153 > %156 = bitcast float %154 to i32 > %157 = bitcast float %155 to i32 > %158 = insertelement <2 x i32> undef, i32 %156, i32 0 > %159 = insertelement <2 x i32> %158, i32 %157, i32 1 > %160 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %159, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %161 = extractelement <4 x float> %160, i32 0 > %162 = extractelement <4 x float> %160, i32 1 > %163 = extractelement <4 x float> %160, i32 2 > %164 = call float @llvm.fma.f32(float %161, float 2.000000e+00, float -1.000000e+00) > %165 = call float @llvm.fma.f32(float %162, float 2.000000e+00, float -1.000000e+00) > %166 = call float @llvm.fma.f32(float %163, float 2.000000e+00, float -1.000000e+00) > %167 = fmul float %37, %164 > %168 = fmul float %38, %165 > %169 = fadd float %168, %167 > %170 = fmul float %39, %166 > %171 = fadd float %169, %170 > %172 = fmul float %40, %164 > %173 = fmul float %41, %165 > %174 = fadd float %173, %172 > %175 = fmul float %42, %166 > %176 = fadd float %174, %175 > %177 = fmul float %43, %164 > %178 = fmul float %44, %165 > %179 = fadd float %178, %177 > %180 = fmul float %45, %166 > %181 = fadd float %179, %180 > %182 = fmul float %171, %171 > %183 = fmul float %176, %176 > %184 = fadd float %183, %182 > %185 = fmul float %181, %181 > %186 = fadd float %184, %185 > %187 = call float @llvm.AMDGPU.rsq.clamped.f32(float %186) > %188 = fmul float %187, %171 > %189 = fmul float %187, %176 > %190 = fmul float %187, %181 > %191 = fmul float %138, %188 > %192 = fmul float %139, %189 > %193 = fadd float %192, %191 > %194 = fmul float %140, %190 > %195 = fadd float %193, %194 > %196 = fadd float %188, %188 > %197 = fadd float %189, %189 > %198 = fadd float %190, %190 > %199 = call float @llvm.fabs.f32(float %188) > %200 = call float @llvm.fabs.f32(float %189) > %201 = call float @llvm.fabs.f32(float %190) > %202 = fmul float %199, %188 > %203 = fmul float %200, %189 > %204 = fmul float %201, %190 > %205 = fsub float -0.000000e+00, %196 > %206 = call float @llvm.fma.f32(float %205, float %195, float %138) > %207 = fsub float -0.000000e+00, %197 > %208 = call float @llvm.fma.f32(float %207, float %195, float %139) > %209 = fsub float -0.000000e+00, %198 > %210 = call float @llvm.fma.f32(float %209, float %195, float %140) > %211 = call float @llvm.fabs.f32(float %206) > %212 = call float @llvm.fabs.f32(float %208) > %213 = call float @llvm.fabs.f32(float %210) > %214 = fsub float 1.000000e+00, %211 > %215 = fsub float 1.000000e+00, %212 > %216 = fsub float 1.000000e+00, %213 > %217 = fmul float %214, %214 > %218 = fmul float %215, %215 > %219 = fmul float %216, %216 > %220 = fmul float %217, %217 > %221 = fmul float %218, %218 > %222 = fmul float %219, %219 > %223 = bitcast float %154 to i32 > %224 = bitcast float %155 to i32 > %225 = insertelement <2 x i32> undef, i32 %223, i32 0 > %226 = insertelement <2 x i32> %225, i32 %224, i32 1 > %227 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %226, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %228 = extractelement <4 x float> %227, i32 0 > %229 = extractelement <4 x float> %227, i32 1 > %230 = extractelement <4 x float> %227, i32 2 > %231 = fsub float 1.000000e+00, %230 > %232 = call float @llvm.fma.f32(float %231, float %220, float %230) > %233 = call float @llvm.fma.f32(float %231, float %221, float %230) > %234 = call float @llvm.fma.f32(float %231, float %222, float %230) > %235 = call float @llvm.fabs.f32(float %206) > %236 = call float @llvm.fabs.f32(float %208) > %237 = call float @llvm.fabs.f32(float %210) > %238 = call float @llvm.log2.f32(float %235) > %239 = call float @llvm.log2.f32(float %236) > %240 = call float @llvm.log2.f32(float %237) > %241 = fmul float %228, %228 > %242 = call float @llvm.fma.f32(float %241, float 4.096000e+03, float 0x3F70624DE0000000) > %243 = call float @llvm.fma.f32(float %241, float 4.096000e+03, float 0x4000083120000000) > %244 = fmul float %238, %242 > %245 = fmul float %239, %242 > %246 = fmul float %240, %242 > %247 = fmul float %243, 1.250000e-01 > %248 = call float @llvm.exp2.f32(float %244) > %249 = call float @llvm.exp2.f32(float %245) > %250 = call float @llvm.exp2.f32(float %246) > %251 = call float @llvm.minnum.f32(float %248, float 1.000000e+00) > %252 = call float @llvm.minnum.f32(float %249, float 1.000000e+00) > %253 = call float @llvm.minnum.f32(float %250, float 1.000000e+00) > %254 = fmul float %232, %251 > %255 = fmul float %233, %252 > %256 = fmul float %234, %253 > %257 = call float @llvm.fabs.f32(float %206) > %258 = call float @llvm.fabs.f32(float %208) > %259 = call float @llvm.fabs.f32(float %210) > %260 = fmul float %257, %254 > %261 = fmul float %258, %255 > %262 = fmul float %259, %256 > %263 = fcmp oge float %206, 0.000000e+00 > %264 = fcmp oge float %208, 0.000000e+00 > %265 = fcmp oge float %210, 0.000000e+00 > %266 = select i1 %263, float 1.000000e+00, float 0.000000e+00 > %267 = select i1 %264, float 1.000000e+00, float 0.000000e+00 > %268 = select i1 %265, float 1.000000e+00, float 0.000000e+00 > %. = select i1 %263, float 0.000000e+00, float 1.000000e+00 > %temp40.0 = select i1 %264, float 0.000000e+00, float 1.000000e+00 > %.63 = select i1 %265, float 0.000000e+00, float 1.000000e+00 > %269 = fcmp oeq float %126, 0.000000e+00 > %270 = fcmp oeq float %126, 0.000000e+00 > %271 = fcmp ogt float %127, 0.000000e+00 > %272 = select i1 %271, float 1.000000e+00, float %127 > %273 = fcmp oge float %272, 0.000000e+00 > %274 = fcmp ogt float %128, 0.000000e+00 > %275 = select i1 %274, float 1.000000e+00, float %128 > %276 = fcmp oge float %275, 0.000000e+00 > %.op66 = fmul float %272, 0x4600000000000000 > %277 = select i1 %273, float %.op66, float 0xC600000000000000 > %.op67 = fmul float %275, 0x4600000000000000 > %278 = select i1 %276, float %.op67, float 0xC600000000000000 > %279 = fdiv float 1.000000e+00, %126 > %280 = fmul float %127, %279 > %281 = fmul float %128, %279 > %282 = select i1 %269, float %277, float %280 > %283 = select i1 %270, float %278, float %281 > %284 = bitcast float %154 to i32 > %285 = bitcast float %155 to i32 > %286 = insertelement <2 x i32> undef, i32 %284, i32 0 > %287 = insertelement <2 x i32> %286, i32 %285, i32 1 > %288 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %287, <8 x i32> %71, <4 x i32> %78, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %289 = extractelement <4 x float> %288, i32 0 > %290 = fmul float %282, %289 > %291 = fmul float %283, %289 > %292 = fmul float %25, %290 > %293 = fmul float %26, %291 > %294 = fadd float %292, %293 > %295 = fmul float %27, %289 > %296 = fadd float %294, %295 > %297 = fadd float %296, %28 > %298 = fmul float %29, %290 > %299 = fmul float %30, %291 > %300 = fadd float %298, %299 > %301 = fmul float %31, %289 > %302 = fadd float %300, %301 > %303 = fadd float %302, %32 > %304 = fmul float %33, %290 > %305 = fmul float %34, %291 > %306 = fadd float %304, %305 > %307 = fmul float %35, %289 > %308 = fadd float %306, %307 > %309 = fadd float %308, %36 > %310 = bitcast float %297 to i32 > %311 = bitcast float %303 to i32 > %312 = bitcast float %309 to i32 > %313 = insertelement <4 x i32> undef, i32 %310, i32 0 > %314 = insertelement <4 x i32> %313, i32 %311, i32 1 > %315 = insertelement <4 x i32> %314, i32 %312, i32 2 > %316 = insertelement <4 x i32> %315, i32 0, i32 3 > %317 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %316, <8 x i32> %80, <4 x i32> %87, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %318 = extractelement <4 x float> %317, i32 0 > %319 = extractelement <4 x float> %317, i32 1 > %320 = extractelement <4 x float> %317, i32 2 > %321 = fmul float %., %318 > %322 = fmul float %temp40.0, %319 > %323 = fmul float %.63, %320 > %324 = bitcast float %297 to i32 > %325 = bitcast float %303 to i32 > %326 = bitcast float %309 to i32 > %327 = insertelement <4 x i32> undef, i32 %324, i32 0 > %328 = insertelement <4 x i32> %327, i32 %325, i32 1 > %329 = insertelement <4 x i32> %328, i32 %326, i32 2 > %330 = insertelement <4 x i32> %329, i32 0, i32 3 > %331 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %330, <8 x i32> %89, <4 x i32> %96, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %332 = extractelement <4 x float> %331, i32 0 > %333 = extractelement <4 x float> %331, i32 1 > %334 = extractelement <4 x float> %331, i32 2 > %335 = bitcast float %297 to i32 > %336 = bitcast float %303 to i32 > %337 = bitcast float %309 to i32 > %338 = insertelement <4 x i32> undef, i32 %335, i32 0 > %339 = insertelement <4 x i32> %338, i32 %336, i32 1 > %340 = insertelement <4 x i32> %339, i32 %337, i32 2 > %341 = insertelement <4 x i32> %340, i32 0, i32 3 > %342 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %341, <8 x i32> %98, <4 x i32> %105, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %343 = extractelement <4 x float> %342, i32 0 > %344 = extractelement <4 x float> %342, i32 1 > %345 = extractelement <4 x float> %342, i32 2 > %346 = extractelement <4 x float> %342, i32 3 > %347 = fmul float %346, %343 > %348 = fmul float %346, %344 > %349 = fmul float %346, %345 > %350 = call float @llvm.fma.f32(float %332, float %266, float %321) > %351 = call float @llvm.fma.f32(float %333, float %267, float %322) > %352 = call float @llvm.fma.f32(float %334, float %268, float %323) > %353 = fmul float %350, %260 > %354 = fmul float %351, %261 > %355 = fadd float %354, %353 > %356 = fmul float %352, %262 > %357 = fadd float %355, %356 > %358 = fmul float %247, %357 > %359 = bitcast float %154 to i32 > %360 = bitcast float %155 to i32 > %361 = insertelement <4 x i32> undef, i32 %359, i32 0 > %362 = insertelement <4 x i32> %361, i32 %360, i32 1 > %363 = insertelement <4 x i32> %362, i32 0, i32 2 > %364 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %363, <8 x i32> %107, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %365 = extractelement <4 x float> %364, i32 0 > %366 = extractelement <4 x float> %364, i32 1 > %367 = bitcast float %154 to i32 > %368 = bitcast float %155 to i32 > %369 = insertelement <2 x i32> undef, i32 %367, i32 0 > %370 = insertelement <2 x i32> %369, i32 %368, i32 1 > %371 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %370, <8 x i32> %116, <4 x i32> %123, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %372 = extractelement <4 x float> %371, i32 0 > %373 = extractelement <4 x float> %371, i32 1 > %374 = extractelement <4 x float> %371, i32 2 > %375 = extractelement <4 x float> %371, i32 3 > %376 = fmul float %358, %365 > %377 = fmul float %347, %49 > %378 = fmul float %348, %50 > %379 = fmul float %349, %51 > %380 = fmul float %347, %46 > %381 = fmul float %348, %47 > %382 = fmul float %349, %48 > %383 = fmul float %376, %377 > %384 = fmul float %376, %378 > %385 = fmul float %376, %379 > %386 = fmul float %229, %383 > %387 = fmul float %229, %384 > %388 = fmul float %229, %385 > %389 = fcmp ole float %202, 0.000000e+00 > %390 = fcmp ole float %203, 0.000000e+00 > %391 = fcmp ole float %204, 0.000000e+00 > %temp32.0 = select i1 %389, float 0.000000e+00, float 1.000000e+00 > %.64 = select i1 %390, float 0.000000e+00, float 1.000000e+00 > %temp44.1 = select i1 %391, float 0.000000e+00, float 1.000000e+00 > %392 = select i1 %389, float 1.000000e+00, float 0.000000e+00 > %393 = select i1 %390, float 1.000000e+00, float 0.000000e+00 > %394 = select i1 %391, float 1.000000e+00, float 0.000000e+00 > %395 = fmul float %318, %temp32.0 > %396 = fmul float %319, %.64 > %397 = fmul float %320, %temp44.1 > %398 = call float @llvm.fma.f32(float %332, float %392, float %395) > %399 = call float @llvm.fma.f32(float %333, float %393, float %396) > %400 = call float @llvm.fma.f32(float %334, float %394, float %397) > %401 = call float @llvm.fabs.f32(float %202) > %402 = call float @llvm.fabs.f32(float %203) > %403 = call float @llvm.fabs.f32(float %204) > %404 = fmul float %398, %401 > %405 = fmul float %399, %402 > %406 = fadd float %405, %404 > %407 = fmul float %400, %403 > %408 = fadd float %406, %407 > %409 = fmul float %366, %408 > %410 = fmul float %380, %409 > %411 = fmul float %381, %409 > %412 = fmul float %382, %409 > %413 = call float @llvm.fma.f32(float %410, float %372, float %386) > %414 = call float @llvm.fma.f32(float %411, float %373, float %387) > %415 = call float @llvm.fma.f32(float %412, float %374, float %388) > %416 = fmul float %375, %413 > %417 = fmul float %375, %414 > %418 = fmul float %375, %415 > %419 = bitcast float %5 to i32 > %420 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %419, 10 > %421 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %420, float %416, 11 > %422 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %421, float %417, 12 > %423 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %422, float %418, 13 > %424 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %423, float 1.000000e+00, 14 > %425 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %424, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %425 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..55] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 336, 848, 864} >IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[2] UINT32 {880, 736, 752, 768} >IMM[3] UINT32 {784, 0, 0, 0} > 0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[1][21].xyzz > 1: MOV TEMP[0].w, IMM[1].xxxx > 2: DP4 TEMP[1].x, CONST[1][53], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][54], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[0].x, CONST[1][55], TEMP[0] > 6: MOV TEMP[1].z, TEMP[0].xxxx > 7: MOV TEMP[1].w, IMM[1].xxxx > 8: DP4 TEMP[0].x, CONST[1][46], TEMP[1] > 9: DP4 TEMP[2].x, CONST[1][47], TEMP[1] > 10: MOV TEMP[0].y, TEMP[2].xxxx > 11: DP4 TEMP[2].x, CONST[1][48], TEMP[1] > 12: MOV TEMP[0].z, TEMP[2].xxxx > 13: DP4 TEMP[1].x, CONST[1][49], TEMP[1] > 14: MOV TEMP[0].w, TEMP[1].xxxx > 15: MOV OUT[0], TEMP[0] > 16: END >radeonsi: Compiling shader 74 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 336) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 340) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 344) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 736) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 740) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 744) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 748) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 752) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 756) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 760) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 764) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 848) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 852) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 856) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 860) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 864) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 868) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 872) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 876) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %13) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = fmul float %50, %16 > %54 = fmul float %51, %17 > %55 = fmul float %52, %18 > %56 = fmul float %35, %53 > %57 = fmul float %36, %54 > %58 = fadd float %56, %57 > %59 = fmul float %37, %55 > %60 = fadd float %58, %59 > %61 = fadd float %60, %38 > %62 = fmul float %39, %53 > %63 = fmul float %40, %54 > %64 = fadd float %62, %63 > %65 = fmul float %41, %55 > %66 = fadd float %64, %65 > %67 = fadd float %66, %42 > %68 = fmul float %43, %53 > %69 = fmul float %44, %54 > %70 = fadd float %68, %69 > %71 = fmul float %45, %55 > %72 = fadd float %70, %71 > %73 = fadd float %72, %46 > %74 = fmul float %19, %61 > %75 = fmul float %20, %67 > %76 = fadd float %74, %75 > %77 = fmul float %21, %73 > %78 = fadd float %76, %77 > %79 = fadd float %78, %22 > %80 = fmul float %23, %61 > %81 = fmul float %24, %67 > %82 = fadd float %80, %81 > %83 = fmul float %25, %73 > %84 = fadd float %82, %83 > %85 = fadd float %84, %26 > %86 = fmul float %27, %61 > %87 = fmul float %28, %67 > %88 = fadd float %86, %87 > %89 = fmul float %29, %73 > %90 = fadd float %88, %89 > %91 = fadd float %90, %30 > %92 = fmul float %31, %61 > %93 = fmul float %32, %67 > %94 = fadd float %92, %93 > %95 = fmul float %33, %73 > %96 = fadd float %94, %95 > %97 = fadd float %96, %34 > %98 = bitcast i32 %11 to float > %99 = insertvalue <{ float, float, float }> undef, float %98, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %79, float %85, float %91, float %97) > ret <{ float, float, float }> %99 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SAMP[7] >DCL SAMP[8] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 3D, FLOAT >DCL SVIEW[2], 3D, FLOAT >DCL SVIEW[3], 3D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL SVIEW[7], 2D, FLOAT >DCL SVIEW[8], CUBE, FLOAT >DCL CONST[1][0..30] >DCL TEMP[0..15], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[1] UINT32 {0, 64, 80, 96} >IMM[2] UINT32 {256, 192, 208, 224} >IMM[3] FLT32 { -1.0000, 0.5000, 0.5098, 0.1500} >IMM[4] INT32 {1, 0, 0, 0} >IMM[5] UINT32 {1065353216, 288, 480, 0} >IMM[6] FLT32 { 8.0000, 10.0000, 0.0500, 0.0000} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: MOV TEMP[3].z, TEMP[2].xxxx > 15: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xxxx > 16: MOV TEMP[3].w, IMM[0].zzzz > 17: DP4 TEMP[1].x, CONST[1][4], TEMP[3] > 18: DP4 TEMP[2].x, CONST[1][5], TEMP[3] > 19: MOV TEMP[1].y, TEMP[2].xxxx > 20: DP4 TEMP[2].x, CONST[1][6], TEMP[3] > 21: MOV TEMP[1].z, TEMP[2].xxxx > 22: MOV TEMP[2].xyz, TEMP[1].xyzz > 23: MOV TEMP[2].w, IMM[0].xxxx > 24: TXL TEMP[2], TEMP[2], SAMP[1], 3D > 25: MUL TEMP[3].xyz, TEMP[2].wwww, TEMP[2].xyzz > 26: MUL TEMP[2].xyz, TEMP[3].xyzz, CONST[1][16].xyzz > 27: MOV TEMP[4].xyz, TEMP[1].xyzz > 28: MOV TEMP[4].w, IMM[0].xxxx > 29: TXL TEMP[4].xyz, TEMP[4], SAMP[2], 3D > 30: MOV TEMP[5].xyz, TEMP[1].xyzz > 31: MOV TEMP[5].w, IMM[0].xxxx > 32: TXL TEMP[5].xyz, TEMP[5], SAMP[3], 3D > 33: MOV TEMP[6].xy, TEMP[0].xyyy > 34: TEX TEMP[6], TEMP[6], SAMP[4], 2D > 35: FMA TEMP[7].xyz, TEMP[6].xyzz, IMM[0].wwww, IMM[3].xxxx > 36: DP3 TEMP[8].x, CONST[1][12].xyzz, TEMP[7].xyzz > 37: DP3 TEMP[9].x, CONST[1][13].xyzz, TEMP[7].xyzz > 38: MOV TEMP[8].y, TEMP[9].xxxx > 39: DP3 TEMP[9].x, CONST[1][14].xyzz, TEMP[7].xyzz > 40: MOV TEMP[8].z, TEMP[9].xxxx > 41: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz > 42: RSQ TEMP[9].x, TEMP[9].xxxx > 43: MUL TEMP[7].xyz, TEMP[9].xxxx, TEMP[8].xyzz > 44: MOV TEMP[9].xy, TEMP[0].xyyy > 45: MOV TEMP[9].w, IMM[0].xxxx > 46: TXL TEMP[9].xy, TEMP[9], SAMP[5], 2D > 47: ABS TEMP[10].xyz, TEMP[7].xyzz > 48: MUL TEMP[8].xyz, TEMP[10].xyzz, TEMP[7].xyzz > 49: FSGE TEMP[10].xyz, IMM[0].xxxx, TEMP[8].xyzz > 50: AND TEMP[10].xyz, TEMP[10].xyzz, IMM[4].xxxx > 51: INEG TEMP[10].xyz, TEMP[10].xyzz > 52: AND TEMP[11].xyz, TEMP[10].xyzz, IMM[5].xxxx > 53: MOV TEMP[12].xyz, TEMP[11].xyzx > 54: USNE TEMP[13].x, TEMP[10].xxxx, IMM[1].xxxx > 55: UIF TEMP[13].xxxx :0 > 56: MOV TEMP[13].x, IMM[1].xxxx > 57: ELSE :0 > 58: MOV TEMP[13].x, IMM[5].xxxx > 59: ENDIF > 60: MOV TEMP[13].x, TEMP[13].xxxx > 61: USNE TEMP[14].x, TEMP[10].yyyy, IMM[1].xxxx > 62: UIF TEMP[14].xxxx :0 > 63: MOV TEMP[14].x, IMM[1].xxxx > 64: ELSE :0 > 65: MOV TEMP[14].x, IMM[5].xxxx > 66: ENDIF > 67: MOV TEMP[13].y, TEMP[14].xxxx > 68: USNE TEMP[14].x, TEMP[10].zzzz, IMM[1].xxxx > 69: UIF TEMP[14].xxxx :0 > 70: MOV TEMP[14].x, IMM[1].xxxx > 71: ELSE :0 > 72: MOV TEMP[14].x, IMM[5].xxxx > 73: ENDIF > 74: MOV TEMP[13].z, TEMP[14].xxxx > 75: MUL TEMP[10].xyz, TEMP[4].xyzz, TEMP[13].xyzz > 76: FMA TEMP[10].xyz, TEMP[5].xyzz, TEMP[11].xyzz, TEMP[10].xyzz > 77: ABS TEMP[8].xyz, TEMP[8].xyzz > 78: DP3 TEMP[8].x, TEMP[10].xyzz, TEMP[8].xyzz > 79: MUL TEMP[8].x, TEMP[9].yyyy, TEMP[8].xxxx > 80: MOV TEMP[0].w, TEMP[8].xxxx > 81: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx > 82: MOV TEMP[8].xy, TEMP[0].xyyy > 83: TEX TEMP[8], TEMP[8], SAMP[6], 2D > 84: MOV TEMP[10].xy, TEMP[0].xyyy > 85: TEX TEMP[10], TEMP[10], SAMP[7], 2D > 86: FSLT TEMP[11].x, IMM[0].xxxx, TEMP[10].wwww > 87: AND TEMP[11].x, TEMP[11].xxxx, IMM[4].xxxx > 88: INEG TEMP[11].x, TEMP[11].xxxx > 89: MOV TEMP[0].x, TEMP[11].xxxx > 90: USNE TEMP[11].x, TEMP[11].xxxx, IMM[1].xxxx > 91: UIF TEMP[11].xxxx :0 > 92: MIN TEMP[11].x, TEMP[10].wwww, IMM[3].yyyy > 93: MOV TEMP[0].x, TEMP[11].xxxx > 94: FSLT TEMP[6].x, TEMP[6].wwww, IMM[3].zzzz > 95: AND TEMP[6].x, TEMP[6].xxxx, IMM[4].xxxx > 96: INEG TEMP[6].x, TEMP[6].xxxx > 97: MUL TEMP[12].xyz, TEMP[2].xyzz, TEMP[8].xyzz > 98: ADD TEMP[12].xyz, TEMP[12].xyzz, TEMP[12].xyzz > 99: MAX TEMP[13].x, TEMP[8].zzzz, TEMP[8].yyyy >100: MOV TEMP[0].w, TEMP[13].xxxx >101: MAX TEMP[13].x, TEMP[13].xxxx, TEMP[8].xxxx >102: MOV TEMP[0].w, TEMP[13].xxxx >103: FSEQ TEMP[14].xyz, TEMP[13].xxxx, IMM[0].xxxx >104: SSG TEMP[15].xyz, TEMP[8].xyzz >105: MUL TEMP[15].xyz, IMM[0].yyyy, TEMP[15].xyzz >106: RCP TEMP[13].xyz, TEMP[13].xxxx >107: MUL TEMP[13].xyz, TEMP[8].xyzz, TEMP[13].xyzz >108: UCMP TEMP[13].xyz, TEMP[14].xyzz, TEMP[15].xyzz, TEMP[13].xyzz >109: MOV_SAT TEMP[13].xyz, TEMP[13].xyzz >110: MOV_SAT TEMP[14].xyz, TEMP[2].xyzz >111: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[13].xyzz >112: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz >113: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[3].wwww >114: USNE TEMP[14].x, TEMP[6].xxxx, IMM[1].xxxx >115: UIF TEMP[14].xxxx :0 >116: MOV TEMP[14].x, TEMP[12].xxxx >117: ELSE :0 >118: MOV TEMP[14].x, TEMP[13].xxxx >119: ENDIF >120: MOV TEMP[14].x, TEMP[14].xxxx >121: USNE TEMP[15].x, TEMP[6].xxxx, IMM[1].xxxx >122: UIF TEMP[15].xxxx :0 >123: MOV TEMP[15].x, TEMP[12].yyyy >124: ELSE :0 >125: MOV TEMP[15].x, TEMP[13].yyyy >126: ENDIF >127: MOV TEMP[14].y, TEMP[15].xxxx >128: USNE TEMP[15].x, TEMP[6].xxxx, IMM[1].xxxx >129: UIF TEMP[15].xxxx :0 >130: MOV TEMP[15].x, TEMP[12].zzzz >131: ELSE :0 >132: MOV TEMP[15].x, TEMP[13].zzzz >133: ENDIF >134: MOV TEMP[14].z, TEMP[15].xxxx >135: MUL TEMP[11].xyz, TEMP[11].xxxx, TEMP[14].xyzz >136: MOV TEMP[0].xyw, TEMP[11].xyxz >137: ELSE :0 >138: MOV TEMP[0].xyw, IMM[0].xxxx >139: ENDIF >140: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[1][18].xyzz >141: DP3 TEMP[11].x, IN[2].xyzz, IN[2].xyzz >142: RSQ TEMP[11].x, TEMP[11].xxxx >143: MUL TEMP[12].xyz, TEMP[11].xxxx, IN[2].xyzz >144: DP3 TEMP[11].x, TEMP[12].xyzz, TEMP[7].xyzz >145: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[11].xxxx >146: FMA TEMP[7].xyz, -TEMP[7].xyzz, IMM[0].wwww, TEMP[12].xyzz >147: MOV TEMP[13].xyz, -TEMP[7].xyzx >148: ABS TEMP[11].x, TEMP[10].xxxx >149: SQRT TEMP[11].x, TEMP[11].xxxx >150: FMA TEMP[11].x, -TEMP[11].xxxx, IMM[6].xxxx, CONST[1][30].xxxx >151: MOV TEMP[13].xyz, TEMP[13].xyzz >152: MOV TEMP[13].w, TEMP[11].xxxx >153: TXL TEMP[11], TEMP[13], SAMP[8], CUBE >154: MUL TEMP[6].xyz, TEMP[11].wwww, TEMP[11].xyzz >155: FMA TEMP[6].xyz, TEMP[6].xyzz, IMM[6].yyyy, -TEMP[11].xyzz >156: FMA TEMP[6].xyz, TEMP[10].xxxx, TEMP[6].xyzz, TEMP[11].xyzz >157: ADD TEMP[11].x, -TEMP[10].zzzz, IMM[0].zzzz >158: DP3 TEMP[13].x, TEMP[7].xyzz, TEMP[12].xyzz >159: MOV_SAT TEMP[13].x, TEMP[13].xxxx >160: MUL TEMP[14].x, TEMP[13].xxxx, TEMP[13].xxxx >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[14].xxxx >162: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].xxxx >163: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx, TEMP[10].zzzz >164: ABS TEMP[13].xyz, TEMP[7].xyzz >165: MUL TEMP[12].xyz, TEMP[13].xyzz, TEMP[7].xyzz >166: FSGE TEMP[12].xyz, TEMP[12].xyzz, IMM[0].xxxx >167: AND TEMP[12].xyz, TEMP[12].xyzz, IMM[4].xxxx >168: INEG TEMP[12].xyz, TEMP[12].xyzz >169: AND TEMP[13].xyz, TEMP[12].xyzz, IMM[5].xxxx >170: USNE TEMP[14].x, TEMP[12].xxxx, IMM[1].xxxx >171: UIF TEMP[14].xxxx :0 >172: MOV TEMP[14].x, IMM[1].xxxx >173: ELSE :0 >174: MOV TEMP[14].x, IMM[5].xxxx >175: ENDIF >176: MOV TEMP[14].x, TEMP[14].xxxx >177: USNE TEMP[15].x, TEMP[12].yyyy, IMM[1].xxxx >178: UIF TEMP[15].xxxx :0 >179: MOV TEMP[15].x, IMM[1].xxxx >180: ELSE :0 >181: MOV TEMP[15].x, IMM[5].xxxx >182: ENDIF >183: MOV TEMP[14].y, TEMP[15].xxxx >184: USNE TEMP[12].x, TEMP[12].zzzz, IMM[1].xxxx >185: UIF TEMP[12].xxxx :0 >186: MOV TEMP[12].x, IMM[1].xxxx >187: ELSE :0 >188: MOV TEMP[12].x, IMM[5].xxxx >189: ENDIF >190: MOV TEMP[14].z, TEMP[12].xxxx >191: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[14].xyzz >192: FMA TEMP[1].xyz, TEMP[5].xyzz, TEMP[13].xyzz, TEMP[4].xyzz >193: ABS TEMP[4].xyz, TEMP[7].xyzz >194: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[4].xyzz >195: ADD TEMP[5].x, TEMP[10].xxxx, IMM[6].zzzz >196: MOV_SAT TEMP[5].x, TEMP[5].xxxx >197: MUL TEMP[1].x, TEMP[5].xxxx, TEMP[4].xxxx >198: MUL TEMP[1].x, TEMP[9].xxxx, TEMP[1].xxxx >199: MUL TEMP[1].x, TEMP[11].xxxx, TEMP[1].xxxx >200: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[1].xxxx >201: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xyzz >202: MUL TEMP[3].xyz, TEMP[10].yyyy, TEMP[3].xyzz >203: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[8].xyzz, TEMP[3].xyzz >204: ADD TEMP[0].xyz, TEMP[0].xyww, TEMP[1].xyzz >205: MUL TEMP[1].x, TEMP[8].wwww, TEMP[8].wwww >206: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx >207: MOV TEMP[0].w, IMM[0].zzzz >208: MOV OUT[0], TEMP[0] >209: END >radeonsi: Compiling shader 75 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 76) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 92) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 200) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 208) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 212) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 216) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 224) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 228) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 232) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) > %53 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %54 = load <8 x i32>, <8 x i32> addrspace(2)* %53, align 32, !tbaa !0 > %55 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %56 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %55, i64 0, i64 3 > %57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !tbaa !0 > %58 = extractelement <8 x i32> %54, i32 7 > %59 = extractelement <4 x i32> %57, i32 0 > %60 = and i32 %59, %58 > %61 = insertelement <4 x i32> %57, i32 %60, i32 0 > %62 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %63 = load <8 x i32>, <8 x i32> addrspace(2)* %62, align 32, !tbaa !0 > %64 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %65 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %64, i64 0, i64 7 > %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 > %67 = extractelement <8 x i32> %63, i32 7 > %68 = extractelement <4 x i32> %66, i32 0 > %69 = and i32 %68, %67 > %70 = insertelement <4 x i32> %66, i32 %69, i32 0 > %71 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0 > %73 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %74 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %73, i64 0, i64 11 > %75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !0 > %76 = extractelement <8 x i32> %72, i32 7 > %77 = extractelement <4 x i32> %75, i32 0 > %78 = and i32 %77, %76 > %79 = insertelement <4 x i32> %75, i32 %78, i32 0 > %80 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %81 = load <8 x i32>, <8 x i32> addrspace(2)* %80, align 32, !tbaa !0 > %82 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %83 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %82, i64 0, i64 15 > %84 = load <4 x i32>, <4 x i32> addrspace(2)* %83, align 16, !tbaa !0 > %85 = extractelement <8 x i32> %81, i32 7 > %86 = extractelement <4 x i32> %84, i32 0 > %87 = and i32 %86, %85 > %88 = insertelement <4 x i32> %84, i32 %87, i32 0 > %89 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %90 = load <8 x i32>, <8 x i32> addrspace(2)* %89, align 32, !tbaa !0 > %91 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %92 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %91, i64 0, i64 19 > %93 = load <4 x i32>, <4 x i32> addrspace(2)* %92, align 16, !tbaa !0 > %94 = extractelement <8 x i32> %90, i32 7 > %95 = extractelement <4 x i32> %93, i32 0 > %96 = and i32 %95, %94 > %97 = insertelement <4 x i32> %93, i32 %96, i32 0 > %98 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %99 = load <8 x i32>, <8 x i32> addrspace(2)* %98, align 32, !tbaa !0 > %100 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %101 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %100, i64 0, i64 23 > %102 = load <4 x i32>, <4 x i32> addrspace(2)* %101, align 16, !tbaa !0 > %103 = extractelement <8 x i32> %99, i32 7 > %104 = extractelement <4 x i32> %102, i32 0 > %105 = and i32 %104, %103 > %106 = insertelement <4 x i32> %102, i32 %105, i32 0 > %107 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %108 = load <8 x i32>, <8 x i32> addrspace(2)* %107, align 32, !tbaa !0 > %109 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %110 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %109, i64 0, i64 27 > %111 = load <4 x i32>, <4 x i32> addrspace(2)* %110, align 16, !tbaa !0 > %112 = extractelement <8 x i32> %108, i32 7 > %113 = extractelement <4 x i32> %111, i32 0 > %114 = and i32 %113, %112 > %115 = insertelement <4 x i32> %111, i32 %114, i32 0 > %116 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 14 > %117 = load <8 x i32>, <8 x i32> addrspace(2)* %116, align 32, !tbaa !0 > %118 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %119 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %118, i64 0, i64 31 > %120 = load <4 x i32>, <4 x i32> addrspace(2)* %119, align 16, !tbaa !0 > %121 = extractelement <8 x i32> %117, i32 7 > %122 = extractelement <4 x i32> %120, i32 0 > %123 = and i32 %122, %121 > %124 = insertelement <4 x i32> %120, i32 %123, i32 0 > %125 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16 > %126 = load <8 x i32>, <8 x i32> addrspace(2)* %125, align 32, !tbaa !0 > %127 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %128 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %127, i64 0, i64 35 > %129 = load <4 x i32>, <4 x i32> addrspace(2)* %128, align 16, !tbaa !0 > %130 = extractelement <8 x i32> %126, i32 7 > %131 = extractelement <4 x i32> %129, i32 0 > %132 = and i32 %131, %130 > %133 = insertelement <4 x i32> %129, i32 %132, i32 0 > %134 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %135 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %136 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %137 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %138 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %139 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %140 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %141 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %142 = fcmp oeq float %136, 0.000000e+00 > %143 = fcmp oeq float %136, 0.000000e+00 > %144 = fcmp ogt float %134, 0.000000e+00 > %145 = select i1 %144, float 1.000000e+00, float %134 > %146 = fcmp oge float %145, 0.000000e+00 > %147 = fcmp ogt float %135, 0.000000e+00 > %148 = select i1 %147, float 1.000000e+00, float %135 > %149 = fcmp oge float %148, 0.000000e+00 > %.op = fmul float %145, 0x4600000000000000 > %150 = select i1 %146, float %.op, float 0xC600000000000000 > %.op96 = fmul float %148, 0x4600000000000000 > %151 = select i1 %149, float %.op96, float 0xC600000000000000 > %152 = fdiv float 1.000000e+00, %136 > %153 = fmul float %134, %152 > %154 = fmul float %135, %152 > %155 = select i1 %142, float %150, float %153 > %156 = select i1 %143, float %151, float %154 > %157 = fcmp oeq float %136, 0.000000e+00 > %158 = fcmp oeq float %136, 0.000000e+00 > %159 = fcmp ogt float %137, 0.000000e+00 > %160 = select i1 %159, float 1.000000e+00, float %137 > %161 = fcmp oge float %160, 0.000000e+00 > %162 = fcmp ogt float %138, 0.000000e+00 > %163 = select i1 %162, float 1.000000e+00, float %138 > %164 = fcmp oge float %163, 0.000000e+00 > %.op97 = fmul float %160, 0x4600000000000000 > %165 = select i1 %161, float %.op97, float 0xC600000000000000 > %.op98 = fmul float %163, 0x4600000000000000 > %166 = select i1 %164, float %.op98, float 0xC600000000000000 > %167 = fdiv float 1.000000e+00, %136 > %168 = fmul float %137, %167 > %169 = fmul float %138, %167 > %170 = select i1 %157, float %165, float %168 > %171 = select i1 %158, float %166, float %169 > %172 = bitcast float %155 to i32 > %173 = bitcast float %156 to i32 > %174 = insertelement <2 x i32> undef, i32 %172, i32 0 > %175 = insertelement <2 x i32> %174, i32 %173, i32 1 > %176 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %175, <8 x i32> %54, <4 x i32> %61, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %177 = extractelement <4 x float> %176, i32 0 > %178 = fmul float %170, %177 > %179 = fmul float %171, %177 > %180 = fmul float %25, %178 > %181 = fmul float %26, %179 > %182 = fadd float %180, %181 > %183 = fmul float %27, %177 > %184 = fadd float %182, %183 > %185 = fadd float %184, %28 > %186 = fmul float %29, %178 > %187 = fmul float %30, %179 > %188 = fadd float %186, %187 > %189 = fmul float %31, %177 > %190 = fadd float %188, %189 > %191 = fadd float %190, %32 > %192 = fmul float %33, %178 > %193 = fmul float %34, %179 > %194 = fadd float %192, %193 > %195 = fmul float %35, %177 > %196 = fadd float %194, %195 > %197 = fadd float %196, %36 > %198 = bitcast float %185 to i32 > %199 = bitcast float %191 to i32 > %200 = bitcast float %197 to i32 > %201 = insertelement <4 x i32> undef, i32 %198, i32 0 > %202 = insertelement <4 x i32> %201, i32 %199, i32 1 > %203 = insertelement <4 x i32> %202, i32 %200, i32 2 > %204 = insertelement <4 x i32> %203, i32 0, i32 3 > %205 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %204, <8 x i32> %63, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %206 = extractelement <4 x float> %205, i32 0 > %207 = extractelement <4 x float> %205, i32 1 > %208 = extractelement <4 x float> %205, i32 2 > %209 = extractelement <4 x float> %205, i32 3 > %210 = fmul float %209, %206 > %211 = fmul float %209, %207 > %212 = fmul float %209, %208 > %213 = fmul float %210, %46 > %214 = fmul float %211, %47 > %215 = fmul float %212, %48 > %216 = bitcast float %185 to i32 > %217 = bitcast float %191 to i32 > %218 = bitcast float %197 to i32 > %219 = insertelement <4 x i32> undef, i32 %216, i32 0 > %220 = insertelement <4 x i32> %219, i32 %217, i32 1 > %221 = insertelement <4 x i32> %220, i32 %218, i32 2 > %222 = insertelement <4 x i32> %221, i32 0, i32 3 > %223 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %222, <8 x i32> %72, <4 x i32> %79, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %224 = extractelement <4 x float> %223, i32 0 > %225 = extractelement <4 x float> %223, i32 1 > %226 = extractelement <4 x float> %223, i32 2 > %227 = bitcast float %185 to i32 > %228 = bitcast float %191 to i32 > %229 = bitcast float %197 to i32 > %230 = insertelement <4 x i32> undef, i32 %227, i32 0 > %231 = insertelement <4 x i32> %230, i32 %228, i32 1 > %232 = insertelement <4 x i32> %231, i32 %229, i32 2 > %233 = insertelement <4 x i32> %232, i32 0, i32 3 > %234 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %233, <8 x i32> %81, <4 x i32> %88, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %235 = extractelement <4 x float> %234, i32 0 > %236 = extractelement <4 x float> %234, i32 1 > %237 = extractelement <4 x float> %234, i32 2 > %238 = bitcast float %155 to i32 > %239 = bitcast float %156 to i32 > %240 = insertelement <2 x i32> undef, i32 %238, i32 0 > %241 = insertelement <2 x i32> %240, i32 %239, i32 1 > %242 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %241, <8 x i32> %90, <4 x i32> %97, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %243 = extractelement <4 x float> %242, i32 0 > %244 = extractelement <4 x float> %242, i32 1 > %245 = extractelement <4 x float> %242, i32 2 > %246 = call float @llvm.fma.f32(float %243, float 2.000000e+00, float -1.000000e+00) > %247 = call float @llvm.fma.f32(float %244, float 2.000000e+00, float -1.000000e+00) > %248 = call float @llvm.fma.f32(float %245, float 2.000000e+00, float -1.000000e+00) > %249 = fmul float %37, %246 > %250 = fmul float %38, %247 > %251 = fadd float %250, %249 > %252 = fmul float %39, %248 > %253 = fadd float %251, %252 > %254 = fmul float %40, %246 > %255 = fmul float %41, %247 > %256 = fadd float %255, %254 > %257 = fmul float %42, %248 > %258 = fadd float %256, %257 > %259 = fmul float %43, %246 > %260 = fmul float %44, %247 > %261 = fadd float %260, %259 > %262 = fmul float %45, %248 > %263 = fadd float %261, %262 > %264 = fmul float %253, %253 > %265 = fmul float %258, %258 > %266 = fadd float %265, %264 > %267 = fmul float %263, %263 > %268 = fadd float %266, %267 > %269 = call float @llvm.AMDGPU.rsq.clamped.f32(float %268) > %270 = fmul float %269, %253 > %271 = fmul float %269, %258 > %272 = fmul float %269, %263 > %273 = bitcast float %155 to i32 > %274 = bitcast float %156 to i32 > %275 = insertelement <4 x i32> undef, i32 %273, i32 0 > %276 = insertelement <4 x i32> %275, i32 %274, i32 1 > %277 = insertelement <4 x i32> %276, i32 0, i32 2 > %278 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %277, <8 x i32> %99, <4 x i32> %106, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %279 = extractelement <4 x float> %278, i32 0 > %280 = extractelement <4 x float> %278, i32 1 > %281 = call float @llvm.fabs.f32(float %270) > %282 = call float @llvm.fabs.f32(float %271) > %283 = call float @llvm.fabs.f32(float %272) > %284 = fmul float %281, %270 > %285 = fmul float %282, %271 > %286 = fmul float %283, %272 > %287 = fcmp ole float %284, 0.000000e+00 > %288 = fcmp ole float %285, 0.000000e+00 > %289 = fcmp ole float %286, 0.000000e+00 > %290 = select i1 %287, float 1.000000e+00, float 0.000000e+00 > %291 = select i1 %288, float 1.000000e+00, float 0.000000e+00 > %292 = select i1 %289, float 1.000000e+00, float 0.000000e+00 > %. = select i1 %287, float 0.000000e+00, float 1.000000e+00 > %temp56.0 = select i1 %288, float 0.000000e+00, float 1.000000e+00 > %.91 = select i1 %289, float 0.000000e+00, float 1.000000e+00 > %293 = fmul float %224, %. > %294 = fmul float %225, %temp56.0 > %295 = fmul float %226, %.91 > %296 = call float @llvm.fma.f32(float %235, float %290, float %293) > %297 = call float @llvm.fma.f32(float %236, float %291, float %294) > %298 = call float @llvm.fma.f32(float %237, float %292, float %295) > %299 = call float @llvm.fabs.f32(float %284) > %300 = call float @llvm.fabs.f32(float %285) > %301 = call float @llvm.fabs.f32(float %286) > %302 = fmul float %296, %299 > %303 = fmul float %297, %300 > %304 = fadd float %303, %302 > %305 = fmul float %298, %301 > %306 = fadd float %304, %305 > %307 = fmul float %280, %306 > %308 = fmul float %213, %307 > %309 = fmul float %214, %307 > %310 = fmul float %215, %307 > %311 = bitcast float %155 to i32 > %312 = bitcast float %156 to i32 > %313 = insertelement <2 x i32> undef, i32 %311, i32 0 > %314 = insertelement <2 x i32> %313, i32 %312, i32 1 > %315 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %314, <8 x i32> %108, <4 x i32> %115, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %316 = extractelement <4 x float> %315, i32 0 > %317 = extractelement <4 x float> %315, i32 1 > %318 = extractelement <4 x float> %315, i32 2 > %319 = extractelement <4 x float> %315, i32 3 > %320 = bitcast float %155 to i32 > %321 = bitcast float %156 to i32 > %322 = insertelement <2 x i32> undef, i32 %320, i32 0 > %323 = insertelement <2 x i32> %322, i32 %321, i32 1 > %324 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %323, <8 x i32> %117, <4 x i32> %124, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %325 = extractelement <4 x float> %324, i32 0 > %326 = extractelement <4 x float> %324, i32 1 > %327 = extractelement <4 x float> %324, i32 2 > %328 = extractelement <4 x float> %324, i32 3 > %329 = fcmp ogt float %328, 0.000000e+00 > br i1 %329, label %IF71, label %ENDIF70 > >IF71: ; preds = %main_body > %330 = extractelement <4 x float> %242, i32 3 > %331 = call float @llvm.minnum.f32(float %328, float 5.000000e-01) > %332 = fcmp olt float %330, 0x3FE0505060000000 > %333 = fmul float %308, %316 > %334 = fmul float %309, %317 > %335 = fmul float %310, %318 > %336 = fadd float %333, %333 > %337 = fadd float %334, %334 > %338 = fadd float %335, %335 > %339 = call float @llvm.maxnum.f32(float %318, float %317) > %340 = call float @llvm.maxnum.f32(float %339, float %316) > %341 = fcmp oeq float %340, 0.000000e+00 > %342 = fcmp oeq float %340, 0.000000e+00 > %343 = fcmp oeq float %340, 0.000000e+00 > %344 = fcmp ogt float %316, 0.000000e+00 > %345 = select i1 %344, float 1.000000e+00, float %316 > %346 = fcmp oge float %345, 0.000000e+00 > %347 = fcmp ogt float %317, 0.000000e+00 > %348 = select i1 %347, float 1.000000e+00, float %317 > %349 = fcmp oge float %348, 0.000000e+00 > %350 = fcmp ogt float %318, 0.000000e+00 > %351 = select i1 %350, float 1.000000e+00, float %318 > %352 = fcmp oge float %351, 0.000000e+00 > %.op99 = fmul float %345, 0x4600000000000000 > %353 = select i1 %346, float %.op99, float 0xC600000000000000 > %.op100 = fmul float %348, 0x4600000000000000 > %354 = select i1 %349, float %.op100, float 0xC600000000000000 > %.op101 = fmul float %351, 0x4600000000000000 > %355 = select i1 %352, float %.op101, float 0xC600000000000000 > %356 = fdiv float 1.000000e+00, %340 > %357 = fmul float %316, %356 > %358 = fmul float %317, %356 > %359 = fmul float %318, %356 > %360 = select i1 %341, float %353, float %357 > %361 = select i1 %342, float %354, float %358 > %362 = select i1 %343, float %355, float %359 > %363 = call float @llvm.AMDGPU.clamp.(float %360, float 0.000000e+00, float 1.000000e+00) > %364 = call float @llvm.AMDGPU.clamp.(float %361, float 0.000000e+00, float 1.000000e+00) > %365 = call float @llvm.AMDGPU.clamp.(float %362, float 0.000000e+00, float 1.000000e+00) > %366 = call float @llvm.AMDGPU.clamp.(float %308, float 0.000000e+00, float 1.000000e+00) > %367 = call float @llvm.AMDGPU.clamp.(float %309, float 0.000000e+00, float 1.000000e+00) > %368 = call float @llvm.AMDGPU.clamp.(float %310, float 0.000000e+00, float 1.000000e+00) > %369 = fmul float %363, %363 > %370 = fmul float %364, %364 > %371 = fmul float %365, %365 > %372 = fmul float %369, %366 > %373 = fmul float %370, %367 > %374 = fmul float %371, %368 > %375 = fmul float %372, 0x3FC3333340000000 > %376 = fmul float %373, 0x3FC3333340000000 > %377 = fmul float %374, 0x3FC3333340000000 > %.92 = select i1 %332, float %336, float %375 > %temp60.0 = select i1 %332, float %337, float %376 > %.94 = select i1 %332, float %338, float %377 > %378 = fmul float %331, %.92 > %379 = fmul float %331, %temp60.0 > %380 = fmul float %331, %.94 > br label %ENDIF70 > >ENDIF70: ; preds = %main_body, %IF71 > %temp3.0 = phi float [ %380, %IF71 ], [ 0.000000e+00, %main_body ] > %temp1.0 = phi float [ %379, %IF71 ], [ 0.000000e+00, %main_body ] > %temp.0 = phi float [ %378, %IF71 ], [ 0.000000e+00, %main_body ] > %381 = fmul float %210, %49 > %382 = fmul float %211, %50 > %383 = fmul float %212, %51 > %384 = fmul float %139, %139 > %385 = fmul float %140, %140 > %386 = fadd float %385, %384 > %387 = fmul float %141, %141 > %388 = fadd float %386, %387 > %389 = call float @llvm.AMDGPU.rsq.clamped.f32(float %388) > %390 = fmul float %389, %139 > %391 = fmul float %389, %140 > %392 = fmul float %389, %141 > %393 = fmul float %390, %270 > %394 = fmul float %391, %271 > %395 = fadd float %394, %393 > %396 = fmul float %392, %272 > %397 = fadd float %395, %396 > %398 = fmul float %270, %397 > %399 = fmul float %271, %397 > %400 = fmul float %272, %397 > %401 = fsub float -0.000000e+00, %398 > %402 = call float @llvm.fma.f32(float %401, float 2.000000e+00, float %390) > %403 = fsub float -0.000000e+00, %399 > %404 = call float @llvm.fma.f32(float %403, float 2.000000e+00, float %391) > %405 = fsub float -0.000000e+00, %400 > %406 = call float @llvm.fma.f32(float %405, float 2.000000e+00, float %392) > %407 = fsub float -0.000000e+00, %402 > %408 = fsub float -0.000000e+00, %404 > %409 = fsub float -0.000000e+00, %406 > %410 = call float @llvm.fabs.f32(float %325) > %411 = call float @llvm.sqrt.f32(float %410) > %412 = fsub float -0.000000e+00, %411 > %413 = call float @llvm.fma.f32(float %412, float 8.000000e+00, float %52) > %414 = insertelement <4 x float> undef, float %407, i32 0 > %415 = insertelement <4 x float> %414, float %408, i32 1 > %416 = insertelement <4 x float> %415, float %409, i32 2 > %417 = insertelement <4 x float> %416, float %413, i32 3 > %418 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %417) > %419 = extractelement <4 x float> %418, i32 0 > %420 = extractelement <4 x float> %418, i32 1 > %421 = extractelement <4 x float> %418, i32 2 > %422 = call float @llvm.fabs.f32(float %421) > %423 = fdiv float 1.000000e+00, %422 > %424 = fmul float %419, %423 > %425 = fadd float %424, 1.500000e+00 > %426 = fmul float %420, %423 > %427 = fadd float %426, 1.500000e+00 > %428 = bitcast float %427 to i32 > %429 = bitcast float %425 to i32 > %bc = bitcast <4 x float> %418 to <4 x i32> > %430 = extractelement <4 x i32> %bc, i32 3 > %431 = bitcast float %413 to i32 > %432 = insertelement <4 x i32> undef, i32 %428, i32 0 > %433 = insertelement <4 x i32> %432, i32 %429, i32 1 > %434 = insertelement <4 x i32> %433, i32 %430, i32 2 > %435 = insertelement <4 x i32> %434, i32 %431, i32 3 > %436 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %435, <8 x i32> %126, <4 x i32> %133, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %437 = extractelement <4 x float> %436, i32 0 > %438 = extractelement <4 x float> %436, i32 1 > %439 = extractelement <4 x float> %436, i32 2 > %440 = extractelement <4 x float> %436, i32 3 > %441 = fmul float %440, %437 > %442 = fmul float %440, %438 > %443 = fmul float %440, %439 > %444 = fsub float -0.000000e+00, %437 > %445 = call float @llvm.fma.f32(float %441, float 1.000000e+01, float %444) > %446 = fsub float -0.000000e+00, %438 > %447 = call float @llvm.fma.f32(float %442, float 1.000000e+01, float %446) > %448 = fsub float -0.000000e+00, %439 > %449 = call float @llvm.fma.f32(float %443, float 1.000000e+01, float %448) > %450 = call float @llvm.fma.f32(float %325, float %445, float %437) > %451 = call float @llvm.fma.f32(float %325, float %447, float %438) > %452 = call float @llvm.fma.f32(float %325, float %449, float %439) > %453 = fsub float 1.000000e+00, %327 > %454 = fmul float %402, %390 > %455 = fmul float %404, %391 > %456 = fadd float %455, %454 > %457 = fmul float %406, %392 > %458 = fadd float %456, %457 > %459 = call float @llvm.AMDGPU.clamp.(float %458, float 0.000000e+00, float 1.000000e+00) > %460 = fmul float %459, %459 > %461 = fmul float %460, %460 > %462 = fmul float %459, %461 > %463 = call float @llvm.fma.f32(float %453, float %462, float %327) > %464 = call float @llvm.fabs.f32(float %402) > %465 = call float @llvm.fabs.f32(float %404) > %466 = call float @llvm.fabs.f32(float %406) > %467 = fmul float %464, %402 > %468 = fmul float %465, %404 > %469 = fmul float %466, %406 > %470 = fcmp oge float %467, 0.000000e+00 > %471 = fcmp oge float %468, 0.000000e+00 > %472 = fcmp oge float %469, 0.000000e+00 > %473 = select i1 %470, float 1.000000e+00, float 0.000000e+00 > %474 = select i1 %471, float 1.000000e+00, float 0.000000e+00 > %475 = select i1 %472, float 1.000000e+00, float 0.000000e+00 > %.93 = select i1 %470, float 0.000000e+00, float 1.000000e+00 > %temp60.2 = select i1 %471, float 0.000000e+00, float 1.000000e+00 > %.95 = select i1 %472, float 0.000000e+00, float 1.000000e+00 > %476 = fmul float %224, %.93 > %477 = fmul float %225, %temp60.2 > %478 = fmul float %226, %.95 > %479 = call float @llvm.fma.f32(float %235, float %473, float %476) > %480 = call float @llvm.fma.f32(float %236, float %474, float %477) > %481 = call float @llvm.fma.f32(float %237, float %475, float %478) > %482 = call float @llvm.fabs.f32(float %402) > %483 = call float @llvm.fabs.f32(float %404) > %484 = call float @llvm.fabs.f32(float %406) > %485 = fmul float %479, %482 > %486 = fmul float %480, %483 > %487 = fadd float %486, %485 > %488 = fmul float %481, %484 > %489 = fadd float %487, %488 > %490 = fadd float %325, 0x3FA99999A0000000 > %491 = call float @llvm.AMDGPU.clamp.(float %490, float 0.000000e+00, float 1.000000e+00) > %492 = fmul float %491, %489 > %493 = fmul float %279, %492 > %494 = fmul float %463, %493 > %495 = fmul float %381, %494 > %496 = fmul float %382, %494 > %497 = fmul float %383, %494 > %498 = fmul float %495, %450 > %499 = fmul float %496, %451 > %500 = fmul float %497, %452 > %501 = fmul float %326, %498 > %502 = fmul float %326, %499 > %503 = fmul float %326, %500 > %504 = call float @llvm.fma.f32(float %308, float %316, float %501) > %505 = call float @llvm.fma.f32(float %309, float %317, float %502) > %506 = call float @llvm.fma.f32(float %310, float %318, float %503) > %507 = fadd float %temp.0, %504 > %508 = fadd float %temp1.0, %505 > %509 = fadd float %temp3.0, %506 > %510 = fmul float %319, %319 > %511 = fmul float %507, %510 > %512 = fmul float %508, %510 > %513 = fmul float %509, %510 > %514 = bitcast float %5 to i32 > %515 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %514, 10 > %516 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %515, float %511, 11 > %517 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %516, float %512, 12 > %518 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %517, float %513, 13 > %519 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %518, float 1.000000e+00, 14 > %520 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %519, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %520 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SAMP[7] >DCL SAMP[8] >DCL SAMP[9] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 3D, FLOAT >DCL SVIEW[3], 3D, FLOAT >DCL SVIEW[4], 3D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL SVIEW[7], 2D, FLOAT >DCL SVIEW[8], 2D, FLOAT >DCL SVIEW[9], CUBE, FLOAT >DCL CONST[1][0..30] >DCL TEMP[0..17], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[1] UINT32 {0, 256, 64, 80} >IMM[2] UINT32 {96, 192, 208, 224} >IMM[3] FLT32 { -1.0000, 0.5000, 0.5098, 0.1500} >IMM[4] INT32 {1, 0, 0, 0} >IMM[5] UINT32 {1065353216, 288, 480, 0} >IMM[6] FLT32 { 8.0000, 10.0000, 0.0500, 0.0000} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: ADD TEMP[3].x, -CONST[1][16].wwww, IMM[0].zzzz > 15: FMA TEMP[2].x, CONST[1][16].wwww, TEMP[2].xxxx, TEMP[3].xxxx > 16: MOV TEMP[3].xy, TEMP[0].xyyy > 17: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D > 18: MOV TEMP[4].z, TEMP[3].xxxx > 19: MUL TEMP[4].xy, TEMP[1].xyyy, TEMP[3].xxxx > 20: MOV TEMP[4].w, IMM[0].zzzz > 21: DP4 TEMP[1].x, CONST[1][4], TEMP[4] > 22: DP4 TEMP[3].x, CONST[1][5], TEMP[4] > 23: MOV TEMP[1].y, TEMP[3].xxxx > 24: DP4 TEMP[3].x, CONST[1][6], TEMP[4] > 25: MOV TEMP[1].z, TEMP[3].xxxx > 26: MOV TEMP[3].xyz, TEMP[1].xyzz > 27: MOV TEMP[3].w, IMM[0].xxxx > 28: TXL TEMP[3], TEMP[3], SAMP[2], 3D > 29: MUL TEMP[3].xyz, TEMP[3].wwww, TEMP[3].xyzz > 30: MUL TEMP[4].xyz, TEMP[3].xyzz, CONST[1][16].xyzz > 31: MOV TEMP[5].xyz, TEMP[1].xyzz > 32: MOV TEMP[5].w, IMM[0].xxxx > 33: TXL TEMP[5].xyz, TEMP[5], SAMP[3], 3D > 34: MOV TEMP[6].xyz, TEMP[1].xyzz > 35: MOV TEMP[6].w, IMM[0].xxxx > 36: TXL TEMP[6].xyz, TEMP[6], SAMP[4], 3D > 37: MOV TEMP[7].xy, TEMP[0].xyyy > 38: TEX TEMP[7], TEMP[7], SAMP[5], 2D > 39: FMA TEMP[8].xyz, TEMP[7].xyzz, IMM[0].wwww, IMM[3].xxxx > 40: DP3 TEMP[9].x, CONST[1][12].xyzz, TEMP[8].xyzz > 41: DP3 TEMP[10].x, CONST[1][13].xyzz, TEMP[8].xyzz > 42: MOV TEMP[9].y, TEMP[10].xxxx > 43: DP3 TEMP[10].x, CONST[1][14].xyzz, TEMP[8].xyzz > 44: MOV TEMP[9].z, TEMP[10].xxxx > 45: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz > 46: RSQ TEMP[10].x, TEMP[10].xxxx > 47: MUL TEMP[8].xyz, TEMP[10].xxxx, TEMP[9].xyzz > 48: MOV TEMP[10].xy, TEMP[0].xyyy > 49: MOV TEMP[10].w, IMM[0].xxxx > 50: TXL TEMP[10].xy, TEMP[10], SAMP[6], 2D > 51: ABS TEMP[11].xyz, TEMP[8].xyzz > 52: MUL TEMP[9].xyz, TEMP[11].xyzz, TEMP[8].xyzz > 53: FSGE TEMP[11].xyz, IMM[0].xxxx, TEMP[9].xyzz > 54: AND TEMP[11].xyz, TEMP[11].xyzz, IMM[4].xxxx > 55: INEG TEMP[11].xyz, TEMP[11].xyzz > 56: AND TEMP[12].xyz, TEMP[11].xyzz, IMM[5].xxxx > 57: MOV TEMP[13].xyz, TEMP[12].xyzx > 58: USNE TEMP[14].x, TEMP[11].xxxx, IMM[1].xxxx > 59: UIF TEMP[14].xxxx :0 > 60: MOV TEMP[14].x, IMM[1].xxxx > 61: ELSE :0 > 62: MOV TEMP[14].x, IMM[5].xxxx > 63: ENDIF > 64: MOV TEMP[14].x, TEMP[14].xxxx > 65: USNE TEMP[15].x, TEMP[11].yyyy, IMM[1].xxxx > 66: UIF TEMP[15].xxxx :0 > 67: MOV TEMP[15].x, IMM[1].xxxx > 68: ELSE :0 > 69: MOV TEMP[15].x, IMM[5].xxxx > 70: ENDIF > 71: MOV TEMP[14].y, TEMP[15].xxxx > 72: USNE TEMP[15].x, TEMP[11].zzzz, IMM[1].xxxx > 73: UIF TEMP[15].xxxx :0 > 74: MOV TEMP[15].x, IMM[1].xxxx > 75: ELSE :0 > 76: MOV TEMP[15].x, IMM[5].xxxx > 77: ENDIF > 78: MOV TEMP[14].z, TEMP[15].xxxx > 79: MUL TEMP[11].xyz, TEMP[5].xyzz, TEMP[14].xyzz > 80: FMA TEMP[11].xyz, TEMP[6].xyzz, TEMP[12].xyzz, TEMP[11].xyzz > 81: ABS TEMP[9].xyz, TEMP[9].xyzz > 82: DP3 TEMP[9].x, TEMP[11].xyzz, TEMP[9].xyzz > 83: MUL TEMP[9].x, TEMP[10].yyyy, TEMP[9].xxxx > 84: MOV TEMP[0].w, TEMP[9].xxxx > 85: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[9].xxxx > 86: MOV TEMP[9].xy, TEMP[0].xyyy > 87: TEX TEMP[9], TEMP[9], SAMP[7], 2D > 88: MOV TEMP[11].xy, TEMP[0].xyyy > 89: TEX TEMP[11], TEMP[11], SAMP[8], 2D > 90: FSLT TEMP[12].x, IMM[0].xxxx, TEMP[11].wwww > 91: AND TEMP[12].x, TEMP[12].xxxx, IMM[4].xxxx > 92: INEG TEMP[12].x, TEMP[12].xxxx > 93: MOV TEMP[0].x, TEMP[12].xxxx > 94: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx > 95: UIF TEMP[12].xxxx :0 > 96: MIN TEMP[12].x, TEMP[11].wwww, IMM[3].yyyy > 97: MOV TEMP[0].x, TEMP[12].xxxx > 98: FSLT TEMP[7].x, TEMP[7].wwww, IMM[3].zzzz > 99: AND TEMP[7].x, TEMP[7].xxxx, IMM[4].xxxx >100: INEG TEMP[7].x, TEMP[7].xxxx >101: MUL TEMP[13].xyz, TEMP[4].xyzz, TEMP[9].xyzz >102: ADD TEMP[13].xyz, TEMP[13].xyzz, TEMP[13].xyzz >103: MAX TEMP[14].x, TEMP[9].zzzz, TEMP[9].yyyy >104: MOV TEMP[0].w, TEMP[14].xxxx >105: MAX TEMP[14].x, TEMP[14].xxxx, TEMP[9].xxxx >106: MOV TEMP[0].w, TEMP[14].xxxx >107: FSEQ TEMP[15].xyz, TEMP[14].xxxx, IMM[0].xxxx >108: SSG TEMP[16].xyz, TEMP[9].xyzz >109: MUL TEMP[16].xyz, IMM[0].yyyy, TEMP[16].xyzz >110: RCP TEMP[14].xyz, TEMP[14].xxxx >111: MUL TEMP[14].xyz, TEMP[9].xyzz, TEMP[14].xyzz >112: UCMP TEMP[14].xyz, TEMP[15].xyzz, TEMP[16].xyzz, TEMP[14].xyzz >113: MOV_SAT TEMP[14].xyz, TEMP[14].xyzz >114: MOV_SAT TEMP[15].xyz, TEMP[4].xyzz >115: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[14].xyzz >116: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz >117: MUL TEMP[14].xyz, TEMP[14].xyzz, IMM[3].wwww >118: USNE TEMP[15].x, TEMP[7].xxxx, IMM[1].xxxx >119: UIF TEMP[15].xxxx :0 >120: MOV TEMP[15].x, TEMP[13].xxxx >121: ELSE :0 >122: MOV TEMP[15].x, TEMP[14].xxxx >123: ENDIF >124: MOV TEMP[15].x, TEMP[15].xxxx >125: USNE TEMP[16].x, TEMP[7].xxxx, IMM[1].xxxx >126: UIF TEMP[16].xxxx :0 >127: MOV TEMP[16].x, TEMP[13].yyyy >128: ELSE :0 >129: MOV TEMP[16].x, TEMP[14].yyyy >130: ENDIF >131: MOV TEMP[15].y, TEMP[16].xxxx >132: USNE TEMP[16].x, TEMP[7].xxxx, IMM[1].xxxx >133: UIF TEMP[16].xxxx :0 >134: MOV TEMP[16].x, TEMP[13].zzzz >135: ELSE :0 >136: MOV TEMP[16].x, TEMP[14].zzzz >137: ENDIF >138: MOV TEMP[15].z, TEMP[16].xxxx >139: MUL TEMP[12].xyz, TEMP[12].xxxx, TEMP[15].xyzz >140: MOV TEMP[0].xyw, TEMP[12].xyxz >141: ELSE :0 >142: MOV TEMP[0].xyw, IMM[0].xxxx >143: ENDIF >144: MUL TEMP[12].xyz, TEMP[3].xyzz, CONST[1][18].xyzz >145: DP3 TEMP[15].x, IN[2].xyzz, IN[2].xyzz >146: RSQ TEMP[15].x, TEMP[15].xxxx >147: MUL TEMP[13].xyz, TEMP[15].xxxx, IN[2].xyzz >148: DP3 TEMP[15].x, TEMP[13].xyzz, TEMP[8].xyzz >149: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[15].xxxx >150: FMA TEMP[8].xyz, -TEMP[8].xyzz, IMM[0].wwww, TEMP[13].xyzz >151: MOV TEMP[14].xyz, -TEMP[8].xyzx >152: ABS TEMP[15].x, TEMP[11].xxxx >153: SQRT TEMP[15].x, TEMP[15].xxxx >154: FMA TEMP[15].x, -TEMP[15].xxxx, IMM[6].xxxx, CONST[1][30].xxxx >155: MOV TEMP[14].xyz, TEMP[14].xyzz >156: MOV TEMP[14].w, TEMP[15].xxxx >157: TXL TEMP[14], TEMP[14], SAMP[9], CUBE >158: MUL TEMP[7].xyz, TEMP[14].wwww, TEMP[14].xyzz >159: FMA TEMP[7].xyz, TEMP[7].xyzz, IMM[6].yyyy, -TEMP[14].xyzz >160: FMA TEMP[7].xyz, TEMP[11].xxxx, TEMP[7].xyzz, TEMP[14].xyzz >161: ADD TEMP[14].x, -TEMP[11].zzzz, IMM[0].zzzz >162: DP3 TEMP[15].x, TEMP[8].xyzz, TEMP[13].xyzz >163: MOV_SAT TEMP[15].x, TEMP[15].xxxx >164: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx >165: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[16].xxxx >166: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[16].xxxx >167: FMA TEMP[14].x, TEMP[14].xxxx, TEMP[15].xxxx, TEMP[11].zzzz >168: ABS TEMP[15].xyz, TEMP[8].xyzz >169: MUL TEMP[13].xyz, TEMP[15].xyzz, TEMP[8].xyzz >170: FSGE TEMP[13].xyz, TEMP[13].xyzz, IMM[0].xxxx >171: AND TEMP[13].xyz, TEMP[13].xyzz, IMM[4].xxxx >172: INEG TEMP[13].xyz, TEMP[13].xyzz >173: AND TEMP[15].xyz, TEMP[13].xyzz, IMM[5].xxxx >174: USNE TEMP[16].x, TEMP[13].xxxx, IMM[1].xxxx >175: UIF TEMP[16].xxxx :0 >176: MOV TEMP[16].x, IMM[1].xxxx >177: ELSE :0 >178: MOV TEMP[16].x, IMM[5].xxxx >179: ENDIF >180: MOV TEMP[16].x, TEMP[16].xxxx >181: USNE TEMP[17].x, TEMP[13].yyyy, IMM[1].xxxx >182: UIF TEMP[17].xxxx :0 >183: MOV TEMP[17].x, IMM[1].xxxx >184: ELSE :0 >185: MOV TEMP[17].x, IMM[5].xxxx >186: ENDIF >187: MOV TEMP[16].y, TEMP[17].xxxx >188: USNE TEMP[13].x, TEMP[13].zzzz, IMM[1].xxxx >189: UIF TEMP[13].xxxx :0 >190: MOV TEMP[13].x, IMM[1].xxxx >191: ELSE :0 >192: MOV TEMP[13].x, IMM[5].xxxx >193: ENDIF >194: MOV TEMP[16].z, TEMP[13].xxxx >195: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[16].xyzz >196: FMA TEMP[1].xyz, TEMP[6].xyzz, TEMP[15].xyzz, TEMP[3].xyzz >197: ABS TEMP[3].xyz, TEMP[8].xyzz >198: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[3].xyzz >199: ADD TEMP[5].x, TEMP[11].xxxx, IMM[6].zzzz >200: MOV_SAT TEMP[5].x, TEMP[5].xxxx >201: MUL TEMP[1].x, TEMP[5].xxxx, TEMP[3].xxxx >202: MUL TEMP[1].x, TEMP[10].xxxx, TEMP[1].xxxx >203: MUL TEMP[1].x, TEMP[14].xxxx, TEMP[1].xxxx >204: MUL TEMP[1].xyz, TEMP[12].xyzz, TEMP[1].xxxx >205: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[7].xyzz >206: MUL TEMP[1].xyz, TEMP[11].yyyy, TEMP[1].xyzz >207: FMA TEMP[1].xyz, TEMP[4].xyzz, TEMP[9].xyzz, TEMP[1].xyzz >208: ADD TEMP[0].xyz, TEMP[0].xyww, TEMP[1].xyzz >209: MUL TEMP[1].x, TEMP[9].wwww, TEMP[9].wwww >210: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx >211: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx >212: MOV TEMP[0].w, IMM[0].zzzz >213: MOV OUT[0], TEMP[0] >214: END >radeonsi: Compiling shader 76 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 76) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 92) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 200) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 208) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 212) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 216) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 224) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 228) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 232) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) > %54 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %55 = load <8 x i32>, <8 x i32> addrspace(2)* %54, align 32, !tbaa !0 > %56 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %57 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %56, i64 0, i64 3 > %58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0 > %59 = extractelement <8 x i32> %55, i32 7 > %60 = extractelement <4 x i32> %58, i32 0 > %61 = and i32 %60, %59 > %62 = insertelement <4 x i32> %58, i32 %61, i32 0 > %63 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %64 = load <8 x i32>, <8 x i32> addrspace(2)* %63, align 32, !tbaa !0 > %65 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %66 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %65, i64 0, i64 7 > %67 = load <4 x i32>, <4 x i32> addrspace(2)* %66, align 16, !tbaa !0 > %68 = extractelement <8 x i32> %64, i32 7 > %69 = extractelement <4 x i32> %67, i32 0 > %70 = and i32 %69, %68 > %71 = insertelement <4 x i32> %67, i32 %70, i32 0 > %72 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %73 = load <8 x i32>, <8 x i32> addrspace(2)* %72, align 32, !tbaa !0 > %74 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %75 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %74, i64 0, i64 11 > %76 = load <4 x i32>, <4 x i32> addrspace(2)* %75, align 16, !tbaa !0 > %77 = extractelement <8 x i32> %73, i32 7 > %78 = extractelement <4 x i32> %76, i32 0 > %79 = and i32 %78, %77 > %80 = insertelement <4 x i32> %76, i32 %79, i32 0 > %81 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %82 = load <8 x i32>, <8 x i32> addrspace(2)* %81, align 32, !tbaa !0 > %83 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %84 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %83, i64 0, i64 15 > %85 = load <4 x i32>, <4 x i32> addrspace(2)* %84, align 16, !tbaa !0 > %86 = extractelement <8 x i32> %82, i32 7 > %87 = extractelement <4 x i32> %85, i32 0 > %88 = and i32 %87, %86 > %89 = insertelement <4 x i32> %85, i32 %88, i32 0 > %90 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %91 = load <8 x i32>, <8 x i32> addrspace(2)* %90, align 32, !tbaa !0 > %92 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %93 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %92, i64 0, i64 19 > %94 = load <4 x i32>, <4 x i32> addrspace(2)* %93, align 16, !tbaa !0 > %95 = extractelement <8 x i32> %91, i32 7 > %96 = extractelement <4 x i32> %94, i32 0 > %97 = and i32 %96, %95 > %98 = insertelement <4 x i32> %94, i32 %97, i32 0 > %99 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %100 = load <8 x i32>, <8 x i32> addrspace(2)* %99, align 32, !tbaa !0 > %101 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %102 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %101, i64 0, i64 23 > %103 = load <4 x i32>, <4 x i32> addrspace(2)* %102, align 16, !tbaa !0 > %104 = extractelement <8 x i32> %100, i32 7 > %105 = extractelement <4 x i32> %103, i32 0 > %106 = and i32 %105, %104 > %107 = insertelement <4 x i32> %103, i32 %106, i32 0 > %108 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %109 = load <8 x i32>, <8 x i32> addrspace(2)* %108, align 32, !tbaa !0 > %110 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %111 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %110, i64 0, i64 27 > %112 = load <4 x i32>, <4 x i32> addrspace(2)* %111, align 16, !tbaa !0 > %113 = extractelement <8 x i32> %109, i32 7 > %114 = extractelement <4 x i32> %112, i32 0 > %115 = and i32 %114, %113 > %116 = insertelement <4 x i32> %112, i32 %115, i32 0 > %117 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 14 > %118 = load <8 x i32>, <8 x i32> addrspace(2)* %117, align 32, !tbaa !0 > %119 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %120 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %119, i64 0, i64 31 > %121 = load <4 x i32>, <4 x i32> addrspace(2)* %120, align 16, !tbaa !0 > %122 = extractelement <8 x i32> %118, i32 7 > %123 = extractelement <4 x i32> %121, i32 0 > %124 = and i32 %123, %122 > %125 = insertelement <4 x i32> %121, i32 %124, i32 0 > %126 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16 > %127 = load <8 x i32>, <8 x i32> addrspace(2)* %126, align 32, !tbaa !0 > %128 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %129 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %128, i64 0, i64 35 > %130 = load <4 x i32>, <4 x i32> addrspace(2)* %129, align 16, !tbaa !0 > %131 = extractelement <8 x i32> %127, i32 7 > %132 = extractelement <4 x i32> %130, i32 0 > %133 = and i32 %132, %131 > %134 = insertelement <4 x i32> %130, i32 %133, i32 0 > %135 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18 > %136 = load <8 x i32>, <8 x i32> addrspace(2)* %135, align 32, !tbaa !0 > %137 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %138 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %137, i64 0, i64 39 > %139 = load <4 x i32>, <4 x i32> addrspace(2)* %138, align 16, !tbaa !0 > %140 = extractelement <8 x i32> %136, i32 7 > %141 = extractelement <4 x i32> %139, i32 0 > %142 = and i32 %141, %140 > %143 = insertelement <4 x i32> %139, i32 %142, i32 0 > %144 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %145 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %146 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %147 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %148 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %149 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %150 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %151 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %152 = fcmp oeq float %146, 0.000000e+00 > %153 = fcmp oeq float %146, 0.000000e+00 > %154 = fcmp ogt float %144, 0.000000e+00 > %155 = select i1 %154, float 1.000000e+00, float %144 > %156 = fcmp oge float %155, 0.000000e+00 > %157 = fcmp ogt float %145, 0.000000e+00 > %158 = select i1 %157, float 1.000000e+00, float %145 > %159 = fcmp oge float %158, 0.000000e+00 > %.op = fmul float %155, 0x4600000000000000 > %160 = select i1 %156, float %.op, float 0xC600000000000000 > %.op104 = fmul float %158, 0x4600000000000000 > %161 = select i1 %159, float %.op104, float 0xC600000000000000 > %162 = fdiv float 1.000000e+00, %146 > %163 = fmul float %144, %162 > %164 = fmul float %145, %162 > %165 = select i1 %152, float %160, float %163 > %166 = select i1 %153, float %161, float %164 > %167 = fcmp oeq float %146, 0.000000e+00 > %168 = fcmp oeq float %146, 0.000000e+00 > %169 = fcmp ogt float %147, 0.000000e+00 > %170 = select i1 %169, float 1.000000e+00, float %147 > %171 = fcmp oge float %170, 0.000000e+00 > %172 = fcmp ogt float %148, 0.000000e+00 > %173 = select i1 %172, float 1.000000e+00, float %148 > %174 = fcmp oge float %173, 0.000000e+00 > %.op105 = fmul float %170, 0x4600000000000000 > %175 = select i1 %171, float %.op105, float 0xC600000000000000 > %.op106 = fmul float %173, 0x4600000000000000 > %176 = select i1 %174, float %.op106, float 0xC600000000000000 > %177 = fdiv float 1.000000e+00, %146 > %178 = fmul float %147, %177 > %179 = fmul float %148, %177 > %180 = select i1 %167, float %175, float %178 > %181 = select i1 %168, float %176, float %179 > %182 = bitcast float %165 to i32 > %183 = bitcast float %166 to i32 > %184 = insertelement <2 x i32> undef, i32 %182, i32 0 > %185 = insertelement <2 x i32> %184, i32 %183, i32 1 > %186 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %185, <8 x i32> %55, <4 x i32> %62, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %187 = extractelement <4 x float> %186, i32 0 > %188 = fsub float 1.000000e+00, %49 > %189 = call float @llvm.fma.f32(float %49, float %187, float %188) > %190 = bitcast float %165 to i32 > %191 = bitcast float %166 to i32 > %192 = insertelement <2 x i32> undef, i32 %190, i32 0 > %193 = insertelement <2 x i32> %192, i32 %191, i32 1 > %194 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %193, <8 x i32> %64, <4 x i32> %71, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %195 = extractelement <4 x float> %194, i32 0 > %196 = fmul float %180, %195 > %197 = fmul float %181, %195 > %198 = fmul float %25, %196 > %199 = fmul float %26, %197 > %200 = fadd float %198, %199 > %201 = fmul float %27, %195 > %202 = fadd float %200, %201 > %203 = fadd float %202, %28 > %204 = fmul float %29, %196 > %205 = fmul float %30, %197 > %206 = fadd float %204, %205 > %207 = fmul float %31, %195 > %208 = fadd float %206, %207 > %209 = fadd float %208, %32 > %210 = fmul float %33, %196 > %211 = fmul float %34, %197 > %212 = fadd float %210, %211 > %213 = fmul float %35, %195 > %214 = fadd float %212, %213 > %215 = fadd float %214, %36 > %216 = bitcast float %203 to i32 > %217 = bitcast float %209 to i32 > %218 = bitcast float %215 to i32 > %219 = insertelement <4 x i32> undef, i32 %216, i32 0 > %220 = insertelement <4 x i32> %219, i32 %217, i32 1 > %221 = insertelement <4 x i32> %220, i32 %218, i32 2 > %222 = insertelement <4 x i32> %221, i32 0, i32 3 > %223 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %222, <8 x i32> %73, <4 x i32> %80, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %224 = extractelement <4 x float> %223, i32 0 > %225 = extractelement <4 x float> %223, i32 1 > %226 = extractelement <4 x float> %223, i32 2 > %227 = extractelement <4 x float> %223, i32 3 > %228 = fmul float %227, %224 > %229 = fmul float %227, %225 > %230 = fmul float %227, %226 > %231 = fmul float %228, %46 > %232 = fmul float %229, %47 > %233 = fmul float %230, %48 > %234 = bitcast float %203 to i32 > %235 = bitcast float %209 to i32 > %236 = bitcast float %215 to i32 > %237 = insertelement <4 x i32> undef, i32 %234, i32 0 > %238 = insertelement <4 x i32> %237, i32 %235, i32 1 > %239 = insertelement <4 x i32> %238, i32 %236, i32 2 > %240 = insertelement <4 x i32> %239, i32 0, i32 3 > %241 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %240, <8 x i32> %82, <4 x i32> %89, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %242 = extractelement <4 x float> %241, i32 0 > %243 = extractelement <4 x float> %241, i32 1 > %244 = extractelement <4 x float> %241, i32 2 > %245 = bitcast float %203 to i32 > %246 = bitcast float %209 to i32 > %247 = bitcast float %215 to i32 > %248 = insertelement <4 x i32> undef, i32 %245, i32 0 > %249 = insertelement <4 x i32> %248, i32 %246, i32 1 > %250 = insertelement <4 x i32> %249, i32 %247, i32 2 > %251 = insertelement <4 x i32> %250, i32 0, i32 3 > %252 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %251, <8 x i32> %91, <4 x i32> %98, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %253 = extractelement <4 x float> %252, i32 0 > %254 = extractelement <4 x float> %252, i32 1 > %255 = extractelement <4 x float> %252, i32 2 > %256 = bitcast float %165 to i32 > %257 = bitcast float %166 to i32 > %258 = insertelement <2 x i32> undef, i32 %256, i32 0 > %259 = insertelement <2 x i32> %258, i32 %257, i32 1 > %260 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %259, <8 x i32> %100, <4 x i32> %107, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %261 = extractelement <4 x float> %260, i32 0 > %262 = extractelement <4 x float> %260, i32 1 > %263 = extractelement <4 x float> %260, i32 2 > %264 = call float @llvm.fma.f32(float %261, float 2.000000e+00, float -1.000000e+00) > %265 = call float @llvm.fma.f32(float %262, float 2.000000e+00, float -1.000000e+00) > %266 = call float @llvm.fma.f32(float %263, float 2.000000e+00, float -1.000000e+00) > %267 = fmul float %37, %264 > %268 = fmul float %38, %265 > %269 = fadd float %268, %267 > %270 = fmul float %39, %266 > %271 = fadd float %269, %270 > %272 = fmul float %40, %264 > %273 = fmul float %41, %265 > %274 = fadd float %273, %272 > %275 = fmul float %42, %266 > %276 = fadd float %274, %275 > %277 = fmul float %43, %264 > %278 = fmul float %44, %265 > %279 = fadd float %278, %277 > %280 = fmul float %45, %266 > %281 = fadd float %279, %280 > %282 = fmul float %271, %271 > %283 = fmul float %276, %276 > %284 = fadd float %283, %282 > %285 = fmul float %281, %281 > %286 = fadd float %284, %285 > %287 = call float @llvm.AMDGPU.rsq.clamped.f32(float %286) > %288 = fmul float %287, %271 > %289 = fmul float %287, %276 > %290 = fmul float %287, %281 > %291 = bitcast float %165 to i32 > %292 = bitcast float %166 to i32 > %293 = insertelement <4 x i32> undef, i32 %291, i32 0 > %294 = insertelement <4 x i32> %293, i32 %292, i32 1 > %295 = insertelement <4 x i32> %294, i32 0, i32 2 > %296 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %295, <8 x i32> %109, <4 x i32> %116, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %297 = extractelement <4 x float> %296, i32 0 > %298 = extractelement <4 x float> %296, i32 1 > %299 = call float @llvm.fabs.f32(float %288) > %300 = call float @llvm.fabs.f32(float %289) > %301 = call float @llvm.fabs.f32(float %290) > %302 = fmul float %299, %288 > %303 = fmul float %300, %289 > %304 = fmul float %301, %290 > %305 = fcmp ole float %302, 0.000000e+00 > %306 = fcmp ole float %303, 0.000000e+00 > %307 = fcmp ole float %304, 0.000000e+00 > %308 = select i1 %305, float 1.000000e+00, float 0.000000e+00 > %309 = select i1 %306, float 1.000000e+00, float 0.000000e+00 > %310 = select i1 %307, float 1.000000e+00, float 0.000000e+00 > %. = select i1 %305, float 0.000000e+00, float 1.000000e+00 > %temp60.0 = select i1 %306, float 0.000000e+00, float 1.000000e+00 > %.99 = select i1 %307, float 0.000000e+00, float 1.000000e+00 > %311 = fmul float %242, %. > %312 = fmul float %243, %temp60.0 > %313 = fmul float %244, %.99 > %314 = call float @llvm.fma.f32(float %253, float %308, float %311) > %315 = call float @llvm.fma.f32(float %254, float %309, float %312) > %316 = call float @llvm.fma.f32(float %255, float %310, float %313) > %317 = call float @llvm.fabs.f32(float %302) > %318 = call float @llvm.fabs.f32(float %303) > %319 = call float @llvm.fabs.f32(float %304) > %320 = fmul float %314, %317 > %321 = fmul float %315, %318 > %322 = fadd float %321, %320 > %323 = fmul float %316, %319 > %324 = fadd float %322, %323 > %325 = fmul float %298, %324 > %326 = fmul float %231, %325 > %327 = fmul float %232, %325 > %328 = fmul float %233, %325 > %329 = bitcast float %165 to i32 > %330 = bitcast float %166 to i32 > %331 = insertelement <2 x i32> undef, i32 %329, i32 0 > %332 = insertelement <2 x i32> %331, i32 %330, i32 1 > %333 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %332, <8 x i32> %118, <4 x i32> %125, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %334 = extractelement <4 x float> %333, i32 0 > %335 = extractelement <4 x float> %333, i32 1 > %336 = extractelement <4 x float> %333, i32 2 > %337 = extractelement <4 x float> %333, i32 3 > %338 = bitcast float %165 to i32 > %339 = bitcast float %166 to i32 > %340 = insertelement <2 x i32> undef, i32 %338, i32 0 > %341 = insertelement <2 x i32> %340, i32 %339, i32 1 > %342 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %341, <8 x i32> %127, <4 x i32> %134, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %343 = extractelement <4 x float> %342, i32 0 > %344 = extractelement <4 x float> %342, i32 1 > %345 = extractelement <4 x float> %342, i32 2 > %346 = extractelement <4 x float> %342, i32 3 > %347 = fcmp ogt float %346, 0.000000e+00 > br i1 %347, label %IF79, label %ENDIF78 > >IF79: ; preds = %main_body > %348 = extractelement <4 x float> %260, i32 3 > %349 = call float @llvm.minnum.f32(float %346, float 5.000000e-01) > %350 = fcmp olt float %348, 0x3FE0505060000000 > %351 = fmul float %326, %334 > %352 = fmul float %327, %335 > %353 = fmul float %328, %336 > %354 = fadd float %351, %351 > %355 = fadd float %352, %352 > %356 = fadd float %353, %353 > %357 = call float @llvm.maxnum.f32(float %336, float %335) > %358 = call float @llvm.maxnum.f32(float %357, float %334) > %359 = fcmp oeq float %358, 0.000000e+00 > %360 = fcmp oeq float %358, 0.000000e+00 > %361 = fcmp oeq float %358, 0.000000e+00 > %362 = fcmp ogt float %334, 0.000000e+00 > %363 = select i1 %362, float 1.000000e+00, float %334 > %364 = fcmp oge float %363, 0.000000e+00 > %365 = fcmp ogt float %335, 0.000000e+00 > %366 = select i1 %365, float 1.000000e+00, float %335 > %367 = fcmp oge float %366, 0.000000e+00 > %368 = fcmp ogt float %336, 0.000000e+00 > %369 = select i1 %368, float 1.000000e+00, float %336 > %370 = fcmp oge float %369, 0.000000e+00 > %.op107 = fmul float %363, 0x4600000000000000 > %371 = select i1 %364, float %.op107, float 0xC600000000000000 > %.op108 = fmul float %366, 0x4600000000000000 > %372 = select i1 %367, float %.op108, float 0xC600000000000000 > %.op109 = fmul float %369, 0x4600000000000000 > %373 = select i1 %370, float %.op109, float 0xC600000000000000 > %374 = fdiv float 1.000000e+00, %358 > %375 = fmul float %334, %374 > %376 = fmul float %335, %374 > %377 = fmul float %336, %374 > %378 = select i1 %359, float %371, float %375 > %379 = select i1 %360, float %372, float %376 > %380 = select i1 %361, float %373, float %377 > %381 = call float @llvm.AMDGPU.clamp.(float %378, float 0.000000e+00, float 1.000000e+00) > %382 = call float @llvm.AMDGPU.clamp.(float %379, float 0.000000e+00, float 1.000000e+00) > %383 = call float @llvm.AMDGPU.clamp.(float %380, float 0.000000e+00, float 1.000000e+00) > %384 = call float @llvm.AMDGPU.clamp.(float %326, float 0.000000e+00, float 1.000000e+00) > %385 = call float @llvm.AMDGPU.clamp.(float %327, float 0.000000e+00, float 1.000000e+00) > %386 = call float @llvm.AMDGPU.clamp.(float %328, float 0.000000e+00, float 1.000000e+00) > %387 = fmul float %381, %381 > %388 = fmul float %382, %382 > %389 = fmul float %383, %383 > %390 = fmul float %387, %384 > %391 = fmul float %388, %385 > %392 = fmul float %389, %386 > %393 = fmul float %390, 0x3FC3333340000000 > %394 = fmul float %391, 0x3FC3333340000000 > %395 = fmul float %392, 0x3FC3333340000000 > %.100 = select i1 %350, float %354, float %393 > %temp64.0 = select i1 %350, float %355, float %394 > %.102 = select i1 %350, float %356, float %395 > %396 = fmul float %349, %.100 > %397 = fmul float %349, %temp64.0 > %398 = fmul float %349, %.102 > br label %ENDIF78 > >ENDIF78: ; preds = %main_body, %IF79 > %temp3.0 = phi float [ %398, %IF79 ], [ 0.000000e+00, %main_body ] > %temp1.0 = phi float [ %397, %IF79 ], [ 0.000000e+00, %main_body ] > %temp.0 = phi float [ %396, %IF79 ], [ 0.000000e+00, %main_body ] > %399 = fmul float %228, %50 > %400 = fmul float %229, %51 > %401 = fmul float %230, %52 > %402 = fmul float %149, %149 > %403 = fmul float %150, %150 > %404 = fadd float %403, %402 > %405 = fmul float %151, %151 > %406 = fadd float %404, %405 > %407 = call float @llvm.AMDGPU.rsq.clamped.f32(float %406) > %408 = fmul float %407, %149 > %409 = fmul float %407, %150 > %410 = fmul float %407, %151 > %411 = fmul float %408, %288 > %412 = fmul float %409, %289 > %413 = fadd float %412, %411 > %414 = fmul float %410, %290 > %415 = fadd float %413, %414 > %416 = fmul float %288, %415 > %417 = fmul float %289, %415 > %418 = fmul float %290, %415 > %419 = fsub float -0.000000e+00, %416 > %420 = call float @llvm.fma.f32(float %419, float 2.000000e+00, float %408) > %421 = fsub float -0.000000e+00, %417 > %422 = call float @llvm.fma.f32(float %421, float 2.000000e+00, float %409) > %423 = fsub float -0.000000e+00, %418 > %424 = call float @llvm.fma.f32(float %423, float 2.000000e+00, float %410) > %425 = fsub float -0.000000e+00, %420 > %426 = fsub float -0.000000e+00, %422 > %427 = fsub float -0.000000e+00, %424 > %428 = call float @llvm.fabs.f32(float %343) > %429 = call float @llvm.sqrt.f32(float %428) > %430 = fsub float -0.000000e+00, %429 > %431 = call float @llvm.fma.f32(float %430, float 8.000000e+00, float %53) > %432 = insertelement <4 x float> undef, float %425, i32 0 > %433 = insertelement <4 x float> %432, float %426, i32 1 > %434 = insertelement <4 x float> %433, float %427, i32 2 > %435 = insertelement <4 x float> %434, float %431, i32 3 > %436 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %435) > %437 = extractelement <4 x float> %436, i32 0 > %438 = extractelement <4 x float> %436, i32 1 > %439 = extractelement <4 x float> %436, i32 2 > %440 = call float @llvm.fabs.f32(float %439) > %441 = fdiv float 1.000000e+00, %440 > %442 = fmul float %437, %441 > %443 = fadd float %442, 1.500000e+00 > %444 = fmul float %438, %441 > %445 = fadd float %444, 1.500000e+00 > %446 = bitcast float %445 to i32 > %447 = bitcast float %443 to i32 > %bc = bitcast <4 x float> %436 to <4 x i32> > %448 = extractelement <4 x i32> %bc, i32 3 > %449 = bitcast float %431 to i32 > %450 = insertelement <4 x i32> undef, i32 %446, i32 0 > %451 = insertelement <4 x i32> %450, i32 %447, i32 1 > %452 = insertelement <4 x i32> %451, i32 %448, i32 2 > %453 = insertelement <4 x i32> %452, i32 %449, i32 3 > %454 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %453, <8 x i32> %136, <4 x i32> %143, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %455 = extractelement <4 x float> %454, i32 0 > %456 = extractelement <4 x float> %454, i32 1 > %457 = extractelement <4 x float> %454, i32 2 > %458 = extractelement <4 x float> %454, i32 3 > %459 = fmul float %458, %455 > %460 = fmul float %458, %456 > %461 = fmul float %458, %457 > %462 = fsub float -0.000000e+00, %455 > %463 = call float @llvm.fma.f32(float %459, float 1.000000e+01, float %462) > %464 = fsub float -0.000000e+00, %456 > %465 = call float @llvm.fma.f32(float %460, float 1.000000e+01, float %464) > %466 = fsub float -0.000000e+00, %457 > %467 = call float @llvm.fma.f32(float %461, float 1.000000e+01, float %466) > %468 = call float @llvm.fma.f32(float %343, float %463, float %455) > %469 = call float @llvm.fma.f32(float %343, float %465, float %456) > %470 = call float @llvm.fma.f32(float %343, float %467, float %457) > %471 = fsub float 1.000000e+00, %345 > %472 = fmul float %420, %408 > %473 = fmul float %422, %409 > %474 = fadd float %473, %472 > %475 = fmul float %424, %410 > %476 = fadd float %474, %475 > %477 = call float @llvm.AMDGPU.clamp.(float %476, float 0.000000e+00, float 1.000000e+00) > %478 = fmul float %477, %477 > %479 = fmul float %478, %478 > %480 = fmul float %477, %479 > %481 = call float @llvm.fma.f32(float %471, float %480, float %345) > %482 = call float @llvm.fabs.f32(float %420) > %483 = call float @llvm.fabs.f32(float %422) > %484 = call float @llvm.fabs.f32(float %424) > %485 = fmul float %482, %420 > %486 = fmul float %483, %422 > %487 = fmul float %484, %424 > %488 = fcmp oge float %485, 0.000000e+00 > %489 = fcmp oge float %486, 0.000000e+00 > %490 = fcmp oge float %487, 0.000000e+00 > %491 = select i1 %488, float 1.000000e+00, float 0.000000e+00 > %492 = select i1 %489, float 1.000000e+00, float 0.000000e+00 > %493 = select i1 %490, float 1.000000e+00, float 0.000000e+00 > %.101 = select i1 %488, float 0.000000e+00, float 1.000000e+00 > %temp68.0 = select i1 %489, float 0.000000e+00, float 1.000000e+00 > %.103 = select i1 %490, float 0.000000e+00, float 1.000000e+00 > %494 = fmul float %242, %.101 > %495 = fmul float %243, %temp68.0 > %496 = fmul float %244, %.103 > %497 = call float @llvm.fma.f32(float %253, float %491, float %494) > %498 = call float @llvm.fma.f32(float %254, float %492, float %495) > %499 = call float @llvm.fma.f32(float %255, float %493, float %496) > %500 = call float @llvm.fabs.f32(float %420) > %501 = call float @llvm.fabs.f32(float %422) > %502 = call float @llvm.fabs.f32(float %424) > %503 = fmul float %497, %500 > %504 = fmul float %498, %501 > %505 = fadd float %504, %503 > %506 = fmul float %499, %502 > %507 = fadd float %505, %506 > %508 = fadd float %343, 0x3FA99999A0000000 > %509 = call float @llvm.AMDGPU.clamp.(float %508, float 0.000000e+00, float 1.000000e+00) > %510 = fmul float %509, %507 > %511 = fmul float %297, %510 > %512 = fmul float %481, %511 > %513 = fmul float %399, %512 > %514 = fmul float %400, %512 > %515 = fmul float %401, %512 > %516 = fmul float %513, %468 > %517 = fmul float %514, %469 > %518 = fmul float %515, %470 > %519 = fmul float %344, %516 > %520 = fmul float %344, %517 > %521 = fmul float %344, %518 > %522 = call float @llvm.fma.f32(float %326, float %334, float %519) > %523 = call float @llvm.fma.f32(float %327, float %335, float %520) > %524 = call float @llvm.fma.f32(float %328, float %336, float %521) > %525 = fadd float %temp.0, %522 > %526 = fadd float %temp1.0, %523 > %527 = fadd float %temp3.0, %524 > %528 = fmul float %337, %337 > %529 = fmul float %525, %528 > %530 = fmul float %526, %528 > %531 = fmul float %527, %528 > %532 = fmul float %529, %189 > %533 = fmul float %530, %189 > %534 = fmul float %531, %189 > %535 = bitcast float %5 to i32 > %536 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %535, 10 > %537 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %536, float %532, 11 > %538 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %537, float %533, 12 > %539 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %538, float %534, 13 > %540 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %539, float 1.000000e+00, 14 > %541 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %540, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %541 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..57] >DCL CONST[2][0..24] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 176, 336, 848} >IMM[1] FLT32 { 0.0000, 1.0000, -0.5000, 0.5000} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] UINT32 {864, 880, 736, 752} >IMM[4] UINT32 {768, 784, 912, 1} >IMM[5] UINT32 {384, 0, 0, 0} >IMM[6] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xy, CONST[1][11].wwww, CONST[1][21].xyyy > 1: ADD TEMP[1].xyz, IN[0].xyzz, IMM[1].xxyy > 2: MOV TEMP[0].z, CONST[1][11].wwww > 3: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz > 4: MUL TEMP[1].xy, CONST[1][21].zzzz, CONST[1][21].xyyy > 5: MOV TEMP[1].z, -CONST[1][21].zzzz > 6: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[0].xyzz > 7: FSLT TEMP[2].x, IN[0].zzzz, IMM[1].zzzz > 8: AND TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx > 9: INEG TEMP[2].x, TEMP[2].xxxx > 10: USNE TEMP[3].x, TEMP[2].xxxx, IMM[0].xxxx > 11: UIF TEMP[3].xxxx :0 > 12: MOV TEMP[3].x, TEMP[1].xxxx > 13: ELSE :0 > 14: MOV TEMP[3].x, TEMP[0].xxxx > 15: ENDIF > 16: MOV TEMP[3].x, TEMP[3].xxxx > 17: USNE TEMP[4].x, TEMP[2].xxxx, IMM[0].xxxx > 18: UIF TEMP[4].xxxx :0 > 19: MOV TEMP[4].x, TEMP[1].yyyy > 20: ELSE :0 > 21: MOV TEMP[4].x, TEMP[0].yyyy > 22: ENDIF > 23: MOV TEMP[3].y, TEMP[4].xxxx > 24: USNE TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx > 25: UIF TEMP[2].xxxx :0 > 26: MOV TEMP[2].x, TEMP[1].zzzz > 27: ELSE :0 > 28: MOV TEMP[2].x, TEMP[0].zzzz > 29: ENDIF > 30: MOV TEMP[3].z, TEMP[2].xxxx > 31: FSLT TEMP[2].x, IMM[1].wwww, IN[0].zzzz > 32: AND TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx > 33: INEG TEMP[2].x, TEMP[2].xxxx > 34: MUL TEMP[1].xyz, IN[0].xyzz, CONST[1][11].xyzz > 35: USNE TEMP[4].x, TEMP[2].xxxx, IMM[0].xxxx > 36: UIF TEMP[4].xxxx :0 > 37: MOV TEMP[4].x, TEMP[1].xxxx > 38: ELSE :0 > 39: MOV TEMP[4].x, TEMP[3].xxxx > 40: ENDIF > 41: MOV TEMP[4].x, TEMP[4].xxxx > 42: USNE TEMP[5].x, TEMP[2].xxxx, IMM[0].xxxx > 43: UIF TEMP[5].xxxx :0 > 44: MOV TEMP[5].x, TEMP[1].yyyy > 45: ELSE :0 > 46: MOV TEMP[5].x, TEMP[3].yyyy > 47: ENDIF > 48: MOV TEMP[4].y, TEMP[5].xxxx > 49: USNE TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx > 50: UIF TEMP[2].xxxx :0 > 51: MOV TEMP[2].x, TEMP[1].zzzz > 52: ELSE :0 > 53: MOV TEMP[2].x, TEMP[3].zzzz > 54: ENDIF > 55: MOV TEMP[4].z, TEMP[2].xxxx > 56: MOV TEMP[0].xyz, TEMP[4].xyzx > 57: MOV TEMP[0].w, IMM[1].yyyy > 58: DP4 TEMP[1].x, CONST[1][53], TEMP[0] > 59: DP4 TEMP[2].x, CONST[1][54], TEMP[0] > 60: MOV TEMP[1].y, TEMP[2].xxxx > 61: DP4 TEMP[2].x, CONST[1][55], TEMP[0] > 62: MOV TEMP[1].z, TEMP[2].xxxx > 63: MOV TEMP[1].w, IMM[1].yyyy > 64: DP4 TEMP[0].x, CONST[1][46], TEMP[1] > 65: DP4 TEMP[2].x, CONST[1][47], TEMP[1] > 66: MOV TEMP[0].y, TEMP[2].xxxx > 67: DP4 TEMP[2].x, CONST[1][48], TEMP[1] > 68: MOV TEMP[0].z, TEMP[2].xxxx > 69: DP4 TEMP[2].x, CONST[1][49], TEMP[1] > 70: MOV TEMP[0].w, TEMP[2].xxxx > 71: ADD TEMP[3].xyz, -TEMP[1].xyzz, CONST[1][57].xyzz > 72: MOV TEMP[4], TEMP[0] > 73: MOV TEMP[5].zw, TEMP[0].wwzw > 74: MUL TEMP[1].xy, TEMP[2].xxxx, CONST[2][24].xyyy > 75: MUL TEMP[2].xy, TEMP[2].xxxx, CONST[2][21].xyyy > 76: MUL TEMP[6].xy, IMM[6].xyyy, CONST[2][24].xyyy > 77: FMA TEMP[0].xy, TEMP[0].xyyy, TEMP[6].xyyy, TEMP[1].xyyy > 78: MOV TEMP[5].xy, TEMP[0].xyxx > 79: FMA TEMP[0].xy, TEMP[0].xyyy, CONST[2][21].zwww, TEMP[2].xyyy > 80: MOV OUT[2], TEMP[0] > 81: MOV OUT[1], TEMP[5] > 82: MOV OUT[3], TEMP[3] > 83: MOV OUT[0], TEMP[4] > 84: END >radeonsi: Compiling shader 77 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 176) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 180) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 184) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 188) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 336) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 340) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 344) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 736) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 740) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 744) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 748) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 752) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 756) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 760) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 764) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 848) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 852) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 856) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 860) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 864) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 868) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 872) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 876) > %47 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %48 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %49 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %50 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %51 = call float @llvm.SI.load.const(<16 x i8> %15, i32 912) > %52 = call float @llvm.SI.load.const(<16 x i8> %15, i32 916) > %53 = call float @llvm.SI.load.const(<16 x i8> %15, i32 920) > %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 > %56 = call float @llvm.SI.load.const(<16 x i8> %55, i32 336) > %57 = call float @llvm.SI.load.const(<16 x i8> %55, i32 340) > %58 = call float @llvm.SI.load.const(<16 x i8> %55, i32 344) > %59 = call float @llvm.SI.load.const(<16 x i8> %55, i32 348) > %60 = call float @llvm.SI.load.const(<16 x i8> %55, i32 384) > %61 = call float @llvm.SI.load.const(<16 x i8> %55, i32 388) > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %13) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = fmul float %19, %20 > %69 = fmul float %19, %21 > %70 = fadd float %65, 0.000000e+00 > %71 = fadd float %66, 0.000000e+00 > %72 = fadd float %67, 1.000000e+00 > %73 = fmul float %68, %70 > %74 = fmul float %69, %71 > %75 = fmul float %19, %72 > %76 = fmul float %22, %20 > %77 = fmul float %22, %21 > %78 = fmul float %76, %65 > %79 = fmul float %77, %66 > %80 = fmul float %22, %67 > %81 = fsub float -0.000000e+00, %80 > %82 = fcmp olt float %67, -5.000000e-01 > %. = select i1 %82, float %78, float %73 > %temp16.0 = select i1 %82, float %79, float %74 > %.43 = select i1 %82, float %81, float %75 > %83 = fcmp ogt float %67, 5.000000e-01 > %84 = fmul float %65, %16 > %85 = fmul float %66, %17 > %86 = fmul float %67, %18 > %temp16.1 = select i1 %83, float %84, float %. > %.temp16.0 = select i1 %83, float %85, float %temp16.0 > %temp8.1 = select i1 %83, float %86, float %.43 > %87 = fmul float %39, %temp16.1 > %88 = fmul float %40, %.temp16.0 > %89 = fadd float %87, %88 > %90 = fmul float %41, %temp8.1 > %91 = fadd float %89, %90 > %92 = fadd float %91, %42 > %93 = fmul float %43, %temp16.1 > %94 = fmul float %44, %.temp16.0 > %95 = fadd float %93, %94 > %96 = fmul float %45, %temp8.1 > %97 = fadd float %95, %96 > %98 = fadd float %97, %46 > %99 = fmul float %47, %temp16.1 > %100 = fmul float %48, %.temp16.0 > %101 = fadd float %99, %100 > %102 = fmul float %49, %temp8.1 > %103 = fadd float %101, %102 > %104 = fadd float %103, %50 > %105 = fmul float %23, %92 > %106 = fmul float %24, %98 > %107 = fadd float %105, %106 > %108 = fmul float %25, %104 > %109 = fadd float %107, %108 > %110 = fadd float %109, %26 > %111 = fmul float %27, %92 > %112 = fmul float %28, %98 > %113 = fadd float %111, %112 > %114 = fmul float %29, %104 > %115 = fadd float %113, %114 > %116 = fadd float %115, %30 > %117 = fmul float %31, %92 > %118 = fmul float %32, %98 > %119 = fadd float %117, %118 > %120 = fmul float %33, %104 > %121 = fadd float %119, %120 > %122 = fadd float %121, %34 > %123 = fmul float %35, %92 > %124 = fmul float %36, %98 > %125 = fadd float %123, %124 > %126 = fmul float %37, %104 > %127 = fadd float %125, %126 > %128 = fadd float %127, %38 > %129 = fsub float %51, %92 > %130 = fsub float %52, %98 > %131 = fsub float %53, %104 > %132 = fmul float %128, %60 > %133 = fmul float %128, %61 > %134 = fmul float %128, %56 > %135 = fmul float %128, %57 > %136 = fsub float -0.000000e+00, %61 > %137 = call float @llvm.fma.f32(float %110, float %60, float %132) > %138 = call float @llvm.fma.f32(float %116, float %136, float %133) > %139 = call float @llvm.fma.f32(float %137, float %58, float %134) > %140 = call float @llvm.fma.f32(float %138, float %59, float %135) > %141 = bitcast i32 %11 to float > %142 = insertvalue <{ float, float, float }> undef, float %141, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %137, float %138, float %122, float %128) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %139, float %140, float %122, float %128) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %129, float %130, float %131, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %110, float %116, float %122, float %128) > ret <{ float, float, float }> %142 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL CONST[1][0..6] >DCL CONST[2][0..18] >DCL TEMP[0..16], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[1] UINT32 {1, 272, 0, 64} >IMM[2] UINT32 {80, 96, 112, 128} >IMM[3] UINT32 {160, 256, 288, 0} >IMM[4] FLT32 { -1.0000, 4096.0000, 0.0040, 2.0040} >IMM[5] FLT32 { 0.1250, 0.5098, -0.5000, 0.5000} >IMM[6] INT32 {1, 0, 0, 0} >IMM[7] FLT32 { 0.2500, 1.0000, 0.1500, 0.3330} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: MOV TEMP[3].z, TEMP[2].xxxx > 15: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xxxx > 16: ADD TEMP[1].xyz, -TEMP[3].xyzz, CONST[2][17].xyzz > 17: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[2][0].xxxx > 18: DP3 TEMP[2].x, CONST[1][4].xyzz, TEMP[1].xyzz > 19: DP3 TEMP[4].x, CONST[1][5].xyzz, TEMP[1].xyzz > 20: MOV TEMP[2].y, TEMP[4].xxxx > 21: DP3 TEMP[4].x, CONST[1][6].xyzz, TEMP[1].xyzz > 22: MOV TEMP[2].z, TEMP[4].xxxx > 23: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 24: MIN TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz > 25: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].zzzz > 26: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 27: MOV TEMP[3].w, IMM[0].zzzz > 28: DP4 TEMP[1].x, CONST[2][7], TEMP[3] > 29: DP4 TEMP[6].x, CONST[2][8], TEMP[3] > 30: MOV TEMP[1].y, TEMP[6].xxxx > 31: DP4 TEMP[6].x, CONST[2][10], TEMP[3] > 32: FSEQ TEMP[7].xy, TEMP[6].xxxx, IMM[0].xxxx > 33: SSG TEMP[8].xy, TEMP[1].xyyy > 34: MUL TEMP[8].xy, IMM[0].yyyy, TEMP[8].xyyy > 35: RCP TEMP[6].xy, TEMP[6].xxxx > 36: MUL TEMP[6].xy, TEMP[1].xyyy, TEMP[6].xyyy > 37: UCMP TEMP[3].xy, TEMP[7].xyyy, TEMP[8].xyyy, TEMP[6].xyyy > 38: MOV TEMP[6].xy, TEMP[3].xyyy > 39: MOV TEMP[6].w, IMM[0].xxxx > 40: TXL TEMP[6], TEMP[6], SAMP[1], 2D > 41: MUL TEMP[3].xyz, TEMP[6].xyzz, CONST[2][16].xyzz > 42: MOV TEMP[7].xy, TEMP[0].xyyy > 43: TEX TEMP[7], TEMP[7], SAMP[2], 2D > 44: MOV TEMP[1].xyz, TEMP[7] > 45: MUL TEMP[8].xyz, TEMP[3].xyzz, TEMP[7].xyzz > 46: DP3 TEMP[9].x, IN[2].xyzz, IN[2].xyzz > 47: RSQ TEMP[9].x, TEMP[9].xxxx > 48: MOV TEMP[10].xy, TEMP[0].xyyy > 49: TEX TEMP[10], TEMP[10], SAMP[3], 2D > 50: FMA TEMP[11].xyz, TEMP[10].xyzz, IMM[0].wwww, IMM[4].xxxx > 51: DP3 TEMP[12].x, TEMP[11].xyzz, TEMP[11].xyzz > 52: RSQ TEMP[12].x, TEMP[12].xxxx > 53: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz > 54: RSQ TEMP[4].x, TEMP[4].xxxx > 55: MUL TEMP[2].xyz, TEMP[4].xxxx, TEMP[2].xyzz > 56: MOV TEMP[4].xy, TEMP[0].xyyy > 57: TEX TEMP[4], TEMP[4], SAMP[4], 2D > 58: MOV TEMP[12], TEMP[4].wxyz > 59: MUL TEMP[6].xyz, TEMP[6].wwww, CONST[2][18].xyzz > 60: MUL TEMP[13].x, TEMP[4].xxxx, TEMP[4].xxxx > 61: MOV TEMP[14].xy, TEMP[0].xyyy > 62: MOV TEMP[14].w, IMM[0].xxxx > 63: TXL TEMP[14].xy, TEMP[14], SAMP[5], 2D > 64: FMA TEMP[9].xyz, IN[2].xyzz, TEMP[9].xxxx, TEMP[2].xyzz > 65: DP3 TEMP[15].x, TEMP[9].xyzz, TEMP[9].xyzz > 66: RSQ TEMP[15].x, TEMP[15].xxxx > 67: MUL TEMP[9].xyz, TEMP[15].xxxx, TEMP[9].xyzz > 68: DP3 TEMP[15].x, TEMP[11].xyzz, TEMP[9].xyzz > 69: MOV_SAT TEMP[15].x, TEMP[15].xxxx > 70: FMA TEMP[13].xy, TEMP[13].xxxx, IMM[4].yyyy, IMM[4].zwww > 71: MUL TEMP[16].x, TEMP[13].yyyy, IMM[5].xxxx > 72: LG2 TEMP[15].x, TEMP[15].xxxx > 73: MUL TEMP[13].x, TEMP[15].xxxx, TEMP[13].xxxx > 74: EX2 TEMP[13].x, TEMP[13].xxxx > 75: MUL TEMP[13].x, TEMP[16].xxxx, TEMP[13].xxxx > 76: ADD TEMP[15].x, -TEMP[4].zzzz, IMM[0].zzzz > 77: DP3 TEMP[9].x, TEMP[2].xyzz, TEMP[9].xyzz > 78: MOV_SAT TEMP[9].x, TEMP[9].xxxx > 79: ADD TEMP[9].x, -TEMP[9].xxxx, IMM[0].zzzz > 80: MUL TEMP[16].x, TEMP[9].xxxx, TEMP[9].xxxx > 81: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[16].xxxx > 82: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[16].xxxx > 83: FMA TEMP[9].x, TEMP[15].xxxx, TEMP[9].xxxx, TEMP[4].zzzz > 84: MUL TEMP[9].x, TEMP[13].xxxx, TEMP[9].xxxx > 85: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[14].xxxx > 86: MUL TEMP[6].xyz, TEMP[4].yyyy, TEMP[6].xyzz > 87: DP3 TEMP[11].x, TEMP[11].xyzz, TEMP[2].xyzz > 88: MOV TEMP[0].x, TEMP[11].xxxx > 89: ADD TEMP[13].x, TEMP[7].wwww, TEMP[11].xxxx > 90: ADD TEMP[13].x, TEMP[13].xxxx, IMM[4].xxxx > 91: FSNE TEMP[15].x, TEMP[7].wwww, IMM[0].xxxx > 92: UIF TEMP[15].xxxx :0 > 93: RCP TEMP[15].x, TEMP[7].wwww > 94: MUL TEMP[15].x, TEMP[13].xxxx, TEMP[15].xxxx > 95: ELSE :0 > 96: SSG TEMP[13].x, TEMP[13].xxxx > 97: MUL TEMP[15].x, IMM[0].yyyy, TEMP[13].xxxx > 98: ENDIF > 99: MOV_SAT TEMP[13].x, TEMP[15].xxxx >100: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[8].xyzz >101: MUL TEMP[2].xyz, TEMP[13].xxxx, TEMP[9].xyzz >102: FSLT TEMP[9].x, IMM[0].xxxx, TEMP[4].wwww >103: AND TEMP[9].x, TEMP[9].xxxx, IMM[6].xxxx >104: INEG TEMP[9].x, TEMP[9].xxxx >105: USNE TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz >106: UIF TEMP[9].xxxx :0 >107: FSLT TEMP[9].x, TEMP[10].wwww, IMM[5].yyyy >108: AND TEMP[9].x, TEMP[9].xxxx, IMM[6].xxxx >109: INEG TEMP[9].x, TEMP[9].xxxx >110: ADD TEMP[10].xyz, TEMP[8].xyzz, TEMP[8].xyzz >111: MOV TEMP[12].yzw, TEMP[10].yxyz >112: ADD TEMP[13].x, TEMP[4].wwww, IMM[5].zzzz >113: MOV_SAT TEMP[13].x, TEMP[13].xxxx >114: MUL TEMP[8].xyz, TEMP[13].xxxx, TEMP[10].xyzz >115: ADD TEMP[10].xy, -TEMP[11].xxxx, IMM[7].xyyy >116: MOV_SAT TEMP[10].xy, TEMP[10].xyyy >117: FMA TEMP[8].xyz, TEMP[8].xyzz, TEMP[10].xxxx, TEMP[2].xyzz >118: MIN TEMP[6].x, TEMP[4].wwww, IMM[5].wwww >119: MAX TEMP[4].x, TEMP[7].zzzz, TEMP[7].yyyy >120: MAX TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx >121: FSEQ TEMP[13].xyz, TEMP[4].xxxx, IMM[0].xxxx >122: SSG TEMP[15].xyz, TEMP[7].xyzz >123: MUL TEMP[15].xyz, IMM[0].yyyy, TEMP[15].xyzz >124: RCP TEMP[4].xyz, TEMP[4].xxxx >125: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xyzz >126: UCMP TEMP[4].xyz, TEMP[13].xyzz, TEMP[15].xyzz, TEMP[4].xyzz >127: MOV_SAT TEMP[4].xyz, TEMP[4].xyzz >128: MUL TEMP[1].xyz, TEMP[4].xyzz, TEMP[4].xyzz >129: MOV_SAT TEMP[4].xyz, TEMP[3].xyzz >130: MUL TEMP[3].xyz, TEMP[4].xyzz, TEMP[1].xyzz >131: MUL TEMP[3].xyz, TEMP[3].xyzz, IMM[7].zzzz >132: MOV TEMP[6].yzw, TEMP[3].yxyz >133: USNE TEMP[3].x, TEMP[9].xxxx, IMM[1].zzzz >134: UIF TEMP[3].xxxx :0 >135: MOV TEMP[3].x, TEMP[8].xxxx >136: ELSE :0 >137: MOV TEMP[3].x, TEMP[2].xxxx >138: ENDIF >139: MOV TEMP[3].x, TEMP[3].xxxx >140: USNE TEMP[4].x, TEMP[9].xxxx, IMM[1].zzzz >141: UIF TEMP[4].xxxx :0 >142: MOV TEMP[4].x, TEMP[8].yyyy >143: ELSE :0 >144: MOV TEMP[4].x, TEMP[2].yyyy >145: ENDIF >146: MOV TEMP[3].y, TEMP[4].xxxx >147: USNE TEMP[4].x, TEMP[9].xxxx, IMM[1].zzzz >148: UIF TEMP[4].xxxx :0 >149: MOV TEMP[4].x, TEMP[8].zzzz >150: ELSE :0 >151: MOV TEMP[4].x, TEMP[2].zzzz >152: ENDIF >153: MOV TEMP[3].z, TEMP[4].xxxx >154: USNE TEMP[4].x, TEMP[9].xxxx, IMM[1].zzzz >155: UIF TEMP[4].xxxx :0 >156: MOV TEMP[4].x, TEMP[12].xxxx >157: ELSE :0 >158: MOV TEMP[4].x, TEMP[6].xxxx >159: ENDIF >160: MOV TEMP[4].x, TEMP[4].xxxx >161: USNE TEMP[7].x, TEMP[9].xxxx, IMM[1].zzzz >162: UIF TEMP[7].xxxx :0 >163: MOV TEMP[7].x, TEMP[12].yyyy >164: ELSE :0 >165: MOV TEMP[7].x, TEMP[6].yyyy >166: ENDIF >167: MOV TEMP[4].y, TEMP[7].xxxx >168: USNE TEMP[7].x, TEMP[9].xxxx, IMM[1].zzzz >169: UIF TEMP[7].xxxx :0 >170: MOV TEMP[7].x, TEMP[12].zzzz >171: ELSE :0 >172: MOV TEMP[7].x, TEMP[6].zzzz >173: ENDIF >174: MOV TEMP[4].z, TEMP[7].xxxx >175: USNE TEMP[7].x, TEMP[9].xxxx, IMM[1].zzzz >176: UIF TEMP[7].xxxx :0 >177: MOV TEMP[7].x, TEMP[12].wwww >178: ELSE :0 >179: MOV TEMP[7].x, TEMP[6].wwww >180: ENDIF >181: MOV TEMP[4].w, TEMP[7].xxxx >182: MIN TEMP[6].x, TEMP[4].xxxx, IMM[5].wwww >183: MUL TEMP[1].xyz, TEMP[6].xxxx, TEMP[4].yzww >184: MUL TEMP[1].xyz, TEMP[10].yyyy, TEMP[1].xyzz >185: ADD TEMP[4].x, TEMP[11].xxxx, IMM[7].xxxx >186: MOV_SAT TEMP[0].x, TEMP[4].xxxx >187: FMA TEMP[2].xyz, TEMP[1].xyzz, TEMP[0].xxxx, TEMP[3].xyzz >188: ENDIF >189: MUL TEMP[0].xyz, TEMP[5].xxxx, TEMP[2].xyzz >190: ADD TEMP[1].x, TEMP[14].yyyy, IMM[7].wwww >191: MOV_SAT TEMP[1].x, TEMP[1].xxxx >192: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz >193: MOV TEMP[0].w, IMM[0].zzzz >194: MOV OUT[0], TEMP[0] >195: END >radeonsi: Compiling shader 78 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 112) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 116) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 120) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 124) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 128) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 132) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 136) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 140) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 160) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 164) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 168) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 172) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 256) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 260) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 264) > %52 = call float @llvm.SI.load.const(<16 x i8> %35, i32 272) > %53 = call float @llvm.SI.load.const(<16 x i8> %35, i32 276) > %54 = call float @llvm.SI.load.const(<16 x i8> %35, i32 280) > %55 = call float @llvm.SI.load.const(<16 x i8> %35, i32 288) > %56 = call float @llvm.SI.load.const(<16 x i8> %35, i32 292) > %57 = call float @llvm.SI.load.const(<16 x i8> %35, i32 296) > %58 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %59 = load <8 x i32>, <8 x i32> addrspace(2)* %58, align 32, !tbaa !0 > %60 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %61 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %60, i64 0, i64 3 > %62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !tbaa !0 > %63 = extractelement <8 x i32> %59, i32 7 > %64 = extractelement <4 x i32> %62, i32 0 > %65 = and i32 %64, %63 > %66 = insertelement <4 x i32> %62, i32 %65, i32 0 > %67 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0 > %69 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %70 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %69, i64 0, i64 7 > %71 = load <4 x i32>, <4 x i32> addrspace(2)* %70, align 16, !tbaa !0 > %72 = extractelement <8 x i32> %68, i32 7 > %73 = extractelement <4 x i32> %71, i32 0 > %74 = and i32 %73, %72 > %75 = insertelement <4 x i32> %71, i32 %74, i32 0 > %76 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %77 = load <8 x i32>, <8 x i32> addrspace(2)* %76, align 32, !tbaa !0 > %78 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %79 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %78, i64 0, i64 11 > %80 = load <4 x i32>, <4 x i32> addrspace(2)* %79, align 16, !tbaa !0 > %81 = extractelement <8 x i32> %77, i32 7 > %82 = extractelement <4 x i32> %80, i32 0 > %83 = and i32 %82, %81 > %84 = insertelement <4 x i32> %80, i32 %83, i32 0 > %85 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %86 = load <8 x i32>, <8 x i32> addrspace(2)* %85, align 32, !tbaa !0 > %87 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %88 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %87, i64 0, i64 15 > %89 = load <4 x i32>, <4 x i32> addrspace(2)* %88, align 16, !tbaa !0 > %90 = extractelement <8 x i32> %86, i32 7 > %91 = extractelement <4 x i32> %89, i32 0 > %92 = and i32 %91, %90 > %93 = insertelement <4 x i32> %89, i32 %92, i32 0 > %94 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %95 = load <8 x i32>, <8 x i32> addrspace(2)* %94, align 32, !tbaa !0 > %96 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %97 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %96, i64 0, i64 19 > %98 = load <4 x i32>, <4 x i32> addrspace(2)* %97, align 16, !tbaa !0 > %99 = extractelement <8 x i32> %95, i32 7 > %100 = extractelement <4 x i32> %98, i32 0 > %101 = and i32 %100, %99 > %102 = insertelement <4 x i32> %98, i32 %101, i32 0 > %103 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %104 = load <8 x i32>, <8 x i32> addrspace(2)* %103, align 32, !tbaa !0 > %105 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %106 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %105, i64 0, i64 23 > %107 = load <4 x i32>, <4 x i32> addrspace(2)* %106, align 16, !tbaa !0 > %108 = extractelement <8 x i32> %104, i32 7 > %109 = extractelement <4 x i32> %107, i32 0 > %110 = and i32 %109, %108 > %111 = insertelement <4 x i32> %107, i32 %110, i32 0 > %112 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %113 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %114 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %115 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %116 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %117 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %118 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %119 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %120 = fcmp oeq float %114, 0.000000e+00 > %121 = fcmp oeq float %114, 0.000000e+00 > %122 = fcmp ogt float %112, 0.000000e+00 > %123 = select i1 %122, float 1.000000e+00, float %112 > %124 = fcmp oge float %123, 0.000000e+00 > %125 = fcmp ogt float %113, 0.000000e+00 > %126 = select i1 %125, float 1.000000e+00, float %113 > %127 = fcmp oge float %126, 0.000000e+00 > %.op = fmul float %123, 0x4600000000000000 > %128 = select i1 %124, float %.op, float 0xC600000000000000 > %.op95 = fmul float %126, 0x4600000000000000 > %129 = select i1 %127, float %.op95, float 0xC600000000000000 > %130 = fdiv float 1.000000e+00, %114 > %131 = fmul float %112, %130 > %132 = fmul float %113, %130 > %133 = select i1 %120, float %128, float %131 > %134 = select i1 %121, float %129, float %132 > %135 = fcmp oeq float %114, 0.000000e+00 > %136 = fcmp oeq float %114, 0.000000e+00 > %137 = fcmp ogt float %115, 0.000000e+00 > %138 = select i1 %137, float 1.000000e+00, float %115 > %139 = fcmp oge float %138, 0.000000e+00 > %140 = fcmp ogt float %116, 0.000000e+00 > %141 = select i1 %140, float 1.000000e+00, float %116 > %142 = fcmp oge float %141, 0.000000e+00 > %.op96 = fmul float %138, 0x4600000000000000 > %143 = select i1 %139, float %.op96, float 0xC600000000000000 > %.op97 = fmul float %141, 0x4600000000000000 > %144 = select i1 %142, float %.op97, float 0xC600000000000000 > %145 = fdiv float 1.000000e+00, %114 > %146 = fmul float %115, %145 > %147 = fmul float %116, %145 > %148 = select i1 %135, float %143, float %146 > %149 = select i1 %136, float %144, float %147 > %150 = bitcast float %133 to i32 > %151 = bitcast float %134 to i32 > %152 = insertelement <2 x i32> undef, i32 %150, i32 0 > %153 = insertelement <2 x i32> %152, i32 %151, i32 1 > %154 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %153, <8 x i32> %59, <4 x i32> %66, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %155 = extractelement <4 x float> %154, i32 0 > %156 = fmul float %148, %155 > %157 = fmul float %149, %155 > %158 = fsub float %52, %156 > %159 = fsub float %53, %157 > %160 = fsub float %54, %155 > %161 = fmul float %158, %36 > %162 = fmul float %159, %36 > %163 = fmul float %160, %36 > %164 = fmul float %25, %161 > %165 = fmul float %26, %162 > %166 = fadd float %165, %164 > %167 = fmul float %27, %163 > %168 = fadd float %166, %167 > %169 = fmul float %28, %161 > %170 = fmul float %29, %162 > %171 = fadd float %170, %169 > %172 = fmul float %30, %163 > %173 = fadd float %171, %172 > %174 = fmul float %31, %161 > %175 = fmul float %32, %162 > %176 = fadd float %175, %174 > %177 = fmul float %33, %163 > %178 = fadd float %176, %177 > %179 = fmul float %168, %168 > %180 = fmul float %173, %173 > %181 = fadd float %180, %179 > %182 = fmul float %178, %178 > %183 = fadd float %181, %182 > %184 = call float @llvm.minnum.f32(float %183, float 1.000000e+00) > %185 = fsub float 1.000000e+00, %184 > %186 = fmul float %185, %185 > %187 = fmul float %37, %156 > %188 = fmul float %38, %157 > %189 = fadd float %187, %188 > %190 = fmul float %39, %155 > %191 = fadd float %189, %190 > %192 = fadd float %191, %40 > %193 = fmul float %41, %156 > %194 = fmul float %42, %157 > %195 = fadd float %193, %194 > %196 = fmul float %43, %155 > %197 = fadd float %195, %196 > %198 = fadd float %197, %44 > %199 = fmul float %45, %156 > %200 = fmul float %46, %157 > %201 = fadd float %199, %200 > %202 = fmul float %47, %155 > %203 = fadd float %201, %202 > %204 = fadd float %203, %48 > %205 = fcmp oeq float %204, 0.000000e+00 > %206 = fcmp oeq float %204, 0.000000e+00 > %207 = fcmp ogt float %192, 0.000000e+00 > %208 = select i1 %207, float 1.000000e+00, float %192 > %209 = fcmp oge float %208, 0.000000e+00 > %210 = fcmp ogt float %198, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %198 > %212 = fcmp oge float %211, 0.000000e+00 > %.op98 = fmul float %208, 0x4600000000000000 > %213 = select i1 %209, float %.op98, float 0xC600000000000000 > %.op99 = fmul float %211, 0x4600000000000000 > %214 = select i1 %212, float %.op99, float 0xC600000000000000 > %215 = fdiv float 1.000000e+00, %204 > %216 = fmul float %192, %215 > %217 = fmul float %198, %215 > %218 = select i1 %205, float %213, float %216 > %219 = select i1 %206, float %214, float %217 > %220 = bitcast float %218 to i32 > %221 = bitcast float %219 to i32 > %222 = insertelement <4 x i32> undef, i32 %220, i32 0 > %223 = insertelement <4 x i32> %222, i32 %221, i32 1 > %224 = insertelement <4 x i32> %223, i32 0, i32 2 > %225 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %224, <8 x i32> %68, <4 x i32> %75, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %226 = extractelement <4 x float> %225, i32 0 > %227 = extractelement <4 x float> %225, i32 1 > %228 = extractelement <4 x float> %225, i32 2 > %229 = extractelement <4 x float> %225, i32 3 > %230 = fmul float %226, %49 > %231 = fmul float %227, %50 > %232 = fmul float %228, %51 > %233 = bitcast float %133 to i32 > %234 = bitcast float %134 to i32 > %235 = insertelement <2 x i32> undef, i32 %233, i32 0 > %236 = insertelement <2 x i32> %235, i32 %234, i32 1 > %237 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %236, <8 x i32> %77, <4 x i32> %84, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %238 = extractelement <4 x float> %237, i32 0 > %239 = extractelement <4 x float> %237, i32 1 > %240 = extractelement <4 x float> %237, i32 2 > %241 = extractelement <4 x float> %237, i32 3 > %242 = fmul float %230, %238 > %243 = fmul float %231, %239 > %244 = fmul float %232, %240 > %245 = fmul float %117, %117 > %246 = fmul float %118, %118 > %247 = fadd float %246, %245 > %248 = fmul float %119, %119 > %249 = fadd float %247, %248 > %250 = call float @llvm.AMDGPU.rsq.clamped.f32(float %249) > %251 = bitcast float %133 to i32 > %252 = bitcast float %134 to i32 > %253 = insertelement <2 x i32> undef, i32 %251, i32 0 > %254 = insertelement <2 x i32> %253, i32 %252, i32 1 > %255 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %254, <8 x i32> %86, <4 x i32> %93, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %256 = extractelement <4 x float> %255, i32 0 > %257 = extractelement <4 x float> %255, i32 1 > %258 = extractelement <4 x float> %255, i32 2 > %259 = extractelement <4 x float> %255, i32 3 > %260 = call float @llvm.fma.f32(float %256, float 2.000000e+00, float -1.000000e+00) > %261 = call float @llvm.fma.f32(float %257, float 2.000000e+00, float -1.000000e+00) > %262 = call float @llvm.fma.f32(float %258, float 2.000000e+00, float -1.000000e+00) > %263 = fmul float %260, %260 > %264 = fmul float %261, %261 > %265 = fadd float %264, %263 > %266 = fmul float %262, %262 > %267 = fadd float %265, %266 > %268 = call float @llvm.AMDGPU.rsq.clamped.f32(float %267) > %269 = fmul float %268, %260 > %270 = fmul float %268, %261 > %271 = fmul float %268, %262 > %272 = call float @llvm.AMDGPU.rsq.clamped.f32(float %183) > %273 = fmul float %272, %168 > %274 = fmul float %272, %173 > %275 = fmul float %272, %178 > %276 = bitcast float %133 to i32 > %277 = bitcast float %134 to i32 > %278 = insertelement <2 x i32> undef, i32 %276, i32 0 > %279 = insertelement <2 x i32> %278, i32 %277, i32 1 > %280 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %279, <8 x i32> %95, <4 x i32> %102, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %281 = extractelement <4 x float> %280, i32 0 > %282 = extractelement <4 x float> %280, i32 1 > %283 = extractelement <4 x float> %280, i32 2 > %284 = extractelement <4 x float> %280, i32 3 > %285 = fmul float %229, %55 > %286 = fmul float %229, %56 > %287 = fmul float %229, %57 > %288 = fmul float %281, %281 > %289 = bitcast float %133 to i32 > %290 = bitcast float %134 to i32 > %291 = insertelement <4 x i32> undef, i32 %289, i32 0 > %292 = insertelement <4 x i32> %291, i32 %290, i32 1 > %293 = insertelement <4 x i32> %292, i32 0, i32 2 > %294 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %293, <8 x i32> %104, <4 x i32> %111, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %295 = extractelement <4 x float> %294, i32 0 > %296 = extractelement <4 x float> %294, i32 1 > %297 = call float @llvm.fma.f32(float %117, float %250, float %273) > %298 = call float @llvm.fma.f32(float %118, float %250, float %274) > %299 = call float @llvm.fma.f32(float %119, float %250, float %275) > %300 = fmul float %297, %297 > %301 = fmul float %298, %298 > %302 = fadd float %301, %300 > %303 = fmul float %299, %299 > %304 = fadd float %302, %303 > %305 = call float @llvm.AMDGPU.rsq.clamped.f32(float %304) > %306 = fmul float %305, %297 > %307 = fmul float %305, %298 > %308 = fmul float %305, %299 > %309 = fmul float %269, %306 > %310 = fmul float %270, %307 > %311 = fadd float %310, %309 > %312 = fmul float %271, %308 > %313 = fadd float %311, %312 > %314 = call float @llvm.AMDGPU.clamp.(float %313, float 0.000000e+00, float 1.000000e+00) > %315 = call float @llvm.fma.f32(float %288, float 4.096000e+03, float 0x3F70624DE0000000) > %316 = call float @llvm.fma.f32(float %288, float 4.096000e+03, float 0x4000083120000000) > %317 = fmul float %316, 1.250000e-01 > %318 = call float @llvm.log2.f32(float %314) > %319 = fmul float %318, %315 > %320 = call float @llvm.exp2.f32(float %319) > %321 = fmul float %317, %320 > %322 = fsub float 1.000000e+00, %283 > %323 = fmul float %273, %306 > %324 = fmul float %274, %307 > %325 = fadd float %324, %323 > %326 = fmul float %275, %308 > %327 = fadd float %325, %326 > %328 = call float @llvm.AMDGPU.clamp.(float %327, float 0.000000e+00, float 1.000000e+00) > %329 = fsub float 1.000000e+00, %328 > %330 = fmul float %329, %329 > %331 = fmul float %330, %330 > %332 = fmul float %329, %331 > %333 = call float @llvm.fma.f32(float %322, float %332, float %283) > %334 = fmul float %321, %333 > %335 = fmul float %285, %295 > %336 = fmul float %286, %295 > %337 = fmul float %287, %295 > %338 = fmul float %282, %335 > %339 = fmul float %282, %336 > %340 = fmul float %282, %337 > %341 = fmul float %269, %273 > %342 = fmul float %270, %274 > %343 = fadd float %342, %341 > %344 = fmul float %271, %275 > %345 = fadd float %343, %344 > %346 = fadd float %241, %345 > %347 = fadd float %346, -1.000000e+00 > %348 = fcmp une float %241, 0.000000e+00 > br i1 %348, label %IF, label %ELSE > >IF: ; preds = %main_body > %349 = fdiv float 1.000000e+00, %241 > %350 = fmul float %347, %349 > br label %ENDIF > >ELSE: ; preds = %main_body > %351 = fcmp ogt float %347, 0.000000e+00 > %352 = select i1 %351, float 1.000000e+00, float %347 > %353 = fcmp oge float %352, 0.000000e+00 > %.op100 = fmul float %352, 0x4600000000000000 > %354 = select i1 %353, float %.op100, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp60.0 = phi float [ %350, %IF ], [ %354, %ELSE ] > %355 = call float @llvm.AMDGPU.clamp.(float %temp60.0, float 0.000000e+00, float 1.000000e+00) > %356 = call float @llvm.fma.f32(float %338, float %334, float %242) > %357 = call float @llvm.fma.f32(float %339, float %334, float %243) > %358 = call float @llvm.fma.f32(float %340, float %334, float %244) > %359 = fmul float %355, %356 > %360 = fmul float %355, %357 > %361 = fmul float %355, %358 > %362 = fcmp ogt float %284, 0.000000e+00 > br i1 %362, label %IF69, label %ENDIF68 > >IF69: ; preds = %ENDIF > %363 = fcmp olt float %259, 0x3FE0505060000000 > %364 = fadd float %242, %242 > %365 = fadd float %243, %243 > %366 = fadd float %244, %244 > %367 = fadd float %284, -5.000000e-01 > %368 = call float @llvm.AMDGPU.clamp.(float %367, float 0.000000e+00, float 1.000000e+00) > %369 = fmul float %368, %364 > %370 = fmul float %368, %365 > %371 = fmul float %368, %366 > %372 = fsub float 2.500000e-01, %345 > %373 = fsub float 1.000000e+00, %345 > %374 = call float @llvm.AMDGPU.clamp.(float %372, float 0.000000e+00, float 1.000000e+00) > %375 = call float @llvm.AMDGPU.clamp.(float %373, float 0.000000e+00, float 1.000000e+00) > %376 = call float @llvm.fma.f32(float %369, float %374, float %359) > %377 = call float @llvm.fma.f32(float %370, float %374, float %360) > %378 = call float @llvm.fma.f32(float %371, float %374, float %361) > %379 = call float @llvm.minnum.f32(float %284, float 5.000000e-01) > %380 = call float @llvm.maxnum.f32(float %240, float %239) > %381 = call float @llvm.maxnum.f32(float %380, float %238) > %382 = fcmp oeq float %381, 0.000000e+00 > %383 = fcmp oeq float %381, 0.000000e+00 > %384 = fcmp oeq float %381, 0.000000e+00 > %385 = fcmp ogt float %238, 0.000000e+00 > %386 = select i1 %385, float 1.000000e+00, float %238 > %387 = fcmp oge float %386, 0.000000e+00 > %388 = fcmp ogt float %239, 0.000000e+00 > %389 = select i1 %388, float 1.000000e+00, float %239 > %390 = fcmp oge float %389, 0.000000e+00 > %391 = fcmp ogt float %240, 0.000000e+00 > %392 = select i1 %391, float 1.000000e+00, float %240 > %393 = fcmp oge float %392, 0.000000e+00 > %.op101 = fmul float %386, 0x4600000000000000 > %394 = select i1 %387, float %.op101, float 0xC600000000000000 > %.op102 = fmul float %389, 0x4600000000000000 > %395 = select i1 %390, float %.op102, float 0xC600000000000000 > %.op103 = fmul float %392, 0x4600000000000000 > %396 = select i1 %393, float %.op103, float 0xC600000000000000 > %397 = fdiv float 1.000000e+00, %381 > %398 = fmul float %238, %397 > %399 = fmul float %239, %397 > %400 = fmul float %240, %397 > %401 = select i1 %382, float %394, float %398 > %402 = select i1 %383, float %395, float %399 > %403 = select i1 %384, float %396, float %400 > %404 = call float @llvm.AMDGPU.clamp.(float %401, float 0.000000e+00, float 1.000000e+00) > %405 = call float @llvm.AMDGPU.clamp.(float %402, float 0.000000e+00, float 1.000000e+00) > %406 = call float @llvm.AMDGPU.clamp.(float %403, float 0.000000e+00, float 1.000000e+00) > %407 = fmul float %404, %404 > %408 = fmul float %405, %405 > %409 = fmul float %406, %406 > %410 = call float @llvm.AMDGPU.clamp.(float %230, float 0.000000e+00, float 1.000000e+00) > %411 = call float @llvm.AMDGPU.clamp.(float %231, float 0.000000e+00, float 1.000000e+00) > %412 = call float @llvm.AMDGPU.clamp.(float %232, float 0.000000e+00, float 1.000000e+00) > %413 = fmul float %410, %407 > %414 = fmul float %411, %408 > %415 = fmul float %412, %409 > %416 = fmul float %413, 0x3FC3333340000000 > %417 = fmul float %414, 0x3FC3333340000000 > %418 = fmul float %415, 0x3FC3333340000000 > %. = select i1 %363, float %376, float %359 > %temp16.0 = select i1 %363, float %377, float %360 > %.92 = select i1 %363, float %378, float %361 > %temp16.2 = select i1 %363, float %284, float %379 > %.93 = select i1 %363, float %364, float %416 > %temp28.1 = select i1 %363, float %365, float %417 > %.94 = select i1 %363, float %366, float %418 > %419 = call float @llvm.minnum.f32(float %temp16.2, float 5.000000e-01) > %420 = fmul float %419, %.93 > %421 = fmul float %419, %temp28.1 > %422 = fmul float %419, %.94 > %423 = fmul float %375, %420 > %424 = fmul float %375, %421 > %425 = fmul float %375, %422 > %426 = fadd float %345, 2.500000e-01 > %427 = call float @llvm.AMDGPU.clamp.(float %426, float 0.000000e+00, float 1.000000e+00) > %428 = call float @llvm.fma.f32(float %423, float %427, float %.) > %429 = call float @llvm.fma.f32(float %424, float %427, float %temp16.0) > %430 = call float @llvm.fma.f32(float %425, float %427, float %.92) > br label %ENDIF68 > >ENDIF68: ; preds = %ENDIF, %IF69 > %temp10.0 = phi float [ %430, %IF69 ], [ %361, %ENDIF ] > %temp9.0 = phi float [ %429, %IF69 ], [ %360, %ENDIF ] > %temp8.0 = phi float [ %428, %IF69 ], [ %359, %ENDIF ] > %431 = fmul float %186, %temp8.0 > %432 = fmul float %186, %temp9.0 > %433 = fmul float %186, %temp10.0 > %434 = fadd float %296, 0x3FD54FDF40000000 > %435 = call float @llvm.AMDGPU.clamp.(float %434, float 0.000000e+00, float 1.000000e+00) > %436 = fmul float %435, %431 > %437 = fmul float %435, %432 > %438 = fmul float %435, %433 > %439 = bitcast float %5 to i32 > %440 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %439, 10 > %441 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %440, float %436, 11 > %442 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %441, float %437, 12 > %443 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %442, float %438, 13 > %444 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %443, float 1.000000e+00, 14 > %445 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %444, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %445 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL CONST[1][0..6] >DCL CONST[2][0..18] >DCL TEMP[0..17], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[1] UINT32 {1, 256, 272, 0} >IMM[2] UINT32 {64, 80, 96, 112} >IMM[3] UINT32 {128, 160, 288, 0} >IMM[4] FLT32 { -1.0000, 4096.0000, 0.0040, 2.0040} >IMM[5] FLT32 { 0.1250, 0.5098, -0.5000, 0.5000} >IMM[6] INT32 {1, 0, 0, 0} >IMM[7] FLT32 { 0.2500, 1.0000, 0.1500, 0.3330} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: ADD TEMP[3].x, -CONST[2][16].wwww, IMM[0].zzzz > 15: FMA TEMP[2].x, CONST[2][16].wwww, TEMP[2].xxxx, TEMP[3].xxxx > 16: MOV TEMP[3].xy, TEMP[0].xyyy > 17: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D > 18: MOV TEMP[4].z, TEMP[3].xxxx > 19: MUL TEMP[4].xy, TEMP[1].xyyy, TEMP[3].xxxx > 20: ADD TEMP[1].xyz, -TEMP[4].xyzz, CONST[2][17].xyzz > 21: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[2][0].xxxx > 22: DP3 TEMP[3].x, CONST[1][4].xyzz, TEMP[1].xyzz > 23: DP3 TEMP[5].x, CONST[1][5].xyzz, TEMP[1].xyzz > 24: MOV TEMP[3].y, TEMP[5].xxxx > 25: DP3 TEMP[1].x, CONST[1][6].xyzz, TEMP[1].xyzz > 26: MOV TEMP[3].z, TEMP[1].xxxx > 27: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[3].xyzz > 28: MIN TEMP[5].x, TEMP[1].xxxx, IMM[0].zzzz > 29: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].zzzz > 30: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 31: MOV TEMP[4].w, IMM[0].zzzz > 32: DP4 TEMP[6].x, CONST[2][7], TEMP[4] > 33: DP4 TEMP[7].x, CONST[2][8], TEMP[4] > 34: MOV TEMP[6].y, TEMP[7].xxxx > 35: DP4 TEMP[7].x, CONST[2][10], TEMP[4] > 36: FSEQ TEMP[8].xy, TEMP[7].xxxx, IMM[0].xxxx > 37: SSG TEMP[9].xy, TEMP[6].xyyy > 38: MUL TEMP[9].xy, IMM[0].yyyy, TEMP[9].xyyy > 39: RCP TEMP[7].xy, TEMP[7].xxxx > 40: MUL TEMP[7].xy, TEMP[6].xyyy, TEMP[7].xyyy > 41: UCMP TEMP[7].xy, TEMP[8].xyyy, TEMP[9].xyyy, TEMP[7].xyyy > 42: MOV TEMP[7].xy, TEMP[7].xyyy > 43: MOV TEMP[7].w, IMM[0].xxxx > 44: TXL TEMP[7], TEMP[7], SAMP[2], 2D > 45: MUL TEMP[8].xyz, TEMP[7].xyzz, CONST[2][16].xyzz > 46: MOV TEMP[9].xy, TEMP[0].xyyy > 47: TEX TEMP[9], TEMP[9], SAMP[3], 2D > 48: MOV TEMP[6].xyz, TEMP[9] > 49: MUL TEMP[4].xyz, TEMP[8].xyzz, TEMP[9].xyzz > 50: DP3 TEMP[10].x, IN[2].xyzz, IN[2].xyzz > 51: RSQ TEMP[10].x, TEMP[10].xxxx > 52: MOV TEMP[11].xy, TEMP[0].xyyy > 53: TEX TEMP[11], TEMP[11], SAMP[4], 2D > 54: FMA TEMP[12].xyz, TEMP[11].xyzz, IMM[0].wwww, IMM[4].xxxx > 55: DP3 TEMP[13].x, TEMP[12].xyzz, TEMP[12].xyzz > 56: RSQ TEMP[14].x, TEMP[13].xxxx > 57: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx > 58: RSQ TEMP[1].x, TEMP[1].xxxx > 59: MUL TEMP[3].xyz, TEMP[1].xxxx, TEMP[3].xyzz > 60: MOV TEMP[1].xy, TEMP[0].xyyy > 61: TEX TEMP[1], TEMP[1], SAMP[5], 2D > 62: MOV TEMP[13], TEMP[1].wxyz > 63: MUL TEMP[7].xyz, TEMP[7].wwww, CONST[2][18].xyzz > 64: MUL TEMP[14].x, TEMP[1].xxxx, TEMP[1].xxxx > 65: MOV TEMP[15].xy, TEMP[0].xyyy > 66: MOV TEMP[15].w, IMM[0].xxxx > 67: TXL TEMP[15].xy, TEMP[15], SAMP[6], 2D > 68: FMA TEMP[10].xyz, IN[2].xyzz, TEMP[10].xxxx, TEMP[3].xyzz > 69: DP3 TEMP[16].x, TEMP[10].xyzz, TEMP[10].xyzz > 70: RSQ TEMP[16].x, TEMP[16].xxxx > 71: MUL TEMP[10].xyz, TEMP[16].xxxx, TEMP[10].xyzz > 72: DP3 TEMP[16].x, TEMP[12].xyzz, TEMP[10].xyzz > 73: MOV_SAT TEMP[16].x, TEMP[16].xxxx > 74: FMA TEMP[14].xy, TEMP[14].xxxx, IMM[4].yyyy, IMM[4].zwww > 75: MUL TEMP[17].x, TEMP[14].yyyy, IMM[5].xxxx > 76: LG2 TEMP[16].x, TEMP[16].xxxx > 77: MUL TEMP[14].x, TEMP[16].xxxx, TEMP[14].xxxx > 78: EX2 TEMP[14].x, TEMP[14].xxxx > 79: MUL TEMP[14].x, TEMP[17].xxxx, TEMP[14].xxxx > 80: ADD TEMP[16].x, -TEMP[1].zzzz, IMM[0].zzzz > 81: DP3 TEMP[10].x, TEMP[3].xyzz, TEMP[10].xyzz > 82: MOV_SAT TEMP[10].x, TEMP[10].xxxx > 83: ADD TEMP[10].x, -TEMP[10].xxxx, IMM[0].zzzz > 84: MUL TEMP[17].x, TEMP[10].xxxx, TEMP[10].xxxx > 85: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[17].xxxx > 86: MOV TEMP[7].w, TEMP[17].xxxx > 87: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[17].xxxx > 88: FMA TEMP[10].x, TEMP[16].xxxx, TEMP[10].xxxx, TEMP[1].zzzz > 89: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[10].xxxx > 90: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[15].xxxx > 91: MUL TEMP[7].xyz, TEMP[1].yyyy, TEMP[7].xyzz > 92: DP3 TEMP[12].x, TEMP[12].xyzz, TEMP[3].xyzz > 93: MOV TEMP[0].x, TEMP[12].xxxx > 94: ADD TEMP[14].x, TEMP[9].wwww, TEMP[12].xxxx > 95: ADD TEMP[14].x, TEMP[14].xxxx, IMM[4].xxxx > 96: FSNE TEMP[16].x, TEMP[9].wwww, IMM[0].xxxx > 97: UIF TEMP[16].xxxx :0 > 98: RCP TEMP[16].x, TEMP[9].wwww > 99: MUL TEMP[16].x, TEMP[14].xxxx, TEMP[16].xxxx >100: ELSE :0 >101: SSG TEMP[14].x, TEMP[14].xxxx >102: MUL TEMP[16].x, IMM[0].yyyy, TEMP[14].xxxx >103: ENDIF >104: MOV_SAT TEMP[14].x, TEMP[16].xxxx >105: FMA TEMP[10].xyz, TEMP[7].xyzz, TEMP[10].xxxx, TEMP[4].xyzz >106: MUL TEMP[3].xyz, TEMP[14].xxxx, TEMP[10].xyzz >107: FSLT TEMP[10].x, IMM[0].xxxx, TEMP[1].wwww >108: AND TEMP[10].x, TEMP[10].xxxx, IMM[6].xxxx >109: INEG TEMP[10].x, TEMP[10].xxxx >110: USNE TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww >111: UIF TEMP[10].xxxx :0 >112: FSLT TEMP[10].x, TEMP[11].wwww, IMM[5].yyyy >113: AND TEMP[10].x, TEMP[10].xxxx, IMM[6].xxxx >114: INEG TEMP[10].x, TEMP[10].xxxx >115: ADD TEMP[11].xyz, TEMP[4].xyzz, TEMP[4].xyzz >116: MOV TEMP[13].yzw, TEMP[11].yxyz >117: ADD TEMP[14].x, TEMP[1].wwww, IMM[5].zzzz >118: MOV_SAT TEMP[14].x, TEMP[14].xxxx >119: MUL TEMP[4].xyz, TEMP[14].xxxx, TEMP[11].xyzz >120: ADD TEMP[11].xy, -TEMP[12].xxxx, IMM[7].xyyy >121: MOV_SAT TEMP[11].xy, TEMP[11].xyyy >122: FMA TEMP[4].xyz, TEMP[4].xyzz, TEMP[11].xxxx, TEMP[3].xyzz >123: MIN TEMP[7].x, TEMP[1].wwww, IMM[5].wwww >124: MAX TEMP[1].x, TEMP[9].zzzz, TEMP[9].yyyy >125: MAX TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx >126: FSEQ TEMP[14].xyz, TEMP[1].xxxx, IMM[0].xxxx >127: SSG TEMP[16].xyz, TEMP[9].xyzz >128: MUL TEMP[16].xyz, IMM[0].yyyy, TEMP[16].xyzz >129: RCP TEMP[1].xyz, TEMP[1].xxxx >130: MUL TEMP[1].xyz, TEMP[9].xyzz, TEMP[1].xyzz >131: UCMP TEMP[1].xyz, TEMP[14].xyzz, TEMP[16].xyzz, TEMP[1].xyzz >132: MOV_SAT TEMP[1].xyz, TEMP[1].xyzz >133: MUL TEMP[6].xyz, TEMP[1].xyzz, TEMP[1].xyzz >134: MOV_SAT TEMP[1].xyz, TEMP[8].xyzz >135: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[6].xyzz >136: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].zzzz >137: MOV TEMP[7].yzw, TEMP[1].yxyz >138: USNE TEMP[1].x, TEMP[10].xxxx, IMM[1].wwww >139: UIF TEMP[1].xxxx :0 >140: MOV TEMP[1].x, TEMP[4].xxxx >141: ELSE :0 >142: MOV TEMP[1].x, TEMP[3].xxxx >143: ENDIF >144: MOV TEMP[1].x, TEMP[1].xxxx >145: USNE TEMP[6].x, TEMP[10].xxxx, IMM[1].wwww >146: UIF TEMP[6].xxxx :0 >147: MOV TEMP[6].x, TEMP[4].yyyy >148: ELSE :0 >149: MOV TEMP[6].x, TEMP[3].yyyy >150: ENDIF >151: MOV TEMP[1].y, TEMP[6].xxxx >152: USNE TEMP[6].x, TEMP[10].xxxx, IMM[1].wwww >153: UIF TEMP[6].xxxx :0 >154: MOV TEMP[6].x, TEMP[4].zzzz >155: ELSE :0 >156: MOV TEMP[6].x, TEMP[3].zzzz >157: ENDIF >158: MOV TEMP[1].z, TEMP[6].xxxx >159: USNE TEMP[6].x, TEMP[10].xxxx, IMM[1].wwww >160: UIF TEMP[6].xxxx :0 >161: MOV TEMP[6].x, TEMP[13].xxxx >162: ELSE :0 >163: MOV TEMP[6].x, TEMP[7].xxxx >164: ENDIF >165: MOV TEMP[6].x, TEMP[6].xxxx >166: USNE TEMP[8].x, TEMP[10].xxxx, IMM[1].wwww >167: UIF TEMP[8].xxxx :0 >168: MOV TEMP[8].x, TEMP[13].yyyy >169: ELSE :0 >170: MOV TEMP[8].x, TEMP[7].yyyy >171: ENDIF >172: MOV TEMP[6].y, TEMP[8].xxxx >173: USNE TEMP[8].x, TEMP[10].xxxx, IMM[1].wwww >174: UIF TEMP[8].xxxx :0 >175: MOV TEMP[8].x, TEMP[13].zzzz >176: ELSE :0 >177: MOV TEMP[8].x, TEMP[7].zzzz >178: ENDIF >179: MOV TEMP[6].z, TEMP[8].xxxx >180: USNE TEMP[8].x, TEMP[10].xxxx, IMM[1].wwww >181: UIF TEMP[8].xxxx :0 >182: MOV TEMP[8].x, TEMP[13].wwww >183: ELSE :0 >184: MOV TEMP[8].x, TEMP[7].wwww >185: ENDIF >186: MOV TEMP[6].w, TEMP[8].xxxx >187: MIN TEMP[7].x, TEMP[6].xxxx, IMM[5].wwww >188: MUL TEMP[4].xyz, TEMP[7].xxxx, TEMP[6].yzww >189: MUL TEMP[4].xyz, TEMP[11].yyyy, TEMP[4].xyzz >190: ADD TEMP[6].x, TEMP[12].xxxx, IMM[7].xxxx >191: MOV_SAT TEMP[0].x, TEMP[6].xxxx >192: FMA TEMP[3].xyz, TEMP[4].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >193: ENDIF >194: MUL TEMP[0].xyz, TEMP[5].xxxx, TEMP[3].xyzz >195: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx >196: ADD TEMP[1].x, TEMP[15].yyyy, IMM[7].wwww >197: MOV_SAT TEMP[1].x, TEMP[1].xxxx >198: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz >199: MOV TEMP[0].w, IMM[0].zzzz >200: MOV OUT[0], TEMP[0] >201: END >radeonsi: Compiling shader 79 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 112) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 116) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 120) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 124) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 128) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 132) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 136) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 140) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 160) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 164) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 168) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 172) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 256) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 260) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 264) > %52 = call float @llvm.SI.load.const(<16 x i8> %35, i32 268) > %53 = call float @llvm.SI.load.const(<16 x i8> %35, i32 272) > %54 = call float @llvm.SI.load.const(<16 x i8> %35, i32 276) > %55 = call float @llvm.SI.load.const(<16 x i8> %35, i32 280) > %56 = call float @llvm.SI.load.const(<16 x i8> %35, i32 288) > %57 = call float @llvm.SI.load.const(<16 x i8> %35, i32 292) > %58 = call float @llvm.SI.load.const(<16 x i8> %35, i32 296) > %59 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 > %61 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %62 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %61, i64 0, i64 3 > %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 > %64 = extractelement <8 x i32> %60, i32 7 > %65 = extractelement <4 x i32> %63, i32 0 > %66 = and i32 %65, %64 > %67 = insertelement <4 x i32> %63, i32 %66, i32 0 > %68 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %69 = load <8 x i32>, <8 x i32> addrspace(2)* %68, align 32, !tbaa !0 > %70 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %71 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %70, i64 0, i64 7 > %72 = load <4 x i32>, <4 x i32> addrspace(2)* %71, align 16, !tbaa !0 > %73 = extractelement <8 x i32> %69, i32 7 > %74 = extractelement <4 x i32> %72, i32 0 > %75 = and i32 %74, %73 > %76 = insertelement <4 x i32> %72, i32 %75, i32 0 > %77 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %78 = load <8 x i32>, <8 x i32> addrspace(2)* %77, align 32, !tbaa !0 > %79 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %80 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %79, i64 0, i64 11 > %81 = load <4 x i32>, <4 x i32> addrspace(2)* %80, align 16, !tbaa !0 > %82 = extractelement <8 x i32> %78, i32 7 > %83 = extractelement <4 x i32> %81, i32 0 > %84 = and i32 %83, %82 > %85 = insertelement <4 x i32> %81, i32 %84, i32 0 > %86 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %87 = load <8 x i32>, <8 x i32> addrspace(2)* %86, align 32, !tbaa !0 > %88 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %89 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %88, i64 0, i64 15 > %90 = load <4 x i32>, <4 x i32> addrspace(2)* %89, align 16, !tbaa !0 > %91 = extractelement <8 x i32> %87, i32 7 > %92 = extractelement <4 x i32> %90, i32 0 > %93 = and i32 %92, %91 > %94 = insertelement <4 x i32> %90, i32 %93, i32 0 > %95 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %96 = load <8 x i32>, <8 x i32> addrspace(2)* %95, align 32, !tbaa !0 > %97 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %98 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %97, i64 0, i64 19 > %99 = load <4 x i32>, <4 x i32> addrspace(2)* %98, align 16, !tbaa !0 > %100 = extractelement <8 x i32> %96, i32 7 > %101 = extractelement <4 x i32> %99, i32 0 > %102 = and i32 %101, %100 > %103 = insertelement <4 x i32> %99, i32 %102, i32 0 > %104 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %105 = load <8 x i32>, <8 x i32> addrspace(2)* %104, align 32, !tbaa !0 > %106 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %107 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %106, i64 0, i64 23 > %108 = load <4 x i32>, <4 x i32> addrspace(2)* %107, align 16, !tbaa !0 > %109 = extractelement <8 x i32> %105, i32 7 > %110 = extractelement <4 x i32> %108, i32 0 > %111 = and i32 %110, %109 > %112 = insertelement <4 x i32> %108, i32 %111, i32 0 > %113 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %114 = load <8 x i32>, <8 x i32> addrspace(2)* %113, align 32, !tbaa !0 > %115 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %116 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %115, i64 0, i64 27 > %117 = load <4 x i32>, <4 x i32> addrspace(2)* %116, align 16, !tbaa !0 > %118 = extractelement <8 x i32> %114, i32 7 > %119 = extractelement <4 x i32> %117, i32 0 > %120 = and i32 %119, %118 > %121 = insertelement <4 x i32> %117, i32 %120, i32 0 > %122 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %123 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %124 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %125 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %126 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %127 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %128 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %129 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %130 = fcmp oeq float %124, 0.000000e+00 > %131 = fcmp oeq float %124, 0.000000e+00 > %132 = fcmp ogt float %122, 0.000000e+00 > %133 = select i1 %132, float 1.000000e+00, float %122 > %134 = fcmp oge float %133, 0.000000e+00 > %135 = fcmp ogt float %123, 0.000000e+00 > %136 = select i1 %135, float 1.000000e+00, float %123 > %137 = fcmp oge float %136, 0.000000e+00 > %.op = fmul float %133, 0x4600000000000000 > %138 = select i1 %134, float %.op, float 0xC600000000000000 > %.op99 = fmul float %136, 0x4600000000000000 > %139 = select i1 %137, float %.op99, float 0xC600000000000000 > %140 = fdiv float 1.000000e+00, %124 > %141 = fmul float %122, %140 > %142 = fmul float %123, %140 > %143 = select i1 %130, float %138, float %141 > %144 = select i1 %131, float %139, float %142 > %145 = fcmp oeq float %124, 0.000000e+00 > %146 = fcmp oeq float %124, 0.000000e+00 > %147 = fcmp ogt float %125, 0.000000e+00 > %148 = select i1 %147, float 1.000000e+00, float %125 > %149 = fcmp oge float %148, 0.000000e+00 > %150 = fcmp ogt float %126, 0.000000e+00 > %151 = select i1 %150, float 1.000000e+00, float %126 > %152 = fcmp oge float %151, 0.000000e+00 > %.op100 = fmul float %148, 0x4600000000000000 > %153 = select i1 %149, float %.op100, float 0xC600000000000000 > %.op101 = fmul float %151, 0x4600000000000000 > %154 = select i1 %152, float %.op101, float 0xC600000000000000 > %155 = fdiv float 1.000000e+00, %124 > %156 = fmul float %125, %155 > %157 = fmul float %126, %155 > %158 = select i1 %145, float %153, float %156 > %159 = select i1 %146, float %154, float %157 > %160 = bitcast float %143 to i32 > %161 = bitcast float %144 to i32 > %162 = insertelement <2 x i32> undef, i32 %160, i32 0 > %163 = insertelement <2 x i32> %162, i32 %161, i32 1 > %164 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %163, <8 x i32> %60, <4 x i32> %67, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %165 = extractelement <4 x float> %164, i32 0 > %166 = fsub float 1.000000e+00, %52 > %167 = call float @llvm.fma.f32(float %52, float %165, float %166) > %168 = bitcast float %143 to i32 > %169 = bitcast float %144 to i32 > %170 = insertelement <2 x i32> undef, i32 %168, i32 0 > %171 = insertelement <2 x i32> %170, i32 %169, i32 1 > %172 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %171, <8 x i32> %69, <4 x i32> %76, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %173 = extractelement <4 x float> %172, i32 0 > %174 = fmul float %158, %173 > %175 = fmul float %159, %173 > %176 = fsub float %53, %174 > %177 = fsub float %54, %175 > %178 = fsub float %55, %173 > %179 = fmul float %176, %36 > %180 = fmul float %177, %36 > %181 = fmul float %178, %36 > %182 = fmul float %25, %179 > %183 = fmul float %26, %180 > %184 = fadd float %183, %182 > %185 = fmul float %27, %181 > %186 = fadd float %184, %185 > %187 = fmul float %28, %179 > %188 = fmul float %29, %180 > %189 = fadd float %188, %187 > %190 = fmul float %30, %181 > %191 = fadd float %189, %190 > %192 = fmul float %31, %179 > %193 = fmul float %32, %180 > %194 = fadd float %193, %192 > %195 = fmul float %33, %181 > %196 = fadd float %194, %195 > %197 = fmul float %186, %186 > %198 = fmul float %191, %191 > %199 = fadd float %198, %197 > %200 = fmul float %196, %196 > %201 = fadd float %199, %200 > %202 = call float @llvm.minnum.f32(float %201, float 1.000000e+00) > %203 = fsub float 1.000000e+00, %202 > %204 = fmul float %203, %203 > %205 = fmul float %37, %174 > %206 = fmul float %38, %175 > %207 = fadd float %205, %206 > %208 = fmul float %39, %173 > %209 = fadd float %207, %208 > %210 = fadd float %209, %40 > %211 = fmul float %41, %174 > %212 = fmul float %42, %175 > %213 = fadd float %211, %212 > %214 = fmul float %43, %173 > %215 = fadd float %213, %214 > %216 = fadd float %215, %44 > %217 = fmul float %45, %174 > %218 = fmul float %46, %175 > %219 = fadd float %217, %218 > %220 = fmul float %47, %173 > %221 = fadd float %219, %220 > %222 = fadd float %221, %48 > %223 = fcmp oeq float %222, 0.000000e+00 > %224 = fcmp oeq float %222, 0.000000e+00 > %225 = fcmp ogt float %210, 0.000000e+00 > %226 = select i1 %225, float 1.000000e+00, float %210 > %227 = fcmp oge float %226, 0.000000e+00 > %228 = fcmp ogt float %216, 0.000000e+00 > %229 = select i1 %228, float 1.000000e+00, float %216 > %230 = fcmp oge float %229, 0.000000e+00 > %.op102 = fmul float %226, 0x4600000000000000 > %231 = select i1 %227, float %.op102, float 0xC600000000000000 > %.op103 = fmul float %229, 0x4600000000000000 > %232 = select i1 %230, float %.op103, float 0xC600000000000000 > %233 = fdiv float 1.000000e+00, %222 > %234 = fmul float %210, %233 > %235 = fmul float %216, %233 > %236 = select i1 %223, float %231, float %234 > %237 = select i1 %224, float %232, float %235 > %238 = bitcast float %236 to i32 > %239 = bitcast float %237 to i32 > %240 = insertelement <4 x i32> undef, i32 %238, i32 0 > %241 = insertelement <4 x i32> %240, i32 %239, i32 1 > %242 = insertelement <4 x i32> %241, i32 0, i32 2 > %243 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %242, <8 x i32> %78, <4 x i32> %85, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %244 = extractelement <4 x float> %243, i32 0 > %245 = extractelement <4 x float> %243, i32 1 > %246 = extractelement <4 x float> %243, i32 2 > %247 = extractelement <4 x float> %243, i32 3 > %248 = fmul float %244, %49 > %249 = fmul float %245, %50 > %250 = fmul float %246, %51 > %251 = bitcast float %143 to i32 > %252 = bitcast float %144 to i32 > %253 = insertelement <2 x i32> undef, i32 %251, i32 0 > %254 = insertelement <2 x i32> %253, i32 %252, i32 1 > %255 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %254, <8 x i32> %87, <4 x i32> %94, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %256 = extractelement <4 x float> %255, i32 0 > %257 = extractelement <4 x float> %255, i32 1 > %258 = extractelement <4 x float> %255, i32 2 > %259 = extractelement <4 x float> %255, i32 3 > %260 = fmul float %248, %256 > %261 = fmul float %249, %257 > %262 = fmul float %250, %258 > %263 = fmul float %127, %127 > %264 = fmul float %128, %128 > %265 = fadd float %264, %263 > %266 = fmul float %129, %129 > %267 = fadd float %265, %266 > %268 = call float @llvm.AMDGPU.rsq.clamped.f32(float %267) > %269 = bitcast float %143 to i32 > %270 = bitcast float %144 to i32 > %271 = insertelement <2 x i32> undef, i32 %269, i32 0 > %272 = insertelement <2 x i32> %271, i32 %270, i32 1 > %273 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %272, <8 x i32> %96, <4 x i32> %103, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %274 = extractelement <4 x float> %273, i32 0 > %275 = extractelement <4 x float> %273, i32 1 > %276 = extractelement <4 x float> %273, i32 2 > %277 = extractelement <4 x float> %273, i32 3 > %278 = call float @llvm.fma.f32(float %274, float 2.000000e+00, float -1.000000e+00) > %279 = call float @llvm.fma.f32(float %275, float 2.000000e+00, float -1.000000e+00) > %280 = call float @llvm.fma.f32(float %276, float 2.000000e+00, float -1.000000e+00) > %281 = fmul float %278, %278 > %282 = fmul float %279, %279 > %283 = fadd float %282, %281 > %284 = fmul float %280, %280 > %285 = fadd float %283, %284 > %286 = call float @llvm.AMDGPU.rsq.clamped.f32(float %285) > %287 = fmul float %278, %286 > %288 = fmul float %279, %286 > %289 = fmul float %280, %286 > %290 = call float @llvm.AMDGPU.rsq.clamped.f32(float %201) > %291 = fmul float %290, %186 > %292 = fmul float %290, %191 > %293 = fmul float %290, %196 > %294 = bitcast float %143 to i32 > %295 = bitcast float %144 to i32 > %296 = insertelement <2 x i32> undef, i32 %294, i32 0 > %297 = insertelement <2 x i32> %296, i32 %295, i32 1 > %298 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %297, <8 x i32> %105, <4 x i32> %112, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %299 = extractelement <4 x float> %298, i32 0 > %300 = extractelement <4 x float> %298, i32 1 > %301 = extractelement <4 x float> %298, i32 2 > %302 = extractelement <4 x float> %298, i32 3 > %303 = fmul float %247, %56 > %304 = fmul float %247, %57 > %305 = fmul float %247, %58 > %306 = fmul float %299, %299 > %307 = bitcast float %143 to i32 > %308 = bitcast float %144 to i32 > %309 = insertelement <4 x i32> undef, i32 %307, i32 0 > %310 = insertelement <4 x i32> %309, i32 %308, i32 1 > %311 = insertelement <4 x i32> %310, i32 0, i32 2 > %312 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %311, <8 x i32> %114, <4 x i32> %121, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %313 = extractelement <4 x float> %312, i32 0 > %314 = extractelement <4 x float> %312, i32 1 > %315 = call float @llvm.fma.f32(float %127, float %268, float %291) > %316 = call float @llvm.fma.f32(float %128, float %268, float %292) > %317 = call float @llvm.fma.f32(float %129, float %268, float %293) > %318 = fmul float %315, %315 > %319 = fmul float %316, %316 > %320 = fadd float %319, %318 > %321 = fmul float %317, %317 > %322 = fadd float %320, %321 > %323 = call float @llvm.AMDGPU.rsq.clamped.f32(float %322) > %324 = fmul float %323, %315 > %325 = fmul float %323, %316 > %326 = fmul float %323, %317 > %327 = fmul float %287, %324 > %328 = fmul float %288, %325 > %329 = fadd float %328, %327 > %330 = fmul float %289, %326 > %331 = fadd float %329, %330 > %332 = call float @llvm.AMDGPU.clamp.(float %331, float 0.000000e+00, float 1.000000e+00) > %333 = call float @llvm.fma.f32(float %306, float 4.096000e+03, float 0x3F70624DE0000000) > %334 = call float @llvm.fma.f32(float %306, float 4.096000e+03, float 0x4000083120000000) > %335 = fmul float %334, 1.250000e-01 > %336 = call float @llvm.log2.f32(float %332) > %337 = fmul float %336, %333 > %338 = call float @llvm.exp2.f32(float %337) > %339 = fmul float %335, %338 > %340 = fsub float 1.000000e+00, %301 > %341 = fmul float %291, %324 > %342 = fmul float %292, %325 > %343 = fadd float %342, %341 > %344 = fmul float %293, %326 > %345 = fadd float %343, %344 > %346 = call float @llvm.AMDGPU.clamp.(float %345, float 0.000000e+00, float 1.000000e+00) > %347 = fsub float 1.000000e+00, %346 > %348 = fmul float %347, %347 > %349 = fmul float %348, %348 > %350 = fmul float %347, %349 > %351 = call float @llvm.fma.f32(float %340, float %350, float %301) > %352 = fmul float %339, %351 > %353 = fmul float %303, %313 > %354 = fmul float %304, %313 > %355 = fmul float %305, %313 > %356 = fmul float %300, %353 > %357 = fmul float %300, %354 > %358 = fmul float %300, %355 > %359 = fmul float %287, %291 > %360 = fmul float %288, %292 > %361 = fadd float %360, %359 > %362 = fmul float %289, %293 > %363 = fadd float %361, %362 > %364 = fadd float %259, %363 > %365 = fadd float %364, -1.000000e+00 > %366 = fcmp une float %259, 0.000000e+00 > br i1 %366, label %IF, label %ELSE > >IF: ; preds = %main_body > %367 = fdiv float 1.000000e+00, %259 > %368 = fmul float %365, %367 > br label %ENDIF > >ELSE: ; preds = %main_body > %369 = fcmp ogt float %365, 0.000000e+00 > %370 = select i1 %369, float 1.000000e+00, float %365 > %371 = fcmp oge float %370, 0.000000e+00 > %.op104 = fmul float %370, 0x4600000000000000 > %372 = select i1 %371, float %.op104, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp64.0 = phi float [ %368, %IF ], [ %372, %ELSE ] > %373 = call float @llvm.AMDGPU.clamp.(float %temp64.0, float 0.000000e+00, float 1.000000e+00) > %374 = call float @llvm.fma.f32(float %356, float %352, float %260) > %375 = call float @llvm.fma.f32(float %357, float %352, float %261) > %376 = call float @llvm.fma.f32(float %358, float %352, float %262) > %377 = fmul float %373, %374 > %378 = fmul float %373, %375 > %379 = fmul float %373, %376 > %380 = fcmp ogt float %302, 0.000000e+00 > br i1 %380, label %IF73, label %ENDIF72 > >IF73: ; preds = %ENDIF > %381 = fcmp olt float %277, 0x3FE0505060000000 > %382 = fadd float %260, %260 > %383 = fadd float %261, %261 > %384 = fadd float %262, %262 > %385 = fadd float %302, -5.000000e-01 > %386 = call float @llvm.AMDGPU.clamp.(float %385, float 0.000000e+00, float 1.000000e+00) > %387 = fmul float %386, %382 > %388 = fmul float %386, %383 > %389 = fmul float %386, %384 > %390 = fsub float 2.500000e-01, %363 > %391 = fsub float 1.000000e+00, %363 > %392 = call float @llvm.AMDGPU.clamp.(float %390, float 0.000000e+00, float 1.000000e+00) > %393 = call float @llvm.AMDGPU.clamp.(float %391, float 0.000000e+00, float 1.000000e+00) > %394 = call float @llvm.fma.f32(float %387, float %392, float %377) > %395 = call float @llvm.fma.f32(float %388, float %392, float %378) > %396 = call float @llvm.fma.f32(float %389, float %392, float %379) > %397 = call float @llvm.minnum.f32(float %302, float 5.000000e-01) > %398 = call float @llvm.maxnum.f32(float %258, float %257) > %399 = call float @llvm.maxnum.f32(float %398, float %256) > %400 = fcmp oeq float %399, 0.000000e+00 > %401 = fcmp oeq float %399, 0.000000e+00 > %402 = fcmp oeq float %399, 0.000000e+00 > %403 = fcmp ogt float %256, 0.000000e+00 > %404 = select i1 %403, float 1.000000e+00, float %256 > %405 = fcmp oge float %404, 0.000000e+00 > %406 = fcmp ogt float %257, 0.000000e+00 > %407 = select i1 %406, float 1.000000e+00, float %257 > %408 = fcmp oge float %407, 0.000000e+00 > %409 = fcmp ogt float %258, 0.000000e+00 > %410 = select i1 %409, float 1.000000e+00, float %258 > %411 = fcmp oge float %410, 0.000000e+00 > %.op105 = fmul float %404, 0x4600000000000000 > %412 = select i1 %405, float %.op105, float 0xC600000000000000 > %.op106 = fmul float %407, 0x4600000000000000 > %413 = select i1 %408, float %.op106, float 0xC600000000000000 > %.op107 = fmul float %410, 0x4600000000000000 > %414 = select i1 %411, float %.op107, float 0xC600000000000000 > %415 = fdiv float 1.000000e+00, %399 > %416 = fmul float %256, %415 > %417 = fmul float %257, %415 > %418 = fmul float %258, %415 > %419 = select i1 %400, float %412, float %416 > %420 = select i1 %401, float %413, float %417 > %421 = select i1 %402, float %414, float %418 > %422 = call float @llvm.AMDGPU.clamp.(float %419, float 0.000000e+00, float 1.000000e+00) > %423 = call float @llvm.AMDGPU.clamp.(float %420, float 0.000000e+00, float 1.000000e+00) > %424 = call float @llvm.AMDGPU.clamp.(float %421, float 0.000000e+00, float 1.000000e+00) > %425 = fmul float %422, %422 > %426 = fmul float %423, %423 > %427 = fmul float %424, %424 > %428 = call float @llvm.AMDGPU.clamp.(float %248, float 0.000000e+00, float 1.000000e+00) > %429 = call float @llvm.AMDGPU.clamp.(float %249, float 0.000000e+00, float 1.000000e+00) > %430 = call float @llvm.AMDGPU.clamp.(float %250, float 0.000000e+00, float 1.000000e+00) > %431 = fmul float %428, %425 > %432 = fmul float %429, %426 > %433 = fmul float %430, %427 > %434 = fmul float %431, 0x3FC3333340000000 > %435 = fmul float %432, 0x3FC3333340000000 > %436 = fmul float %433, 0x3FC3333340000000 > %. = select i1 %381, float %394, float %377 > %temp24.0 = select i1 %381, float %395, float %378 > %.96 = select i1 %381, float %396, float %379 > %temp24.2 = select i1 %381, float %302, float %397 > %.97 = select i1 %381, float %382, float %434 > %temp32.1 = select i1 %381, float %383, float %435 > %.98 = select i1 %381, float %384, float %436 > %437 = call float @llvm.minnum.f32(float %temp24.2, float 5.000000e-01) > %438 = fmul float %437, %.97 > %439 = fmul float %437, %temp32.1 > %440 = fmul float %437, %.98 > %441 = fmul float %393, %438 > %442 = fmul float %393, %439 > %443 = fmul float %393, %440 > %444 = fadd float %363, 2.500000e-01 > %445 = call float @llvm.AMDGPU.clamp.(float %444, float 0.000000e+00, float 1.000000e+00) > %446 = call float @llvm.fma.f32(float %441, float %445, float %.) > %447 = call float @llvm.fma.f32(float %442, float %445, float %temp24.0) > %448 = call float @llvm.fma.f32(float %443, float %445, float %.96) > br label %ENDIF72 > >ENDIF72: ; preds = %ENDIF, %IF73 > %temp14.0 = phi float [ %448, %IF73 ], [ %379, %ENDIF ] > %temp13.0 = phi float [ %447, %IF73 ], [ %378, %ENDIF ] > %temp12.0 = phi float [ %446, %IF73 ], [ %377, %ENDIF ] > %449 = fmul float %204, %temp12.0 > %450 = fmul float %204, %temp13.0 > %451 = fmul float %204, %temp14.0 > %452 = fmul float %449, %167 > %453 = fmul float %450, %167 > %454 = fmul float %451, %167 > %455 = fadd float %314, 0x3FD54FDF40000000 > %456 = call float @llvm.AMDGPU.clamp.(float %455, float 0.000000e+00, float 1.000000e+00) > %457 = fmul float %456, %452 > %458 = fmul float %456, %453 > %459 = fmul float %456, %454 > %460 = bitcast float %5 to i32 > %461 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %460, 10 > %462 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %461, float %457, 11 > %463 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %462, float %458, 12 > %464 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %463, float %459, 13 > %465 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %464, float 1.000000e+00, 14 > %466 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %465, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %466 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], SHADOW2D_ARRAY, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL CONST[1][0..6] >DCL CONST[2][0..40] >DCL TEMP[0..16], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.5000, 1.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {1065353216, 1, 640, 384} >IMM[3] UINT32 {400, 416, 432, 368} >IMM[4] FLT32 { 0.5000, -0.5000, 0.2500, 2.0000} >IMM[5] UINT32 {464, 256, 272, 0} >IMM[6] UINT32 {64, 80, 96, 112} >IMM[7] UINT32 {128, 160, 288, 0} >IMM[8] FLT32 { -1.0000, 4096.0000, 0.0040, 2.0040} >IMM[9] FLT32 { 0.1250, 0.5098, 0.1500, 0.3330} >IMM[10] FLT32 { 0.2500, 1.0000, 0.0000, 0.0000} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: MOV TEMP[3].z, TEMP[2].xxxx > 15: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xxxx > 16: MOV TEMP[1].xy, TEMP[0].xyyy > 17: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 18: FSGE TEMP[4].x, TEMP[1].wwww, IMM[0].zzzz > 19: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx > 20: INEG TEMP[4].x, TEMP[4].xxxx > 21: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx > 22: FMA TEMP[4].xyz, TEMP[4].xxxx, CONST[2][40].xyzz, TEMP[3].xyzz > 23: MOV TEMP[4].w, IMM[0].wwww > 24: DP4 TEMP[5].x, CONST[2][24], TEMP[4] > 25: DP4 TEMP[6].x, CONST[2][25], TEMP[4] > 26: MOV TEMP[5].y, TEMP[6].xxxx > 27: DP4 TEMP[6].x, CONST[2][26], TEMP[4] > 28: MOV TEMP[5].z, TEMP[6].xxxx > 29: DP4 TEMP[6].x, CONST[2][27], TEMP[4] > 30: FSEQ TEMP[7].xyz, TEMP[6].xxxx, IMM[0].xxxx > 31: SSG TEMP[8].xyz, TEMP[5].xyzz > 32: MUL TEMP[8].xyz, IMM[0].yyyy, TEMP[8].xyzz > 33: RCP TEMP[6].xyz, TEMP[6].xxxx > 34: MUL TEMP[6].xyz, TEMP[5].xyzz, TEMP[6].xyzz > 35: UCMP TEMP[6].xyz, TEMP[7].xyzz, TEMP[8].xyzz, TEMP[6].xyzz > 36: FMA TEMP[7], TEMP[6].xyxy, IMM[4].xyxy, IMM[0].zzzz > 37: ADD TEMP[6].x, -TEMP[6].zzzz, IMM[0].wwww > 38: FMA TEMP[8], CONST[2][23].zwzw, IMM[4].xyyy, TEMP[7].zwzw > 39: MOV TEMP[4].xy, TEMP[8].xyxw > 40: MOV TEMP[8].xy, TEMP[8].zwzz > 41: MOV TEMP[8].z, CONST[2][29].xxxx > 42: MOV TEMP[9].xyz, TEMP[8].xyzz > 43: MOV TEMP[9].w, TEMP[6].xxxx > 44: TEX TEMP[9].x, TEMP[9], SAMP[2], SHADOW2D_ARRAY > 45: MOV TEMP[4].z, CONST[2][29].xxxx > 46: MOV TEMP[10].xyz, TEMP[4].xyzz > 47: MOV TEMP[10].w, TEMP[6].xxxx > 48: TEX TEMP[10].x, TEMP[10], SAMP[2], SHADOW2D_ARRAY > 49: FMA TEMP[7], CONST[2][23].zwzw, IMM[4].xxyx, TEMP[7].zwxy > 50: MOV TEMP[5].xy, TEMP[7].xyxw > 51: MOV TEMP[8].xy, TEMP[7].zwzz > 52: MOV TEMP[8].z, CONST[2][29].xxxx > 53: MOV TEMP[7].xyz, TEMP[8].xyzz > 54: MOV TEMP[7].w, TEMP[6].xxxx > 55: TEX TEMP[7].x, TEMP[7], SAMP[2], SHADOW2D_ARRAY > 56: MOV TEMP[5].z, CONST[2][29].xxxx > 57: MOV TEMP[11].xyz, TEMP[5].xyzz > 58: MOV TEMP[11].w, TEMP[6].xxxx > 59: TEX TEMP[6].x, TEMP[11], SAMP[2], SHADOW2D_ARRAY > 60: ADD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx > 61: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[9].xxxx > 62: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx > 63: FMA TEMP[6].x, -TEMP[6].xxxx, IMM[4].zzzz, IMM[0].wwww > 64: ADD TEMP[7].x, -CONST[2][16].wwww, IMM[0].wwww > 65: FMA TEMP[6].x, CONST[2][16].wwww, TEMP[6].xxxx, TEMP[7].xxxx > 66: ADD TEMP[4].xyz, -TEMP[3].xyzz, CONST[2][17].xyzz > 67: MUL TEMP[4].xyz, TEMP[4].xyzz, CONST[2][0].xxxx > 68: DP3 TEMP[5].x, CONST[1][4].xyzz, TEMP[4].xyzz > 69: DP3 TEMP[7].x, CONST[1][5].xyzz, TEMP[4].xyzz > 70: MOV TEMP[5].y, TEMP[7].xxxx > 71: DP3 TEMP[7].x, CONST[1][6].xyzz, TEMP[4].xyzz > 72: MOV TEMP[5].z, TEMP[7].xxxx > 73: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[5].xyzz > 74: MIN TEMP[9].x, TEMP[7].xxxx, IMM[0].wwww > 75: ADD TEMP[4].x, -TEMP[9].xxxx, IMM[0].wwww > 76: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx > 77: MOV TEMP[3].w, IMM[0].wwww > 78: DP4 TEMP[8].x, CONST[2][7], TEMP[3] > 79: DP4 TEMP[9].x, CONST[2][8], TEMP[3] > 80: MOV TEMP[8].y, TEMP[9].xxxx > 81: DP4 TEMP[9].x, CONST[2][10], TEMP[3] > 82: FSEQ TEMP[10].xy, TEMP[9].xxxx, IMM[0].xxxx > 83: SSG TEMP[11].xy, TEMP[8].xyyy > 84: MUL TEMP[11].xy, IMM[0].yyyy, TEMP[11].xyyy > 85: RCP TEMP[9].xy, TEMP[9].xxxx > 86: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[9].xyyy > 87: UCMP TEMP[3].xy, TEMP[10].xyyy, TEMP[11].xyyy, TEMP[8].xyyy > 88: MOV TEMP[8].xy, TEMP[3].xyyy > 89: MOV TEMP[8].w, IMM[0].xxxx > 90: TXL TEMP[8], TEMP[8], SAMP[3], 2D > 91: MUL TEMP[3].xyz, TEMP[8].xyzz, CONST[2][16].xyzz > 92: MOV TEMP[9].xy, TEMP[0].xyyy > 93: TEX TEMP[9], TEMP[9], SAMP[4], 2D > 94: MUL TEMP[10].xyz, TEMP[3].xyzz, TEMP[9].xyzz > 95: DP3 TEMP[11].x, IN[2].xyzz, IN[2].xyzz > 96: RSQ TEMP[11].x, TEMP[11].xxxx > 97: MOV TEMP[12].xy, TEMP[0].xyyy > 98: TEX TEMP[12], TEMP[12], SAMP[5], 2D > 99: FMA TEMP[13].xyz, TEMP[12].xyzz, IMM[4].wwww, IMM[8].xxxx >100: DP3 TEMP[14].x, TEMP[13].xyzz, TEMP[13].xyzz >101: RSQ TEMP[15].x, TEMP[14].xxxx >102: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xxxx >103: RSQ TEMP[7].x, TEMP[7].xxxx >104: MUL TEMP[5].xyz, TEMP[7].xxxx, TEMP[5].xyzz >105: MUL TEMP[14].xyz, TEMP[8].wwww, CONST[2][18].xyzz >106: MUL TEMP[7].x, TEMP[1].xxxx, TEMP[1].xxxx >107: MOV TEMP[8].xy, TEMP[0].xyyy >108: MOV TEMP[8].w, IMM[0].xxxx >109: TXL TEMP[8].xy, TEMP[8], SAMP[6], 2D >110: FMA TEMP[11].xyz, IN[2].xyzz, TEMP[11].xxxx, TEMP[5].xyzz >111: DP3 TEMP[15].x, TEMP[11].xyzz, TEMP[11].xyzz >112: RSQ TEMP[15].x, TEMP[15].xxxx >113: MUL TEMP[11].xyz, TEMP[15].xxxx, TEMP[11].xyzz >114: DP3 TEMP[15].x, TEMP[13].xyzz, TEMP[11].xyzz >115: MOV_SAT TEMP[15].x, TEMP[15].xxxx >116: FMA TEMP[7].xy, TEMP[7].xxxx, IMM[8].yyyy, IMM[8].zwww >117: MUL TEMP[16].x, TEMP[7].yyyy, IMM[9].xxxx >118: LG2 TEMP[15].x, TEMP[15].xxxx >119: MUL TEMP[7].x, TEMP[15].xxxx, TEMP[7].xxxx >120: EX2 TEMP[7].x, TEMP[7].xxxx >121: MUL TEMP[7].x, TEMP[16].xxxx, TEMP[7].xxxx >122: ADD TEMP[15].x, -TEMP[1].zzzz, IMM[0].wwww >123: DP3 TEMP[11].x, TEMP[5].xyzz, TEMP[11].xyzz >124: MOV_SAT TEMP[11].x, TEMP[11].xxxx >125: ADD TEMP[2].x, -TEMP[11].xxxx, IMM[0].wwww >126: MUL TEMP[11].x, TEMP[2].xxxx, TEMP[2].xxxx >127: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[11].xxxx >128: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[11].xxxx >129: FMA TEMP[11].x, TEMP[15].xxxx, TEMP[2].xxxx, TEMP[1].zzzz >130: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[11].xxxx >131: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[8].xxxx >132: MUL TEMP[2].xyz, TEMP[1].yyyy, TEMP[14].xyzz >133: DP3 TEMP[11].x, TEMP[13].xyzz, TEMP[5].xyzz >134: MOV TEMP[0].x, TEMP[11].xxxx >135: ADD TEMP[13].x, TEMP[9].wwww, TEMP[11].xxxx >136: ADD TEMP[13].x, TEMP[13].xxxx, IMM[8].xxxx >137: FSNE TEMP[14].x, TEMP[9].wwww, IMM[0].xxxx >138: UIF TEMP[14].xxxx :0 >139: RCP TEMP[14].x, TEMP[9].wwww >140: MUL TEMP[14].x, TEMP[13].xxxx, TEMP[14].xxxx >141: ELSE :0 >142: SSG TEMP[13].x, TEMP[13].xxxx >143: MUL TEMP[14].x, IMM[0].yyyy, TEMP[13].xxxx >144: ENDIF >145: MOV_SAT TEMP[13].x, TEMP[14].xxxx >146: FMA TEMP[7].xyz, TEMP[2].xyzz, TEMP[7].xxxx, TEMP[10].xyzz >147: MUL TEMP[2].xyz, TEMP[13].xxxx, TEMP[7].xyzz >148: FSLT TEMP[7].x, IMM[0].xxxx, TEMP[1].wwww >149: AND TEMP[7].x, TEMP[7].xxxx, IMM[1].xxxx >150: INEG TEMP[7].x, TEMP[7].xxxx >151: USNE TEMP[7].x, TEMP[7].xxxx, IMM[5].wwww >152: UIF TEMP[7].xxxx :0 >153: FSLT TEMP[7].x, TEMP[12].wwww, IMM[9].yyyy >154: AND TEMP[7].x, TEMP[7].xxxx, IMM[1].xxxx >155: INEG TEMP[7].x, TEMP[7].xxxx >156: ADD TEMP[10].xyz, TEMP[10].xyzz, TEMP[10].xyzz >157: MAX TEMP[12].x, TEMP[9].zzzz, TEMP[9].yyyy >158: MAX TEMP[12].x, TEMP[12].xxxx, TEMP[9].xxxx >159: FSEQ TEMP[13].xyz, TEMP[12].xxxx, IMM[0].xxxx >160: SSG TEMP[14].xyz, TEMP[9].xyzz >161: MUL TEMP[14].xyz, IMM[0].yyyy, TEMP[14].xyzz >162: RCP TEMP[12].xyz, TEMP[12].xxxx >163: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xyzz >164: UCMP TEMP[9].xyz, TEMP[13].xyzz, TEMP[14].xyzz, TEMP[9].xyzz >165: MOV_SAT TEMP[9].xyz, TEMP[9].xyzz >166: MUL TEMP[5].xyz, TEMP[9].xyzz, TEMP[9].xyzz >167: MOV_SAT TEMP[9].xyz, TEMP[3].xyzz >168: MUL TEMP[3].xyz, TEMP[9].xyzz, TEMP[5].xyzz >169: MUL TEMP[3].xyz, TEMP[3].xyzz, IMM[9].zzzz >170: USNE TEMP[5].x, TEMP[7].xxxx, IMM[5].wwww >171: UIF TEMP[5].xxxx :0 >172: MOV TEMP[5].x, TEMP[10].xxxx >173: ELSE :0 >174: MOV TEMP[5].x, TEMP[3].xxxx >175: ENDIF >176: MOV TEMP[5].x, TEMP[5].xxxx >177: USNE TEMP[9].x, TEMP[7].xxxx, IMM[5].wwww >178: UIF TEMP[9].xxxx :0 >179: MOV TEMP[9].x, TEMP[10].yyyy >180: ELSE :0 >181: MOV TEMP[9].x, TEMP[3].yyyy >182: ENDIF >183: MOV TEMP[5].y, TEMP[9].xxxx >184: USNE TEMP[7].x, TEMP[7].xxxx, IMM[5].wwww >185: UIF TEMP[7].xxxx :0 >186: MOV TEMP[7].x, TEMP[10].zzzz >187: ELSE :0 >188: MOV TEMP[7].x, TEMP[3].zzzz >189: ENDIF >190: MOV TEMP[5].z, TEMP[7].xxxx >191: ADD TEMP[7].x, TEMP[1].wwww, IMM[4].yyyy >192: MOV_SAT TEMP[7].x, TEMP[7].xxxx >193: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[5].xyzz >194: ADD TEMP[9].xy, -TEMP[11].xxxx, IMM[10].xyyy >195: MOV_SAT TEMP[9].xy, TEMP[9].xyyy >196: FMA TEMP[7].xyz, TEMP[7].xyzz, TEMP[9].xxxx, TEMP[2].xyzz >197: MIN TEMP[1].x, TEMP[1].wwww, IMM[0].zzzz >198: MUL TEMP[3].xyz, TEMP[1].xxxx, TEMP[5].xyzz >199: MUL TEMP[3].xyz, TEMP[9].yyyy, TEMP[3].xyzz >200: ADD TEMP[1].x, TEMP[11].xxxx, IMM[4].zzzz >201: MOV_SAT TEMP[0].x, TEMP[1].xxxx >202: FMA TEMP[2].xyz, TEMP[3].xyzz, TEMP[0].xxxx, TEMP[7].xyzz >203: ENDIF >204: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xxxx >205: MUL TEMP[0].xyz, TEMP[6].xxxx, TEMP[3].xyzz >206: ADD TEMP[1].x, TEMP[8].yyyy, IMM[9].wwww >207: MOV_SAT TEMP[1].x, TEMP[1].xxxx >208: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz >209: MOV TEMP[0].w, IMM[0].wwww >210: MOV OUT[0], TEMP[0] >211: END >radeonsi: Compiling shader 80 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 112) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 116) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 120) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 124) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 128) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 132) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 136) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 140) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 160) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 164) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 168) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 172) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 256) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 260) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 264) > %52 = call float @llvm.SI.load.const(<16 x i8> %35, i32 268) > %53 = call float @llvm.SI.load.const(<16 x i8> %35, i32 272) > %54 = call float @llvm.SI.load.const(<16 x i8> %35, i32 276) > %55 = call float @llvm.SI.load.const(<16 x i8> %35, i32 280) > %56 = call float @llvm.SI.load.const(<16 x i8> %35, i32 288) > %57 = call float @llvm.SI.load.const(<16 x i8> %35, i32 292) > %58 = call float @llvm.SI.load.const(<16 x i8> %35, i32 296) > %59 = call float @llvm.SI.load.const(<16 x i8> %35, i32 376) > %60 = call float @llvm.SI.load.const(<16 x i8> %35, i32 380) > %61 = call float @llvm.SI.load.const(<16 x i8> %35, i32 384) > %62 = call float @llvm.SI.load.const(<16 x i8> %35, i32 388) > %63 = call float @llvm.SI.load.const(<16 x i8> %35, i32 392) > %64 = call float @llvm.SI.load.const(<16 x i8> %35, i32 396) > %65 = call float @llvm.SI.load.const(<16 x i8> %35, i32 400) > %66 = call float @llvm.SI.load.const(<16 x i8> %35, i32 404) > %67 = call float @llvm.SI.load.const(<16 x i8> %35, i32 408) > %68 = call float @llvm.SI.load.const(<16 x i8> %35, i32 412) > %69 = call float @llvm.SI.load.const(<16 x i8> %35, i32 416) > %70 = call float @llvm.SI.load.const(<16 x i8> %35, i32 420) > %71 = call float @llvm.SI.load.const(<16 x i8> %35, i32 424) > %72 = call float @llvm.SI.load.const(<16 x i8> %35, i32 428) > %73 = call float @llvm.SI.load.const(<16 x i8> %35, i32 432) > %74 = call float @llvm.SI.load.const(<16 x i8> %35, i32 436) > %75 = call float @llvm.SI.load.const(<16 x i8> %35, i32 440) > %76 = call float @llvm.SI.load.const(<16 x i8> %35, i32 444) > %77 = call float @llvm.SI.load.const(<16 x i8> %35, i32 464) > %78 = call float @llvm.SI.load.const(<16 x i8> %35, i32 640) > %79 = call float @llvm.SI.load.const(<16 x i8> %35, i32 644) > %80 = call float @llvm.SI.load.const(<16 x i8> %35, i32 648) > %81 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %82 = load <8 x i32>, <8 x i32> addrspace(2)* %81, align 32, !tbaa !0 > %83 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %84 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %83, i64 0, i64 3 > %85 = load <4 x i32>, <4 x i32> addrspace(2)* %84, align 16, !tbaa !0 > %86 = extractelement <8 x i32> %82, i32 7 > %87 = extractelement <4 x i32> %85, i32 0 > %88 = and i32 %87, %86 > %89 = insertelement <4 x i32> %85, i32 %88, i32 0 > %90 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %91 = load <8 x i32>, <8 x i32> addrspace(2)* %90, align 32, !tbaa !0 > %92 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %93 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %92, i64 0, i64 7 > %94 = load <4 x i32>, <4 x i32> addrspace(2)* %93, align 16, !tbaa !0 > %95 = extractelement <8 x i32> %91, i32 7 > %96 = extractelement <4 x i32> %94, i32 0 > %97 = and i32 %96, %95 > %98 = insertelement <4 x i32> %94, i32 %97, i32 0 > %99 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %100 = load <8 x i32>, <8 x i32> addrspace(2)* %99, align 32, !tbaa !0 > %101 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %102 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %101, i64 0, i64 11 > %103 = load <4 x i32>, <4 x i32> addrspace(2)* %102, align 16, !tbaa !0 > %104 = extractelement <8 x i32> %100, i32 7 > %105 = extractelement <4 x i32> %103, i32 0 > %106 = and i32 %105, %104 > %107 = insertelement <4 x i32> %103, i32 %106, i32 0 > %108 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %109 = load <8 x i32>, <8 x i32> addrspace(2)* %108, align 32, !tbaa !0 > %110 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %111 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %110, i64 0, i64 15 > %112 = load <4 x i32>, <4 x i32> addrspace(2)* %111, align 16, !tbaa !0 > %113 = extractelement <8 x i32> %109, i32 7 > %114 = extractelement <4 x i32> %112, i32 0 > %115 = and i32 %114, %113 > %116 = insertelement <4 x i32> %112, i32 %115, i32 0 > %117 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %118 = load <8 x i32>, <8 x i32> addrspace(2)* %117, align 32, !tbaa !0 > %119 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %120 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %119, i64 0, i64 19 > %121 = load <4 x i32>, <4 x i32> addrspace(2)* %120, align 16, !tbaa !0 > %122 = extractelement <8 x i32> %118, i32 7 > %123 = extractelement <4 x i32> %121, i32 0 > %124 = and i32 %123, %122 > %125 = insertelement <4 x i32> %121, i32 %124, i32 0 > %126 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %127 = load <8 x i32>, <8 x i32> addrspace(2)* %126, align 32, !tbaa !0 > %128 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %129 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %128, i64 0, i64 23 > %130 = load <4 x i32>, <4 x i32> addrspace(2)* %129, align 16, !tbaa !0 > %131 = extractelement <8 x i32> %127, i32 7 > %132 = extractelement <4 x i32> %130, i32 0 > %133 = and i32 %132, %131 > %134 = insertelement <4 x i32> %130, i32 %133, i32 0 > %135 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %136 = load <8 x i32>, <8 x i32> addrspace(2)* %135, align 32, !tbaa !0 > %137 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %138 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %137, i64 0, i64 27 > %139 = load <4 x i32>, <4 x i32> addrspace(2)* %138, align 16, !tbaa !0 > %140 = extractelement <8 x i32> %136, i32 7 > %141 = extractelement <4 x i32> %139, i32 0 > %142 = and i32 %141, %140 > %143 = insertelement <4 x i32> %139, i32 %142, i32 0 > %144 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %145 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %146 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %147 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %148 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %149 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %150 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %151 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %152 = fcmp oeq float %146, 0.000000e+00 > %153 = fcmp oeq float %146, 0.000000e+00 > %154 = fcmp ogt float %144, 0.000000e+00 > %155 = select i1 %154, float 1.000000e+00, float %144 > %156 = fcmp oge float %155, 0.000000e+00 > %157 = fcmp ogt float %145, 0.000000e+00 > %158 = select i1 %157, float 1.000000e+00, float %145 > %159 = fcmp oge float %158, 0.000000e+00 > %.op = fmul float %155, 0x4600000000000000 > %160 = select i1 %156, float %.op, float 0xC600000000000000 > %.op81 = fmul float %158, 0x4600000000000000 > %161 = select i1 %159, float %.op81, float 0xC600000000000000 > %162 = fdiv float 1.000000e+00, %146 > %163 = fmul float %144, %162 > %164 = fmul float %145, %162 > %165 = select i1 %152, float %160, float %163 > %166 = select i1 %153, float %161, float %164 > %167 = fcmp oeq float %146, 0.000000e+00 > %168 = fcmp oeq float %146, 0.000000e+00 > %169 = fcmp ogt float %147, 0.000000e+00 > %170 = select i1 %169, float 1.000000e+00, float %147 > %171 = fcmp oge float %170, 0.000000e+00 > %172 = fcmp ogt float %148, 0.000000e+00 > %173 = select i1 %172, float 1.000000e+00, float %148 > %174 = fcmp oge float %173, 0.000000e+00 > %.op82 = fmul float %170, 0x4600000000000000 > %175 = select i1 %171, float %.op82, float 0xC600000000000000 > %.op83 = fmul float %173, 0x4600000000000000 > %176 = select i1 %174, float %.op83, float 0xC600000000000000 > %177 = fdiv float 1.000000e+00, %146 > %178 = fmul float %147, %177 > %179 = fmul float %148, %177 > %180 = select i1 %167, float %175, float %178 > %181 = select i1 %168, float %176, float %179 > %182 = bitcast float %165 to i32 > %183 = bitcast float %166 to i32 > %184 = insertelement <2 x i32> undef, i32 %182, i32 0 > %185 = insertelement <2 x i32> %184, i32 %183, i32 1 > %186 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %185, <8 x i32> %82, <4 x i32> %89, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %187 = extractelement <4 x float> %186, i32 0 > %188 = fmul float %180, %187 > %189 = fmul float %181, %187 > %190 = bitcast float %165 to i32 > %191 = bitcast float %166 to i32 > %192 = insertelement <2 x i32> undef, i32 %190, i32 0 > %193 = insertelement <2 x i32> %192, i32 %191, i32 1 > %194 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %193, <8 x i32> %91, <4 x i32> %98, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %195 = extractelement <4 x float> %194, i32 0 > %196 = extractelement <4 x float> %194, i32 1 > %197 = extractelement <4 x float> %194, i32 2 > %198 = extractelement <4 x float> %194, i32 3 > %199 = fcmp oge float %198, 5.000000e-01 > %200 = select i1 %199, float 1.000000e+00, float 0.000000e+00 > %201 = call float @llvm.fma.f32(float %200, float %78, float %188) > %202 = call float @llvm.fma.f32(float %200, float %79, float %189) > %203 = call float @llvm.fma.f32(float %200, float %80, float %187) > %204 = fmul float %61, %201 > %205 = fmul float %62, %202 > %206 = fadd float %204, %205 > %207 = fmul float %63, %203 > %208 = fadd float %206, %207 > %209 = fadd float %208, %64 > %210 = fmul float %65, %201 > %211 = fmul float %66, %202 > %212 = fadd float %210, %211 > %213 = fmul float %67, %203 > %214 = fadd float %212, %213 > %215 = fadd float %214, %68 > %216 = fmul float %69, %201 > %217 = fmul float %70, %202 > %218 = fadd float %216, %217 > %219 = fmul float %71, %203 > %220 = fadd float %218, %219 > %221 = fadd float %220, %72 > %222 = fmul float %73, %201 > %223 = fmul float %74, %202 > %224 = fadd float %222, %223 > %225 = fmul float %75, %203 > %226 = fadd float %224, %225 > %227 = fadd float %226, %76 > %228 = fcmp oeq float %227, 0.000000e+00 > %229 = fcmp oeq float %227, 0.000000e+00 > %230 = fcmp oeq float %227, 0.000000e+00 > %231 = fcmp ogt float %209, 0.000000e+00 > %232 = select i1 %231, float 1.000000e+00, float %209 > %233 = fcmp oge float %232, 0.000000e+00 > %234 = fcmp ogt float %215, 0.000000e+00 > %235 = select i1 %234, float 1.000000e+00, float %215 > %236 = fcmp oge float %235, 0.000000e+00 > %237 = fcmp ogt float %221, 0.000000e+00 > %238 = select i1 %237, float 1.000000e+00, float %221 > %239 = fcmp oge float %238, 0.000000e+00 > %.op84 = fmul float %232, 0x4600000000000000 > %240 = select i1 %233, float %.op84, float 0xC600000000000000 > %.op85 = fmul float %235, 0x4600000000000000 > %241 = select i1 %236, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %238, 0x4600000000000000 > %242 = select i1 %239, float %.op86, float 0xC600000000000000 > %243 = fdiv float 1.000000e+00, %227 > %244 = fmul float %209, %243 > %245 = fmul float %215, %243 > %246 = fmul float %221, %243 > %247 = select i1 %228, float %240, float %244 > %248 = select i1 %229, float %241, float %245 > %249 = select i1 %230, float %242, float %246 > %250 = call float @llvm.fma.f32(float %247, float 5.000000e-01, float 5.000000e-01) > %251 = call float @llvm.fma.f32(float %248, float -5.000000e-01, float 5.000000e-01) > %252 = call float @llvm.fma.f32(float %247, float 5.000000e-01, float 5.000000e-01) > %253 = call float @llvm.fma.f32(float %248, float -5.000000e-01, float 5.000000e-01) > %254 = fsub float 1.000000e+00, %249 > %255 = call float @llvm.fma.f32(float %59, float 5.000000e-01, float %252) > %256 = call float @llvm.fma.f32(float %60, float -5.000000e-01, float %253) > %257 = call float @llvm.fma.f32(float %59, float -5.000000e-01, float %252) > %258 = call float @llvm.fma.f32(float %60, float -5.000000e-01, float %253) > %259 = bitcast float %254 to i32 > %260 = bitcast float %257 to i32 > %261 = bitcast float %258 to i32 > %262 = bitcast float %77 to i32 > %263 = insertelement <4 x i32> undef, i32 %259, i32 0 > %264 = insertelement <4 x i32> %263, i32 %260, i32 1 > %265 = insertelement <4 x i32> %264, i32 %261, i32 2 > %266 = insertelement <4 x i32> %265, i32 %262, i32 3 > %267 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %266, <8 x i32> %100, <4 x i32> %107, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %268 = extractelement <4 x float> %267, i32 0 > %269 = bitcast float %254 to i32 > %270 = bitcast float %255 to i32 > %271 = bitcast float %256 to i32 > %272 = bitcast float %77 to i32 > %273 = insertelement <4 x i32> undef, i32 %269, i32 0 > %274 = insertelement <4 x i32> %273, i32 %270, i32 1 > %275 = insertelement <4 x i32> %274, i32 %271, i32 2 > %276 = insertelement <4 x i32> %275, i32 %272, i32 3 > %277 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %276, <8 x i32> %100, <4 x i32> %107, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %278 = extractelement <4 x float> %277, i32 0 > %279 = call float @llvm.fma.f32(float %59, float 5.000000e-01, float %252) > %280 = call float @llvm.fma.f32(float %60, float 5.000000e-01, float %253) > %281 = call float @llvm.fma.f32(float %59, float -5.000000e-01, float %250) > %282 = call float @llvm.fma.f32(float %60, float 5.000000e-01, float %251) > %283 = bitcast float %254 to i32 > %284 = bitcast float %281 to i32 > %285 = bitcast float %282 to i32 > %286 = bitcast float %77 to i32 > %287 = insertelement <4 x i32> undef, i32 %283, i32 0 > %288 = insertelement <4 x i32> %287, i32 %284, i32 1 > %289 = insertelement <4 x i32> %288, i32 %285, i32 2 > %290 = insertelement <4 x i32> %289, i32 %286, i32 3 > %291 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %290, <8 x i32> %100, <4 x i32> %107, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %292 = extractelement <4 x float> %291, i32 0 > %293 = bitcast float %254 to i32 > %294 = bitcast float %279 to i32 > %295 = bitcast float %280 to i32 > %296 = bitcast float %77 to i32 > %297 = insertelement <4 x i32> undef, i32 %293, i32 0 > %298 = insertelement <4 x i32> %297, i32 %294, i32 1 > %299 = insertelement <4 x i32> %298, i32 %295, i32 2 > %300 = insertelement <4 x i32> %299, i32 %296, i32 3 > %301 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %300, <8 x i32> %100, <4 x i32> %107, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %302 = extractelement <4 x float> %301, i32 0 > %303 = fadd float %268, %278 > %304 = fadd float %292, %303 > %305 = fadd float %302, %304 > %306 = fsub float -0.000000e+00, %305 > %307 = call float @llvm.fma.f32(float %306, float 2.500000e-01, float 1.000000e+00) > %308 = fsub float 1.000000e+00, %52 > %309 = call float @llvm.fma.f32(float %52, float %307, float %308) > %310 = fsub float %53, %188 > %311 = fsub float %54, %189 > %312 = fsub float %55, %187 > %313 = fmul float %310, %36 > %314 = fmul float %311, %36 > %315 = fmul float %312, %36 > %316 = fmul float %25, %313 > %317 = fmul float %26, %314 > %318 = fadd float %317, %316 > %319 = fmul float %27, %315 > %320 = fadd float %318, %319 > %321 = fmul float %28, %313 > %322 = fmul float %29, %314 > %323 = fadd float %322, %321 > %324 = fmul float %30, %315 > %325 = fadd float %323, %324 > %326 = fmul float %31, %313 > %327 = fmul float %32, %314 > %328 = fadd float %327, %326 > %329 = fmul float %33, %315 > %330 = fadd float %328, %329 > %331 = fmul float %320, %320 > %332 = fmul float %325, %325 > %333 = fadd float %332, %331 > %334 = fmul float %330, %330 > %335 = fadd float %333, %334 > %336 = call float @llvm.minnum.f32(float %335, float 1.000000e+00) > %337 = fsub float 1.000000e+00, %336 > %338 = fmul float %337, %337 > %339 = fmul float %37, %188 > %340 = fmul float %38, %189 > %341 = fadd float %339, %340 > %342 = fmul float %39, %187 > %343 = fadd float %341, %342 > %344 = fadd float %343, %40 > %345 = fmul float %41, %188 > %346 = fmul float %42, %189 > %347 = fadd float %345, %346 > %348 = fmul float %43, %187 > %349 = fadd float %347, %348 > %350 = fadd float %349, %44 > %351 = fmul float %45, %188 > %352 = fmul float %46, %189 > %353 = fadd float %351, %352 > %354 = fmul float %47, %187 > %355 = fadd float %353, %354 > %356 = fadd float %355, %48 > %357 = fcmp oeq float %356, 0.000000e+00 > %358 = fcmp oeq float %356, 0.000000e+00 > %359 = fcmp ogt float %344, 0.000000e+00 > %360 = select i1 %359, float 1.000000e+00, float %344 > %361 = fcmp oge float %360, 0.000000e+00 > %362 = fcmp ogt float %350, 0.000000e+00 > %363 = select i1 %362, float 1.000000e+00, float %350 > %364 = fcmp oge float %363, 0.000000e+00 > %.op87 = fmul float %360, 0x4600000000000000 > %365 = select i1 %361, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %363, 0x4600000000000000 > %366 = select i1 %364, float %.op88, float 0xC600000000000000 > %367 = fdiv float 1.000000e+00, %356 > %368 = fmul float %344, %367 > %369 = fmul float %350, %367 > %370 = select i1 %357, float %365, float %368 > %371 = select i1 %358, float %366, float %369 > %372 = bitcast float %370 to i32 > %373 = bitcast float %371 to i32 > %374 = insertelement <4 x i32> undef, i32 %372, i32 0 > %375 = insertelement <4 x i32> %374, i32 %373, i32 1 > %376 = insertelement <4 x i32> %375, i32 0, i32 2 > %377 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %376, <8 x i32> %109, <4 x i32> %116, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %378 = extractelement <4 x float> %377, i32 0 > %379 = extractelement <4 x float> %377, i32 1 > %380 = extractelement <4 x float> %377, i32 2 > %381 = extractelement <4 x float> %377, i32 3 > %382 = fmul float %378, %49 > %383 = fmul float %379, %50 > %384 = fmul float %380, %51 > %385 = bitcast float %165 to i32 > %386 = bitcast float %166 to i32 > %387 = insertelement <2 x i32> undef, i32 %385, i32 0 > %388 = insertelement <2 x i32> %387, i32 %386, i32 1 > %389 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %388, <8 x i32> %118, <4 x i32> %125, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %390 = extractelement <4 x float> %389, i32 0 > %391 = extractelement <4 x float> %389, i32 1 > %392 = extractelement <4 x float> %389, i32 2 > %393 = extractelement <4 x float> %389, i32 3 > %394 = fmul float %382, %390 > %395 = fmul float %383, %391 > %396 = fmul float %384, %392 > %397 = fmul float %149, %149 > %398 = fmul float %150, %150 > %399 = fadd float %398, %397 > %400 = fmul float %151, %151 > %401 = fadd float %399, %400 > %402 = call float @llvm.AMDGPU.rsq.clamped.f32(float %401) > %403 = bitcast float %165 to i32 > %404 = bitcast float %166 to i32 > %405 = insertelement <2 x i32> undef, i32 %403, i32 0 > %406 = insertelement <2 x i32> %405, i32 %404, i32 1 > %407 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %406, <8 x i32> %127, <4 x i32> %134, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %408 = extractelement <4 x float> %407, i32 0 > %409 = extractelement <4 x float> %407, i32 1 > %410 = extractelement <4 x float> %407, i32 2 > %411 = extractelement <4 x float> %407, i32 3 > %412 = call float @llvm.fma.f32(float %408, float 2.000000e+00, float -1.000000e+00) > %413 = call float @llvm.fma.f32(float %409, float 2.000000e+00, float -1.000000e+00) > %414 = call float @llvm.fma.f32(float %410, float 2.000000e+00, float -1.000000e+00) > %415 = fmul float %412, %412 > %416 = fmul float %413, %413 > %417 = fadd float %416, %415 > %418 = fmul float %414, %414 > %419 = fadd float %417, %418 > %420 = call float @llvm.AMDGPU.rsq.clamped.f32(float %419) > %421 = fmul float %412, %420 > %422 = fmul float %413, %420 > %423 = fmul float %414, %420 > %424 = call float @llvm.AMDGPU.rsq.clamped.f32(float %335) > %425 = fmul float %424, %320 > %426 = fmul float %424, %325 > %427 = fmul float %424, %330 > %428 = fmul float %381, %56 > %429 = fmul float %381, %57 > %430 = fmul float %381, %58 > %431 = fmul float %195, %195 > %432 = bitcast float %165 to i32 > %433 = bitcast float %166 to i32 > %434 = insertelement <4 x i32> undef, i32 %432, i32 0 > %435 = insertelement <4 x i32> %434, i32 %433, i32 1 > %436 = insertelement <4 x i32> %435, i32 0, i32 2 > %437 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %436, <8 x i32> %136, <4 x i32> %143, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %438 = extractelement <4 x float> %437, i32 0 > %439 = extractelement <4 x float> %437, i32 1 > %440 = call float @llvm.fma.f32(float %149, float %402, float %425) > %441 = call float @llvm.fma.f32(float %150, float %402, float %426) > %442 = call float @llvm.fma.f32(float %151, float %402, float %427) > %443 = fmul float %440, %440 > %444 = fmul float %441, %441 > %445 = fadd float %444, %443 > %446 = fmul float %442, %442 > %447 = fadd float %445, %446 > %448 = call float @llvm.AMDGPU.rsq.clamped.f32(float %447) > %449 = fmul float %448, %440 > %450 = fmul float %448, %441 > %451 = fmul float %448, %442 > %452 = fmul float %421, %449 > %453 = fmul float %422, %450 > %454 = fadd float %453, %452 > %455 = fmul float %423, %451 > %456 = fadd float %454, %455 > %457 = call float @llvm.AMDGPU.clamp.(float %456, float 0.000000e+00, float 1.000000e+00) > %458 = call float @llvm.fma.f32(float %431, float 4.096000e+03, float 0x3F70624DE0000000) > %459 = call float @llvm.fma.f32(float %431, float 4.096000e+03, float 0x4000083120000000) > %460 = fmul float %459, 1.250000e-01 > %461 = call float @llvm.log2.f32(float %457) > %462 = fmul float %461, %458 > %463 = call float @llvm.exp2.f32(float %462) > %464 = fmul float %460, %463 > %465 = fsub float 1.000000e+00, %197 > %466 = fmul float %425, %449 > %467 = fmul float %426, %450 > %468 = fadd float %467, %466 > %469 = fmul float %427, %451 > %470 = fadd float %468, %469 > %471 = call float @llvm.AMDGPU.clamp.(float %470, float 0.000000e+00, float 1.000000e+00) > %472 = fsub float 1.000000e+00, %471 > %473 = fmul float %472, %472 > %474 = fmul float %473, %473 > %475 = fmul float %472, %474 > %476 = call float @llvm.fma.f32(float %465, float %475, float %197) > %477 = fmul float %464, %476 > %478 = fmul float %428, %438 > %479 = fmul float %429, %438 > %480 = fmul float %430, %438 > %481 = fmul float %196, %478 > %482 = fmul float %196, %479 > %483 = fmul float %196, %480 > %484 = fmul float %421, %425 > %485 = fmul float %422, %426 > %486 = fadd float %485, %484 > %487 = fmul float %423, %427 > %488 = fadd float %486, %487 > %489 = fadd float %393, %488 > %490 = fadd float %489, -1.000000e+00 > %491 = fcmp une float %393, 0.000000e+00 > br i1 %491, label %IF, label %ELSE > >IF: ; preds = %main_body > %492 = fdiv float 1.000000e+00, %393 > %493 = fmul float %490, %492 > br label %ENDIF > >ELSE: ; preds = %main_body > %494 = fcmp ogt float %490, 0.000000e+00 > %495 = select i1 %494, float 1.000000e+00, float %490 > %496 = fcmp oge float %495, 0.000000e+00 > %.op89 = fmul float %495, 0x4600000000000000 > %497 = select i1 %496, float %.op89, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp56.0 = phi float [ %493, %IF ], [ %497, %ELSE ] > %498 = call float @llvm.AMDGPU.clamp.(float %temp56.0, float 0.000000e+00, float 1.000000e+00) > %499 = call float @llvm.fma.f32(float %481, float %477, float %394) > %500 = call float @llvm.fma.f32(float %482, float %477, float %395) > %501 = call float @llvm.fma.f32(float %483, float %477, float %396) > %502 = fmul float %498, %499 > %503 = fmul float %498, %500 > %504 = fmul float %498, %501 > %505 = fcmp ogt float %198, 0.000000e+00 > br i1 %505, label %IF69, label %ENDIF68 > >IF69: ; preds = %ENDIF > %506 = fcmp olt float %411, 0x3FE0505060000000 > %507 = fadd float %394, %394 > %508 = fadd float %395, %395 > %509 = fadd float %396, %396 > %510 = call float @llvm.maxnum.f32(float %392, float %391) > %511 = call float @llvm.maxnum.f32(float %510, float %390) > %512 = fcmp oeq float %511, 0.000000e+00 > %513 = fcmp oeq float %511, 0.000000e+00 > %514 = fcmp oeq float %511, 0.000000e+00 > %515 = fcmp ogt float %390, 0.000000e+00 > %516 = select i1 %515, float 1.000000e+00, float %390 > %517 = fcmp oge float %516, 0.000000e+00 > %518 = fcmp ogt float %391, 0.000000e+00 > %519 = select i1 %518, float 1.000000e+00, float %391 > %520 = fcmp oge float %519, 0.000000e+00 > %521 = fcmp ogt float %392, 0.000000e+00 > %522 = select i1 %521, float 1.000000e+00, float %392 > %523 = fcmp oge float %522, 0.000000e+00 > %.op90 = fmul float %516, 0x4600000000000000 > %524 = select i1 %517, float %.op90, float 0xC600000000000000 > %.op91 = fmul float %519, 0x4600000000000000 > %525 = select i1 %520, float %.op91, float 0xC600000000000000 > %.op92 = fmul float %522, 0x4600000000000000 > %526 = select i1 %523, float %.op92, float 0xC600000000000000 > %527 = fdiv float 1.000000e+00, %511 > %528 = fmul float %390, %527 > %529 = fmul float %391, %527 > %530 = fmul float %392, %527 > %531 = select i1 %512, float %524, float %528 > %532 = select i1 %513, float %525, float %529 > %533 = select i1 %514, float %526, float %530 > %534 = call float @llvm.AMDGPU.clamp.(float %531, float 0.000000e+00, float 1.000000e+00) > %535 = call float @llvm.AMDGPU.clamp.(float %532, float 0.000000e+00, float 1.000000e+00) > %536 = call float @llvm.AMDGPU.clamp.(float %533, float 0.000000e+00, float 1.000000e+00) > %537 = fmul float %534, %534 > %538 = fmul float %535, %535 > %539 = fmul float %536, %536 > %540 = call float @llvm.AMDGPU.clamp.(float %382, float 0.000000e+00, float 1.000000e+00) > %541 = call float @llvm.AMDGPU.clamp.(float %383, float 0.000000e+00, float 1.000000e+00) > %542 = call float @llvm.AMDGPU.clamp.(float %384, float 0.000000e+00, float 1.000000e+00) > %543 = fmul float %540, %537 > %544 = fmul float %541, %538 > %545 = fmul float %542, %539 > %546 = fmul float %543, 0x3FC3333340000000 > %547 = fmul float %544, 0x3FC3333340000000 > %548 = fmul float %545, 0x3FC3333340000000 > %. = select i1 %506, float %507, float %546 > %temp36.0 = select i1 %506, float %508, float %547 > %.80 = select i1 %506, float %509, float %548 > %549 = fadd float %198, -5.000000e-01 > %550 = call float @llvm.AMDGPU.clamp.(float %549, float 0.000000e+00, float 1.000000e+00) > %551 = fmul float %550, %. > %552 = fmul float %550, %temp36.0 > %553 = fmul float %550, %.80 > %554 = fsub float 2.500000e-01, %488 > %555 = fsub float 1.000000e+00, %488 > %556 = call float @llvm.AMDGPU.clamp.(float %554, float 0.000000e+00, float 1.000000e+00) > %557 = call float @llvm.AMDGPU.clamp.(float %555, float 0.000000e+00, float 1.000000e+00) > %558 = call float @llvm.fma.f32(float %551, float %556, float %502) > %559 = call float @llvm.fma.f32(float %552, float %556, float %503) > %560 = call float @llvm.fma.f32(float %553, float %556, float %504) > %561 = call float @llvm.minnum.f32(float %198, float 5.000000e-01) > %562 = fmul float %561, %. > %563 = fmul float %561, %temp36.0 > %564 = fmul float %561, %.80 > %565 = fmul float %557, %562 > %566 = fmul float %557, %563 > %567 = fmul float %557, %564 > %568 = fadd float %488, 2.500000e-01 > %569 = call float @llvm.AMDGPU.clamp.(float %568, float 0.000000e+00, float 1.000000e+00) > %570 = call float @llvm.fma.f32(float %565, float %569, float %558) > %571 = call float @llvm.fma.f32(float %566, float %569, float %559) > %572 = call float @llvm.fma.f32(float %567, float %569, float %560) > br label %ENDIF68 > >ENDIF68: ; preds = %ENDIF, %IF69 > %temp10.0 = phi float [ %572, %IF69 ], [ %504, %ENDIF ] > %temp9.0 = phi float [ %571, %IF69 ], [ %503, %ENDIF ] > %temp8.0 = phi float [ %570, %IF69 ], [ %502, %ENDIF ] > %573 = fmul float %temp8.0, %338 > %574 = fmul float %temp9.0, %338 > %575 = fmul float %temp10.0, %338 > %576 = fmul float %309, %573 > %577 = fmul float %309, %574 > %578 = fmul float %309, %575 > %579 = fadd float %439, 0x3FD54FDF40000000 > %580 = call float @llvm.AMDGPU.clamp.(float %579, float 0.000000e+00, float 1.000000e+00) > %581 = fmul float %580, %576 > %582 = fmul float %580, %577 > %583 = fmul float %580, %578 > %584 = bitcast float %5 to i32 > %585 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %584, 10 > %586 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %585, float %581, 11 > %587 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %586, float %582, 12 > %588 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %587, float %583, 13 > %589 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %588, float 1.000000e+00, 14 > %590 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %589, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %590 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..55] >DCL TEMP[0..5], LOCAL >IMM[0] UINT32 {0, 176, 336, 848} >IMM[1] FLT32 { 0.0000, 1.0000, -0.5000, 0.5000} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] UINT32 {864, 880, 736, 752} >IMM[4] UINT32 {768, 784, 0, 0} > 0: MUL TEMP[0].xy, CONST[1][11].wwww, CONST[1][21].xyyy > 1: ADD TEMP[1].xyz, IN[0].xyzz, IMM[1].xxyy > 2: MOV TEMP[0].z, CONST[1][11].wwww > 3: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz > 4: MUL TEMP[1].xy, CONST[1][21].zzzz, CONST[1][21].xyyy > 5: MOV TEMP[1].z, -CONST[1][21].zzzz > 6: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[0].xyzz > 7: FSLT TEMP[2].x, IN[0].zzzz, IMM[1].zzzz > 8: AND TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx > 9: INEG TEMP[2].x, TEMP[2].xxxx > 10: USNE TEMP[3].x, TEMP[2].xxxx, IMM[0].xxxx > 11: UIF TEMP[3].xxxx :0 > 12: MOV TEMP[3].x, TEMP[1].xxxx > 13: ELSE :0 > 14: MOV TEMP[3].x, TEMP[0].xxxx > 15: ENDIF > 16: MOV TEMP[3].x, TEMP[3].xxxx > 17: USNE TEMP[4].x, TEMP[2].xxxx, IMM[0].xxxx > 18: UIF TEMP[4].xxxx :0 > 19: MOV TEMP[4].x, TEMP[1].yyyy > 20: ELSE :0 > 21: MOV TEMP[4].x, TEMP[0].yyyy > 22: ENDIF > 23: MOV TEMP[3].y, TEMP[4].xxxx > 24: USNE TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx > 25: UIF TEMP[2].xxxx :0 > 26: MOV TEMP[2].x, TEMP[1].zzzz > 27: ELSE :0 > 28: MOV TEMP[2].x, TEMP[0].zzzz > 29: ENDIF > 30: MOV TEMP[3].z, TEMP[2].xxxx > 31: FSLT TEMP[2].x, IMM[1].wwww, IN[0].zzzz > 32: AND TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx > 33: INEG TEMP[2].x, TEMP[2].xxxx > 34: MUL TEMP[1].xyz, IN[0].xyzz, CONST[1][11].xyzz > 35: USNE TEMP[4].x, TEMP[2].xxxx, IMM[0].xxxx > 36: UIF TEMP[4].xxxx :0 > 37: MOV TEMP[4].x, TEMP[1].xxxx > 38: ELSE :0 > 39: MOV TEMP[4].x, TEMP[3].xxxx > 40: ENDIF > 41: MOV TEMP[4].x, TEMP[4].xxxx > 42: USNE TEMP[5].x, TEMP[2].xxxx, IMM[0].xxxx > 43: UIF TEMP[5].xxxx :0 > 44: MOV TEMP[5].x, TEMP[1].yyyy > 45: ELSE :0 > 46: MOV TEMP[5].x, TEMP[3].yyyy > 47: ENDIF > 48: MOV TEMP[4].y, TEMP[5].xxxx > 49: USNE TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx > 50: UIF TEMP[2].xxxx :0 > 51: MOV TEMP[2].x, TEMP[1].zzzz > 52: ELSE :0 > 53: MOV TEMP[2].x, TEMP[3].zzzz > 54: ENDIF > 55: MOV TEMP[4].z, TEMP[2].xxxx > 56: MOV TEMP[0].xyz, TEMP[4].xyzx > 57: MOV TEMP[0].w, IMM[1].yyyy > 58: DP4 TEMP[1].x, CONST[1][53], TEMP[0] > 59: DP4 TEMP[2].x, CONST[1][54], TEMP[0] > 60: MOV TEMP[1].y, TEMP[2].xxxx > 61: DP4 TEMP[0].x, CONST[1][55], TEMP[0] > 62: MOV TEMP[1].z, TEMP[0].xxxx > 63: MOV TEMP[1].w, IMM[1].yyyy > 64: DP4 TEMP[0].x, CONST[1][46], TEMP[1] > 65: DP4 TEMP[2].x, CONST[1][47], TEMP[1] > 66: MOV TEMP[0].y, TEMP[2].xxxx > 67: DP4 TEMP[2].x, CONST[1][48], TEMP[1] > 68: MOV TEMP[0].z, TEMP[2].xxxx > 69: DP4 TEMP[1].x, CONST[1][49], TEMP[1] > 70: MOV TEMP[0].w, TEMP[1].xxxx > 71: MOV OUT[0], TEMP[0] > 72: END >radeonsi: Compiling shader 81 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 176) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 180) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 184) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 188) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 336) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 340) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 344) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 736) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 740) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 744) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 748) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 752) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 756) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 760) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 764) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 848) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 852) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 856) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 860) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 864) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 868) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 872) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 876) > %47 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %48 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %49 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %50 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 > %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %13) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = extractelement <4 x float> %53, i32 2 > %57 = fmul float %19, %20 > %58 = fmul float %19, %21 > %59 = fadd float %54, 0.000000e+00 > %60 = fadd float %55, 0.000000e+00 > %61 = fadd float %56, 1.000000e+00 > %62 = fmul float %57, %59 > %63 = fmul float %58, %60 > %64 = fmul float %19, %61 > %65 = fmul float %22, %20 > %66 = fmul float %22, %21 > %67 = fmul float %65, %54 > %68 = fmul float %66, %55 > %69 = fmul float %22, %56 > %70 = fsub float -0.000000e+00, %69 > %71 = fcmp olt float %56, -5.000000e-01 > %. = select i1 %71, float %67, float %62 > %temp16.0 = select i1 %71, float %68, float %63 > %.39 = select i1 %71, float %70, float %64 > %72 = fcmp ogt float %56, 5.000000e-01 > %73 = fmul float %54, %16 > %74 = fmul float %55, %17 > %75 = fmul float %56, %18 > %temp16.1 = select i1 %72, float %73, float %. > %.temp16.0 = select i1 %72, float %74, float %temp16.0 > %temp8.1 = select i1 %72, float %75, float %.39 > %76 = fmul float %39, %temp16.1 > %77 = fmul float %40, %.temp16.0 > %78 = fadd float %76, %77 > %79 = fmul float %41, %temp8.1 > %80 = fadd float %78, %79 > %81 = fadd float %80, %42 > %82 = fmul float %43, %temp16.1 > %83 = fmul float %44, %.temp16.0 > %84 = fadd float %82, %83 > %85 = fmul float %45, %temp8.1 > %86 = fadd float %84, %85 > %87 = fadd float %86, %46 > %88 = fmul float %47, %temp16.1 > %89 = fmul float %48, %.temp16.0 > %90 = fadd float %88, %89 > %91 = fmul float %49, %temp8.1 > %92 = fadd float %90, %91 > %93 = fadd float %92, %50 > %94 = fmul float %23, %81 > %95 = fmul float %24, %87 > %96 = fadd float %94, %95 > %97 = fmul float %25, %93 > %98 = fadd float %96, %97 > %99 = fadd float %98, %26 > %100 = fmul float %27, %81 > %101 = fmul float %28, %87 > %102 = fadd float %100, %101 > %103 = fmul float %29, %93 > %104 = fadd float %102, %103 > %105 = fadd float %104, %30 > %106 = fmul float %31, %81 > %107 = fmul float %32, %87 > %108 = fadd float %106, %107 > %109 = fmul float %33, %93 > %110 = fadd float %108, %109 > %111 = fadd float %110, %34 > %112 = fmul float %35, %81 > %113 = fmul float %36, %87 > %114 = fadd float %112, %113 > %115 = fmul float %37, %93 > %116 = fadd float %114, %115 > %117 = fadd float %116, %38 > %118 = bitcast i32 %11 to float > %119 = insertvalue <{ float, float, float }> undef, float %118, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %99, float %105, float %111, float %117) > ret <{ float, float, float }> %119 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SAMP[7] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL SVIEW[7], 2D, FLOAT >DCL CONST[1][0..18] >DCL TEMP[0..18], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[1] UINT32 {0, 256, 64, 80} >IMM[2] UINT32 {96, 288, 240, 0} >IMM[3] FLT32 { -1.0000, 4096.0000, 0.0040, 0.3000} >IMM[4] INT32 {1, 0, 0, 0} >IMM[5] FLT32 { 0.1250, 0.5098, 0.1500, -0.5000} >IMM[6] FLT32 { 0.2500, 1.0000, 0.5000, 0.3330} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: ADD TEMP[3].x, -CONST[1][16].wwww, IMM[0].zzzz > 15: FMA TEMP[2].x, CONST[1][16].wwww, TEMP[2].xxxx, TEMP[3].xxxx > 16: MOV TEMP[3].xy, TEMP[0].xyyy > 17: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D > 18: MOV TEMP[4].z, TEMP[3].xxxx > 19: MUL TEMP[4].xy, TEMP[1].xyyy, TEMP[3].xxxx > 20: MOV TEMP[4].w, IMM[0].zzzz > 21: DP4 TEMP[1].x, CONST[1][4], TEMP[4] > 22: DP4 TEMP[3].x, CONST[1][5], TEMP[4] > 23: MOV TEMP[1].y, TEMP[3].xxxx > 24: DP4 TEMP[3].x, CONST[1][6], TEMP[4] > 25: MOV TEMP[4].y, TEMP[3].xxxx > 26: MOV TEMP[4].x, IMM[0].xxxx > 27: MOV TEMP[3].xy, TEMP[4].xyyy > 28: MOV TEMP[3].w, IMM[0].xxxx > 29: TXL TEMP[3], TEMP[3], SAMP[3], 2D > 30: MOV TEMP[5].xy, TEMP[1].xyyy > 31: MOV TEMP[5].w, IMM[0].xxxx > 32: TXL TEMP[5], TEMP[5], SAMP[2], 2D > 33: MUL TEMP[4], TEMP[3], TEMP[5] > 34: MUL TEMP[3].xyz, TEMP[4].xyzz, CONST[1][16].xyzz > 35: MOV TEMP[5].xy, TEMP[0].xyyy > 36: TEX TEMP[5], TEMP[5], SAMP[4], 2D > 37: MOV TEMP[1].xyz, TEMP[5] > 38: MUL TEMP[6].xyz, TEMP[3].xyzz, TEMP[5].xyzz > 39: DP3 TEMP[7].x, IN[2].xyzz, IN[2].xyzz > 40: RSQ TEMP[7].x, TEMP[7].xxxx > 41: MOV TEMP[8].xy, TEMP[0].xyyy > 42: TEX TEMP[8], TEMP[8], SAMP[5], 2D > 43: FMA TEMP[9].xyz, TEMP[8].xyzz, IMM[0].wwww, IMM[3].xxxx > 44: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz > 45: RSQ TEMP[10].x, TEMP[10].xxxx > 46: MUL TEMP[9].xyz, TEMP[10].xxxx, TEMP[9].xyzz > 47: MOV TEMP[10].xy, TEMP[0].xyyy > 48: TEX TEMP[10], TEMP[10], SAMP[6], 2D > 49: MUL TEMP[4], TEMP[4], CONST[1][18] > 50: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx > 51: FMA TEMP[12].x, TEMP[11].xxxx, IMM[3].yyyy, IMM[3].zzzz > 52: MOV TEMP[13].x, TEMP[12].xxxx > 53: MOV TEMP[14].xy, TEMP[0].xyyy > 54: MOV TEMP[14].w, IMM[0].xxxx > 55: TXL TEMP[14].xy, TEMP[14], SAMP[7], 2D > 56: DP3 TEMP[15].x, TEMP[9].xyzz, -CONST[1][15].xyzz > 57: FSGE TEMP[16].x, TEMP[15].xxxx, IMM[0].xxxx > 58: AND TEMP[16].x, TEMP[16].xxxx, IMM[4].xxxx > 59: INEG TEMP[16].x, TEMP[16].xxxx > 60: FMA TEMP[17].xyz, IN[2].xyzz, TEMP[7].xxxx, -CONST[1][15].xyzz > 61: DP3 TEMP[11].x, TEMP[17].xyzz, TEMP[17].xyzz > 62: RSQ TEMP[11].x, TEMP[11].xxxx > 63: MUL TEMP[11].xyz, TEMP[11].xxxx, TEMP[17].xyzz > 64: DP3 TEMP[17].x, TEMP[9].xyzz, TEMP[11].xyzz > 65: MOV_SAT TEMP[17].x, TEMP[17].xxxx > 66: MOV TEMP[13].y, TEMP[17].xxxx > 67: FMA TEMP[7].xyz, IN[2].xyzz, TEMP[7].xxxx, CONST[1][15].xyzz > 68: DP3 TEMP[17].x, TEMP[7].xyzz, TEMP[7].xyzz > 69: RSQ TEMP[17].x, TEMP[17].xxxx > 70: MUL TEMP[7].xyz, TEMP[17].xxxx, TEMP[7].xyzz > 71: DP3 TEMP[17].x, TEMP[9].xyzz, TEMP[7].xyzz > 72: MOV_SAT TEMP[17].x, TEMP[17].xxxx > 73: MOV TEMP[9].y, TEMP[17].xxxx > 74: MUL TEMP[9].x, TEMP[12].xxxx, IMM[3].wwww > 75: MUL TEMP[12].xyz, TEMP[5].xyzz, TEMP[10].yyyy > 76: ADD TEMP[12].xyz, TEMP[12].xyzz, TEMP[12].xyzz > 77: USNE TEMP[17].x, TEMP[16].xxxx, IMM[1].xxxx > 78: UIF TEMP[17].xxxx :0 > 79: MOV TEMP[17].x, TEMP[10].yyyy > 80: ELSE :0 > 81: MOV TEMP[17].x, TEMP[12].xxxx > 82: ENDIF > 83: MOV TEMP[17].x, TEMP[17].xxxx > 84: USNE TEMP[18].x, TEMP[16].xxxx, IMM[1].xxxx > 85: UIF TEMP[18].xxxx :0 > 86: MOV TEMP[18].x, TEMP[10].yyyy > 87: ELSE :0 > 88: MOV TEMP[18].x, TEMP[12].yyyy > 89: ENDIF > 90: MOV TEMP[17].y, TEMP[18].xxxx > 91: USNE TEMP[18].x, TEMP[16].xxxx, IMM[1].xxxx > 92: UIF TEMP[18].xxxx :0 > 93: MOV TEMP[18].x, TEMP[10].yyyy > 94: ELSE :0 > 95: MOV TEMP[18].x, TEMP[12].zzzz > 96: ENDIF > 97: MOV TEMP[17].z, TEMP[18].xxxx > 98: USNE TEMP[12].x, TEMP[16].xxxx, IMM[1].xxxx > 99: UIF TEMP[12].xxxx :0 >100: MOV TEMP[12].x, TEMP[11].xxxx >101: ELSE :0 >102: MOV TEMP[12].x, TEMP[7].xxxx >103: ENDIF >104: MOV TEMP[12].x, TEMP[12].xxxx >105: USNE TEMP[18].x, TEMP[16].xxxx, IMM[1].xxxx >106: UIF TEMP[18].xxxx :0 >107: MOV TEMP[18].x, TEMP[11].yyyy >108: ELSE :0 >109: MOV TEMP[18].x, TEMP[7].yyyy >110: ENDIF >111: MOV TEMP[12].y, TEMP[18].xxxx >112: USNE TEMP[18].x, TEMP[16].xxxx, IMM[1].xxxx >113: UIF TEMP[18].xxxx :0 >114: MOV TEMP[11].x, TEMP[11].zzzz >115: ELSE :0 >116: MOV TEMP[11].x, TEMP[7].zzzz >117: ENDIF >118: MOV TEMP[12].z, TEMP[11].xxxx >119: USNE TEMP[7].x, TEMP[16].xxxx, IMM[1].xxxx >120: UIF TEMP[7].xxxx :0 >121: MOV TEMP[7].x, TEMP[13].xxxx >122: ELSE :0 >123: MOV TEMP[7].x, TEMP[9].xxxx >124: ENDIF >125: MOV TEMP[7].x, TEMP[7].xxxx >126: USNE TEMP[11].x, TEMP[16].xxxx, IMM[1].xxxx >127: UIF TEMP[11].xxxx :0 >128: MOV TEMP[11].x, TEMP[13].yyyy >129: ELSE :0 >130: MOV TEMP[11].x, TEMP[9].yyyy >131: ENDIF >132: MOV TEMP[7].y, TEMP[11].xxxx >133: ADD TEMP[11].x, TEMP[7].xxxx, IMM[0].wwww >134: MUL TEMP[11].x, TEMP[11].xxxx, IMM[5].xxxx >135: LG2 TEMP[13].x, TEMP[7].yyyy >136: MUL TEMP[9].x, TEMP[13].xxxx, TEMP[7].xxxx >137: EX2 TEMP[7].x, TEMP[9].xxxx >138: MUL TEMP[7].x, TEMP[11].xxxx, TEMP[7].xxxx >139: ADD TEMP[9].x, -TEMP[10].zzzz, IMM[0].zzzz >140: DP3 TEMP[11].x, -CONST[1][15].xyzz, TEMP[12].xyzz >141: MOV_SAT TEMP[11].x, TEMP[11].xxxx >142: ADD TEMP[11].x, -TEMP[11].xxxx, IMM[0].zzzz >143: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx >144: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx >145: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx >146: FMA TEMP[11].x, TEMP[9].xxxx, TEMP[11].xxxx, TEMP[10].zzzz >147: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[11].xxxx >148: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[14].xxxx >149: MUL TEMP[4].xyz, TEMP[17].xyzz, TEMP[4].xyzz >150: ADD TEMP[0].x, TEMP[5].wwww, TEMP[15].xxxx >151: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].xxxx >152: FSNE TEMP[11].x, TEMP[5].wwww, IMM[0].xxxx >153: UIF TEMP[11].xxxx :0 >154: RCP TEMP[11].x, TEMP[5].wwww >155: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx >156: ELSE :0 >157: SSG TEMP[12].x, TEMP[0].xxxx >158: MUL TEMP[11].x, IMM[0].yyyy, TEMP[12].xxxx >159: ENDIF >160: MOV_SAT TEMP[11].x, TEMP[11].xxxx >161: FMA TEMP[7].xyz, TEMP[4].xyzz, TEMP[7].xxxx, TEMP[6].xyzz >162: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[7].xyzz >163: ADD TEMP[0].x, TEMP[5].wwww, -TEMP[15].xxxx >164: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].xxxx >165: FSNE TEMP[11].x, TEMP[5].wwww, IMM[0].xxxx >166: UIF TEMP[11].xxxx :0 >167: RCP TEMP[11].x, TEMP[5].wwww >168: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx >169: ELSE :0 >170: SSG TEMP[12].x, TEMP[0].xxxx >171: MUL TEMP[11].x, IMM[0].yyyy, TEMP[12].xxxx >172: ENDIF >173: MOV_SAT TEMP[11].x, TEMP[11].xxxx >174: MUL TEMP[4].xyz, TEMP[11].xxxx, TEMP[7].xyzz >175: AND TEMP[7].xyz, TEMP[16].xxxx, TEMP[9].xyzz >176: MOV TEMP[9].xyz, TEMP[7].xyzx >177: USNE TEMP[11].x, TEMP[16].xxxx, IMM[1].xxxx >178: UIF TEMP[11].xxxx :0 >179: MOV TEMP[11].x, IMM[1].xxxx >180: ELSE :0 >181: MOV TEMP[11].x, TEMP[4].xxxx >182: ENDIF >183: MOV TEMP[11].x, TEMP[11].xxxx >184: USNE TEMP[12].x, TEMP[16].xxxx, IMM[1].xxxx >185: UIF TEMP[12].xxxx :0 >186: MOV TEMP[12].x, IMM[1].xxxx >187: ELSE :0 >188: MOV TEMP[12].x, TEMP[4].yyyy >189: ENDIF >190: MOV TEMP[11].y, TEMP[12].xxxx >191: USNE TEMP[12].x, TEMP[16].xxxx, IMM[1].xxxx >192: UIF TEMP[12].xxxx :0 >193: MOV TEMP[12].x, IMM[1].xxxx >194: ELSE :0 >195: MOV TEMP[12].x, TEMP[4].zzzz >196: ENDIF >197: MOV TEMP[11].z, TEMP[12].xxxx >198: FSLT TEMP[12].x, IMM[0].xxxx, TEMP[10].wwww >199: AND TEMP[12].x, TEMP[12].xxxx, IMM[4].xxxx >200: INEG TEMP[12].x, TEMP[12].xxxx >201: MOV TEMP[0].x, TEMP[12].xxxx >202: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >203: UIF TEMP[12].xxxx :0 >204: FSLT TEMP[8].x, TEMP[8].wwww, IMM[5].yyyy >205: AND TEMP[8].x, TEMP[8].xxxx, IMM[4].xxxx >206: INEG TEMP[8].x, TEMP[8].xxxx >207: MOV TEMP[0].x, TEMP[8].xxxx >208: ADD TEMP[6].xyz, TEMP[6].xyzz, TEMP[6].xyzz >209: MAX TEMP[12].x, TEMP[5].zzzz, TEMP[5].yyyy >210: MAX TEMP[12].x, TEMP[12].xxxx, TEMP[5].xxxx >211: FSEQ TEMP[13].xyz, TEMP[12].xxxx, IMM[0].xxxx >212: SSG TEMP[16].xyz, TEMP[5].xyzz >213: MUL TEMP[16].xyz, IMM[0].yyyy, TEMP[16].xyzz >214: RCP TEMP[12].xyz, TEMP[12].xxxx >215: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[12].xyzz >216: UCMP TEMP[5].xyz, TEMP[13].xyzz, TEMP[16].xyzz, TEMP[5].xyzz >217: MOV_SAT TEMP[5].xyz, TEMP[5].xyzz >218: MUL TEMP[1].xyz, TEMP[5].xyzz, TEMP[5].xyzz >219: MOV_SAT TEMP[3].xyz, TEMP[3].xyzz >220: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[1].xyzz >221: MUL TEMP[3].xyz, TEMP[3].xyzz, IMM[5].zzzz >222: USNE TEMP[5].x, TEMP[8].xxxx, IMM[1].xxxx >223: UIF TEMP[5].xxxx :0 >224: MOV TEMP[5].x, TEMP[6].xxxx >225: ELSE :0 >226: MOV TEMP[5].x, TEMP[3].xxxx >227: ENDIF >228: MOV TEMP[5].x, TEMP[5].xxxx >229: USNE TEMP[12].x, TEMP[8].xxxx, IMM[1].xxxx >230: UIF TEMP[12].xxxx :0 >231: MOV TEMP[12].x, TEMP[6].yyyy >232: ELSE :0 >233: MOV TEMP[12].x, TEMP[3].yyyy >234: ENDIF >235: MOV TEMP[5].y, TEMP[12].xxxx >236: USNE TEMP[8].x, TEMP[8].xxxx, IMM[1].xxxx >237: UIF TEMP[8].xxxx :0 >238: MOV TEMP[6].x, TEMP[6].zzzz >239: ELSE :0 >240: MOV TEMP[6].x, TEMP[3].zzzz >241: ENDIF >242: MOV TEMP[5].z, TEMP[6].xxxx >243: ADD TEMP[3].x, TEMP[10].wwww, IMM[5].wwww >244: MOV_SAT TEMP[3].x, TEMP[3].xxxx >245: MUL TEMP[1].xyz, TEMP[3].xxxx, TEMP[5].xyzz >246: ADD TEMP[3].xy, -TEMP[15].xxxx, IMM[6].xyyy >247: MOV_SAT TEMP[3].xy, TEMP[3].xyyy >248: FMA TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx, TEMP[7].xyzz >249: MIN TEMP[6].x, TEMP[10].wwww, IMM[6].zzzz >250: MUL TEMP[5].xyz, TEMP[6].xxxx, TEMP[5].xyzz >251: MUL TEMP[3].xyz, TEMP[3].yyyy, TEMP[5].xyzz >252: ADD TEMP[5].x, TEMP[15].xxxx, IMM[6].xxxx >253: MOV_SAT TEMP[0].x, TEMP[5].xxxx >254: FMA TEMP[9].xyz, TEMP[3].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >255: ENDIF >256: MUL TEMP[0].xyz, TEMP[11].xyzz, TEMP[4].wwww >257: FMA TEMP[0].xyz, TEMP[2].xxxx, TEMP[9].xyzz, TEMP[0].xyzz >258: ADD TEMP[1].x, TEMP[14].yyyy, IMM[6].wwww >259: MOV_SAT TEMP[1].x, TEMP[1].xxxx >260: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz >261: MOV TEMP[0].w, IMM[0].zzzz >262: MOV OUT[0], TEMP[0] >263: END >radeonsi: Compiling shader 82 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 76) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 92) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 240) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 > %50 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %50, i64 0, i64 3 > %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 > %53 = extractelement <8 x i32> %49, i32 7 > %54 = extractelement <4 x i32> %52, i32 0 > %55 = and i32 %54, %53 > %56 = insertelement <4 x i32> %52, i32 %55, i32 0 > %57 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %58 = load <8 x i32>, <8 x i32> addrspace(2)* %57, align 32, !tbaa !0 > %59 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %60 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %59, i64 0, i64 7 > %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0 > %62 = extractelement <8 x i32> %58, i32 7 > %63 = extractelement <4 x i32> %61, i32 0 > %64 = and i32 %63, %62 > %65 = insertelement <4 x i32> %61, i32 %64, i32 0 > %66 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, align 32, !tbaa !0 > %68 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %69 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %68, i64 0, i64 11 > %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 > %71 = extractelement <8 x i32> %67, i32 7 > %72 = extractelement <4 x i32> %70, i32 0 > %73 = and i32 %72, %71 > %74 = insertelement <4 x i32> %70, i32 %73, i32 0 > %75 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0 > %77 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %78 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %77, i64 0, i64 15 > %79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !tbaa !0 > %80 = extractelement <8 x i32> %76, i32 7 > %81 = extractelement <4 x i32> %79, i32 0 > %82 = and i32 %81, %80 > %83 = insertelement <4 x i32> %79, i32 %82, i32 0 > %84 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %85 = load <8 x i32>, <8 x i32> addrspace(2)* %84, align 32, !tbaa !0 > %86 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %87 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %86, i64 0, i64 19 > %88 = load <4 x i32>, <4 x i32> addrspace(2)* %87, align 16, !tbaa !0 > %89 = extractelement <8 x i32> %85, i32 7 > %90 = extractelement <4 x i32> %88, i32 0 > %91 = and i32 %90, %89 > %92 = insertelement <4 x i32> %88, i32 %91, i32 0 > %93 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %94 = load <8 x i32>, <8 x i32> addrspace(2)* %93, align 32, !tbaa !0 > %95 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %96 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %95, i64 0, i64 23 > %97 = load <4 x i32>, <4 x i32> addrspace(2)* %96, align 16, !tbaa !0 > %98 = extractelement <8 x i32> %94, i32 7 > %99 = extractelement <4 x i32> %97, i32 0 > %100 = and i32 %99, %98 > %101 = insertelement <4 x i32> %97, i32 %100, i32 0 > %102 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %103 = load <8 x i32>, <8 x i32> addrspace(2)* %102, align 32, !tbaa !0 > %104 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %105 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %104, i64 0, i64 27 > %106 = load <4 x i32>, <4 x i32> addrspace(2)* %105, align 16, !tbaa !0 > %107 = extractelement <8 x i32> %103, i32 7 > %108 = extractelement <4 x i32> %106, i32 0 > %109 = and i32 %108, %107 > %110 = insertelement <4 x i32> %106, i32 %109, i32 0 > %111 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 14 > %112 = load <8 x i32>, <8 x i32> addrspace(2)* %111, align 32, !tbaa !0 > %113 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %114 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %113, i64 0, i64 31 > %115 = load <4 x i32>, <4 x i32> addrspace(2)* %114, align 16, !tbaa !0 > %116 = extractelement <8 x i32> %112, i32 7 > %117 = extractelement <4 x i32> %115, i32 0 > %118 = and i32 %117, %116 > %119 = insertelement <4 x i32> %115, i32 %118, i32 0 > %120 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %121 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %122 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %123 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %124 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %125 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %126 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %127 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %128 = fcmp oeq float %122, 0.000000e+00 > %129 = fcmp oeq float %122, 0.000000e+00 > %130 = fcmp ogt float %120, 0.000000e+00 > %131 = select i1 %130, float 1.000000e+00, float %120 > %132 = fcmp oge float %131, 0.000000e+00 > %133 = fcmp ogt float %121, 0.000000e+00 > %134 = select i1 %133, float 1.000000e+00, float %121 > %135 = fcmp oge float %134, 0.000000e+00 > %.op = fmul float %131, 0x4600000000000000 > %136 = select i1 %132, float %.op, float 0xC600000000000000 > %.op131 = fmul float %134, 0x4600000000000000 > %137 = select i1 %135, float %.op131, float 0xC600000000000000 > %138 = fdiv float 1.000000e+00, %122 > %139 = fmul float %120, %138 > %140 = fmul float %121, %138 > %141 = select i1 %128, float %136, float %139 > %142 = select i1 %129, float %137, float %140 > %143 = fcmp oeq float %122, 0.000000e+00 > %144 = fcmp oeq float %122, 0.000000e+00 > %145 = fcmp ogt float %123, 0.000000e+00 > %146 = select i1 %145, float 1.000000e+00, float %123 > %147 = fcmp oge float %146, 0.000000e+00 > %148 = fcmp ogt float %124, 0.000000e+00 > %149 = select i1 %148, float 1.000000e+00, float %124 > %150 = fcmp oge float %149, 0.000000e+00 > %.op132 = fmul float %146, 0x4600000000000000 > %151 = select i1 %147, float %.op132, float 0xC600000000000000 > %.op133 = fmul float %149, 0x4600000000000000 > %152 = select i1 %150, float %.op133, float 0xC600000000000000 > %153 = fdiv float 1.000000e+00, %122 > %154 = fmul float %123, %153 > %155 = fmul float %124, %153 > %156 = select i1 %143, float %151, float %154 > %157 = select i1 %144, float %152, float %155 > %158 = bitcast float %141 to i32 > %159 = bitcast float %142 to i32 > %160 = insertelement <2 x i32> undef, i32 %158, i32 0 > %161 = insertelement <2 x i32> %160, i32 %159, i32 1 > %162 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %161, <8 x i32> %49, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %163 = extractelement <4 x float> %162, i32 0 > %164 = fsub float 1.000000e+00, %43 > %165 = call float @llvm.fma.f32(float %43, float %163, float %164) > %166 = bitcast float %141 to i32 > %167 = bitcast float %142 to i32 > %168 = insertelement <2 x i32> undef, i32 %166, i32 0 > %169 = insertelement <2 x i32> %168, i32 %167, i32 1 > %170 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %169, <8 x i32> %58, <4 x i32> %65, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %171 = extractelement <4 x float> %170, i32 0 > %172 = fmul float %156, %171 > %173 = fmul float %157, %171 > %174 = fmul float %25, %172 > %175 = fmul float %26, %173 > %176 = fadd float %174, %175 > %177 = fmul float %27, %171 > %178 = fadd float %176, %177 > %179 = fadd float %178, %28 > %180 = fmul float %29, %172 > %181 = fmul float %30, %173 > %182 = fadd float %180, %181 > %183 = fmul float %31, %171 > %184 = fadd float %182, %183 > %185 = fadd float %184, %32 > %186 = fmul float %33, %172 > %187 = fmul float %34, %173 > %188 = fadd float %186, %187 > %189 = fmul float %35, %171 > %190 = fadd float %188, %189 > %191 = fadd float %190, %36 > %192 = bitcast float %191 to i32 > %193 = insertelement <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, i32 %192, i32 1 > %194 = insertelement <4 x i32> %193, i32 0, i32 2 > %195 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %194, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %196 = extractelement <4 x float> %195, i32 0 > %197 = extractelement <4 x float> %195, i32 1 > %198 = extractelement <4 x float> %195, i32 2 > %199 = extractelement <4 x float> %195, i32 3 > %200 = bitcast float %179 to i32 > %201 = bitcast float %185 to i32 > %202 = insertelement <4 x i32> undef, i32 %200, i32 0 > %203 = insertelement <4 x i32> %202, i32 %201, i32 1 > %204 = insertelement <4 x i32> %203, i32 0, i32 2 > %205 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %204, <8 x i32> %67, <4 x i32> %74, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %206 = extractelement <4 x float> %205, i32 0 > %207 = extractelement <4 x float> %205, i32 1 > %208 = extractelement <4 x float> %205, i32 2 > %209 = extractelement <4 x float> %205, i32 3 > %210 = fmul float %196, %206 > %211 = fmul float %197, %207 > %212 = fmul float %198, %208 > %213 = fmul float %199, %209 > %214 = fmul float %210, %40 > %215 = fmul float %211, %41 > %216 = fmul float %212, %42 > %217 = bitcast float %141 to i32 > %218 = bitcast float %142 to i32 > %219 = insertelement <2 x i32> undef, i32 %217, i32 0 > %220 = insertelement <2 x i32> %219, i32 %218, i32 1 > %221 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %220, <8 x i32> %85, <4 x i32> %92, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %222 = extractelement <4 x float> %221, i32 0 > %223 = extractelement <4 x float> %221, i32 1 > %224 = extractelement <4 x float> %221, i32 2 > %225 = extractelement <4 x float> %221, i32 3 > %226 = fmul float %214, %222 > %227 = fmul float %215, %223 > %228 = fmul float %216, %224 > %229 = fmul float %125, %125 > %230 = fmul float %126, %126 > %231 = fadd float %230, %229 > %232 = fmul float %127, %127 > %233 = fadd float %231, %232 > %234 = call float @llvm.AMDGPU.rsq.clamped.f32(float %233) > %235 = bitcast float %141 to i32 > %236 = bitcast float %142 to i32 > %237 = insertelement <2 x i32> undef, i32 %235, i32 0 > %238 = insertelement <2 x i32> %237, i32 %236, i32 1 > %239 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %238, <8 x i32> %94, <4 x i32> %101, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %240 = extractelement <4 x float> %239, i32 0 > %241 = extractelement <4 x float> %239, i32 1 > %242 = extractelement <4 x float> %239, i32 2 > %243 = extractelement <4 x float> %239, i32 3 > %244 = call float @llvm.fma.f32(float %240, float 2.000000e+00, float -1.000000e+00) > %245 = call float @llvm.fma.f32(float %241, float 2.000000e+00, float -1.000000e+00) > %246 = call float @llvm.fma.f32(float %242, float 2.000000e+00, float -1.000000e+00) > %247 = fmul float %244, %244 > %248 = fmul float %245, %245 > %249 = fadd float %248, %247 > %250 = fmul float %246, %246 > %251 = fadd float %249, %250 > %252 = call float @llvm.AMDGPU.rsq.clamped.f32(float %251) > %253 = fmul float %252, %244 > %254 = fmul float %252, %245 > %255 = fmul float %252, %246 > %256 = bitcast float %141 to i32 > %257 = bitcast float %142 to i32 > %258 = insertelement <2 x i32> undef, i32 %256, i32 0 > %259 = insertelement <2 x i32> %258, i32 %257, i32 1 > %260 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %259, <8 x i32> %103, <4 x i32> %110, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %261 = extractelement <4 x float> %260, i32 0 > %262 = extractelement <4 x float> %260, i32 1 > %263 = extractelement <4 x float> %260, i32 2 > %264 = extractelement <4 x float> %260, i32 3 > %265 = fmul float %210, %44 > %266 = fmul float %211, %45 > %267 = fmul float %212, %46 > %268 = fmul float %213, %47 > %269 = fmul float %261, %261 > %270 = call float @llvm.fma.f32(float %269, float 4.096000e+03, float 0x3F70624DE0000000) > %271 = bitcast float %141 to i32 > %272 = bitcast float %142 to i32 > %273 = insertelement <4 x i32> undef, i32 %271, i32 0 > %274 = insertelement <4 x i32> %273, i32 %272, i32 1 > %275 = insertelement <4 x i32> %274, i32 0, i32 2 > %276 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %275, <8 x i32> %112, <4 x i32> %119, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %277 = extractelement <4 x float> %276, i32 0 > %278 = extractelement <4 x float> %276, i32 1 > %279 = fmul float %37, %253 > %280 = fsub float -0.000000e+00, %279 > %281 = fmul float %38, %254 > %282 = fsub float %280, %281 > %283 = fmul float %39, %255 > %284 = fsub float %282, %283 > %285 = fcmp oge float %284, 0.000000e+00 > %286 = fsub float -0.000000e+00, %37 > %287 = call float @llvm.fma.f32(float %125, float %234, float %286) > %288 = fsub float -0.000000e+00, %38 > %289 = call float @llvm.fma.f32(float %126, float %234, float %288) > %290 = fsub float -0.000000e+00, %39 > %291 = call float @llvm.fma.f32(float %127, float %234, float %290) > %292 = fmul float %287, %287 > %293 = fmul float %289, %289 > %294 = fadd float %293, %292 > %295 = fmul float %291, %291 > %296 = fadd float %294, %295 > %297 = call float @llvm.AMDGPU.rsq.clamped.f32(float %296) > %298 = fmul float %297, %287 > %299 = fmul float %297, %289 > %300 = fmul float %297, %291 > %301 = fmul float %253, %298 > %302 = fmul float %254, %299 > %303 = fadd float %302, %301 > %304 = fmul float %255, %300 > %305 = fadd float %303, %304 > %306 = call float @llvm.AMDGPU.clamp.(float %305, float 0.000000e+00, float 1.000000e+00) > %307 = call float @llvm.fma.f32(float %125, float %234, float %37) > %308 = call float @llvm.fma.f32(float %126, float %234, float %38) > %309 = call float @llvm.fma.f32(float %127, float %234, float %39) > %310 = fmul float %307, %307 > %311 = fmul float %308, %308 > %312 = fadd float %311, %310 > %313 = fmul float %309, %309 > %314 = fadd float %312, %313 > %315 = call float @llvm.AMDGPU.rsq.clamped.f32(float %314) > %316 = fmul float %315, %307 > %317 = fmul float %315, %308 > %318 = fmul float %315, %309 > %319 = fmul float %253, %316 > %320 = fmul float %254, %317 > %321 = fadd float %320, %319 > %322 = fmul float %255, %318 > %323 = fadd float %321, %322 > %324 = call float @llvm.AMDGPU.clamp.(float %323, float 0.000000e+00, float 1.000000e+00) > %325 = fmul float %270, 0x3FD3333340000000 > %326 = fmul float %222, %262 > %327 = fmul float %223, %262 > %328 = fmul float %224, %262 > %329 = fadd float %326, %326 > %330 = fadd float %327, %327 > %331 = fadd float %328, %328 > %. = select i1 %285, float %262, float %329 > %temp72.0 = select i1 %285, float %262, float %330 > %.124 = select i1 %285, float %262, float %331 > %temp48.0 = select i1 %285, float %298, float %316 > %.125 = select i1 %285, float %299, float %317 > %temp44.0 = select i1 %285, float %300, float %318 > %.126 = select i1 %285, float %270, float %325 > %temp44.1 = select i1 %285, float %306, float %324 > %332 = fadd float %.126, 2.000000e+00 > %333 = fmul float %332, 1.250000e-01 > %334 = call float @llvm.log2.f32(float %temp44.1) > %335 = fmul float %334, %.126 > %336 = call float @llvm.exp2.f32(float %335) > %337 = fmul float %333, %336 > %338 = fsub float 1.000000e+00, %263 > %339 = fmul float %37, %temp48.0 > %340 = fsub float -0.000000e+00, %339 > %341 = fmul float %38, %.125 > %342 = fsub float %340, %341 > %343 = fmul float %39, %temp44.0 > %344 = fsub float %342, %343 > %345 = call float @llvm.AMDGPU.clamp.(float %344, float 0.000000e+00, float 1.000000e+00) > %346 = fsub float 1.000000e+00, %345 > %347 = fmul float %346, %346 > %348 = fmul float %347, %347 > %349 = fmul float %348, %346 > %350 = call float @llvm.fma.f32(float %338, float %349, float %263) > %351 = fmul float %337, %350 > %352 = fmul float %265, %277 > %353 = fmul float %266, %277 > %354 = fmul float %267, %277 > %355 = fmul float %., %352 > %356 = fmul float %temp72.0, %353 > %357 = fmul float %.124, %354 > %358 = fadd float %225, %284 > %359 = fadd float %358, -1.000000e+00 > %360 = fcmp une float %225, 0.000000e+00 > br i1 %360, label %IF98, label %ELSE99 > >IF98: ; preds = %main_body > %361 = fdiv float 1.000000e+00, %225 > %362 = fmul float %359, %361 > br label %ENDIF97 > >ELSE99: ; preds = %main_body > %363 = fcmp ogt float %359, 0.000000e+00 > %364 = select i1 %363, float 1.000000e+00, float %359 > %365 = fcmp oge float %364, 0.000000e+00 > %.op134 = fmul float %364, 0x4600000000000000 > %366 = select i1 %365, float %.op134, float 0xC600000000000000 > br label %ENDIF97 > >ENDIF97: ; preds = %ELSE99, %IF98 > %temp44.2 = phi float [ %362, %IF98 ], [ %366, %ELSE99 ] > %367 = call float @llvm.AMDGPU.clamp.(float %temp44.2, float 0.000000e+00, float 1.000000e+00) > %368 = call float @llvm.fma.f32(float %355, float %351, float %226) > %369 = call float @llvm.fma.f32(float %356, float %351, float %227) > %370 = call float @llvm.fma.f32(float %357, float %351, float %228) > %371 = fmul float %367, %368 > %372 = fmul float %367, %369 > %373 = fmul float %367, %370 > %374 = fsub float %225, %284 > %375 = fadd float %374, -1.000000e+00 > %376 = fcmp une float %225, 0.000000e+00 > br i1 %376, label %IF101, label %ELSE102 > >IF101: ; preds = %ENDIF97 > %377 = fdiv float 1.000000e+00, %225 > %378 = fmul float %375, %377 > br label %ENDIF100 > >ELSE102: ; preds = %ENDIF97 > %379 = fcmp ogt float %375, 0.000000e+00 > %380 = select i1 %379, float 1.000000e+00, float %375 > %381 = fcmp oge float %380, 0.000000e+00 > %.op135 = fmul float %380, 0x4600000000000000 > %382 = select i1 %381, float %.op135, float 0xC600000000000000 > br label %ENDIF100 > >ENDIF100: ; preds = %ELSE102, %IF101 > %temp44.3 = phi float [ %378, %IF101 ], [ %382, %ELSE102 ] > %383 = call float @llvm.AMDGPU.clamp.(float %temp44.3, float 0.000000e+00, float 1.000000e+00) > %384 = fmul float %383, %368 > %385 = fmul float %383, %369 > %386 = fmul float %383, %370 > %387 = select i1 %285, float %371, float 0.000000e+00 > %388 = select i1 %285, float %372, float 0.000000e+00 > %389 = select i1 %285, float %373, float 0.000000e+00 > %.127 = select i1 %285, float 0.000000e+00, float %384 > %temp48.1 = select i1 %285, float 0.000000e+00, float %385 > %.128 = select i1 %285, float 0.000000e+00, float %386 > %390 = fcmp ogt float %264, 0.000000e+00 > br i1 %390, label %IF113, label %ENDIF112 > >IF113: ; preds = %ENDIF100 > %391 = fcmp olt float %243, 0x3FE0505060000000 > %392 = fadd float %226, %226 > %393 = fadd float %227, %227 > %394 = fadd float %228, %228 > %395 = call float @llvm.maxnum.f32(float %224, float %223) > %396 = call float @llvm.maxnum.f32(float %395, float %222) > %397 = fcmp oeq float %396, 0.000000e+00 > %398 = fcmp oeq float %396, 0.000000e+00 > %399 = fcmp oeq float %396, 0.000000e+00 > %400 = fcmp ogt float %222, 0.000000e+00 > %401 = select i1 %400, float 1.000000e+00, float %222 > %402 = fcmp oge float %401, 0.000000e+00 > %403 = fcmp ogt float %223, 0.000000e+00 > %404 = select i1 %403, float 1.000000e+00, float %223 > %405 = fcmp oge float %404, 0.000000e+00 > %406 = fcmp ogt float %224, 0.000000e+00 > %407 = select i1 %406, float 1.000000e+00, float %224 > %408 = fcmp oge float %407, 0.000000e+00 > %.op136 = fmul float %401, 0x4600000000000000 > %409 = select i1 %402, float %.op136, float 0xC600000000000000 > %.op137 = fmul float %404, 0x4600000000000000 > %410 = select i1 %405, float %.op137, float 0xC600000000000000 > %.op138 = fmul float %407, 0x4600000000000000 > %411 = select i1 %408, float %.op138, float 0xC600000000000000 > %412 = fdiv float 1.000000e+00, %396 > %413 = fmul float %222, %412 > %414 = fmul float %223, %412 > %415 = fmul float %224, %412 > %416 = select i1 %397, float %409, float %413 > %417 = select i1 %398, float %410, float %414 > %418 = select i1 %399, float %411, float %415 > %419 = call float @llvm.AMDGPU.clamp.(float %416, float 0.000000e+00, float 1.000000e+00) > %420 = call float @llvm.AMDGPU.clamp.(float %417, float 0.000000e+00, float 1.000000e+00) > %421 = call float @llvm.AMDGPU.clamp.(float %418, float 0.000000e+00, float 1.000000e+00) > %422 = fmul float %419, %419 > %423 = fmul float %420, %420 > %424 = fmul float %421, %421 > %425 = call float @llvm.AMDGPU.clamp.(float %214, float 0.000000e+00, float 1.000000e+00) > %426 = call float @llvm.AMDGPU.clamp.(float %215, float 0.000000e+00, float 1.000000e+00) > %427 = call float @llvm.AMDGPU.clamp.(float %216, float 0.000000e+00, float 1.000000e+00) > %428 = fmul float %425, %422 > %429 = fmul float %426, %423 > %430 = fmul float %427, %424 > %431 = fmul float %428, 0x3FC3333340000000 > %432 = fmul float %429, 0x3FC3333340000000 > %433 = fmul float %430, 0x3FC3333340000000 > %.129 = select i1 %391, float %392, float %431 > %temp48.3 = select i1 %391, float %393, float %432 > %.130 = select i1 %391, float %394, float %433 > %434 = fadd float %264, -5.000000e-01 > %435 = call float @llvm.AMDGPU.clamp.(float %434, float 0.000000e+00, float 1.000000e+00) > %436 = fmul float %435, %.129 > %437 = fmul float %435, %temp48.3 > %438 = fmul float %435, %.130 > %439 = fsub float 2.500000e-01, %284 > %440 = fsub float 1.000000e+00, %284 > %441 = call float @llvm.AMDGPU.clamp.(float %439, float 0.000000e+00, float 1.000000e+00) > %442 = call float @llvm.AMDGPU.clamp.(float %440, float 0.000000e+00, float 1.000000e+00) > %443 = call float @llvm.fma.f32(float %436, float %441, float %387) > %444 = call float @llvm.fma.f32(float %437, float %441, float %388) > %445 = call float @llvm.fma.f32(float %438, float %441, float %389) > %446 = call float @llvm.minnum.f32(float %264, float 5.000000e-01) > %447 = fmul float %446, %.129 > %448 = fmul float %446, %temp48.3 > %449 = fmul float %446, %.130 > %450 = fmul float %442, %447 > %451 = fmul float %442, %448 > %452 = fmul float %442, %449 > %453 = fadd float %284, 2.500000e-01 > %454 = call float @llvm.AMDGPU.clamp.(float %453, float 0.000000e+00, float 1.000000e+00) > %455 = call float @llvm.fma.f32(float %450, float %454, float %443) > %456 = call float @llvm.fma.f32(float %451, float %454, float %444) > %457 = call float @llvm.fma.f32(float %452, float %454, float %445) > br label %ENDIF112 > >ENDIF112: ; preds = %ENDIF100, %IF113 > %temp36.0 = phi float [ %455, %IF113 ], [ %387, %ENDIF100 ] > %temp37.0 = phi float [ %456, %IF113 ], [ %388, %ENDIF100 ] > %temp38.0 = phi float [ %457, %IF113 ], [ %389, %ENDIF100 ] > %458 = fmul float %.127, %268 > %459 = fmul float %temp48.1, %268 > %460 = fmul float %.128, %268 > %461 = call float @llvm.fma.f32(float %165, float %temp36.0, float %458) > %462 = call float @llvm.fma.f32(float %165, float %temp37.0, float %459) > %463 = call float @llvm.fma.f32(float %165, float %temp38.0, float %460) > %464 = fadd float %278, 0x3FD54FDF40000000 > %465 = call float @llvm.AMDGPU.clamp.(float %464, float 0.000000e+00, float 1.000000e+00) > %466 = fmul float %465, %461 > %467 = fmul float %465, %462 > %468 = fmul float %465, %463 > %469 = bitcast float %5 to i32 > %470 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %469, 10 > %471 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %470, float %466, 11 > %472 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %471, float %467, 12 > %473 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %472, float %468, 13 > %474 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %473, float 1.000000e+00, 14 > %475 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %474, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %475 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL CONST[1][0..18] >DCL TEMP[0..18], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[1] UINT32 {0, 64, 80, 96} >IMM[2] UINT32 {256, 288, 240, 0} >IMM[3] FLT32 { -1.0000, 4096.0000, 0.0040, 0.3000} >IMM[4] INT32 {1, 0, 0, 0} >IMM[5] FLT32 { 0.1250, 0.5098, -0.5000, 0.5000} >IMM[6] FLT32 { 0.2500, 1.0000, 0.1500, 0.3330} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: MOV TEMP[3].z, TEMP[2].xxxx > 15: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xxxx > 16: MOV TEMP[3].w, IMM[0].zzzz > 17: DP4 TEMP[1].x, CONST[1][4], TEMP[3] > 18: DP4 TEMP[2].x, CONST[1][5], TEMP[3] > 19: MOV TEMP[1].y, TEMP[2].xxxx > 20: DP4 TEMP[2].x, CONST[1][6], TEMP[3] > 21: MOV TEMP[3].y, TEMP[2].xxxx > 22: MOV TEMP[2].xy, TEMP[1].xyyy > 23: MOV TEMP[2].w, IMM[0].xxxx > 24: TXL TEMP[2], TEMP[2], SAMP[1], 2D > 25: MOV TEMP[3].x, IMM[0].xxxx > 26: MOV TEMP[4].xy, TEMP[3].xyyy > 27: MOV TEMP[4].w, IMM[0].xxxx > 28: TXL TEMP[4], TEMP[4], SAMP[2], 2D > 29: MUL TEMP[3], TEMP[4], TEMP[2] > 30: MUL TEMP[1].xyz, TEMP[3].xyzz, CONST[1][16].xyzz > 31: MOV TEMP[2].xy, TEMP[0].xyyy > 32: TEX TEMP[2], TEMP[2], SAMP[3], 2D > 33: MOV TEMP[4].xyz, TEMP[2] > 34: MUL TEMP[5].xyz, TEMP[1].xyzz, TEMP[2].xyzz > 35: DP3 TEMP[6].x, IN[2].xyzz, IN[2].xyzz > 36: RSQ TEMP[6].x, TEMP[6].xxxx > 37: MOV TEMP[7].xy, TEMP[0].xyyy > 38: TEX TEMP[7], TEMP[7], SAMP[4], 2D > 39: FMA TEMP[8].xyz, TEMP[7].xyzz, IMM[0].wwww, IMM[3].xxxx > 40: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz > 41: RSQ TEMP[9].x, TEMP[9].xxxx > 42: MUL TEMP[8].xyz, TEMP[9].xxxx, TEMP[8].xyzz > 43: MOV TEMP[9].xy, TEMP[0].xyyy > 44: TEX TEMP[9], TEMP[9], SAMP[5], 2D > 45: MOV TEMP[10], TEMP[9].wxyz > 46: MUL TEMP[3], TEMP[3], CONST[1][18] > 47: MUL TEMP[11].x, TEMP[9].xxxx, TEMP[9].xxxx > 48: FMA TEMP[11].x, TEMP[11].xxxx, IMM[3].yyyy, IMM[3].zzzz > 49: MOV TEMP[12].x, TEMP[11].xxxx > 50: MOV TEMP[13].xy, TEMP[0].xyyy > 51: MOV TEMP[13].w, IMM[0].xxxx > 52: TXL TEMP[13].xy, TEMP[13], SAMP[6], 2D > 53: DP3 TEMP[14].x, TEMP[8].xyzz, -CONST[1][15].xyzz > 54: FSGE TEMP[15].x, TEMP[14].xxxx, IMM[0].xxxx > 55: AND TEMP[15].x, TEMP[15].xxxx, IMM[4].xxxx > 56: INEG TEMP[15].x, TEMP[15].xxxx > 57: FMA TEMP[16].xyz, IN[2].xyzz, TEMP[6].xxxx, -CONST[1][15].xyzz > 58: DP3 TEMP[17].x, TEMP[16].xyzz, TEMP[16].xyzz > 59: RSQ TEMP[17].x, TEMP[17].xxxx > 60: MUL TEMP[16].xyz, TEMP[17].xxxx, TEMP[16].xyzz > 61: DP3 TEMP[17].x, TEMP[8].xyzz, TEMP[16].xyzz > 62: MOV_SAT TEMP[17].x, TEMP[17].xxxx > 63: MOV TEMP[12].y, TEMP[17].xxxx > 64: FMA TEMP[6].xyz, IN[2].xyzz, TEMP[6].xxxx, CONST[1][15].xyzz > 65: DP3 TEMP[17].x, TEMP[6].xyzz, TEMP[6].xyzz > 66: RSQ TEMP[17].x, TEMP[17].xxxx > 67: MUL TEMP[6].xyz, TEMP[17].xxxx, TEMP[6].xyzz > 68: DP3 TEMP[17].x, TEMP[8].xyzz, TEMP[6].xyzz > 69: MOV_SAT TEMP[17].x, TEMP[17].xxxx > 70: MOV TEMP[8].y, TEMP[17].xxxx > 71: MUL TEMP[8].x, TEMP[11].xxxx, IMM[3].wwww > 72: MUL TEMP[11].xyz, TEMP[2].xyzz, TEMP[9].yyyy > 73: ADD TEMP[11].xyz, TEMP[11].xyzz, TEMP[11].xyzz > 74: USNE TEMP[17].x, TEMP[15].xxxx, IMM[1].xxxx > 75: UIF TEMP[17].xxxx :0 > 76: MOV TEMP[17].x, TEMP[9].yyyy > 77: ELSE :0 > 78: MOV TEMP[17].x, TEMP[11].xxxx > 79: ENDIF > 80: MOV TEMP[17].x, TEMP[17].xxxx > 81: USNE TEMP[18].x, TEMP[15].xxxx, IMM[1].xxxx > 82: UIF TEMP[18].xxxx :0 > 83: MOV TEMP[18].x, TEMP[9].yyyy > 84: ELSE :0 > 85: MOV TEMP[18].x, TEMP[11].yyyy > 86: ENDIF > 87: MOV TEMP[17].y, TEMP[18].xxxx > 88: USNE TEMP[18].x, TEMP[15].xxxx, IMM[1].xxxx > 89: UIF TEMP[18].xxxx :0 > 90: MOV TEMP[18].x, TEMP[9].yyyy > 91: ELSE :0 > 92: MOV TEMP[18].x, TEMP[11].zzzz > 93: ENDIF > 94: MOV TEMP[17].z, TEMP[18].xxxx > 95: USNE TEMP[11].x, TEMP[15].xxxx, IMM[1].xxxx > 96: UIF TEMP[11].xxxx :0 > 97: MOV TEMP[11].x, TEMP[16].xxxx > 98: ELSE :0 > 99: MOV TEMP[11].x, TEMP[6].xxxx >100: ENDIF >101: MOV TEMP[11].x, TEMP[11].xxxx >102: USNE TEMP[18].x, TEMP[15].xxxx, IMM[1].xxxx >103: UIF TEMP[18].xxxx :0 >104: MOV TEMP[18].x, TEMP[16].yyyy >105: ELSE :0 >106: MOV TEMP[18].x, TEMP[6].yyyy >107: ENDIF >108: MOV TEMP[11].y, TEMP[18].xxxx >109: USNE TEMP[18].x, TEMP[15].xxxx, IMM[1].xxxx >110: UIF TEMP[18].xxxx :0 >111: MOV TEMP[18].x, TEMP[16].zzzz >112: ELSE :0 >113: MOV TEMP[18].x, TEMP[6].zzzz >114: ENDIF >115: MOV TEMP[11].z, TEMP[18].xxxx >116: MOV TEMP[16].xyz, TEMP[11].xyzx >117: USNE TEMP[6].x, TEMP[15].xxxx, IMM[1].xxxx >118: UIF TEMP[6].xxxx :0 >119: MOV TEMP[6].x, TEMP[12].xxxx >120: ELSE :0 >121: MOV TEMP[6].x, TEMP[8].xxxx >122: ENDIF >123: MOV TEMP[6].x, TEMP[6].xxxx >124: USNE TEMP[18].x, TEMP[15].xxxx, IMM[1].xxxx >125: UIF TEMP[18].xxxx :0 >126: MOV TEMP[12].x, TEMP[12].yyyy >127: ELSE :0 >128: MOV TEMP[12].x, TEMP[8].yyyy >129: ENDIF >130: MOV TEMP[6].y, TEMP[12].xxxx >131: ADD TEMP[12].x, TEMP[6].xxxx, IMM[0].wwww >132: MUL TEMP[12].x, TEMP[12].xxxx, IMM[5].xxxx >133: LG2 TEMP[18].x, TEMP[6].yyyy >134: MUL TEMP[6].x, TEMP[18].xxxx, TEMP[6].xxxx >135: EX2 TEMP[6].x, TEMP[6].xxxx >136: MUL TEMP[6].x, TEMP[12].xxxx, TEMP[6].xxxx >137: ADD TEMP[12].x, -TEMP[9].zzzz, IMM[0].zzzz >138: DP3 TEMP[11].x, -CONST[1][15].xyzz, TEMP[11].xyzz >139: MOV_SAT TEMP[11].x, TEMP[11].xxxx >140: ADD TEMP[8].x, -TEMP[11].xxxx, IMM[0].zzzz >141: MUL TEMP[11].x, TEMP[8].xxxx, TEMP[8].xxxx >142: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[11].xxxx >143: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[8].xxxx >144: FMA TEMP[11].x, TEMP[12].xxxx, TEMP[8].xxxx, TEMP[9].zzzz >145: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[11].xxxx >146: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[13].xxxx >147: MUL TEMP[3].xyz, TEMP[17].xyzz, TEMP[3].xyzz >148: ADD TEMP[0].x, TEMP[2].wwww, TEMP[14].xxxx >149: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].xxxx >150: FSNE TEMP[11].x, TEMP[2].wwww, IMM[0].xxxx >151: UIF TEMP[11].xxxx :0 >152: RCP TEMP[11].x, TEMP[2].wwww >153: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx >154: ELSE :0 >155: SSG TEMP[12].x, TEMP[0].xxxx >156: MUL TEMP[11].x, IMM[0].yyyy, TEMP[12].xxxx >157: ENDIF >158: MOV_SAT TEMP[11].x, TEMP[11].xxxx >159: FMA TEMP[6].xyz, TEMP[3].xyzz, TEMP[6].xxxx, TEMP[5].xyzz >160: MUL TEMP[8].xyz, TEMP[11].xxxx, TEMP[6].xyzz >161: ADD TEMP[0].x, TEMP[2].wwww, -TEMP[14].xxxx >162: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].xxxx >163: FSNE TEMP[11].x, TEMP[2].wwww, IMM[0].xxxx >164: UIF TEMP[11].xxxx :0 >165: RCP TEMP[11].x, TEMP[2].wwww >166: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx >167: ELSE :0 >168: SSG TEMP[12].x, TEMP[0].xxxx >169: MUL TEMP[11].x, IMM[0].yyyy, TEMP[12].xxxx >170: ENDIF >171: MOV_SAT TEMP[11].x, TEMP[11].xxxx >172: MUL TEMP[3].xyz, TEMP[11].xxxx, TEMP[6].xyzz >173: AND TEMP[6].xyz, TEMP[15].xxxx, TEMP[8].xyzz >174: MOV TEMP[8].xyz, TEMP[6].xyzx >175: USNE TEMP[11].x, TEMP[15].xxxx, IMM[1].xxxx >176: UIF TEMP[11].xxxx :0 >177: MOV TEMP[11].x, IMM[1].xxxx >178: ELSE :0 >179: MOV TEMP[11].x, TEMP[3].xxxx >180: ENDIF >181: MOV TEMP[11].x, TEMP[11].xxxx >182: USNE TEMP[12].x, TEMP[15].xxxx, IMM[1].xxxx >183: UIF TEMP[12].xxxx :0 >184: MOV TEMP[12].x, IMM[1].xxxx >185: ELSE :0 >186: MOV TEMP[12].x, TEMP[3].yyyy >187: ENDIF >188: MOV TEMP[11].y, TEMP[12].xxxx >189: USNE TEMP[12].x, TEMP[15].xxxx, IMM[1].xxxx >190: UIF TEMP[12].xxxx :0 >191: MOV TEMP[12].x, IMM[1].xxxx >192: ELSE :0 >193: MOV TEMP[12].x, TEMP[3].zzzz >194: ENDIF >195: MOV TEMP[11].z, TEMP[12].xxxx >196: FSLT TEMP[12].x, IMM[0].xxxx, TEMP[9].wwww >197: AND TEMP[12].x, TEMP[12].xxxx, IMM[4].xxxx >198: INEG TEMP[12].x, TEMP[12].xxxx >199: MOV TEMP[0].x, TEMP[12].xxxx >200: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >201: UIF TEMP[12].xxxx :0 >202: FSLT TEMP[7].x, TEMP[7].wwww, IMM[5].yyyy >203: AND TEMP[7].x, TEMP[7].xxxx, IMM[4].xxxx >204: INEG TEMP[7].x, TEMP[7].xxxx >205: MOV TEMP[0].x, TEMP[7].xxxx >206: ADD TEMP[12].xyz, TEMP[5].xyzz, TEMP[5].xyzz >207: MOV TEMP[10].yzw, TEMP[12].yxyz >208: ADD TEMP[15].x, TEMP[9].wwww, IMM[5].zzzz >209: MOV_SAT TEMP[15].x, TEMP[15].xxxx >210: MUL TEMP[5].xyz, TEMP[15].xxxx, TEMP[12].xyzz >211: ADD TEMP[12].xy, -TEMP[14].xxxx, IMM[6].xyyy >212: MOV_SAT TEMP[12].xy, TEMP[12].xyyy >213: FMA TEMP[5].xyz, TEMP[5].xyzz, TEMP[12].xxxx, TEMP[6].xyzz >214: MIN TEMP[16].x, TEMP[9].wwww, IMM[5].wwww >215: MAX TEMP[9].x, TEMP[2].zzzz, TEMP[2].yyyy >216: MAX TEMP[9].x, TEMP[9].xxxx, TEMP[2].xxxx >217: FSEQ TEMP[15].xyz, TEMP[9].xxxx, IMM[0].xxxx >218: SSG TEMP[17].xyz, TEMP[2].xyzz >219: MUL TEMP[17].xyz, IMM[0].yyyy, TEMP[17].xyzz >220: RCP TEMP[9].xyz, TEMP[9].xxxx >221: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[9].xyzz >222: UCMP TEMP[2].xyz, TEMP[15].xyzz, TEMP[17].xyzz, TEMP[2].xyzz >223: MOV_SAT TEMP[2].xyz, TEMP[2].xyzz >224: MUL TEMP[4].xyz, TEMP[2].xyzz, TEMP[2].xyzz >225: MOV_SAT TEMP[2].xyz, TEMP[1].xyzz >226: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xyzz >227: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[6].zzzz >228: MOV TEMP[16].yzw, TEMP[1].yxyz >229: USNE TEMP[1].x, TEMP[7].xxxx, IMM[1].xxxx >230: UIF TEMP[1].xxxx :0 >231: MOV TEMP[1].x, TEMP[5].xxxx >232: ELSE :0 >233: MOV TEMP[1].x, TEMP[6].xxxx >234: ENDIF >235: MOV TEMP[1].x, TEMP[1].xxxx >236: USNE TEMP[2].x, TEMP[7].xxxx, IMM[1].xxxx >237: UIF TEMP[2].xxxx :0 >238: MOV TEMP[2].x, TEMP[5].yyyy >239: ELSE :0 >240: MOV TEMP[2].x, TEMP[6].yyyy >241: ENDIF >242: MOV TEMP[1].y, TEMP[2].xxxx >243: USNE TEMP[2].x, TEMP[7].xxxx, IMM[1].xxxx >244: UIF TEMP[2].xxxx :0 >245: MOV TEMP[2].x, TEMP[5].zzzz >246: ELSE :0 >247: MOV TEMP[2].x, TEMP[6].zzzz >248: ENDIF >249: MOV TEMP[1].z, TEMP[2].xxxx >250: USNE TEMP[2].x, TEMP[7].xxxx, IMM[1].xxxx >251: UIF TEMP[2].xxxx :0 >252: MOV TEMP[2].x, TEMP[10].xxxx >253: ELSE :0 >254: MOV TEMP[2].x, TEMP[16].xxxx >255: ENDIF >256: MOV TEMP[2].x, TEMP[2].xxxx >257: USNE TEMP[5].x, TEMP[7].xxxx, IMM[1].xxxx >258: UIF TEMP[5].xxxx :0 >259: MOV TEMP[5].x, TEMP[10].yyyy >260: ELSE :0 >261: MOV TEMP[5].x, TEMP[16].yyyy >262: ENDIF >263: MOV TEMP[2].y, TEMP[5].xxxx >264: USNE TEMP[5].x, TEMP[7].xxxx, IMM[1].xxxx >265: UIF TEMP[5].xxxx :0 >266: MOV TEMP[5].x, TEMP[10].zzzz >267: ELSE :0 >268: MOV TEMP[5].x, TEMP[16].zzzz >269: ENDIF >270: MOV TEMP[2].z, TEMP[5].xxxx >271: USNE TEMP[5].x, TEMP[7].xxxx, IMM[1].xxxx >272: UIF TEMP[5].xxxx :0 >273: MOV TEMP[5].x, TEMP[10].wwww >274: ELSE :0 >275: MOV TEMP[5].x, TEMP[16].wwww >276: ENDIF >277: MOV TEMP[2].w, TEMP[5].xxxx >278: MIN TEMP[5].x, TEMP[2].xxxx, IMM[5].wwww >279: MUL TEMP[4].xyz, TEMP[5].xxxx, TEMP[2].yzww >280: MUL TEMP[4].xyz, TEMP[12].yyyy, TEMP[4].xyzz >281: ADD TEMP[2].x, TEMP[14].xxxx, IMM[6].xxxx >282: MOV_SAT TEMP[0].x, TEMP[2].xxxx >283: FMA TEMP[8].xyz, TEMP[4].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >284: ENDIF >285: FMA TEMP[0].xyz, TEMP[3].wwww, TEMP[11].xyzz, TEMP[8].xyzz >286: ADD TEMP[1].x, TEMP[13].yyyy, IMM[6].wwww >287: MOV_SAT TEMP[1].x, TEMP[1].xxxx >288: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz >289: MOV TEMP[0].w, IMM[0].zzzz >290: MOV OUT[0], TEMP[0] >291: END >radeonsi: Compiling shader 83 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 76) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 92) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 240) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %47 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 > %49 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %50 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %49, i64 0, i64 3 > %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 > %52 = extractelement <8 x i32> %48, i32 7 > %53 = extractelement <4 x i32> %51, i32 0 > %54 = and i32 %53, %52 > %55 = insertelement <4 x i32> %51, i32 %54, i32 0 > %56 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 > %58 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 7 > %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0 > %61 = extractelement <8 x i32> %57, i32 7 > %62 = extractelement <4 x i32> %60, i32 0 > %63 = and i32 %62, %61 > %64 = insertelement <4 x i32> %60, i32 %63, i32 0 > %65 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %66 = load <8 x i32>, <8 x i32> addrspace(2)* %65, align 32, !tbaa !0 > %67 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %68 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %67, i64 0, i64 11 > %69 = load <4 x i32>, <4 x i32> addrspace(2)* %68, align 16, !tbaa !0 > %70 = extractelement <8 x i32> %66, i32 7 > %71 = extractelement <4 x i32> %69, i32 0 > %72 = and i32 %71, %70 > %73 = insertelement <4 x i32> %69, i32 %72, i32 0 > %74 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %75 = load <8 x i32>, <8 x i32> addrspace(2)* %74, align 32, !tbaa !0 > %76 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %77 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %76, i64 0, i64 15 > %78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0 > %79 = extractelement <8 x i32> %75, i32 7 > %80 = extractelement <4 x i32> %78, i32 0 > %81 = and i32 %80, %79 > %82 = insertelement <4 x i32> %78, i32 %81, i32 0 > %83 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %84 = load <8 x i32>, <8 x i32> addrspace(2)* %83, align 32, !tbaa !0 > %85 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %86 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %85, i64 0, i64 19 > %87 = load <4 x i32>, <4 x i32> addrspace(2)* %86, align 16, !tbaa !0 > %88 = extractelement <8 x i32> %84, i32 7 > %89 = extractelement <4 x i32> %87, i32 0 > %90 = and i32 %89, %88 > %91 = insertelement <4 x i32> %87, i32 %90, i32 0 > %92 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %93 = load <8 x i32>, <8 x i32> addrspace(2)* %92, align 32, !tbaa !0 > %94 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %95 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %94, i64 0, i64 23 > %96 = load <4 x i32>, <4 x i32> addrspace(2)* %95, align 16, !tbaa !0 > %97 = extractelement <8 x i32> %93, i32 7 > %98 = extractelement <4 x i32> %96, i32 0 > %99 = and i32 %98, %97 > %100 = insertelement <4 x i32> %96, i32 %99, i32 0 > %101 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %102 = load <8 x i32>, <8 x i32> addrspace(2)* %101, align 32, !tbaa !0 > %103 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %104 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %103, i64 0, i64 27 > %105 = load <4 x i32>, <4 x i32> addrspace(2)* %104, align 16, !tbaa !0 > %106 = extractelement <8 x i32> %102, i32 7 > %107 = extractelement <4 x i32> %105, i32 0 > %108 = and i32 %107, %106 > %109 = insertelement <4 x i32> %105, i32 %108, i32 0 > %110 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %111 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %112 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %113 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %114 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %115 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %116 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %117 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %118 = fcmp oeq float %112, 0.000000e+00 > %119 = fcmp oeq float %112, 0.000000e+00 > %120 = fcmp ogt float %110, 0.000000e+00 > %121 = select i1 %120, float 1.000000e+00, float %110 > %122 = fcmp oge float %121, 0.000000e+00 > %123 = fcmp ogt float %111, 0.000000e+00 > %124 = select i1 %123, float 1.000000e+00, float %111 > %125 = fcmp oge float %124, 0.000000e+00 > %.op = fmul float %121, 0x4600000000000000 > %126 = select i1 %122, float %.op, float 0xC600000000000000 > %.op145 = fmul float %124, 0x4600000000000000 > %127 = select i1 %125, float %.op145, float 0xC600000000000000 > %128 = fdiv float 1.000000e+00, %112 > %129 = fmul float %110, %128 > %130 = fmul float %111, %128 > %131 = select i1 %118, float %126, float %129 > %132 = select i1 %119, float %127, float %130 > %133 = fcmp oeq float %112, 0.000000e+00 > %134 = fcmp oeq float %112, 0.000000e+00 > %135 = fcmp ogt float %113, 0.000000e+00 > %136 = select i1 %135, float 1.000000e+00, float %113 > %137 = fcmp oge float %136, 0.000000e+00 > %138 = fcmp ogt float %114, 0.000000e+00 > %139 = select i1 %138, float 1.000000e+00, float %114 > %140 = fcmp oge float %139, 0.000000e+00 > %.op146 = fmul float %136, 0x4600000000000000 > %141 = select i1 %137, float %.op146, float 0xC600000000000000 > %.op147 = fmul float %139, 0x4600000000000000 > %142 = select i1 %140, float %.op147, float 0xC600000000000000 > %143 = fdiv float 1.000000e+00, %112 > %144 = fmul float %113, %143 > %145 = fmul float %114, %143 > %146 = select i1 %133, float %141, float %144 > %147 = select i1 %134, float %142, float %145 > %148 = bitcast float %131 to i32 > %149 = bitcast float %132 to i32 > %150 = insertelement <2 x i32> undef, i32 %148, i32 0 > %151 = insertelement <2 x i32> %150, i32 %149, i32 1 > %152 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %151, <8 x i32> %48, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %153 = extractelement <4 x float> %152, i32 0 > %154 = fmul float %146, %153 > %155 = fmul float %147, %153 > %156 = fmul float %25, %154 > %157 = fmul float %26, %155 > %158 = fadd float %156, %157 > %159 = fmul float %27, %153 > %160 = fadd float %158, %159 > %161 = fadd float %160, %28 > %162 = fmul float %29, %154 > %163 = fmul float %30, %155 > %164 = fadd float %162, %163 > %165 = fmul float %31, %153 > %166 = fadd float %164, %165 > %167 = fadd float %166, %32 > %168 = fmul float %33, %154 > %169 = fmul float %34, %155 > %170 = fadd float %168, %169 > %171 = fmul float %35, %153 > %172 = fadd float %170, %171 > %173 = fadd float %172, %36 > %174 = bitcast float %161 to i32 > %175 = bitcast float %167 to i32 > %176 = insertelement <4 x i32> undef, i32 %174, i32 0 > %177 = insertelement <4 x i32> %176, i32 %175, i32 1 > %178 = insertelement <4 x i32> %177, i32 0, i32 2 > %179 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %178, <8 x i32> %57, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %180 = extractelement <4 x float> %179, i32 0 > %181 = extractelement <4 x float> %179, i32 1 > %182 = extractelement <4 x float> %179, i32 2 > %183 = extractelement <4 x float> %179, i32 3 > %184 = bitcast float %173 to i32 > %185 = insertelement <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, i32 %184, i32 1 > %186 = insertelement <4 x i32> %185, i32 0, i32 2 > %187 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %186, <8 x i32> %66, <4 x i32> %73, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %188 = extractelement <4 x float> %187, i32 0 > %189 = extractelement <4 x float> %187, i32 1 > %190 = extractelement <4 x float> %187, i32 2 > %191 = extractelement <4 x float> %187, i32 3 > %192 = fmul float %188, %180 > %193 = fmul float %189, %181 > %194 = fmul float %190, %182 > %195 = fmul float %191, %183 > %196 = fmul float %192, %40 > %197 = fmul float %193, %41 > %198 = fmul float %194, %42 > %199 = bitcast float %131 to i32 > %200 = bitcast float %132 to i32 > %201 = insertelement <2 x i32> undef, i32 %199, i32 0 > %202 = insertelement <2 x i32> %201, i32 %200, i32 1 > %203 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %202, <8 x i32> %75, <4 x i32> %82, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %204 = extractelement <4 x float> %203, i32 0 > %205 = extractelement <4 x float> %203, i32 1 > %206 = extractelement <4 x float> %203, i32 2 > %207 = extractelement <4 x float> %203, i32 3 > %208 = fmul float %196, %204 > %209 = fmul float %197, %205 > %210 = fmul float %198, %206 > %211 = fmul float %115, %115 > %212 = fmul float %116, %116 > %213 = fadd float %212, %211 > %214 = fmul float %117, %117 > %215 = fadd float %213, %214 > %216 = call float @llvm.AMDGPU.rsq.clamped.f32(float %215) > %217 = bitcast float %131 to i32 > %218 = bitcast float %132 to i32 > %219 = insertelement <2 x i32> undef, i32 %217, i32 0 > %220 = insertelement <2 x i32> %219, i32 %218, i32 1 > %221 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %220, <8 x i32> %84, <4 x i32> %91, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %222 = extractelement <4 x float> %221, i32 0 > %223 = extractelement <4 x float> %221, i32 1 > %224 = extractelement <4 x float> %221, i32 2 > %225 = extractelement <4 x float> %221, i32 3 > %226 = call float @llvm.fma.f32(float %222, float 2.000000e+00, float -1.000000e+00) > %227 = call float @llvm.fma.f32(float %223, float 2.000000e+00, float -1.000000e+00) > %228 = call float @llvm.fma.f32(float %224, float 2.000000e+00, float -1.000000e+00) > %229 = fmul float %226, %226 > %230 = fmul float %227, %227 > %231 = fadd float %230, %229 > %232 = fmul float %228, %228 > %233 = fadd float %231, %232 > %234 = call float @llvm.AMDGPU.rsq.clamped.f32(float %233) > %235 = fmul float %234, %226 > %236 = fmul float %234, %227 > %237 = fmul float %234, %228 > %238 = bitcast float %131 to i32 > %239 = bitcast float %132 to i32 > %240 = insertelement <2 x i32> undef, i32 %238, i32 0 > %241 = insertelement <2 x i32> %240, i32 %239, i32 1 > %242 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %241, <8 x i32> %93, <4 x i32> %100, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %243 = extractelement <4 x float> %242, i32 0 > %244 = extractelement <4 x float> %242, i32 1 > %245 = extractelement <4 x float> %242, i32 2 > %246 = extractelement <4 x float> %242, i32 3 > %247 = fmul float %192, %43 > %248 = fmul float %193, %44 > %249 = fmul float %194, %45 > %250 = fmul float %195, %46 > %251 = fmul float %243, %243 > %252 = call float @llvm.fma.f32(float %251, float 4.096000e+03, float 0x3F70624DE0000000) > %253 = bitcast float %131 to i32 > %254 = bitcast float %132 to i32 > %255 = insertelement <4 x i32> undef, i32 %253, i32 0 > %256 = insertelement <4 x i32> %255, i32 %254, i32 1 > %257 = insertelement <4 x i32> %256, i32 0, i32 2 > %258 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %257, <8 x i32> %102, <4 x i32> %109, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %259 = extractelement <4 x float> %258, i32 0 > %260 = extractelement <4 x float> %258, i32 1 > %261 = fmul float %37, %235 > %262 = fsub float -0.000000e+00, %261 > %263 = fmul float %38, %236 > %264 = fsub float %262, %263 > %265 = fmul float %39, %237 > %266 = fsub float %264, %265 > %267 = fcmp oge float %266, 0.000000e+00 > %268 = fsub float -0.000000e+00, %37 > %269 = call float @llvm.fma.f32(float %115, float %216, float %268) > %270 = fsub float -0.000000e+00, %38 > %271 = call float @llvm.fma.f32(float %116, float %216, float %270) > %272 = fsub float -0.000000e+00, %39 > %273 = call float @llvm.fma.f32(float %117, float %216, float %272) > %274 = fmul float %269, %269 > %275 = fmul float %271, %271 > %276 = fadd float %275, %274 > %277 = fmul float %273, %273 > %278 = fadd float %276, %277 > %279 = call float @llvm.AMDGPU.rsq.clamped.f32(float %278) > %280 = fmul float %279, %269 > %281 = fmul float %279, %271 > %282 = fmul float %279, %273 > %283 = fmul float %235, %280 > %284 = fmul float %236, %281 > %285 = fadd float %284, %283 > %286 = fmul float %237, %282 > %287 = fadd float %285, %286 > %288 = call float @llvm.AMDGPU.clamp.(float %287, float 0.000000e+00, float 1.000000e+00) > %289 = call float @llvm.fma.f32(float %115, float %216, float %37) > %290 = call float @llvm.fma.f32(float %116, float %216, float %38) > %291 = call float @llvm.fma.f32(float %117, float %216, float %39) > %292 = fmul float %289, %289 > %293 = fmul float %290, %290 > %294 = fadd float %293, %292 > %295 = fmul float %291, %291 > %296 = fadd float %294, %295 > %297 = call float @llvm.AMDGPU.rsq.clamped.f32(float %296) > %298 = fmul float %297, %289 > %299 = fmul float %297, %290 > %300 = fmul float %297, %291 > %301 = fmul float %235, %298 > %302 = fmul float %236, %299 > %303 = fadd float %302, %301 > %304 = fmul float %237, %300 > %305 = fadd float %303, %304 > %306 = call float @llvm.AMDGPU.clamp.(float %305, float 0.000000e+00, float 1.000000e+00) > %307 = fmul float %252, 0x3FD3333340000000 > %308 = fmul float %204, %244 > %309 = fmul float %205, %244 > %310 = fmul float %206, %244 > %311 = fadd float %308, %308 > %312 = fadd float %309, %309 > %313 = fadd float %310, %310 > %. = select i1 %267, float %244, float %311 > %temp72.0 = select i1 %267, float %244, float %312 > %.136 = select i1 %267, float %244, float %313 > %temp44.0 = select i1 %267, float %280, float %298 > %.137 = select i1 %267, float %281, float %299 > %temp72.3 = select i1 %267, float %282, float %300 > %.138 = select i1 %267, float %252, float %307 > %temp48.0 = select i1 %267, float %288, float %306 > %314 = fadd float %.138, 2.000000e+00 > %315 = fmul float %314, 1.250000e-01 > %316 = call float @llvm.log2.f32(float %temp48.0) > %317 = fmul float %316, %.138 > %318 = call float @llvm.exp2.f32(float %317) > %319 = fmul float %315, %318 > %320 = fsub float 1.000000e+00, %245 > %321 = fmul float %37, %temp44.0 > %322 = fsub float -0.000000e+00, %321 > %323 = fmul float %38, %.137 > %324 = fsub float %322, %323 > %325 = fmul float %39, %temp72.3 > %326 = fsub float %324, %325 > %327 = call float @llvm.AMDGPU.clamp.(float %326, float 0.000000e+00, float 1.000000e+00) > %328 = fsub float 1.000000e+00, %327 > %329 = fmul float %328, %328 > %330 = fmul float %329, %329 > %331 = fmul float %330, %328 > %332 = call float @llvm.fma.f32(float %320, float %331, float %245) > %333 = fmul float %319, %332 > %334 = fmul float %247, %259 > %335 = fmul float %248, %259 > %336 = fmul float %249, %259 > %337 = fmul float %., %334 > %338 = fmul float %temp72.0, %335 > %339 = fmul float %.136, %336 > %340 = fadd float %207, %266 > %341 = fadd float %340, -1.000000e+00 > %342 = fcmp une float %207, 0.000000e+00 > br i1 %342, label %IF98, label %ELSE99 > >IF98: ; preds = %main_body > %343 = fdiv float 1.000000e+00, %207 > %344 = fmul float %341, %343 > br label %ENDIF97 > >ELSE99: ; preds = %main_body > %345 = fcmp ogt float %341, 0.000000e+00 > %346 = select i1 %345, float 1.000000e+00, float %341 > %347 = fcmp oge float %346, 0.000000e+00 > %.op148 = fmul float %346, 0x4600000000000000 > %348 = select i1 %347, float %.op148, float 0xC600000000000000 > br label %ENDIF97 > >ENDIF97: ; preds = %ELSE99, %IF98 > %temp44.1 = phi float [ %344, %IF98 ], [ %348, %ELSE99 ] > %349 = call float @llvm.AMDGPU.clamp.(float %temp44.1, float 0.000000e+00, float 1.000000e+00) > %350 = call float @llvm.fma.f32(float %337, float %333, float %208) > %351 = call float @llvm.fma.f32(float %338, float %333, float %209) > %352 = call float @llvm.fma.f32(float %339, float %333, float %210) > %353 = fmul float %349, %350 > %354 = fmul float %349, %351 > %355 = fmul float %349, %352 > %356 = fsub float %207, %266 > %357 = fadd float %356, -1.000000e+00 > %358 = fcmp une float %207, 0.000000e+00 > br i1 %358, label %IF101, label %ELSE102 > >IF101: ; preds = %ENDIF97 > %359 = fdiv float 1.000000e+00, %207 > %360 = fmul float %357, %359 > br label %ENDIF100 > >ELSE102: ; preds = %ENDIF97 > %361 = fcmp ogt float %357, 0.000000e+00 > %362 = select i1 %361, float 1.000000e+00, float %357 > %363 = fcmp oge float %362, 0.000000e+00 > %.op149 = fmul float %362, 0x4600000000000000 > %364 = select i1 %363, float %.op149, float 0xC600000000000000 > br label %ENDIF100 > >ENDIF100: ; preds = %ELSE102, %IF101 > %temp44.2 = phi float [ %360, %IF101 ], [ %364, %ELSE102 ] > %365 = call float @llvm.AMDGPU.clamp.(float %temp44.2, float 0.000000e+00, float 1.000000e+00) > %366 = fmul float %365, %350 > %367 = fmul float %365, %351 > %368 = fmul float %365, %352 > %369 = select i1 %267, float %353, float 0.000000e+00 > %370 = select i1 %267, float %354, float 0.000000e+00 > %371 = select i1 %267, float %355, float 0.000000e+00 > %.139 = select i1 %267, float 0.000000e+00, float %366 > %temp48.1 = select i1 %267, float 0.000000e+00, float %367 > %.140 = select i1 %267, float 0.000000e+00, float %368 > %372 = fcmp ogt float %246, 0.000000e+00 > br i1 %372, label %IF113, label %ENDIF112 > >IF113: ; preds = %ENDIF100 > %373 = fcmp olt float %225, 0x3FE0505060000000 > %374 = fadd float %208, %208 > %375 = fadd float %209, %209 > %376 = fadd float %210, %210 > %377 = fadd float %246, -5.000000e-01 > %378 = call float @llvm.AMDGPU.clamp.(float %377, float 0.000000e+00, float 1.000000e+00) > %379 = fmul float %378, %374 > %380 = fmul float %378, %375 > %381 = fmul float %378, %376 > %382 = fsub float 2.500000e-01, %266 > %383 = fsub float 1.000000e+00, %266 > %384 = call float @llvm.AMDGPU.clamp.(float %382, float 0.000000e+00, float 1.000000e+00) > %385 = call float @llvm.AMDGPU.clamp.(float %383, float 0.000000e+00, float 1.000000e+00) > %386 = call float @llvm.fma.f32(float %379, float %384, float %369) > %387 = call float @llvm.fma.f32(float %380, float %384, float %370) > %388 = call float @llvm.fma.f32(float %381, float %384, float %371) > %389 = call float @llvm.minnum.f32(float %246, float 5.000000e-01) > %390 = call float @llvm.maxnum.f32(float %206, float %205) > %391 = call float @llvm.maxnum.f32(float %390, float %204) > %392 = fcmp oeq float %391, 0.000000e+00 > %393 = fcmp oeq float %391, 0.000000e+00 > %394 = fcmp oeq float %391, 0.000000e+00 > %395 = fcmp ogt float %204, 0.000000e+00 > %396 = select i1 %395, float 1.000000e+00, float %204 > %397 = fcmp oge float %396, 0.000000e+00 > %398 = fcmp ogt float %205, 0.000000e+00 > %399 = select i1 %398, float 1.000000e+00, float %205 > %400 = fcmp oge float %399, 0.000000e+00 > %401 = fcmp ogt float %206, 0.000000e+00 > %402 = select i1 %401, float 1.000000e+00, float %206 > %403 = fcmp oge float %402, 0.000000e+00 > %.op150 = fmul float %396, 0x4600000000000000 > %404 = select i1 %397, float %.op150, float 0xC600000000000000 > %.op151 = fmul float %399, 0x4600000000000000 > %405 = select i1 %400, float %.op151, float 0xC600000000000000 > %.op152 = fmul float %402, 0x4600000000000000 > %406 = select i1 %403, float %.op152, float 0xC600000000000000 > %407 = fdiv float 1.000000e+00, %391 > %408 = fmul float %204, %407 > %409 = fmul float %205, %407 > %410 = fmul float %206, %407 > %411 = select i1 %392, float %404, float %408 > %412 = select i1 %393, float %405, float %409 > %413 = select i1 %394, float %406, float %410 > %414 = call float @llvm.AMDGPU.clamp.(float %411, float 0.000000e+00, float 1.000000e+00) > %415 = call float @llvm.AMDGPU.clamp.(float %412, float 0.000000e+00, float 1.000000e+00) > %416 = call float @llvm.AMDGPU.clamp.(float %413, float 0.000000e+00, float 1.000000e+00) > %417 = fmul float %414, %414 > %418 = fmul float %415, %415 > %419 = fmul float %416, %416 > %420 = call float @llvm.AMDGPU.clamp.(float %196, float 0.000000e+00, float 1.000000e+00) > %421 = call float @llvm.AMDGPU.clamp.(float %197, float 0.000000e+00, float 1.000000e+00) > %422 = call float @llvm.AMDGPU.clamp.(float %198, float 0.000000e+00, float 1.000000e+00) > %423 = fmul float %420, %417 > %424 = fmul float %421, %418 > %425 = fmul float %422, %419 > %426 = fmul float %423, 0x3FC3333340000000 > %427 = fmul float %424, 0x3FC3333340000000 > %428 = fmul float %425, 0x3FC3333340000000 > %.141 = select i1 %373, float %386, float %369 > %temp8.0 = select i1 %373, float %387, float %370 > %.142 = select i1 %373, float %388, float %371 > %temp8.2 = select i1 %373, float %246, float %389 > %.143 = select i1 %373, float %374, float %426 > %temp20.1 = select i1 %373, float %375, float %427 > %.144 = select i1 %373, float %376, float %428 > %429 = call float @llvm.minnum.f32(float %temp8.2, float 5.000000e-01) > %430 = fmul float %429, %.143 > %431 = fmul float %429, %temp20.1 > %432 = fmul float %429, %.144 > %433 = fmul float %385, %430 > %434 = fmul float %385, %431 > %435 = fmul float %385, %432 > %436 = fadd float %266, 2.500000e-01 > %437 = call float @llvm.AMDGPU.clamp.(float %436, float 0.000000e+00, float 1.000000e+00) > %438 = call float @llvm.fma.f32(float %433, float %437, float %.141) > %439 = call float @llvm.fma.f32(float %434, float %437, float %temp8.0) > %440 = call float @llvm.fma.f32(float %435, float %437, float %.142) > br label %ENDIF112 > >ENDIF112: ; preds = %ENDIF100, %IF113 > %temp32.0 = phi float [ %438, %IF113 ], [ %369, %ENDIF100 ] > %temp33.0 = phi float [ %439, %IF113 ], [ %370, %ENDIF100 ] > %temp34.0 = phi float [ %440, %IF113 ], [ %371, %ENDIF100 ] > %441 = call float @llvm.fma.f32(float %250, float %.139, float %temp32.0) > %442 = call float @llvm.fma.f32(float %250, float %temp48.1, float %temp33.0) > %443 = call float @llvm.fma.f32(float %250, float %.140, float %temp34.0) > %444 = fadd float %260, 0x3FD54FDF40000000 > %445 = call float @llvm.AMDGPU.clamp.(float %444, float 0.000000e+00, float 1.000000e+00) > %446 = fmul float %445, %441 > %447 = fmul float %445, %442 > %448 = fmul float %445, %443 > %449 = bitcast float %5 to i32 > %450 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %449, 10 > %451 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %450, float %446, 11 > %452 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %451, float %447, 12 > %453 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %452, float %448, 13 > %454 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %453, float 1.000000e+00, 14 > %455 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %454, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %455 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], CUBE, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL CONST[1][0..6] >DCL CONST[2][0..18] >DCL TEMP[0..16], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 2.0000, -1.0000} >IMM[1] UINT32 {1, 272, 0, 64} >IMM[2] UINT32 {80, 96, 16, 32} >IMM[3] UINT32 {48, 256, 288, 0} >IMM[4] FLT32 { 4096.0000, 0.0040, 2.0040, 0.1250} >IMM[5] FLT32 { 1.0000, 0.5098, -0.5000, 0.2500} >IMM[6] INT32 {1, 0, 0, 0} >IMM[7] FLT32 { 0.5000, 0.1500, 0.0000, 0.0000} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: MOV TEMP[3].z, TEMP[2].xxxx > 15: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xxxx > 16: ADD TEMP[3].xyz, -TEMP[3].xyzz, CONST[2][17].xyzz > 17: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[2][0].xxxx > 18: DP3 TEMP[1].x, CONST[1][4].xyzz, TEMP[3].xyzz > 19: DP3 TEMP[2].x, CONST[1][5].xyzz, TEMP[3].xyzz > 20: MOV TEMP[1].y, TEMP[2].xxxx > 21: DP3 TEMP[2].x, CONST[1][6].xyzz, TEMP[3].xyzz > 22: MOV TEMP[1].z, TEMP[2].xxxx > 23: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz > 24: SQRT TEMP[4].x, TEMP[2].xxxx > 25: MOV TEMP[4].y, TEMP[4].xxxx > 26: MOV TEMP[4].x, IMM[0].xxxx > 27: MOV TEMP[4].xy, TEMP[4].xyyy > 28: MOV TEMP[4].w, IMM[0].xxxx > 29: TXL TEMP[4].xyz, TEMP[4], SAMP[1], 2D > 30: DP3 TEMP[5].x, CONST[2][1].xyzz, TEMP[3].xyzz > 31: DP3 TEMP[6].x, CONST[2][2].xyzz, TEMP[3].xyzz > 32: MOV TEMP[5].y, TEMP[6].xxxx > 33: DP3 TEMP[6].x, CONST[2][3].xyzz, TEMP[3].xyzz > 34: MOV TEMP[5].z, TEMP[6].xxxx > 35: MOV TEMP[6].xyz, TEMP[5].xyzz > 36: MOV TEMP[6].w, IMM[0].xxxx > 37: TXL TEMP[6], TEMP[6], SAMP[2], CUBE > 38: MUL TEMP[3].xyz, TEMP[6].xyzz, CONST[2][16].xyzz > 39: MOV TEMP[7].xy, TEMP[0].xyyy > 40: TEX TEMP[7], TEMP[7], SAMP[3], 2D > 41: MOV TEMP[5].xyz, TEMP[7] > 42: MUL TEMP[8].xyz, TEMP[3].xyzz, TEMP[7].xyzz > 43: DP3 TEMP[9].x, IN[2].xyzz, IN[2].xyzz > 44: RSQ TEMP[9].x, TEMP[9].xxxx > 45: MOV TEMP[10].xy, TEMP[0].xyyy > 46: TEX TEMP[10], TEMP[10], SAMP[4], 2D > 47: FMA TEMP[11].xyz, TEMP[10].xyzz, IMM[0].zzzz, IMM[0].wwww > 48: DP3 TEMP[12].x, TEMP[11].xyzz, TEMP[11].xyzz > 49: RSQ TEMP[12].x, TEMP[12].xxxx > 50: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz > 51: RSQ TEMP[2].x, TEMP[2].xxxx > 52: MUL TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 53: MOV TEMP[2].xy, TEMP[0].xyyy > 54: TEX TEMP[2], TEMP[2], SAMP[5], 2D > 55: MOV TEMP[12], TEMP[2].wxyz > 56: MUL TEMP[6].xyz, TEMP[6].wwww, CONST[2][18].xyzz > 57: MUL TEMP[13].x, TEMP[2].xxxx, TEMP[2].xxxx > 58: MOV TEMP[14].xy, TEMP[0].xyyy > 59: MOV TEMP[14].w, IMM[0].xxxx > 60: TXL TEMP[14].xy, TEMP[14], SAMP[6], 2D > 61: FMA TEMP[9].xyz, IN[2].xyzz, TEMP[9].xxxx, TEMP[1].xyzz > 62: DP3 TEMP[15].x, TEMP[9].xyzz, TEMP[9].xyzz > 63: RSQ TEMP[15].x, TEMP[15].xxxx > 64: MUL TEMP[9].xyz, TEMP[15].xxxx, TEMP[9].xyzz > 65: DP3 TEMP[15].x, TEMP[11].xyzz, TEMP[9].xyzz > 66: MOV_SAT TEMP[15].x, TEMP[15].xxxx > 67: FMA TEMP[13].xy, TEMP[13].xxxx, IMM[4].xxxx, IMM[4].yzzz > 68: MUL TEMP[16].x, TEMP[13].yyyy, IMM[4].wwww > 69: LG2 TEMP[15].x, TEMP[15].xxxx > 70: MUL TEMP[13].x, TEMP[15].xxxx, TEMP[13].xxxx > 71: EX2 TEMP[13].x, TEMP[13].xxxx > 72: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[16].xxxx > 73: ADD TEMP[15].x, -TEMP[2].zzzz, IMM[5].xxxx > 74: DP3 TEMP[9].x, TEMP[1].xyzz, TEMP[9].xyzz > 75: MOV_SAT TEMP[9].x, TEMP[9].xxxx > 76: ADD TEMP[9].x, -TEMP[9].xxxx, IMM[5].xxxx > 77: MUL TEMP[16].x, TEMP[9].xxxx, TEMP[9].xxxx > 78: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[16].xxxx > 79: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[16].xxxx > 80: FMA TEMP[9].x, TEMP[15].xxxx, TEMP[9].xxxx, TEMP[2].zzzz > 81: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[13].xxxx > 82: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[14].xxxx > 83: MUL TEMP[6].xyz, TEMP[2].yyyy, TEMP[6].xyzz > 84: DP3 TEMP[11].x, TEMP[11].xyzz, TEMP[1].xyzz > 85: MOV TEMP[0].x, TEMP[11].xxxx > 86: ADD TEMP[13].x, TEMP[7].wwww, TEMP[11].xxxx > 87: ADD TEMP[13].x, TEMP[13].xxxx, IMM[0].wwww > 88: FSNE TEMP[15].x, TEMP[7].wwww, IMM[0].xxxx > 89: UIF TEMP[15].xxxx :0 > 90: RCP TEMP[15].x, TEMP[7].wwww > 91: MUL TEMP[15].x, TEMP[13].xxxx, TEMP[15].xxxx > 92: ELSE :0 > 93: SSG TEMP[13].x, TEMP[13].xxxx > 94: MUL TEMP[15].x, IMM[0].yyyy, TEMP[13].xxxx > 95: ENDIF > 96: MOV_SAT TEMP[13].x, TEMP[15].xxxx > 97: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[8].xyzz > 98: MUL TEMP[1].xyz, TEMP[13].xxxx, TEMP[9].xyzz > 99: FSLT TEMP[9].x, IMM[0].xxxx, TEMP[2].wwww >100: AND TEMP[9].x, TEMP[9].xxxx, IMM[6].xxxx >101: INEG TEMP[9].x, TEMP[9].xxxx >102: USNE TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz >103: UIF TEMP[9].xxxx :0 >104: FSLT TEMP[9].x, TEMP[10].wwww, IMM[5].yyyy >105: AND TEMP[9].x, TEMP[9].xxxx, IMM[6].xxxx >106: INEG TEMP[9].x, TEMP[9].xxxx >107: ADD TEMP[10].xyz, TEMP[8].xyzz, TEMP[8].xyzz >108: MOV TEMP[12].yzw, TEMP[10].yxyz >109: ADD TEMP[13].x, TEMP[2].wwww, IMM[5].zzzz >110: MOV_SAT TEMP[13].x, TEMP[13].xxxx >111: MUL TEMP[8].xyz, TEMP[13].xxxx, TEMP[10].xyzz >112: ADD TEMP[10].xy, -TEMP[11].xxxx, IMM[5].wxxx >113: MOV_SAT TEMP[10].xy, TEMP[10].xyyy >114: FMA TEMP[8].xyz, TEMP[8].xyzz, TEMP[10].xxxx, TEMP[1].xyzz >115: MIN TEMP[6].x, TEMP[2].wwww, IMM[7].xxxx >116: MAX TEMP[2].x, TEMP[7].zzzz, TEMP[7].yyyy >117: MAX TEMP[2].x, TEMP[2].xxxx, TEMP[7].xxxx >118: FSEQ TEMP[13].xyz, TEMP[2].xxxx, IMM[0].xxxx >119: SSG TEMP[15].xyz, TEMP[7].xyzz >120: MUL TEMP[15].xyz, IMM[0].yyyy, TEMP[15].xyzz >121: RCP TEMP[2].xyz, TEMP[2].xxxx >122: MUL TEMP[2].xyz, TEMP[7].xyzz, TEMP[2].xyzz >123: UCMP TEMP[2].xyz, TEMP[13].xyzz, TEMP[15].xyzz, TEMP[2].xyzz >124: MOV_SAT TEMP[2].xyz, TEMP[2].xyzz >125: MUL TEMP[5].xyz, TEMP[2].xyzz, TEMP[2].xyzz >126: MOV_SAT TEMP[2].xyz, TEMP[3].xyzz >127: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[5].xyzz >128: MUL TEMP[2].xyz, TEMP[3].xyzz, IMM[7].yyyy >129: MOV TEMP[6].yzw, TEMP[2].yxyz >130: USNE TEMP[2].x, TEMP[9].xxxx, IMM[1].zzzz >131: UIF TEMP[2].xxxx :0 >132: MOV TEMP[2].x, TEMP[8].xxxx >133: ELSE :0 >134: MOV TEMP[2].x, TEMP[1].xxxx >135: ENDIF >136: MOV TEMP[2].x, TEMP[2].xxxx >137: USNE TEMP[3].x, TEMP[9].xxxx, IMM[1].zzzz >138: UIF TEMP[3].xxxx :0 >139: MOV TEMP[3].x, TEMP[8].yyyy >140: ELSE :0 >141: MOV TEMP[3].x, TEMP[1].yyyy >142: ENDIF >143: MOV TEMP[2].y, TEMP[3].xxxx >144: USNE TEMP[3].x, TEMP[9].xxxx, IMM[1].zzzz >145: UIF TEMP[3].xxxx :0 >146: MOV TEMP[3].x, TEMP[8].zzzz >147: ELSE :0 >148: MOV TEMP[3].x, TEMP[1].zzzz >149: ENDIF >150: MOV TEMP[2].z, TEMP[3].xxxx >151: USNE TEMP[3].x, TEMP[9].xxxx, IMM[1].zzzz >152: UIF TEMP[3].xxxx :0 >153: MOV TEMP[3].x, TEMP[12].xxxx >154: ELSE :0 >155: MOV TEMP[3].x, TEMP[6].xxxx >156: ENDIF >157: MOV TEMP[3].x, TEMP[3].xxxx >158: USNE TEMP[7].x, TEMP[9].xxxx, IMM[1].zzzz >159: UIF TEMP[7].xxxx :0 >160: MOV TEMP[7].x, TEMP[12].yyyy >161: ELSE :0 >162: MOV TEMP[7].x, TEMP[6].yyyy >163: ENDIF >164: MOV TEMP[3].y, TEMP[7].xxxx >165: USNE TEMP[7].x, TEMP[9].xxxx, IMM[1].zzzz >166: UIF TEMP[7].xxxx :0 >167: MOV TEMP[7].x, TEMP[12].zzzz >168: ELSE :0 >169: MOV TEMP[7].x, TEMP[6].zzzz >170: ENDIF >171: MOV TEMP[3].z, TEMP[7].xxxx >172: USNE TEMP[7].x, TEMP[9].xxxx, IMM[1].zzzz >173: UIF TEMP[7].xxxx :0 >174: MOV TEMP[7].x, TEMP[12].wwww >175: ELSE :0 >176: MOV TEMP[7].x, TEMP[6].wwww >177: ENDIF >178: MOV TEMP[3].w, TEMP[7].xxxx >179: MIN TEMP[6].x, TEMP[3].xxxx, IMM[7].xxxx >180: MUL TEMP[5].xyz, TEMP[6].xxxx, TEMP[3].yzww >181: MUL TEMP[5].xyz, TEMP[10].yyyy, TEMP[5].xyzz >182: ADD TEMP[3].x, TEMP[11].xxxx, IMM[5].wwww >183: MOV_SAT TEMP[0].x, TEMP[3].xxxx >184: FMA TEMP[1].xyz, TEMP[5].xyzz, TEMP[0].xxxx, TEMP[2].xyzz >185: ENDIF >186: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[4].xyzz >187: MUL TEMP[0].xyz, TEMP[14].yyyy, TEMP[0].xyzz >188: MOV TEMP[0].w, IMM[5].xxxx >189: MOV OUT[0], TEMP[0] >190: END >radeonsi: Compiling shader 84 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 256) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 260) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 264) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 272) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 276) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 280) > %52 = call float @llvm.SI.load.const(<16 x i8> %35, i32 288) > %53 = call float @llvm.SI.load.const(<16 x i8> %35, i32 292) > %54 = call float @llvm.SI.load.const(<16 x i8> %35, i32 296) > %55 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0 > %57 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %58 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %57, i64 0, i64 3 > %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 > %60 = extractelement <8 x i32> %56, i32 7 > %61 = extractelement <4 x i32> %59, i32 0 > %62 = and i32 %61, %60 > %63 = insertelement <4 x i32> %59, i32 %62, i32 0 > %64 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %65 = load <8 x i32>, <8 x i32> addrspace(2)* %64, align 32, !tbaa !0 > %66 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %67 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %66, i64 0, i64 7 > %68 = load <4 x i32>, <4 x i32> addrspace(2)* %67, align 16, !tbaa !0 > %69 = extractelement <8 x i32> %65, i32 7 > %70 = extractelement <4 x i32> %68, i32 0 > %71 = and i32 %70, %69 > %72 = insertelement <4 x i32> %68, i32 %71, i32 0 > %73 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %74 = load <8 x i32>, <8 x i32> addrspace(2)* %73, align 32, !tbaa !0 > %75 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %76 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %75, i64 0, i64 11 > %77 = load <4 x i32>, <4 x i32> addrspace(2)* %76, align 16, !tbaa !0 > %78 = extractelement <8 x i32> %74, i32 7 > %79 = extractelement <4 x i32> %77, i32 0 > %80 = and i32 %79, %78 > %81 = insertelement <4 x i32> %77, i32 %80, i32 0 > %82 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %83 = load <8 x i32>, <8 x i32> addrspace(2)* %82, align 32, !tbaa !0 > %84 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %85 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %84, i64 0, i64 15 > %86 = load <4 x i32>, <4 x i32> addrspace(2)* %85, align 16, !tbaa !0 > %87 = extractelement <8 x i32> %83, i32 7 > %88 = extractelement <4 x i32> %86, i32 0 > %89 = and i32 %88, %87 > %90 = insertelement <4 x i32> %86, i32 %89, i32 0 > %91 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %92 = load <8 x i32>, <8 x i32> addrspace(2)* %91, align 32, !tbaa !0 > %93 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %94 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %93, i64 0, i64 19 > %95 = load <4 x i32>, <4 x i32> addrspace(2)* %94, align 16, !tbaa !0 > %96 = extractelement <8 x i32> %92, i32 7 > %97 = extractelement <4 x i32> %95, i32 0 > %98 = and i32 %97, %96 > %99 = insertelement <4 x i32> %95, i32 %98, i32 0 > %100 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %101 = load <8 x i32>, <8 x i32> addrspace(2)* %100, align 32, !tbaa !0 > %102 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %103 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %102, i64 0, i64 23 > %104 = load <4 x i32>, <4 x i32> addrspace(2)* %103, align 16, !tbaa !0 > %105 = extractelement <8 x i32> %101, i32 7 > %106 = extractelement <4 x i32> %104, i32 0 > %107 = and i32 %106, %105 > %108 = insertelement <4 x i32> %104, i32 %107, i32 0 > %109 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %110 = load <8 x i32>, <8 x i32> addrspace(2)* %109, align 32, !tbaa !0 > %111 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %112 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %111, i64 0, i64 27 > %113 = load <4 x i32>, <4 x i32> addrspace(2)* %112, align 16, !tbaa !0 > %114 = extractelement <8 x i32> %110, i32 7 > %115 = extractelement <4 x i32> %113, i32 0 > %116 = and i32 %115, %114 > %117 = insertelement <4 x i32> %113, i32 %116, i32 0 > %118 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %119 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %120 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %121 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %122 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %123 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %124 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %125 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %126 = fcmp oeq float %120, 0.000000e+00 > %127 = fcmp oeq float %120, 0.000000e+00 > %128 = fcmp ogt float %118, 0.000000e+00 > %129 = select i1 %128, float 1.000000e+00, float %118 > %130 = fcmp oge float %129, 0.000000e+00 > %131 = fcmp ogt float %119, 0.000000e+00 > %132 = select i1 %131, float 1.000000e+00, float %119 > %133 = fcmp oge float %132, 0.000000e+00 > %.op = fmul float %129, 0x4600000000000000 > %134 = select i1 %130, float %.op, float 0xC600000000000000 > %.op95 = fmul float %132, 0x4600000000000000 > %135 = select i1 %133, float %.op95, float 0xC600000000000000 > %136 = fdiv float 1.000000e+00, %120 > %137 = fmul float %118, %136 > %138 = fmul float %119, %136 > %139 = select i1 %126, float %134, float %137 > %140 = select i1 %127, float %135, float %138 > %141 = fcmp oeq float %120, 0.000000e+00 > %142 = fcmp oeq float %120, 0.000000e+00 > %143 = fcmp ogt float %121, 0.000000e+00 > %144 = select i1 %143, float 1.000000e+00, float %121 > %145 = fcmp oge float %144, 0.000000e+00 > %146 = fcmp ogt float %122, 0.000000e+00 > %147 = select i1 %146, float 1.000000e+00, float %122 > %148 = fcmp oge float %147, 0.000000e+00 > %.op96 = fmul float %144, 0x4600000000000000 > %149 = select i1 %145, float %.op96, float 0xC600000000000000 > %.op97 = fmul float %147, 0x4600000000000000 > %150 = select i1 %148, float %.op97, float 0xC600000000000000 > %151 = fdiv float 1.000000e+00, %120 > %152 = fmul float %121, %151 > %153 = fmul float %122, %151 > %154 = select i1 %141, float %149, float %152 > %155 = select i1 %142, float %150, float %153 > %156 = bitcast float %139 to i32 > %157 = bitcast float %140 to i32 > %158 = insertelement <2 x i32> undef, i32 %156, i32 0 > %159 = insertelement <2 x i32> %158, i32 %157, i32 1 > %160 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %159, <8 x i32> %56, <4 x i32> %63, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %161 = extractelement <4 x float> %160, i32 0 > %162 = fmul float %154, %161 > %163 = fmul float %155, %161 > %164 = fsub float %49, %162 > %165 = fsub float %50, %163 > %166 = fsub float %51, %161 > %167 = fmul float %164, %36 > %168 = fmul float %165, %36 > %169 = fmul float %166, %36 > %170 = fmul float %25, %167 > %171 = fmul float %26, %168 > %172 = fadd float %171, %170 > %173 = fmul float %27, %169 > %174 = fadd float %172, %173 > %175 = fmul float %28, %167 > %176 = fmul float %29, %168 > %177 = fadd float %176, %175 > %178 = fmul float %30, %169 > %179 = fadd float %177, %178 > %180 = fmul float %31, %167 > %181 = fmul float %32, %168 > %182 = fadd float %181, %180 > %183 = fmul float %33, %169 > %184 = fadd float %182, %183 > %185 = fmul float %174, %174 > %186 = fmul float %179, %179 > %187 = fadd float %186, %185 > %188 = fmul float %184, %184 > %189 = fadd float %187, %188 > %190 = call float @llvm.sqrt.f32(float %189) > %191 = bitcast float %190 to i32 > %192 = insertelement <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, i32 %191, i32 1 > %193 = insertelement <4 x i32> %192, i32 0, i32 2 > %194 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %193, <8 x i32> %65, <4 x i32> %72, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %195 = extractelement <4 x float> %194, i32 0 > %196 = extractelement <4 x float> %194, i32 1 > %197 = extractelement <4 x float> %194, i32 2 > %198 = fmul float %37, %167 > %199 = fmul float %38, %168 > %200 = fadd float %199, %198 > %201 = fmul float %39, %169 > %202 = fadd float %200, %201 > %203 = fmul float %40, %167 > %204 = fmul float %41, %168 > %205 = fadd float %204, %203 > %206 = fmul float %42, %169 > %207 = fadd float %205, %206 > %208 = fmul float %43, %167 > %209 = fmul float %44, %168 > %210 = fadd float %209, %208 > %211 = fmul float %45, %169 > %212 = fadd float %210, %211 > %213 = insertelement <4 x float> undef, float %202, i32 0 > %214 = insertelement <4 x float> %213, float %207, i32 1 > %215 = insertelement <4 x float> %214, float %212, i32 2 > %216 = insertelement <4 x float> %215, float 0.000000e+00, i32 3 > %217 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %216) > %218 = extractelement <4 x float> %217, i32 0 > %219 = extractelement <4 x float> %217, i32 1 > %220 = extractelement <4 x float> %217, i32 2 > %221 = call float @llvm.fabs.f32(float %220) > %222 = fdiv float 1.000000e+00, %221 > %223 = fmul float %218, %222 > %224 = fadd float %223, 1.500000e+00 > %225 = fmul float %219, %222 > %226 = fadd float %225, 1.500000e+00 > %227 = bitcast float %226 to i32 > %228 = bitcast float %224 to i32 > %bc = bitcast <4 x float> %217 to <4 x i32> > %229 = extractelement <4 x i32> %bc, i32 3 > %230 = insertelement <4 x i32> undef, i32 %227, i32 0 > %231 = insertelement <4 x i32> %230, i32 %228, i32 1 > %232 = insertelement <4 x i32> %231, i32 %229, i32 2 > %233 = insertelement <4 x i32> %232, i32 0, i32 3 > %234 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %233, <8 x i32> %74, <4 x i32> %81, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %235 = extractelement <4 x float> %234, i32 0 > %236 = extractelement <4 x float> %234, i32 1 > %237 = extractelement <4 x float> %234, i32 2 > %238 = extractelement <4 x float> %234, i32 3 > %239 = fmul float %235, %46 > %240 = fmul float %236, %47 > %241 = fmul float %237, %48 > %242 = bitcast float %139 to i32 > %243 = bitcast float %140 to i32 > %244 = insertelement <2 x i32> undef, i32 %242, i32 0 > %245 = insertelement <2 x i32> %244, i32 %243, i32 1 > %246 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %245, <8 x i32> %83, <4 x i32> %90, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %247 = extractelement <4 x float> %246, i32 0 > %248 = extractelement <4 x float> %246, i32 1 > %249 = extractelement <4 x float> %246, i32 2 > %250 = extractelement <4 x float> %246, i32 3 > %251 = fmul float %239, %247 > %252 = fmul float %240, %248 > %253 = fmul float %241, %249 > %254 = fmul float %123, %123 > %255 = fmul float %124, %124 > %256 = fadd float %255, %254 > %257 = fmul float %125, %125 > %258 = fadd float %256, %257 > %259 = call float @llvm.AMDGPU.rsq.clamped.f32(float %258) > %260 = bitcast float %139 to i32 > %261 = bitcast float %140 to i32 > %262 = insertelement <2 x i32> undef, i32 %260, i32 0 > %263 = insertelement <2 x i32> %262, i32 %261, i32 1 > %264 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %263, <8 x i32> %92, <4 x i32> %99, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %265 = extractelement <4 x float> %264, i32 0 > %266 = extractelement <4 x float> %264, i32 1 > %267 = extractelement <4 x float> %264, i32 2 > %268 = extractelement <4 x float> %264, i32 3 > %269 = call float @llvm.fma.f32(float %265, float 2.000000e+00, float -1.000000e+00) > %270 = call float @llvm.fma.f32(float %266, float 2.000000e+00, float -1.000000e+00) > %271 = call float @llvm.fma.f32(float %267, float 2.000000e+00, float -1.000000e+00) > %272 = fmul float %269, %269 > %273 = fmul float %270, %270 > %274 = fadd float %273, %272 > %275 = fmul float %271, %271 > %276 = fadd float %274, %275 > %277 = call float @llvm.AMDGPU.rsq.clamped.f32(float %276) > %278 = fmul float %277, %269 > %279 = fmul float %277, %270 > %280 = fmul float %277, %271 > %281 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189) > %282 = fmul float %281, %174 > %283 = fmul float %281, %179 > %284 = fmul float %281, %184 > %285 = bitcast float %139 to i32 > %286 = bitcast float %140 to i32 > %287 = insertelement <2 x i32> undef, i32 %285, i32 0 > %288 = insertelement <2 x i32> %287, i32 %286, i32 1 > %289 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %288, <8 x i32> %101, <4 x i32> %108, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %290 = extractelement <4 x float> %289, i32 0 > %291 = extractelement <4 x float> %289, i32 1 > %292 = extractelement <4 x float> %289, i32 2 > %293 = extractelement <4 x float> %289, i32 3 > %294 = fmul float %238, %52 > %295 = fmul float %238, %53 > %296 = fmul float %238, %54 > %297 = fmul float %290, %290 > %298 = bitcast float %139 to i32 > %299 = bitcast float %140 to i32 > %300 = insertelement <4 x i32> undef, i32 %298, i32 0 > %301 = insertelement <4 x i32> %300, i32 %299, i32 1 > %302 = insertelement <4 x i32> %301, i32 0, i32 2 > %303 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %302, <8 x i32> %110, <4 x i32> %117, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %304 = extractelement <4 x float> %303, i32 0 > %305 = extractelement <4 x float> %303, i32 1 > %306 = call float @llvm.fma.f32(float %123, float %259, float %282) > %307 = call float @llvm.fma.f32(float %124, float %259, float %283) > %308 = call float @llvm.fma.f32(float %125, float %259, float %284) > %309 = fmul float %306, %306 > %310 = fmul float %307, %307 > %311 = fadd float %310, %309 > %312 = fmul float %308, %308 > %313 = fadd float %311, %312 > %314 = call float @llvm.AMDGPU.rsq.clamped.f32(float %313) > %315 = fmul float %314, %306 > %316 = fmul float %314, %307 > %317 = fmul float %314, %308 > %318 = fmul float %278, %315 > %319 = fmul float %279, %316 > %320 = fadd float %319, %318 > %321 = fmul float %280, %317 > %322 = fadd float %320, %321 > %323 = call float @llvm.AMDGPU.clamp.(float %322, float 0.000000e+00, float 1.000000e+00) > %324 = call float @llvm.fma.f32(float %297, float 4.096000e+03, float 0x3F70624DE0000000) > %325 = call float @llvm.fma.f32(float %297, float 4.096000e+03, float 0x4000083120000000) > %326 = fmul float %325, 1.250000e-01 > %327 = call float @llvm.log2.f32(float %323) > %328 = fmul float %327, %324 > %329 = call float @llvm.exp2.f32(float %328) > %330 = fmul float %329, %326 > %331 = fsub float 1.000000e+00, %292 > %332 = fmul float %282, %315 > %333 = fmul float %283, %316 > %334 = fadd float %333, %332 > %335 = fmul float %284, %317 > %336 = fadd float %334, %335 > %337 = call float @llvm.AMDGPU.clamp.(float %336, float 0.000000e+00, float 1.000000e+00) > %338 = fsub float 1.000000e+00, %337 > %339 = fmul float %338, %338 > %340 = fmul float %339, %339 > %341 = fmul float %338, %340 > %342 = call float @llvm.fma.f32(float %331, float %341, float %292) > %343 = fmul float %342, %330 > %344 = fmul float %294, %304 > %345 = fmul float %295, %304 > %346 = fmul float %296, %304 > %347 = fmul float %291, %344 > %348 = fmul float %291, %345 > %349 = fmul float %291, %346 > %350 = fmul float %278, %282 > %351 = fmul float %279, %283 > %352 = fadd float %351, %350 > %353 = fmul float %280, %284 > %354 = fadd float %352, %353 > %355 = fadd float %250, %354 > %356 = fadd float %355, -1.000000e+00 > %357 = fcmp une float %250, 0.000000e+00 > br i1 %357, label %IF, label %ELSE > >IF: ; preds = %main_body > %358 = fdiv float 1.000000e+00, %250 > %359 = fmul float %356, %358 > br label %ENDIF > >ELSE: ; preds = %main_body > %360 = fcmp ogt float %356, 0.000000e+00 > %361 = select i1 %360, float 1.000000e+00, float %356 > %362 = fcmp oge float %361, 0.000000e+00 > %.op98 = fmul float %361, 0x4600000000000000 > %363 = select i1 %362, float %.op98, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp60.0 = phi float [ %359, %IF ], [ %363, %ELSE ] > %364 = call float @llvm.AMDGPU.clamp.(float %temp60.0, float 0.000000e+00, float 1.000000e+00) > %365 = call float @llvm.fma.f32(float %347, float %343, float %251) > %366 = call float @llvm.fma.f32(float %348, float %343, float %252) > %367 = call float @llvm.fma.f32(float %349, float %343, float %253) > %368 = fmul float %364, %365 > %369 = fmul float %364, %366 > %370 = fmul float %364, %367 > %371 = fcmp ogt float %293, 0.000000e+00 > br i1 %371, label %IF69, label %ENDIF68 > >IF69: ; preds = %ENDIF > %372 = fcmp olt float %268, 0x3FE0505060000000 > %373 = fadd float %251, %251 > %374 = fadd float %252, %252 > %375 = fadd float %253, %253 > %376 = fadd float %293, -5.000000e-01 > %377 = call float @llvm.AMDGPU.clamp.(float %376, float 0.000000e+00, float 1.000000e+00) > %378 = fmul float %377, %373 > %379 = fmul float %377, %374 > %380 = fmul float %377, %375 > %381 = fsub float 2.500000e-01, %354 > %382 = fsub float 1.000000e+00, %354 > %383 = call float @llvm.AMDGPU.clamp.(float %381, float 0.000000e+00, float 1.000000e+00) > %384 = call float @llvm.AMDGPU.clamp.(float %382, float 0.000000e+00, float 1.000000e+00) > %385 = call float @llvm.fma.f32(float %378, float %383, float %368) > %386 = call float @llvm.fma.f32(float %379, float %383, float %369) > %387 = call float @llvm.fma.f32(float %380, float %383, float %370) > %388 = call float @llvm.minnum.f32(float %293, float 5.000000e-01) > %389 = call float @llvm.maxnum.f32(float %249, float %248) > %390 = call float @llvm.maxnum.f32(float %389, float %247) > %391 = fcmp oeq float %390, 0.000000e+00 > %392 = fcmp oeq float %390, 0.000000e+00 > %393 = fcmp oeq float %390, 0.000000e+00 > %394 = fcmp ogt float %247, 0.000000e+00 > %395 = select i1 %394, float 1.000000e+00, float %247 > %396 = fcmp oge float %395, 0.000000e+00 > %397 = fcmp ogt float %248, 0.000000e+00 > %398 = select i1 %397, float 1.000000e+00, float %248 > %399 = fcmp oge float %398, 0.000000e+00 > %400 = fcmp ogt float %249, 0.000000e+00 > %401 = select i1 %400, float 1.000000e+00, float %249 > %402 = fcmp oge float %401, 0.000000e+00 > %.op99 = fmul float %395, 0x4600000000000000 > %403 = select i1 %396, float %.op99, float 0xC600000000000000 > %.op100 = fmul float %398, 0x4600000000000000 > %404 = select i1 %399, float %.op100, float 0xC600000000000000 > %.op101 = fmul float %401, 0x4600000000000000 > %405 = select i1 %402, float %.op101, float 0xC600000000000000 > %406 = fdiv float 1.000000e+00, %390 > %407 = fmul float %247, %406 > %408 = fmul float %248, %406 > %409 = fmul float %249, %406 > %410 = select i1 %391, float %403, float %407 > %411 = select i1 %392, float %404, float %408 > %412 = select i1 %393, float %405, float %409 > %413 = call float @llvm.AMDGPU.clamp.(float %410, float 0.000000e+00, float 1.000000e+00) > %414 = call float @llvm.AMDGPU.clamp.(float %411, float 0.000000e+00, float 1.000000e+00) > %415 = call float @llvm.AMDGPU.clamp.(float %412, float 0.000000e+00, float 1.000000e+00) > %416 = fmul float %413, %413 > %417 = fmul float %414, %414 > %418 = fmul float %415, %415 > %419 = call float @llvm.AMDGPU.clamp.(float %239, float 0.000000e+00, float 1.000000e+00) > %420 = call float @llvm.AMDGPU.clamp.(float %240, float 0.000000e+00, float 1.000000e+00) > %421 = call float @llvm.AMDGPU.clamp.(float %241, float 0.000000e+00, float 1.000000e+00) > %422 = fmul float %419, %416 > %423 = fmul float %420, %417 > %424 = fmul float %421, %418 > %425 = fmul float %422, 0x3FC3333340000000 > %426 = fmul float %423, 0x3FC3333340000000 > %427 = fmul float %424, 0x3FC3333340000000 > %. = select i1 %372, float %385, float %368 > %temp12.0 = select i1 %372, float %386, float %369 > %.92 = select i1 %372, float %387, float %370 > %temp12.2 = select i1 %372, float %293, float %388 > %.93 = select i1 %372, float %373, float %425 > %temp28.1 = select i1 %372, float %374, float %426 > %.94 = select i1 %372, float %375, float %427 > %428 = call float @llvm.minnum.f32(float %temp12.2, float 5.000000e-01) > %429 = fmul float %428, %.93 > %430 = fmul float %428, %temp28.1 > %431 = fmul float %428, %.94 > %432 = fmul float %384, %429 > %433 = fmul float %384, %430 > %434 = fmul float %384, %431 > %435 = fadd float %354, 2.500000e-01 > %436 = call float @llvm.AMDGPU.clamp.(float %435, float 0.000000e+00, float 1.000000e+00) > %437 = call float @llvm.fma.f32(float %432, float %436, float %.) > %438 = call float @llvm.fma.f32(float %433, float %436, float %temp12.0) > %439 = call float @llvm.fma.f32(float %434, float %436, float %.92) > br label %ENDIF68 > >ENDIF68: ; preds = %ENDIF, %IF69 > %temp6.0 = phi float [ %439, %IF69 ], [ %370, %ENDIF ] > %temp5.0 = phi float [ %438, %IF69 ], [ %369, %ENDIF ] > %temp4.0 = phi float [ %437, %IF69 ], [ %368, %ENDIF ] > %440 = fmul float %temp4.0, %195 > %441 = fmul float %temp5.0, %196 > %442 = fmul float %temp6.0, %197 > %443 = fmul float %305, %440 > %444 = fmul float %305, %441 > %445 = fmul float %305, %442 > %446 = bitcast float %5 to i32 > %447 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %446, 10 > %448 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %447, float %443, 11 > %449 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %448, float %444, 12 > %450 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %449, float %445, 13 > %451 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %450, float 1.000000e+00, 14 > %452 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %451, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %452 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..16] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.2500} >IMM[1] UINT32 {0, 240, 224, 208} >IMM[2] FLT32 { 0.5000, 1.0000, 0.0000, 0.0000} >IMM[3] UINT32 {256, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: MUL TEMP[1].xy, CONST[1][15].xyyy, IMM[0].yzzz > 3: FMA TEMP[2].xy, IN[0].xyyy, TEMP[1].xyyy, CONST[1][15].xyyy > 4: MUL TEMP[3].xy, TEMP[2].xyyy, CONST[1][14].zwww > 5: MUL TEMP[3].xy, TEMP[3].xyyy, IMM[0].wwww > 6: MOV TEMP[3].zw, TEMP[3].yyxy > 7: MOV TEMP[3].xy, TEMP[2].xyxx > 8: FMA TEMP[2].xy, TEMP[2].xyyy, CONST[1][13].zwww, CONST[1][13].xyyy > 9: ADD TEMP[4].xy, IN[0].xyyy, IMM[0].yyyy > 10: MUL TEMP[5].x, CONST[1][13].zzzz, TEMP[4].xxxx > 11: MOV TEMP[1].x, IMM[2].xxxx > 12: MOV TEMP[1].y, CONST[1][13].wwww > 13: MOV TEMP[6].y, IMM[2].xyxx > 14: FMA TEMP[4].x, -TEMP[4].yyyy, IMM[2].xxxx, IMM[0].yyyy > 15: MOV TEMP[5].z, TEMP[4].xxxx > 16: FMA TEMP[1].xy, TEMP[5].xzzz, TEMP[1].xyyy, CONST[1][13].xyyy > 17: MOV TEMP[6].x, CONST[1][16].yyyy > 18: DP2 TEMP[4].x, TEMP[6].xyyy, CONST[1][14].xyyy > 19: MUL TEMP[1].xy, TEMP[1].xyyy, TEMP[4].xxxx > 20: MOV TEMP[1].z, CONST[1][16].yyyy > 21: MOV OUT[2], TEMP[1] > 22: MOV OUT[3], TEMP[2] > 23: MOV OUT[1], TEMP[3] > 24: MOV OUT[0], TEMP[0] > 25: END >radeonsi: Compiling shader 85 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 208) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 212) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 216) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 220) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 224) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 228) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 232) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 236) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 240) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 244) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 260) > %27 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 > %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %13) > %30 = extractelement <4 x float> %29, i32 0 > %31 = extractelement <4 x float> %29, i32 1 > %32 = fsub float -0.000000e+00, %25 > %33 = call float @llvm.fma.f32(float %30, float %24, float %24) > %34 = call float @llvm.fma.f32(float %31, float %32, float %25) > %35 = fmul float %33, %22 > %36 = fmul float %34, %23 > %37 = fmul float %35, 2.500000e-01 > %38 = fmul float %36, 2.500000e-01 > %39 = call float @llvm.fma.f32(float %33, float %18, float %16) > %40 = call float @llvm.fma.f32(float %34, float %19, float %17) > %41 = fadd float %30, 1.000000e+00 > %42 = fadd float %31, 1.000000e+00 > %43 = fmul float %18, %41 > %44 = fsub float -0.000000e+00, %42 > %45 = call float @llvm.fma.f32(float %44, float 5.000000e-01, float 1.000000e+00) > %46 = call float @llvm.fma.f32(float %43, float 5.000000e-01, float %16) > %47 = call float @llvm.fma.f32(float %45, float %19, float %17) > %48 = fmul float %26, %20 > %49 = fadd float %48, %21 > %50 = fmul float %46, %49 > %51 = fmul float %47, %49 > %52 = bitcast i32 %11 to float > %53 = insertvalue <{ float, float, float }> undef, float %52, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %37, float %38) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %50, float %51, float %26, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %39, float %40, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %30, float %31, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %53 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..14] >DCL TEMP[0..30], LOCAL >IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.1000} >IMM[1] UINT32 {0, 176, 160, 192} >IMM[2] FLT32 { -0.2700, 0.0100, 0.0040, 0.0000} >IMM[3] FLT32 { 0.5000, 0.2500, 1.3000, 0.6500} >IMM[4] UINT32 {224, 96, 112, 144} >IMM[5] FLT32 {158456325028528675187087900672.0000, 0.5000, -0.5000, -0.3800} >IMM[6] INT32 {1, 0, 0, 0} >IMM[7] FLT32 { 0.3100, -0.3100, -0.0100, 0.0000} >IMM[8] FLT32 { 2.3000, 1.1500, 3.2000, 1.6000} >IMM[9] FLT32 { 0.0500, -0.3800, -0.4000, 0.3500} >IMM[10] FLT32 { 4.1000, 2.0500, 5.3000, 2.6500} >IMM[11] FLT32 { 6.1000, 3.0500, 7.7000, 3.8500} >IMM[12] FLT32 { 0.7100, 0.3400, 0.1000, -0.1500} >IMM[13] FLT32 { 8.5000, 4.2500, 10.0000, 5.0000} >IMM[14] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].w, IMM[0].xxxx > 1: DP3 TEMP[1].x, IN[1].xyzz, IN[1].xyzz > 2: RSQ TEMP[2].x, TEMP[1].xxxx > 3: MUL TEMP[1].xyz, TEMP[2].xxxx, IN[1].xyzz > 4: MOV TEMP[2].xy, IN[0].xyyy > 5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D > 6: FMA TEMP[3].xyz, TEMP[2].xyzz, IMM[0].yyyy, IMM[0].zzzz > 7: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz > 8: RSQ TEMP[4].x, TEMP[4].xxxx > 9: MUL TEMP[2].xyz, TEMP[4].xxxx, TEMP[3].xyzz > 10: MUL TEMP[3].xyz, TEMP[2].yyyy, CONST[1][11].xyzz > 11: FMA TEMP[4].xyz, TEMP[2].xxxx, CONST[1][10].xyzz, TEMP[3].xyzz > 12: FMA TEMP[4].xyz, TEMP[2].zzzz, CONST[1][12].xyzz, TEMP[4].xyzz > 13: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz > 14: RSQ TEMP[5].x, TEMP[5].xxxx > 15: MUL TEMP[2].xyz, TEMP[5].xxxx, TEMP[4].xyzz > 16: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[2].xyzz > 17: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx > 18: FMA TEMP[1].xyz, TEMP[2].xyzz, -TEMP[4].xxxx, TEMP[1].xyzz > 19: FMA TEMP[4].xyz, TEMP[1].xyzz, IMM[0].yyyy, TEMP[2].xyzz > 20: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz > 21: RSQ TEMP[5].x, TEMP[5].xxxx > 22: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[4].xyzz > 23: MOV TEMP[4].xy, IN[0].zwww > 24: TEX TEMP[4].xy, TEMP[4], SAMP[1], 2D > 25: FMA TEMP[4].xy, TEMP[4].xyyy, IMM[0].yyyy, IMM[0].zzzz > 26: DP2 TEMP[5].x, TEMP[4].xyyy, TEMP[4].xyyy > 27: RSQ TEMP[5].x, TEMP[5].xxxx > 28: MUL TEMP[3].xy, TEMP[5].xxxx, TEMP[4].xyyy > 29: MOV TEMP[3].zw, -TEMP[3].xxxx > 30: DP2 TEMP[4].x, TEMP[3].zyyy, IMM[0].wwww > 31: DP2 TEMP[5].x, TEMP[3].wyyy, IMM[2].xyyy > 32: MUL TEMP[6].xyz, TEMP[2].yzxx, TEMP[1].zxyy > 33: FMA TEMP[7].xyz, TEMP[1].yzxx, TEMP[2].zxyy, -TEMP[6].xyzz > 34: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz > 35: RSQ TEMP[8].x, TEMP[8].xxxx > 36: MUL TEMP[6].xyz, TEMP[8].xxxx, TEMP[7].xyzz > 37: MUL TEMP[7].xyz, TEMP[1].yzxx, TEMP[6].zxyy > 38: FMA TEMP[7].xyz, TEMP[6].yzxx, TEMP[1].zxyy, -TEMP[7].xyzz > 39: FMA TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx, TEMP[1].xyzz > 40: DP2 TEMP[8].x, TEMP[3].yxxx, IMM[0].wwww > 41: FMA TEMP[4].xyz, TEMP[6].xyzz, TEMP[8].xxxx, TEMP[4].xyzz > 42: MOV TEMP[9].xy, IN[0].xyyy > 43: TEX TEMP[9].x, TEMP[9], SAMP[2], 2D > 44: MOV TEMP[10].z, TEMP[9].xxxx > 45: MUL TEMP[9].xy, TEMP[9].xxxx, IMM[2].zyyy > 46: MAX TEMP[9].xy, TEMP[9].xyyy, IMM[0].xxxx > 47: MUL TEMP[11], TEMP[9].xxxx, IMM[3] > 48: MOV TEMP[10].w, IMM[0].xxxx > 49: DP2 TEMP[12].x, TEMP[10].zwww, CONST[1][14].xyyy > 50: MUL TEMP[10].xy, TEMP[12].xxxx, IN[2].xyyy > 51: FMA TEMP[12].xyz, TEMP[4].xyzz, TEMP[11].xxxx, TEMP[10].xyzz > 52: MOV TEMP[0].xyz, TEMP[12].xyzx > 53: DP4 TEMP[4].x, CONST[1][6], TEMP[0] > 54: DP4 TEMP[13].x, CONST[1][7], TEMP[0] > 55: MOV TEMP[4].y, TEMP[13].xxxx > 56: DP4 TEMP[13].x, CONST[1][9], TEMP[0] > 57: FSEQ TEMP[14].xy, TEMP[13].xxxx, IMM[2].wwww > 58: SSG TEMP[15].xy, TEMP[4].xyyy > 59: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy > 60: RCP TEMP[13].xy, TEMP[13].xxxx > 61: MUL TEMP[13].xy, TEMP[4].xyyy, TEMP[13].xyyy > 62: UCMP TEMP[0].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[13].xyyy > 63: FMA TEMP[0].xy, TEMP[0].xyyy, IMM[5].yzzz, IMM[3].xxxx > 64: MOV TEMP[13].xy, TEMP[0].xyyy > 65: TEX TEMP[13].x, TEMP[13], SAMP[3], 2D > 66: ADD TEMP[0].x, -TEMP[13].xxxx, TEMP[12].zzzz > 67: FSLT TEMP[12].x, TEMP[0].xxxx, IMM[2].wwww > 68: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx > 69: INEG TEMP[12].x, TEMP[12].xxxx > 70: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx > 71: UIF TEMP[12].xxxx :0 > 72: MOV TEMP[12].x, TEMP[11].yyyy > 73: ELSE :0 > 74: MOV TEMP[12].x, TEMP[11].xxxx > 75: ENDIF > 76: MUL TEMP[13].x, TEMP[12].xxxx, IMM[3].xxxx > 77: FSLT TEMP[14].x, IMM[2].wwww, TEMP[1].zzzz > 78: AND TEMP[14].x, TEMP[14].xxxx, IMM[6].xxxx > 79: INEG TEMP[14].x, TEMP[14].xxxx > 80: FSLT TEMP[15].x, IMM[2].wwww, TEMP[0].xxxx > 81: AND TEMP[15].x, TEMP[15].xxxx, IMM[6].xxxx > 82: INEG TEMP[15].x, TEMP[15].xxxx > 83: AND TEMP[15].x, TEMP[14].xxxx, TEMP[15].xxxx > 84: USNE TEMP[15].x, TEMP[15].xxxx, IMM[1].xxxx > 85: UIF TEMP[15].xxxx :0 > 86: MOV TEMP[13].x, TEMP[13].xxxx > 87: ELSE :0 > 88: MOV TEMP[13].x, TEMP[12].xxxx > 89: ENDIF > 90: ABS TEMP[12].x, TEMP[0].xxxx > 91: ADD TEMP[0].x, -TEMP[13].xxxx, TEMP[12].xxxx > 92: FSNE TEMP[12].x, TEMP[13].xxxx, IMM[2].wwww > 93: UIF TEMP[12].xxxx :0 > 94: RCP TEMP[12].x, TEMP[13].xxxx > 95: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 96: ELSE :0 > 97: SSG TEMP[13].x, TEMP[0].xxxx > 98: MUL TEMP[12].x, IMM[5].xxxx, TEMP[13].xxxx > 99: ENDIF >100: MOV_SAT TEMP[12].x, TEMP[12].xxxx >101: MOV TEMP[4].w, IMM[0].xxxx >102: MUL TEMP[13].xyz, TEMP[3].yxyy, IMM[7].xyxx >103: FMA TEMP[15].xyz, TEMP[7].xyzz, TEMP[13].yyyy, TEMP[1].xyzz >104: FMA TEMP[15].xyz, TEMP[6].xyzz, TEMP[13].xxxx, TEMP[15].xyzz >105: FMA TEMP[16].xyz, TEMP[15].xyzz, TEMP[11].zzzz, TEMP[10].xyzz >106: MOV TEMP[4].xyz, TEMP[16].xyzx >107: DP4 TEMP[11].x, CONST[1][6], TEMP[4] >108: DP4 TEMP[17].x, CONST[1][7], TEMP[4] >109: MOV TEMP[11].y, TEMP[17].xxxx >110: DP4 TEMP[17].x, CONST[1][9], TEMP[4] >111: FSEQ TEMP[18].xy, TEMP[17].xxxx, IMM[2].wwww >112: SSG TEMP[19].xy, TEMP[11].xyyy >113: MUL TEMP[19].xy, IMM[5].xxxx, TEMP[19].xyyy >114: RCP TEMP[17].xy, TEMP[17].xxxx >115: MUL TEMP[17].xy, TEMP[11].xyyy, TEMP[17].xyyy >116: UCMP TEMP[17].xy, TEMP[18].xyyy, TEMP[19].xyyy, TEMP[17].xyyy >117: FMA TEMP[17].xy, TEMP[17].xyyy, IMM[5].yzzz, IMM[3].xxxx >118: MOV TEMP[17].xy, TEMP[17].xyyy >119: TEX TEMP[17].x, TEMP[17], SAMP[3], 2D >120: ADD TEMP[16].x, -TEMP[17].xxxx, TEMP[16].zzzz >121: FSLT TEMP[17].x, TEMP[16].xxxx, IMM[2].wwww >122: AND TEMP[17].x, TEMP[17].xxxx, IMM[6].xxxx >123: INEG TEMP[17].x, TEMP[17].xxxx >124: USNE TEMP[17].x, TEMP[17].xxxx, IMM[1].xxxx >125: UIF TEMP[17].xxxx :0 >126: MOV TEMP[17].x, TEMP[11].wwww >127: ELSE :0 >128: MOV TEMP[17].x, TEMP[11].zzzz >129: ENDIF >130: MUL TEMP[18].x, TEMP[17].xxxx, IMM[3].xxxx >131: FSLT TEMP[19].x, IMM[2].wwww, TEMP[16].xxxx >132: AND TEMP[19].x, TEMP[19].xxxx, IMM[6].xxxx >133: INEG TEMP[19].x, TEMP[19].xxxx >134: AND TEMP[19].x, TEMP[14].xxxx, TEMP[19].xxxx >135: USNE TEMP[19].x, TEMP[19].xxxx, IMM[1].xxxx >136: UIF TEMP[19].xxxx :0 >137: MOV TEMP[18].x, TEMP[18].xxxx >138: ELSE :0 >139: MOV TEMP[18].x, TEMP[17].xxxx >140: ENDIF >141: ABS TEMP[16].x, TEMP[16].xxxx >142: ADD TEMP[16].x, -TEMP[18].xxxx, TEMP[16].xxxx >143: FSNE TEMP[17].x, TEMP[18].xxxx, IMM[2].wwww >144: UIF TEMP[17].xxxx :0 >145: RCP TEMP[17].x, TEMP[18].xxxx >146: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[17].xxxx >147: ELSE :0 >148: SSG TEMP[16].x, TEMP[16].xxxx >149: MUL TEMP[17].x, IMM[5].xxxx, TEMP[16].xxxx >150: ENDIF >151: MOV_SAT TEMP[16].x, TEMP[17].xxxx >152: ADD TEMP[0].x, TEMP[16].xxxx, TEMP[12].xxxx >153: MOV TEMP[4].w, IMM[0].xxxx >154: DP2 TEMP[12].x, TEMP[3].yxxx, IMM[2].xyyy >155: FMA TEMP[11].xyz, TEMP[7].xyzz, TEMP[5].xxxx, TEMP[1].xyzz >156: FMA TEMP[11].xyz, TEMP[6].xyzz, TEMP[12].xxxx, TEMP[11].xyzz >157: MUL TEMP[15], TEMP[9].xxxx, IMM[8] >158: FMA TEMP[5].xyz, TEMP[11].xyzz, TEMP[15].xxxx, TEMP[10].xyzz >159: MOV TEMP[4].xyz, TEMP[5].xyzx >160: DP4 TEMP[11].x, CONST[1][6], TEMP[4] >161: DP4 TEMP[16].x, CONST[1][7], TEMP[4] >162: MOV TEMP[11].y, TEMP[16].xxxx >163: DP4 TEMP[16].x, CONST[1][9], TEMP[4] >164: FSEQ TEMP[17].xy, TEMP[16].xxxx, IMM[2].wwww >165: SSG TEMP[18].xy, TEMP[11].xyyy >166: MUL TEMP[18].xy, IMM[5].xxxx, TEMP[18].xyyy >167: RCP TEMP[16].xy, TEMP[16].xxxx >168: MUL TEMP[16].xy, TEMP[11].xyyy, TEMP[16].xyyy >169: UCMP TEMP[4].xy, TEMP[17].xyyy, TEMP[18].xyyy, TEMP[16].xyyy >170: FMA TEMP[4].xy, TEMP[4].xyyy, IMM[5].yzzz, IMM[3].xxxx >171: MOV TEMP[16].xy, TEMP[4].xyyy >172: TEX TEMP[16].x, TEMP[16], SAMP[3], 2D >173: ADD TEMP[5].x, -TEMP[16].xxxx, TEMP[5].zzzz >174: FSLT TEMP[16].x, TEMP[5].xxxx, IMM[2].wwww >175: AND TEMP[16].x, TEMP[16].xxxx, IMM[6].xxxx >176: INEG TEMP[16].x, TEMP[16].xxxx >177: USNE TEMP[16].x, TEMP[16].xxxx, IMM[1].xxxx >178: UIF TEMP[16].xxxx :0 >179: MOV TEMP[16].x, TEMP[15].yyyy >180: ELSE :0 >181: MOV TEMP[16].x, TEMP[15].xxxx >182: ENDIF >183: MUL TEMP[17].x, TEMP[16].xxxx, IMM[3].xxxx >184: FSLT TEMP[18].x, IMM[2].wwww, TEMP[5].xxxx >185: AND TEMP[18].x, TEMP[18].xxxx, IMM[6].xxxx >186: INEG TEMP[18].x, TEMP[18].xxxx >187: AND TEMP[18].x, TEMP[14].xxxx, TEMP[18].xxxx >188: USNE TEMP[18].x, TEMP[18].xxxx, IMM[1].xxxx >189: UIF TEMP[18].xxxx :0 >190: MOV TEMP[17].x, TEMP[17].xxxx >191: ELSE :0 >192: MOV TEMP[17].x, TEMP[16].xxxx >193: ENDIF >194: ABS TEMP[5].x, TEMP[5].xxxx >195: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[17].xxxx >196: FSNE TEMP[16].x, TEMP[17].xxxx, IMM[2].wwww >197: UIF TEMP[16].xxxx :0 >198: RCP TEMP[16].x, TEMP[17].xxxx >199: MUL TEMP[16].x, TEMP[5].xxxx, TEMP[16].xxxx >200: ELSE :0 >201: SSG TEMP[5].x, TEMP[5].xxxx >202: MUL TEMP[16].x, IMM[5].xxxx, TEMP[5].xxxx >203: ENDIF >204: MOV_SAT TEMP[5].x, TEMP[16].xxxx >205: ADD TEMP[0].x, TEMP[5].xxxx, TEMP[0].xxxx >206: MOV TEMP[4].w, IMM[0].xxxx >207: DP2 TEMP[5].x, TEMP[3].yxxx, IMM[7].zyyy >208: MUL TEMP[11].xy, TEMP[3].xyyy, IMM[0].zxxx >209: DP2 TEMP[16].x, TEMP[11].xyyy, IMM[7].zyyy >210: FMA TEMP[17].xyz, TEMP[7].xyzz, TEMP[16].xxxx, TEMP[1].xyzz >211: FMA TEMP[17].xyz, TEMP[6].xyzz, TEMP[5].xxxx, TEMP[17].xyzz >212: FMA TEMP[18].xyz, TEMP[17].xyzz, TEMP[15].zzzz, TEMP[10].xyzz >213: MOV TEMP[4].xyz, TEMP[18].xyzx >214: DP4 TEMP[15].x, CONST[1][6], TEMP[4] >215: DP4 TEMP[19].x, CONST[1][7], TEMP[4] >216: MOV TEMP[15].y, TEMP[19].xxxx >217: DP4 TEMP[19].x, CONST[1][9], TEMP[4] >218: FSEQ TEMP[20].xy, TEMP[19].xxxx, IMM[2].wwww >219: SSG TEMP[21].xy, TEMP[15].xyyy >220: MUL TEMP[21].xy, IMM[5].xxxx, TEMP[21].xyyy >221: RCP TEMP[19].xy, TEMP[19].xxxx >222: MUL TEMP[19].xy, TEMP[15].xyyy, TEMP[19].xyyy >223: UCMP TEMP[4].xy, TEMP[20].xyyy, TEMP[21].xyyy, TEMP[19].xyyy >224: FMA TEMP[4].xy, TEMP[4].xyyy, IMM[5].yzzz, IMM[3].xxxx >225: MOV TEMP[19].xy, TEMP[4].xyyy >226: TEX TEMP[19].x, TEMP[19], SAMP[3], 2D >227: ADD TEMP[18].x, -TEMP[19].xxxx, TEMP[18].zzzz >228: FSLT TEMP[19].x, TEMP[18].xxxx, IMM[2].wwww >229: AND TEMP[19].x, TEMP[19].xxxx, IMM[6].xxxx >230: INEG TEMP[19].x, TEMP[19].xxxx >231: USNE TEMP[19].x, TEMP[19].xxxx, IMM[1].xxxx >232: UIF TEMP[19].xxxx :0 >233: MOV TEMP[19].x, TEMP[15].wwww >234: ELSE :0 >235: MOV TEMP[19].x, TEMP[15].zzzz >236: ENDIF >237: MUL TEMP[4].x, TEMP[19].xxxx, IMM[3].xxxx >238: FSLT TEMP[20].x, IMM[2].wwww, TEMP[18].xxxx >239: AND TEMP[20].x, TEMP[20].xxxx, IMM[6].xxxx >240: INEG TEMP[20].x, TEMP[20].xxxx >241: AND TEMP[20].x, TEMP[14].xxxx, TEMP[20].xxxx >242: USNE TEMP[20].x, TEMP[20].xxxx, IMM[1].xxxx >243: UIF TEMP[20].xxxx :0 >244: MOV TEMP[20].x, TEMP[4].xxxx >245: ELSE :0 >246: MOV TEMP[20].x, TEMP[19].xxxx >247: ENDIF >248: ABS TEMP[18].x, TEMP[18].xxxx >249: ADD TEMP[18].x, TEMP[18].xxxx, -TEMP[20].xxxx >250: FSNE TEMP[19].x, TEMP[20].xxxx, IMM[2].wwww >251: UIF TEMP[19].xxxx :0 >252: RCP TEMP[19].x, TEMP[20].xxxx >253: MUL TEMP[19].x, TEMP[18].xxxx, TEMP[19].xxxx >254: ELSE :0 >255: SSG TEMP[18].x, TEMP[18].xxxx >256: MUL TEMP[19].x, IMM[5].xxxx, TEMP[18].xxxx >257: ENDIF >258: MOV_SAT TEMP[18].x, TEMP[19].xxxx >259: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[18].xxxx >260: MOV TEMP[4].w, IMM[0].xxxx >261: DP2 TEMP[18].x, TEMP[3].yxxx, IMM[9].xyyy >262: DP2 TEMP[19].x, TEMP[11].xyyy, IMM[9].xyyy >263: FMA TEMP[15].xyz, TEMP[7].xyzz, TEMP[19].xxxx, TEMP[1].xyzz >264: FMA TEMP[15].xyz, TEMP[6].xyzz, TEMP[18].xxxx, TEMP[15].xyzz >265: MUL TEMP[17], TEMP[9].xxxx, IMM[10] >266: FMA TEMP[20].xyz, TEMP[15].xyzz, TEMP[17].xxxx, TEMP[10].xyzz >267: MOV TEMP[4].xyz, TEMP[20].xyzx >268: DP4 TEMP[15].x, CONST[1][6], TEMP[4] >269: DP4 TEMP[21].x, CONST[1][7], TEMP[4] >270: MOV TEMP[15].y, TEMP[21].xxxx >271: DP4 TEMP[21].x, CONST[1][9], TEMP[4] >272: FSEQ TEMP[22].xy, TEMP[21].xxxx, IMM[2].wwww >273: SSG TEMP[23].xy, TEMP[15].xyyy >274: MUL TEMP[23].xy, IMM[5].xxxx, TEMP[23].xyyy >275: RCP TEMP[21].xy, TEMP[21].xxxx >276: MUL TEMP[21].xy, TEMP[15].xyyy, TEMP[21].xyyy >277: UCMP TEMP[4].xy, TEMP[22].xyyy, TEMP[23].xyyy, TEMP[21].xyyy >278: FMA TEMP[4].xy, TEMP[4].xyyy, IMM[5].yzzz, IMM[3].xxxx >279: MOV TEMP[21].xy, TEMP[4].xyyy >280: TEX TEMP[21].x, TEMP[21], SAMP[3], 2D >281: ADD TEMP[4].x, -TEMP[21].xxxx, TEMP[20].zzzz >282: FSLT TEMP[20].x, TEMP[4].xxxx, IMM[2].wwww >283: AND TEMP[20].x, TEMP[20].xxxx, IMM[6].xxxx >284: INEG TEMP[20].x, TEMP[20].xxxx >285: USNE TEMP[20].x, TEMP[20].xxxx, IMM[1].xxxx >286: UIF TEMP[20].xxxx :0 >287: MOV TEMP[20].x, TEMP[17].yyyy >288: ELSE :0 >289: MOV TEMP[20].x, TEMP[17].xxxx >290: ENDIF >291: MUL TEMP[21].x, TEMP[20].xxxx, IMM[3].xxxx >292: FSLT TEMP[22].x, IMM[2].wwww, TEMP[4].xxxx >293: AND TEMP[22].x, TEMP[22].xxxx, IMM[6].xxxx >294: INEG TEMP[22].x, TEMP[22].xxxx >295: AND TEMP[22].x, TEMP[14].xxxx, TEMP[22].xxxx >296: USNE TEMP[22].x, TEMP[22].xxxx, IMM[1].xxxx >297: UIF TEMP[22].xxxx :0 >298: MOV TEMP[21].x, TEMP[21].xxxx >299: ELSE :0 >300: MOV TEMP[21].x, TEMP[20].xxxx >301: ENDIF >302: ABS TEMP[20].x, TEMP[4].xxxx >303: ADD TEMP[4].x, -TEMP[21].xxxx, TEMP[20].xxxx >304: FSNE TEMP[20].x, TEMP[21].xxxx, IMM[2].wwww >305: UIF TEMP[20].xxxx :0 >306: RCP TEMP[20].x, TEMP[21].xxxx >307: MUL TEMP[20].x, TEMP[4].xxxx, TEMP[20].xxxx >308: ELSE :0 >309: SSG TEMP[21].x, TEMP[4].xxxx >310: MUL TEMP[20].x, IMM[5].xxxx, TEMP[21].xxxx >311: ENDIF >312: MOV_SAT TEMP[20].x, TEMP[20].xxxx >313: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[20].xxxx >314: MOV TEMP[4].w, IMM[0].xxxx >315: DP2 TEMP[20].x, TEMP[3].yxxx, IMM[3].xxxx >316: DP2 TEMP[21].x, TEMP[11].xyyy, IMM[3].xxxx >317: FMA TEMP[15].xyz, TEMP[7].xyzz, TEMP[21].xxxx, TEMP[1].xyzz >318: FMA TEMP[15].xyz, TEMP[6].xyzz, TEMP[20].xxxx, TEMP[15].xyzz >319: FMA TEMP[22].xyz, TEMP[15].xyzz, TEMP[17].zzzz, TEMP[10].xyzz >320: MOV TEMP[4].xyz, TEMP[22].xyzx >321: DP4 TEMP[15].x, CONST[1][6], TEMP[4] >322: DP4 TEMP[23].x, CONST[1][7], TEMP[4] >323: MOV TEMP[15].y, TEMP[23].xxxx >324: DP4 TEMP[23].x, CONST[1][9], TEMP[4] >325: FSEQ TEMP[24].xy, TEMP[23].xxxx, IMM[2].wwww >326: SSG TEMP[25].xy, TEMP[15].xyyy >327: MUL TEMP[25].xy, IMM[5].xxxx, TEMP[25].xyyy >328: RCP TEMP[23].xy, TEMP[23].xxxx >329: MUL TEMP[23].xy, TEMP[15].xyyy, TEMP[23].xyyy >330: UCMP TEMP[4].xy, TEMP[24].xyyy, TEMP[25].xyyy, TEMP[23].xyyy >331: FMA TEMP[4].xy, TEMP[4].xyyy, IMM[5].yzzz, IMM[3].xxxx >332: MOV TEMP[23].xy, TEMP[4].xyyy >333: TEX TEMP[23].x, TEMP[23], SAMP[3], 2D >334: ADD TEMP[4].x, -TEMP[23].xxxx, TEMP[22].zzzz >335: FSLT TEMP[22].x, TEMP[4].xxxx, IMM[2].wwww >336: AND TEMP[22].x, TEMP[22].xxxx, IMM[6].xxxx >337: INEG TEMP[22].x, TEMP[22].xxxx >338: USNE TEMP[22].x, TEMP[22].xxxx, IMM[1].xxxx >339: UIF TEMP[22].xxxx :0 >340: MOV TEMP[22].x, TEMP[17].wwww >341: ELSE :0 >342: MOV TEMP[22].x, TEMP[17].zzzz >343: ENDIF >344: MUL TEMP[23].x, TEMP[22].xxxx, IMM[3].xxxx >345: FSLT TEMP[24].x, IMM[2].wwww, TEMP[4].xxxx >346: AND TEMP[24].x, TEMP[24].xxxx, IMM[6].xxxx >347: INEG TEMP[24].x, TEMP[24].xxxx >348: AND TEMP[24].x, TEMP[14].xxxx, TEMP[24].xxxx >349: USNE TEMP[24].x, TEMP[24].xxxx, IMM[1].xxxx >350: UIF TEMP[24].xxxx :0 >351: MOV TEMP[23].x, TEMP[23].xxxx >352: ELSE :0 >353: MOV TEMP[23].x, TEMP[22].xxxx >354: ENDIF >355: ABS TEMP[22].x, TEMP[4].xxxx >356: ADD TEMP[4].x, -TEMP[23].xxxx, TEMP[22].xxxx >357: FSNE TEMP[22].x, TEMP[23].xxxx, IMM[2].wwww >358: UIF TEMP[22].xxxx :0 >359: RCP TEMP[22].x, TEMP[23].xxxx >360: MUL TEMP[22].x, TEMP[4].xxxx, TEMP[22].xxxx >361: ELSE :0 >362: SSG TEMP[23].x, TEMP[4].xxxx >363: MUL TEMP[22].x, IMM[5].xxxx, TEMP[23].xxxx >364: ENDIF >365: MOV_SAT TEMP[22].x, TEMP[22].xxxx >366: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[22].xxxx >367: MOV TEMP[4].w, IMM[0].xxxx >368: DP2 TEMP[22].x, TEMP[3].yxxx, IMM[9].zwww >369: DP2 TEMP[13].x, TEMP[11].xyyy, IMM[9].zwww >370: FMA TEMP[15].xyz, TEMP[7].xyzz, TEMP[13].xxxx, TEMP[1].xyzz >371: FMA TEMP[15].xyz, TEMP[6].xyzz, TEMP[22].xxxx, TEMP[15].xyzz >372: MUL TEMP[17], TEMP[9].xxxx, IMM[11] >373: FMA TEMP[23].xyz, TEMP[15].xyzz, TEMP[17].xxxx, TEMP[10].xyzz >374: MOV TEMP[4].xyz, TEMP[23].xyzx >375: DP4 TEMP[15].x, CONST[1][6], TEMP[4] >376: DP4 TEMP[24].x, CONST[1][7], TEMP[4] >377: MOV TEMP[15].y, TEMP[24].xxxx >378: DP4 TEMP[24].x, CONST[1][9], TEMP[4] >379: FSEQ TEMP[25].xy, TEMP[24].xxxx, IMM[2].wwww >380: SSG TEMP[26].xy, TEMP[15].xyyy >381: MUL TEMP[26].xy, IMM[5].xxxx, TEMP[26].xyyy >382: RCP TEMP[24].xy, TEMP[24].xxxx >383: MUL TEMP[24].xy, TEMP[15].xyyy, TEMP[24].xyyy >384: UCMP TEMP[4].xy, TEMP[25].xyyy, TEMP[26].xyyy, TEMP[24].xyyy >385: FMA TEMP[4].xy, TEMP[4].xyyy, IMM[5].yzzz, IMM[3].xxxx >386: MOV TEMP[24].xy, TEMP[4].xyyy >387: TEX TEMP[24].x, TEMP[24], SAMP[3], 2D >388: ADD TEMP[4].x, -TEMP[24].xxxx, TEMP[23].zzzz >389: FSLT TEMP[23].x, TEMP[4].xxxx, IMM[2].wwww >390: AND TEMP[23].x, TEMP[23].xxxx, IMM[6].xxxx >391: INEG TEMP[23].x, TEMP[23].xxxx >392: USNE TEMP[23].x, TEMP[23].xxxx, IMM[1].xxxx >393: UIF TEMP[23].xxxx :0 >394: MOV TEMP[23].x, TEMP[17].yyyy >395: ELSE :0 >396: MOV TEMP[23].x, TEMP[17].xxxx >397: ENDIF >398: MUL TEMP[24].x, TEMP[23].xxxx, IMM[3].xxxx >399: FSLT TEMP[25].x, IMM[2].wwww, TEMP[4].xxxx >400: AND TEMP[25].x, TEMP[25].xxxx, IMM[6].xxxx >401: INEG TEMP[25].x, TEMP[25].xxxx >402: AND TEMP[25].x, TEMP[14].xxxx, TEMP[25].xxxx >403: USNE TEMP[25].x, TEMP[25].xxxx, IMM[1].xxxx >404: UIF TEMP[25].xxxx :0 >405: MOV TEMP[24].x, TEMP[24].xxxx >406: ELSE :0 >407: MOV TEMP[24].x, TEMP[23].xxxx >408: ENDIF >409: ABS TEMP[23].x, TEMP[4].xxxx >410: ADD TEMP[4].x, -TEMP[24].xxxx, TEMP[23].xxxx >411: FSNE TEMP[23].x, TEMP[24].xxxx, IMM[2].wwww >412: UIF TEMP[23].xxxx :0 >413: RCP TEMP[23].x, TEMP[24].xxxx >414: MUL TEMP[23].x, TEMP[4].xxxx, TEMP[23].xxxx >415: ELSE :0 >416: SSG TEMP[24].x, TEMP[4].xxxx >417: MUL TEMP[23].x, IMM[5].xxxx, TEMP[24].xxxx >418: ENDIF >419: MOV_SAT TEMP[23].x, TEMP[23].xxxx >420: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[23].xxxx >421: DP2 TEMP[4].x, TEMP[3].yxxx, IMM[5].wzzz >422: DP2 TEMP[23].x, TEMP[11].xyyy, IMM[5].wzzz >423: FMA TEMP[15].xyz, TEMP[7].xyzz, TEMP[23].xxxx, TEMP[1].xyzz >424: FMA TEMP[15].xyz, TEMP[6].xyzz, TEMP[4].xxxx, TEMP[15].xyzz >425: FMA TEMP[24].xyz, TEMP[15].xyzz, TEMP[17].zzzz, TEMP[10].xyzz >426: MOV TEMP[15].xyz, TEMP[24].xyzx >427: MOV TEMP[15].w, IMM[0].xxxx >428: DP4 TEMP[17].x, CONST[1][6], TEMP[15] >429: DP4 TEMP[25].x, CONST[1][7], TEMP[15] >430: MOV TEMP[17].y, TEMP[25].xxxx >431: DP4 TEMP[25].x, CONST[1][9], TEMP[15] >432: FSEQ TEMP[26].xy, TEMP[25].xxxx, IMM[2].wwww >433: SSG TEMP[27].xy, TEMP[17].xyyy >434: MUL TEMP[27].xy, IMM[5].xxxx, TEMP[27].xyyy >435: RCP TEMP[25].xy, TEMP[25].xxxx >436: MUL TEMP[25].xy, TEMP[17].xyyy, TEMP[25].xyyy >437: UCMP TEMP[25].xy, TEMP[26].xyyy, TEMP[27].xyyy, TEMP[25].xyyy >438: FMA TEMP[25].xy, TEMP[25].xyyy, IMM[5].yzzz, IMM[3].xxxx >439: MOV TEMP[25].xy, TEMP[25].xyyy >440: TEX TEMP[25].x, TEMP[25], SAMP[3], 2D >441: ADD TEMP[24].x, -TEMP[25].xxxx, TEMP[24].zzzz >442: FSLT TEMP[25].x, TEMP[24].xxxx, IMM[2].wwww >443: AND TEMP[25].x, TEMP[25].xxxx, IMM[6].xxxx >444: INEG TEMP[25].x, TEMP[25].xxxx >445: USNE TEMP[25].x, TEMP[25].xxxx, IMM[1].xxxx >446: UIF TEMP[25].xxxx :0 >447: MOV TEMP[25].x, TEMP[17].wwww >448: ELSE :0 >449: MOV TEMP[25].x, TEMP[17].zzzz >450: ENDIF >451: MUL TEMP[26].x, TEMP[25].xxxx, IMM[3].xxxx >452: FSLT TEMP[27].x, IMM[2].wwww, TEMP[24].xxxx >453: AND TEMP[27].x, TEMP[27].xxxx, IMM[6].xxxx >454: INEG TEMP[27].x, TEMP[27].xxxx >455: AND TEMP[27].x, TEMP[14].xxxx, TEMP[27].xxxx >456: USNE TEMP[27].x, TEMP[27].xxxx, IMM[1].xxxx >457: UIF TEMP[27].xxxx :0 >458: MOV TEMP[26].x, TEMP[26].xxxx >459: ELSE :0 >460: MOV TEMP[26].x, TEMP[25].xxxx >461: ENDIF >462: ABS TEMP[24].x, TEMP[24].xxxx >463: ADD TEMP[24].x, -TEMP[26].xxxx, TEMP[24].xxxx >464: FSNE TEMP[25].x, TEMP[26].xxxx, IMM[2].wwww >465: UIF TEMP[25].xxxx :0 >466: RCP TEMP[25].x, TEMP[26].xxxx >467: MUL TEMP[25].x, TEMP[24].xxxx, TEMP[25].xxxx >468: ELSE :0 >469: SSG TEMP[24].x, TEMP[24].xxxx >470: MUL TEMP[25].x, IMM[5].xxxx, TEMP[24].xxxx >471: ENDIF >472: MOV_SAT TEMP[24].x, TEMP[25].xxxx >473: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[24].xxxx >474: DP2 TEMP[24].x, TEMP[3].yxxx, IMM[12].xyyy >475: DP2 TEMP[3].x, TEMP[3].yxxx, IMM[12].zwww >476: DP2 TEMP[25].x, TEMP[11].xyyy, IMM[12].xyyy >477: FMA TEMP[15].xyz, TEMP[7].xyzz, TEMP[25].xxxx, TEMP[1].xyzz >478: FMA TEMP[15].xyz, TEMP[6].xyzz, TEMP[24].xxxx, TEMP[15].xyzz >479: MUL TEMP[17], TEMP[9].xxxx, IMM[13] >480: FMA TEMP[26].xyz, TEMP[15].xyzz, TEMP[17].xxxx, TEMP[10].xyzz >481: MOV TEMP[15].xyz, TEMP[26].xyzx >482: MOV TEMP[15].w, IMM[0].xxxx >483: DP4 TEMP[27].x, CONST[1][6], TEMP[15] >484: DP4 TEMP[28].x, CONST[1][7], TEMP[15] >485: MOV TEMP[27].y, TEMP[28].xxxx >486: DP4 TEMP[28].x, CONST[1][9], TEMP[15] >487: FSEQ TEMP[29].xy, TEMP[28].xxxx, IMM[2].wwww >488: SSG TEMP[30].xy, TEMP[27].xyyy >489: MUL TEMP[30].xy, IMM[5].xxxx, TEMP[30].xyyy >490: RCP TEMP[28].xy, TEMP[28].xxxx >491: MUL TEMP[27].xy, TEMP[27].xyyy, TEMP[28].xyyy >492: UCMP TEMP[15].xy, TEMP[29].xyyy, TEMP[30].xyyy, TEMP[27].xyyy >493: FMA TEMP[15].xy, TEMP[15].xyyy, IMM[5].yzzz, IMM[3].xxxx >494: MOV TEMP[27].xy, TEMP[15].xyyy >495: TEX TEMP[27].x, TEMP[27], SAMP[3], 2D >496: ADD TEMP[26].x, -TEMP[27].xxxx, TEMP[26].zzzz >497: FSLT TEMP[27].x, IMM[2].wwww, TEMP[26].xxxx >498: AND TEMP[27].x, TEMP[27].xxxx, IMM[6].xxxx >499: INEG TEMP[27].x, TEMP[27].xxxx >500: AND TEMP[27].x, TEMP[14].xxxx, TEMP[27].xxxx >501: FSLT TEMP[28].x, TEMP[26].xxxx, IMM[2].wwww >502: AND TEMP[28].x, TEMP[28].xxxx, IMM[6].xxxx >503: INEG TEMP[28].x, TEMP[28].xxxx >504: USNE TEMP[28].x, TEMP[28].xxxx, IMM[1].xxxx >505: UIF TEMP[28].xxxx :0 >506: MOV TEMP[28].x, TEMP[17].yyyy >507: ELSE :0 >508: MOV TEMP[28].x, TEMP[17].xxxx >509: ENDIF >510: MUL TEMP[15].x, TEMP[28].xxxx, IMM[3].xxxx >511: USNE TEMP[27].x, TEMP[27].xxxx, IMM[1].xxxx >512: UIF TEMP[27].xxxx :0 >513: MOV TEMP[27].x, TEMP[15].xxxx >514: ELSE :0 >515: MOV TEMP[27].x, TEMP[28].xxxx >516: ENDIF >517: ABS TEMP[26].x, TEMP[26].xxxx >518: ADD TEMP[26].x, TEMP[26].xxxx, -TEMP[27].xxxx >519: FSNE TEMP[28].x, TEMP[27].xxxx, IMM[2].wwww >520: UIF TEMP[28].xxxx :0 >521: RCP TEMP[27].x, TEMP[27].xxxx >522: MUL TEMP[27].x, TEMP[26].xxxx, TEMP[27].xxxx >523: ELSE :0 >524: SSG TEMP[26].x, TEMP[26].xxxx >525: MUL TEMP[27].x, IMM[5].xxxx, TEMP[26].xxxx >526: ENDIF >527: MOV_SAT TEMP[26].x, TEMP[27].xxxx >528: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[26].xxxx >529: DP2 TEMP[26].x, TEMP[11].xyyy, IMM[12].zwww >530: FMA TEMP[1].xyz, TEMP[7].xyzz, TEMP[26].xxxx, TEMP[1].xyzz >531: FMA TEMP[1].xyz, TEMP[6].xyzz, TEMP[3].xxxx, TEMP[1].xyzz >532: FMA TEMP[27].xyz, TEMP[1].xyzz, TEMP[17].zzzz, TEMP[10].xyzz >533: MOV TEMP[15].xyz, TEMP[27].xyzx >534: MOV TEMP[15].w, IMM[0].xxxx >535: DP4 TEMP[1].x, CONST[1][6], TEMP[15] >536: DP4 TEMP[28].x, CONST[1][7], TEMP[15] >537: MOV TEMP[1].y, TEMP[28].xxxx >538: DP4 TEMP[28].x, CONST[1][9], TEMP[15] >539: FSEQ TEMP[29].xy, TEMP[28].xxxx, IMM[2].wwww >540: SSG TEMP[30].xy, TEMP[1].xyyy >541: MUL TEMP[30].xy, IMM[5].xxxx, TEMP[30].xyyy >542: RCP TEMP[28].xy, TEMP[28].xxxx >543: MUL TEMP[28].xy, TEMP[1].xyyy, TEMP[28].xyyy >544: UCMP TEMP[1].xy, TEMP[29].xyyy, TEMP[30].xyyy, TEMP[28].xyyy >545: FMA TEMP[1].xy, TEMP[1].xyyy, IMM[5].yzzz, IMM[3].xxxx >546: MOV TEMP[28].xy, TEMP[1].xyyy >547: TEX TEMP[28].x, TEMP[28], SAMP[3], 2D >548: ADD TEMP[1].x, -TEMP[28].xxxx, TEMP[27].zzzz >549: FSLT TEMP[27].x, IMM[2].wwww, TEMP[1].xxxx >550: AND TEMP[27].x, TEMP[27].xxxx, IMM[6].xxxx >551: INEG TEMP[27].x, TEMP[27].xxxx >552: AND TEMP[14].x, TEMP[14].xxxx, TEMP[27].xxxx >553: FSLT TEMP[27].x, TEMP[1].xxxx, IMM[2].wwww >554: AND TEMP[27].x, TEMP[27].xxxx, IMM[6].xxxx >555: INEG TEMP[27].x, TEMP[27].xxxx >556: USNE TEMP[27].x, TEMP[27].xxxx, IMM[1].xxxx >557: UIF TEMP[27].xxxx :0 >558: MOV TEMP[27].x, TEMP[17].wwww >559: ELSE :0 >560: MOV TEMP[27].x, TEMP[17].zzzz >561: ENDIF >562: MUL TEMP[28].x, TEMP[27].xxxx, IMM[3].xxxx >563: USNE TEMP[14].x, TEMP[14].xxxx, IMM[1].xxxx >564: UIF TEMP[14].xxxx :0 >565: MOV TEMP[14].x, TEMP[28].xxxx >566: ELSE :0 >567: MOV TEMP[14].x, TEMP[27].xxxx >568: ENDIF >569: ABS TEMP[27].x, TEMP[1].xxxx >570: ADD TEMP[1].x, -TEMP[14].xxxx, TEMP[27].xxxx >571: FSNE TEMP[27].x, TEMP[14].xxxx, IMM[2].wwww >572: UIF TEMP[27].xxxx :0 >573: RCP TEMP[14].x, TEMP[14].xxxx >574: MUL TEMP[14].x, TEMP[1].xxxx, TEMP[14].xxxx >575: ELSE :0 >576: SSG TEMP[27].x, TEMP[1].xxxx >577: MUL TEMP[14].x, IMM[5].xxxx, TEMP[27].xxxx >578: ENDIF >579: MOV_SAT TEMP[14].x, TEMP[14].xxxx >580: ADD TEMP[0].x, TEMP[14].xxxx, TEMP[0].xxxx >581: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww >582: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx >583: DP2 TEMP[14].x, TEMP[11].xyyy, IMM[0].wwww >584: DP2 TEMP[1].x, TEMP[11].xyyy, IMM[2].xyyy >585: MUL TEMP[7].xyz, TEMP[2].zxyy, TEMP[6].yzxx >586: FMA TEMP[7].xyz, TEMP[2].yzxx, TEMP[6].zxyy, -TEMP[7].xyzz >587: FMA TEMP[15].xyz, TEMP[7].xyzz, TEMP[14].xxxx, TEMP[2].xyzz >588: FMA TEMP[8].xyz, TEMP[6].xyzz, TEMP[8].xxxx, TEMP[15].xyzz >589: MUL TEMP[15], TEMP[9].yyyy, IMM[3] >590: FMA TEMP[8].xyz, TEMP[8].xyzz, TEMP[15].xxxx, TEMP[10].xyzz >591: MOV TEMP[17].xyz, TEMP[8].xyzx >592: MOV TEMP[17].w, IMM[0].xxxx >593: DP4 TEMP[11].x, CONST[1][6], TEMP[17] >594: DP4 TEMP[14].x, CONST[1][7], TEMP[17] >595: MOV TEMP[11].y, TEMP[14].xxxx >596: DP4 TEMP[14].x, CONST[1][9], TEMP[17] >597: FSEQ TEMP[27].xy, TEMP[14].xxxx, IMM[2].wwww >598: SSG TEMP[28].xy, TEMP[11].xyyy >599: MUL TEMP[28].xy, IMM[5].xxxx, TEMP[28].xyyy >600: RCP TEMP[14].xy, TEMP[14].xxxx >601: MUL TEMP[14].xy, TEMP[11].xyyy, TEMP[14].xyyy >602: UCMP TEMP[14].xy, TEMP[27].xyyy, TEMP[28].xyyy, TEMP[14].xyyy >603: FMA TEMP[14].xy, TEMP[14].xyyy, IMM[5].yzzz, IMM[3].xxxx >604: MOV TEMP[14].xy, TEMP[14].xyyy >605: TEX TEMP[14].x, TEMP[14], SAMP[3], 2D >606: ADD TEMP[8].x, -TEMP[14].xxxx, TEMP[8].zzzz >607: FSLT TEMP[14].x, TEMP[8].xxxx, IMM[2].wwww >608: AND TEMP[14].x, TEMP[14].xxxx, IMM[6].xxxx >609: INEG TEMP[14].x, TEMP[14].xxxx >610: USNE TEMP[14].x, TEMP[14].xxxx, IMM[1].xxxx >611: UIF TEMP[14].xxxx :0 >612: MOV TEMP[14].x, TEMP[15].yyyy >613: ELSE :0 >614: MOV TEMP[14].x, TEMP[15].xxxx >615: ENDIF >616: ABS TEMP[8].x, TEMP[8].xxxx >617: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[14].xxxx >618: FSNE TEMP[27].x, TEMP[14].xxxx, IMM[2].wwww >619: UIF TEMP[27].xxxx :0 >620: RCP TEMP[14].x, TEMP[14].xxxx >621: MUL TEMP[14].x, TEMP[8].xxxx, TEMP[14].xxxx >622: ELSE :0 >623: SSG TEMP[8].x, TEMP[8].xxxx >624: MUL TEMP[14].x, IMM[5].xxxx, TEMP[8].xxxx >625: ENDIF >626: MOV_SAT TEMP[8].x, TEMP[14].xxxx >627: FMA TEMP[14].xyz, TEMP[7].xyzz, TEMP[13].yyyy, TEMP[2].xyzz >628: FMA TEMP[14].xyz, TEMP[6].xyzz, TEMP[13].zzzz, TEMP[14].xyzz >629: FMA TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].zzzz, TEMP[10].xyzz >630: MOV TEMP[17].xyz, TEMP[14].xyzx >631: MOV TEMP[17].w, IMM[0].xxxx >632: DP4 TEMP[11].x, CONST[1][6], TEMP[17] >633: DP4 TEMP[27].x, CONST[1][7], TEMP[17] >634: MOV TEMP[11].y, TEMP[27].xxxx >635: DP4 TEMP[17].x, CONST[1][9], TEMP[17] >636: FSEQ TEMP[27].xy, TEMP[17].xxxx, IMM[2].wwww >637: SSG TEMP[28].xy, TEMP[11].xyyy >638: MUL TEMP[28].xy, IMM[5].xxxx, TEMP[28].xyyy >639: RCP TEMP[17].xy, TEMP[17].xxxx >640: MUL TEMP[17].xy, TEMP[11].xyyy, TEMP[17].xyyy >641: UCMP TEMP[17].xy, TEMP[27].xyyy, TEMP[28].xyyy, TEMP[17].xyyy >642: FMA TEMP[17].xy, TEMP[17].xyyy, IMM[5].yzzz, IMM[3].xxxx >643: MOV TEMP[17].xy, TEMP[17].xyyy >644: TEX TEMP[17].x, TEMP[17], SAMP[3], 2D >645: ADD TEMP[14].x, -TEMP[17].xxxx, TEMP[14].zzzz >646: FSLT TEMP[17].x, TEMP[14].xxxx, IMM[2].wwww >647: AND TEMP[17].x, TEMP[17].xxxx, IMM[6].xxxx >648: INEG TEMP[17].x, TEMP[17].xxxx >649: USNE TEMP[17].x, TEMP[17].xxxx, IMM[1].xxxx >650: UIF TEMP[17].xxxx :0 >651: MOV TEMP[17].x, TEMP[15].wwww >652: ELSE :0 >653: MOV TEMP[17].x, TEMP[15].zzzz >654: ENDIF >655: ABS TEMP[14].x, TEMP[14].xxxx >656: ADD TEMP[14].x, -TEMP[17].xxxx, TEMP[14].xxxx >657: FSNE TEMP[27].x, TEMP[17].xxxx, IMM[2].wwww >658: UIF TEMP[27].xxxx :0 >659: RCP TEMP[17].x, TEMP[17].xxxx >660: MUL TEMP[17].x, TEMP[14].xxxx, TEMP[17].xxxx >661: ELSE :0 >662: SSG TEMP[14].x, TEMP[14].xxxx >663: MUL TEMP[17].x, IMM[5].xxxx, TEMP[14].xxxx >664: ENDIF >665: MOV_SAT TEMP[14].x, TEMP[17].xxxx >666: ADD TEMP[8].x, TEMP[8].xxxx, TEMP[14].xxxx >667: FMA TEMP[1].xyz, TEMP[7].xyzz, TEMP[1].xxxx, TEMP[2].xyzz >668: FMA TEMP[1].xyz, TEMP[6].xyzz, TEMP[12].xxxx, TEMP[1].xyzz >669: MUL TEMP[15], TEMP[9].yyyy, IMM[8] >670: FMA TEMP[12].xyz, TEMP[1].xyzz, TEMP[15].xxxx, TEMP[10].xyzz >671: MOV TEMP[1].xyz, TEMP[12].xyzx >672: MOV TEMP[1].w, IMM[0].xxxx >673: DP4 TEMP[11].x, CONST[1][6], TEMP[1] >674: DP4 TEMP[14].x, CONST[1][7], TEMP[1] >675: MOV TEMP[11].y, TEMP[14].xxxx >676: DP4 TEMP[14].x, CONST[1][9], TEMP[1] >677: FSEQ TEMP[17].xy, TEMP[14].xxxx, IMM[2].wwww >678: SSG TEMP[27].xy, TEMP[11].xyyy >679: MUL TEMP[27].xy, IMM[5].xxxx, TEMP[27].xyyy >680: RCP TEMP[14].xy, TEMP[14].xxxx >681: MUL TEMP[14].xy, TEMP[11].xyyy, TEMP[14].xyyy >682: UCMP TEMP[1].xy, TEMP[17].xyyy, TEMP[27].xyyy, TEMP[14].xyyy >683: FMA TEMP[1].xy, TEMP[1].xyyy, IMM[5].yzzz, IMM[3].xxxx >684: MOV TEMP[14].xy, TEMP[1].xyyy >685: TEX TEMP[14].x, TEMP[14], SAMP[3], 2D >686: ADD TEMP[12].x, -TEMP[14].xxxx, TEMP[12].zzzz >687: FSLT TEMP[14].x, TEMP[12].xxxx, IMM[2].wwww >688: AND TEMP[14].x, TEMP[14].xxxx, IMM[6].xxxx >689: INEG TEMP[14].x, TEMP[14].xxxx >690: USNE TEMP[14].x, TEMP[14].xxxx, IMM[1].xxxx >691: UIF TEMP[14].xxxx :0 >692: MOV TEMP[14].x, TEMP[15].yyyy >693: ELSE :0 >694: MOV TEMP[14].x, TEMP[15].xxxx >695: ENDIF >696: ABS TEMP[12].x, TEMP[12].xxxx >697: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[14].xxxx >698: FSNE TEMP[17].x, TEMP[14].xxxx, IMM[2].wwww >699: UIF TEMP[17].xxxx :0 >700: RCP TEMP[14].x, TEMP[14].xxxx >701: MUL TEMP[14].x, TEMP[12].xxxx, TEMP[14].xxxx >702: ELSE :0 >703: SSG TEMP[12].x, TEMP[12].xxxx >704: MUL TEMP[14].x, IMM[5].xxxx, TEMP[12].xxxx >705: ENDIF >706: MOV_SAT TEMP[12].x, TEMP[14].xxxx >707: ADD TEMP[8].x, TEMP[12].xxxx, TEMP[8].xxxx >708: FMA TEMP[1].xyz, TEMP[7].xyzz, TEMP[16].xxxx, TEMP[2].xyzz >709: FMA TEMP[1].xyz, TEMP[6].xyzz, TEMP[5].xxxx, TEMP[1].xyzz >710: FMA TEMP[5].xyz, TEMP[1].xyzz, TEMP[15].zzzz, TEMP[10].xyzz >711: MOV TEMP[1].xyz, TEMP[5].xyzx >712: MOV TEMP[1].w, IMM[0].xxxx >713: DP4 TEMP[11].x, CONST[1][6], TEMP[1] >714: DP4 TEMP[12].x, CONST[1][7], TEMP[1] >715: MOV TEMP[11].y, TEMP[12].xxxx >716: DP4 TEMP[12].x, CONST[1][9], TEMP[1] >717: FSEQ TEMP[14].xy, TEMP[12].xxxx, IMM[2].wwww >718: SSG TEMP[16].xy, TEMP[11].xyyy >719: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >720: RCP TEMP[12].xy, TEMP[12].xxxx >721: MUL TEMP[12].xy, TEMP[11].xyyy, TEMP[12].xyyy >722: UCMP TEMP[12].xy, TEMP[14].xyyy, TEMP[16].xyyy, TEMP[12].xyyy >723: FMA TEMP[12].xy, TEMP[12].xyyy, IMM[5].yzzz, IMM[3].xxxx >724: MOV TEMP[12].xy, TEMP[12].xyyy >725: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >726: ADD TEMP[5].x, -TEMP[12].xxxx, TEMP[5].zzzz >727: FSLT TEMP[12].x, TEMP[5].xxxx, IMM[2].wwww >728: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >729: INEG TEMP[12].x, TEMP[12].xxxx >730: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >731: UIF TEMP[12].xxxx :0 >732: MOV TEMP[12].x, TEMP[15].wwww >733: ELSE :0 >734: MOV TEMP[12].x, TEMP[15].zzzz >735: ENDIF >736: ABS TEMP[5].x, TEMP[5].xxxx >737: ADD TEMP[5].x, -TEMP[12].xxxx, TEMP[5].xxxx >738: FSNE TEMP[14].x, TEMP[12].xxxx, IMM[2].wwww >739: UIF TEMP[14].xxxx :0 >740: RCP TEMP[12].x, TEMP[12].xxxx >741: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[12].xxxx >742: ELSE :0 >743: SSG TEMP[5].x, TEMP[5].xxxx >744: MUL TEMP[12].x, IMM[5].xxxx, TEMP[5].xxxx >745: ENDIF >746: MOV_SAT TEMP[5].x, TEMP[12].xxxx >747: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[8].xxxx >748: FMA TEMP[1].xyz, TEMP[7].xyzz, TEMP[19].xxxx, TEMP[2].xyzz >749: FMA TEMP[1].xyz, TEMP[6].xyzz, TEMP[18].xxxx, TEMP[1].xyzz >750: MUL TEMP[15], TEMP[9].yyyy, IMM[10] >751: FMA TEMP[8].xyz, TEMP[1].xyzz, TEMP[15].xxxx, TEMP[10].xyzz >752: MOV TEMP[1].xyz, TEMP[8].xyzx >753: MOV TEMP[1].w, IMM[0].xxxx >754: DP4 TEMP[11].x, CONST[1][6], TEMP[1] >755: DP4 TEMP[12].x, CONST[1][7], TEMP[1] >756: MOV TEMP[11].y, TEMP[12].xxxx >757: DP4 TEMP[12].x, CONST[1][9], TEMP[1] >758: FSEQ TEMP[14].xy, TEMP[12].xxxx, IMM[2].wwww >759: SSG TEMP[16].xy, TEMP[11].xyyy >760: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >761: RCP TEMP[12].xy, TEMP[12].xxxx >762: MUL TEMP[12].xy, TEMP[11].xyyy, TEMP[12].xyyy >763: UCMP TEMP[12].xy, TEMP[14].xyyy, TEMP[16].xyyy, TEMP[12].xyyy >764: FMA TEMP[12].xy, TEMP[12].xyyy, IMM[5].yzzz, IMM[3].xxxx >765: MOV TEMP[12].xy, TEMP[12].xyyy >766: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >767: ADD TEMP[8].x, -TEMP[12].xxxx, TEMP[8].zzzz >768: FSLT TEMP[12].x, TEMP[8].xxxx, IMM[2].wwww >769: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >770: INEG TEMP[12].x, TEMP[12].xxxx >771: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >772: UIF TEMP[12].xxxx :0 >773: MOV TEMP[12].x, TEMP[15].yyyy >774: ELSE :0 >775: MOV TEMP[12].x, TEMP[15].xxxx >776: ENDIF >777: ABS TEMP[8].x, TEMP[8].xxxx >778: ADD TEMP[8].x, -TEMP[12].xxxx, TEMP[8].xxxx >779: FSNE TEMP[14].x, TEMP[12].xxxx, IMM[2].wwww >780: UIF TEMP[14].xxxx :0 >781: RCP TEMP[12].x, TEMP[12].xxxx >782: MUL TEMP[12].x, TEMP[8].xxxx, TEMP[12].xxxx >783: ELSE :0 >784: SSG TEMP[8].x, TEMP[8].xxxx >785: MUL TEMP[12].x, IMM[5].xxxx, TEMP[8].xxxx >786: ENDIF >787: MOV_SAT TEMP[8].x, TEMP[12].xxxx >788: ADD TEMP[5].x, TEMP[8].xxxx, TEMP[5].xxxx >789: FMA TEMP[1].xyz, TEMP[7].xyzz, TEMP[21].xxxx, TEMP[2].xyzz >790: FMA TEMP[1].xyz, TEMP[6].xyzz, TEMP[20].xxxx, TEMP[1].xyzz >791: FMA TEMP[8].xyz, TEMP[1].xyzz, TEMP[15].zzzz, TEMP[10].xyzz >792: MOV TEMP[1].xyz, TEMP[8].xyzx >793: MOV TEMP[1].w, IMM[0].xxxx >794: DP4 TEMP[11].x, CONST[1][6], TEMP[1] >795: DP4 TEMP[12].x, CONST[1][7], TEMP[1] >796: MOV TEMP[11].y, TEMP[12].xxxx >797: DP4 TEMP[12].x, CONST[1][9], TEMP[1] >798: FSEQ TEMP[14].xy, TEMP[12].xxxx, IMM[2].wwww >799: SSG TEMP[16].xy, TEMP[11].xyyy >800: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >801: RCP TEMP[12].xy, TEMP[12].xxxx >802: MUL TEMP[12].xy, TEMP[11].xyyy, TEMP[12].xyyy >803: UCMP TEMP[12].xy, TEMP[14].xyyy, TEMP[16].xyyy, TEMP[12].xyyy >804: FMA TEMP[12].xy, TEMP[12].xyyy, IMM[5].yzzz, IMM[3].xxxx >805: MOV TEMP[12].xy, TEMP[12].xyyy >806: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >807: ADD TEMP[8].x, -TEMP[12].xxxx, TEMP[8].zzzz >808: FSLT TEMP[12].x, TEMP[8].xxxx, IMM[2].wwww >809: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >810: INEG TEMP[12].x, TEMP[12].xxxx >811: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >812: UIF TEMP[12].xxxx :0 >813: MOV TEMP[12].x, TEMP[15].wwww >814: ELSE :0 >815: MOV TEMP[12].x, TEMP[15].zzzz >816: ENDIF >817: ABS TEMP[8].x, TEMP[8].xxxx >818: ADD TEMP[8].x, -TEMP[12].xxxx, TEMP[8].xxxx >819: FSNE TEMP[14].x, TEMP[12].xxxx, IMM[2].wwww >820: UIF TEMP[14].xxxx :0 >821: RCP TEMP[12].x, TEMP[12].xxxx >822: MUL TEMP[12].x, TEMP[8].xxxx, TEMP[12].xxxx >823: ELSE :0 >824: SSG TEMP[8].x, TEMP[8].xxxx >825: MUL TEMP[12].x, IMM[5].xxxx, TEMP[8].xxxx >826: ENDIF >827: MOV_SAT TEMP[8].x, TEMP[12].xxxx >828: ADD TEMP[5].x, TEMP[8].xxxx, TEMP[5].xxxx >829: FMA TEMP[1].xyz, TEMP[7].xyzz, TEMP[13].xxxx, TEMP[2].xyzz >830: FMA TEMP[1].xyz, TEMP[6].xyzz, TEMP[22].xxxx, TEMP[1].xyzz >831: MUL TEMP[11], TEMP[9].yyyy, IMM[11] >832: MUL TEMP[13], TEMP[9].yyyy, IMM[13] >833: FMA TEMP[8].xyz, TEMP[1].xyzz, TEMP[11].xxxx, TEMP[10].xyzz >834: MOV TEMP[1].xyz, TEMP[8].xyzx >835: MOV TEMP[1].w, IMM[0].xxxx >836: DP4 TEMP[15].x, CONST[1][6], TEMP[1] >837: DP4 TEMP[9].x, CONST[1][7], TEMP[1] >838: MOV TEMP[15].y, TEMP[9].xxxx >839: DP4 TEMP[9].x, CONST[1][9], TEMP[1] >840: FSEQ TEMP[12].xy, TEMP[9].xxxx, IMM[2].wwww >841: SSG TEMP[14].xy, TEMP[15].xyyy >842: MUL TEMP[14].xy, IMM[5].xxxx, TEMP[14].xyyy >843: RCP TEMP[9].xy, TEMP[9].xxxx >844: MUL TEMP[9].xy, TEMP[15].xyyy, TEMP[9].xyyy >845: UCMP TEMP[9].xy, TEMP[12].xyyy, TEMP[14].xyyy, TEMP[9].xyyy >846: FMA TEMP[9].xy, TEMP[9].xyyy, IMM[5].yzzz, IMM[3].xxxx >847: MOV TEMP[9].xy, TEMP[9].xyyy >848: TEX TEMP[9].x, TEMP[9], SAMP[3], 2D >849: ADD TEMP[8].x, -TEMP[9].xxxx, TEMP[8].zzzz >850: FSLT TEMP[9].x, TEMP[8].xxxx, IMM[2].wwww >851: AND TEMP[9].x, TEMP[9].xxxx, IMM[6].xxxx >852: INEG TEMP[9].x, TEMP[9].xxxx >853: USNE TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx >854: UIF TEMP[9].xxxx :0 >855: MOV TEMP[9].x, TEMP[11].yyyy >856: ELSE :0 >857: MOV TEMP[9].x, TEMP[11].xxxx >858: ENDIF >859: ABS TEMP[8].x, TEMP[8].xxxx >860: ADD TEMP[8].x, -TEMP[9].xxxx, TEMP[8].xxxx >861: FSNE TEMP[12].x, TEMP[9].xxxx, IMM[2].wwww >862: UIF TEMP[12].xxxx :0 >863: RCP TEMP[9].x, TEMP[9].xxxx >864: MUL TEMP[9].x, TEMP[8].xxxx, TEMP[9].xxxx >865: ELSE :0 >866: SSG TEMP[8].x, TEMP[8].xxxx >867: MUL TEMP[9].x, IMM[5].xxxx, TEMP[8].xxxx >868: ENDIF >869: MOV_SAT TEMP[8].x, TEMP[9].xxxx >870: ADD TEMP[5].x, TEMP[8].xxxx, TEMP[5].xxxx >871: FMA TEMP[1].xyz, TEMP[7].xyzz, TEMP[23].xxxx, TEMP[2].xyzz >872: FMA TEMP[1].xyz, TEMP[6].xyzz, TEMP[4].xxxx, TEMP[1].xyzz >873: FMA TEMP[8].xyz, TEMP[1].xyzz, TEMP[11].zzzz, TEMP[10].xyzz >874: MOV TEMP[1].xyz, TEMP[8].xyzx >875: MOV TEMP[1].w, IMM[0].xxxx >876: DP4 TEMP[4].x, CONST[1][6], TEMP[1] >877: DP4 TEMP[9].x, CONST[1][7], TEMP[1] >878: MOV TEMP[4].y, TEMP[9].xxxx >879: DP4 TEMP[9].x, CONST[1][9], TEMP[1] >880: FSEQ TEMP[12].xy, TEMP[9].xxxx, IMM[2].wwww >881: SSG TEMP[14].xy, TEMP[4].xyyy >882: MUL TEMP[14].xy, IMM[5].xxxx, TEMP[14].xyyy >883: RCP TEMP[9].xy, TEMP[9].xxxx >884: MUL TEMP[4].xy, TEMP[4].xyyy, TEMP[9].xyyy >885: UCMP TEMP[4].xy, TEMP[12].xyyy, TEMP[14].xyyy, TEMP[4].xyyy >886: FMA TEMP[4].xy, TEMP[4].xyyy, IMM[5].yzzz, IMM[3].xxxx >887: MOV TEMP[4].xy, TEMP[4].xyyy >888: TEX TEMP[4].x, TEMP[4], SAMP[3], 2D >889: ADD TEMP[4].x, -TEMP[4].xxxx, TEMP[8].zzzz >890: FSLT TEMP[8].x, TEMP[4].xxxx, IMM[2].wwww >891: AND TEMP[8].x, TEMP[8].xxxx, IMM[6].xxxx >892: INEG TEMP[8].x, TEMP[8].xxxx >893: USNE TEMP[8].x, TEMP[8].xxxx, IMM[1].xxxx >894: UIF TEMP[8].xxxx :0 >895: MOV TEMP[8].x, TEMP[11].wwww >896: ELSE :0 >897: MOV TEMP[8].x, TEMP[11].zzzz >898: ENDIF >899: ABS TEMP[4].x, TEMP[4].xxxx >900: ADD TEMP[4].x, -TEMP[8].xxxx, TEMP[4].xxxx >901: FSNE TEMP[9].x, TEMP[8].xxxx, IMM[2].wwww >902: UIF TEMP[9].xxxx :0 >903: RCP TEMP[8].x, TEMP[8].xxxx >904: MUL TEMP[8].x, TEMP[4].xxxx, TEMP[8].xxxx >905: ELSE :0 >906: SSG TEMP[4].x, TEMP[4].xxxx >907: MUL TEMP[8].x, IMM[5].xxxx, TEMP[4].xxxx >908: ENDIF >909: MOV_SAT TEMP[4].x, TEMP[8].xxxx >910: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx >911: FMA TEMP[1].xyz, TEMP[7].xyzz, TEMP[25].xxxx, TEMP[2].xyzz >912: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[26].xxxx, TEMP[2].xyzz >913: FMA TEMP[2].xyz, TEMP[6].xyzz, TEMP[3].xxxx, TEMP[2].xyzz >914: FMA TEMP[1].xyz, TEMP[6].xyzz, TEMP[24].xxxx, TEMP[1].xyzz >915: FMA TEMP[5].xyz, TEMP[1].xyzz, TEMP[13].xxxx, TEMP[10].xyzz >916: MOV TEMP[1].xyz, TEMP[5].xyzx >917: FMA TEMP[6].xyz, TEMP[2].xyzz, TEMP[13].zzzz, TEMP[10].xyzz >918: MOV TEMP[2].xyz, TEMP[6].xyzx >919: MOV TEMP[1].w, IMM[0].xxxx >920: DP4 TEMP[3].x, CONST[1][6], TEMP[1] >921: DP4 TEMP[7].x, CONST[1][7], TEMP[1] >922: MOV TEMP[3].y, TEMP[7].xxxx >923: DP4 TEMP[7].x, CONST[1][9], TEMP[1] >924: FSEQ TEMP[8].xy, TEMP[7].xxxx, IMM[2].wwww >925: SSG TEMP[9].xy, TEMP[3].xyyy >926: MUL TEMP[9].xy, IMM[5].xxxx, TEMP[9].xyyy >927: RCP TEMP[7].xy, TEMP[7].xxxx >928: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >929: UCMP TEMP[3].xy, TEMP[8].xyyy, TEMP[9].xyyy, TEMP[3].xyyy >930: FMA TEMP[3].xy, TEMP[3].xyyy, IMM[5].yzzz, IMM[3].xxxx >931: MOV TEMP[3].xy, TEMP[3].xyyy >932: TEX TEMP[3].x, TEMP[3], SAMP[3], 2D >933: ADD TEMP[3].x, -TEMP[3].xxxx, TEMP[5].zzzz >934: FSLT TEMP[5].x, TEMP[3].xxxx, IMM[2].wwww >935: AND TEMP[5].x, TEMP[5].xxxx, IMM[6].xxxx >936: INEG TEMP[5].x, TEMP[5].xxxx >937: USNE TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx >938: UIF TEMP[5].xxxx :0 >939: MOV TEMP[5].x, TEMP[13].yyyy >940: ELSE :0 >941: MOV TEMP[5].x, TEMP[13].xxxx >942: ENDIF >943: ABS TEMP[3].x, TEMP[3].xxxx >944: ADD TEMP[3].x, -TEMP[5].xxxx, TEMP[3].xxxx >945: FSNE TEMP[7].x, TEMP[5].xxxx, IMM[2].wwww >946: UIF TEMP[7].xxxx :0 >947: RCP TEMP[5].x, TEMP[5].xxxx >948: MUL TEMP[5].x, TEMP[3].xxxx, TEMP[5].xxxx >949: ELSE :0 >950: SSG TEMP[3].x, TEMP[3].xxxx >951: MUL TEMP[5].x, IMM[5].xxxx, TEMP[3].xxxx >952: ENDIF >953: MOV_SAT TEMP[3].x, TEMP[5].xxxx >954: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx >955: MOV TEMP[2].w, IMM[0].xxxx >956: DP4 TEMP[1].x, CONST[1][6], TEMP[2] >957: DP4 TEMP[4].x, CONST[1][7], TEMP[2] >958: MOV TEMP[1].y, TEMP[4].xxxx >959: DP4 TEMP[2].x, CONST[1][9], TEMP[2] >960: FSEQ TEMP[4].xy, TEMP[2].xxxx, IMM[2].wwww >961: SSG TEMP[5].xy, TEMP[1].xyyy >962: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >963: RCP TEMP[2].xy, TEMP[2].xxxx >964: MUL TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy >965: UCMP TEMP[1].xy, TEMP[4].xyyy, TEMP[5].xyyy, TEMP[1].xyyy >966: FMA TEMP[1].xy, TEMP[1].xyyy, IMM[5].yzzz, IMM[3].xxxx >967: MOV TEMP[1].xy, TEMP[1].xyyy >968: TEX TEMP[1].x, TEMP[1], SAMP[3], 2D >969: ADD TEMP[1].x, -TEMP[1].xxxx, TEMP[6].zzzz >970: FSLT TEMP[2].x, TEMP[1].xxxx, IMM[2].wwww >971: AND TEMP[2].x, TEMP[2].xxxx, IMM[6].xxxx >972: INEG TEMP[2].x, TEMP[2].xxxx >973: USNE TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx >974: UIF TEMP[2].xxxx :0 >975: MOV TEMP[2].x, TEMP[13].wwww >976: ELSE :0 >977: MOV TEMP[2].x, TEMP[13].zzzz >978: ENDIF >979: ABS TEMP[1].x, TEMP[1].xxxx >980: ADD TEMP[1].x, -TEMP[2].xxxx, TEMP[1].xxxx >981: FSNE TEMP[4].x, TEMP[2].xxxx, IMM[2].wwww >982: UIF TEMP[4].xxxx :0 >983: RCP TEMP[2].x, TEMP[2].xxxx >984: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx >985: ELSE :0 >986: SSG TEMP[1].x, TEMP[1].xxxx >987: MUL TEMP[2].x, IMM[5].xxxx, TEMP[1].xxxx >988: ENDIF >989: MOV_SAT TEMP[1].x, TEMP[2].xxxx >990: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx >991: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww >992: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx >993: MIN TEMP[0].x, TEMP[1].xxxx, TEMP[0].xxxx >994: MOV TEMP[0].y, TEMP[1].xxxx >995: MOV TEMP[0].zw, IMM[14].yyxy >996: MOV OUT[0], TEMP[0] >997: END >radeonsi: Compiling shader 86 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 124) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 144) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 148) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 152) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 156) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 160) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 164) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 168) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 176) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 180) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 184) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 200) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 224) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 228) > %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 > %50 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %50, i64 0, i64 3 > %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 > %53 = extractelement <8 x i32> %49, i32 7 > %54 = extractelement <4 x i32> %52, i32 0 > %55 = and i32 %54, %53 > %56 = insertelement <4 x i32> %52, i32 %55, i32 0 > %57 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %58 = load <8 x i32>, <8 x i32> addrspace(2)* %57, align 32, !tbaa !0 > %59 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %60 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %59, i64 0, i64 7 > %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0 > %62 = extractelement <8 x i32> %58, i32 7 > %63 = extractelement <4 x i32> %61, i32 0 > %64 = and i32 %63, %62 > %65 = insertelement <4 x i32> %61, i32 %64, i32 0 > %66 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, align 32, !tbaa !0 > %68 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %69 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %68, i64 0, i64 11 > %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 > %71 = extractelement <8 x i32> %67, i32 7 > %72 = extractelement <4 x i32> %70, i32 0 > %73 = and i32 %72, %71 > %74 = insertelement <4 x i32> %70, i32 %73, i32 0 > %75 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0 > %77 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %78 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %77, i64 0, i64 15 > %79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !tbaa !0 > %80 = extractelement <8 x i32> %76, i32 7 > %81 = extractelement <4 x i32> %79, i32 0 > %82 = and i32 %81, %80 > %83 = insertelement <4 x i32> %79, i32 %82, i32 0 > %84 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %93 = fmul float %88, %88 > %94 = fmul float %89, %89 > %95 = fadd float %94, %93 > %96 = fmul float %90, %90 > %97 = fadd float %95, %96 > %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97) > %99 = fmul float %98, %88 > %100 = fmul float %98, %89 > %101 = fmul float %98, %90 > %102 = bitcast float %84 to i32 > %103 = bitcast float %85 to i32 > %104 = insertelement <2 x i32> undef, i32 %102, i32 0 > %105 = insertelement <2 x i32> %104, i32 %103, i32 1 > %106 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %105, <8 x i32> %49, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %107 = extractelement <4 x float> %106, i32 0 > %108 = extractelement <4 x float> %106, i32 1 > %109 = extractelement <4 x float> %106, i32 2 > %110 = call float @llvm.fma.f32(float %107, float 2.000000e+00, float -1.000000e+00) > %111 = call float @llvm.fma.f32(float %108, float 2.000000e+00, float -1.000000e+00) > %112 = call float @llvm.fma.f32(float %109, float 2.000000e+00, float -1.000000e+00) > %113 = fmul float %110, %110 > %114 = fmul float %111, %111 > %115 = fadd float %114, %113 > %116 = fmul float %112, %112 > %117 = fadd float %115, %116 > %118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117) > %119 = fmul float %118, %110 > %120 = fmul float %118, %111 > %121 = fmul float %118, %112 > %122 = fmul float %120, %40 > %123 = fmul float %120, %41 > %124 = fmul float %120, %42 > %125 = call float @llvm.fma.f32(float %119, float %37, float %122) > %126 = call float @llvm.fma.f32(float %119, float %38, float %123) > %127 = call float @llvm.fma.f32(float %119, float %39, float %124) > %128 = call float @llvm.fma.f32(float %121, float %43, float %125) > %129 = call float @llvm.fma.f32(float %121, float %44, float %126) > %130 = call float @llvm.fma.f32(float %121, float %45, float %127) > %131 = fmul float %128, %128 > %132 = fmul float %129, %129 > %133 = fadd float %132, %131 > %134 = fmul float %130, %130 > %135 = fadd float %133, %134 > %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135) > %137 = fmul float %136, %128 > %138 = fmul float %136, %129 > %139 = fmul float %136, %130 > %140 = fmul float %99, %137 > %141 = fmul float %100, %138 > %142 = fadd float %141, %140 > %143 = fmul float %101, %139 > %144 = fadd float %142, %143 > %145 = fadd float %144, %144 > %146 = fsub float -0.000000e+00, %145 > %147 = call float @llvm.fma.f32(float %137, float %146, float %99) > %148 = fsub float -0.000000e+00, %145 > %149 = call float @llvm.fma.f32(float %138, float %148, float %100) > %150 = fsub float -0.000000e+00, %145 > %151 = call float @llvm.fma.f32(float %139, float %150, float %101) > %152 = call float @llvm.fma.f32(float %147, float 2.000000e+00, float %137) > %153 = call float @llvm.fma.f32(float %149, float 2.000000e+00, float %138) > %154 = call float @llvm.fma.f32(float %151, float 2.000000e+00, float %139) > %155 = fmul float %152, %152 > %156 = fmul float %153, %153 > %157 = fadd float %156, %155 > %158 = fmul float %154, %154 > %159 = fadd float %157, %158 > %160 = call float @llvm.AMDGPU.rsq.clamped.f32(float %159) > %161 = fmul float %160, %152 > %162 = fmul float %160, %153 > %163 = fmul float %160, %154 > %164 = bitcast float %86 to i32 > %165 = bitcast float %87 to i32 > %166 = insertelement <2 x i32> undef, i32 %164, i32 0 > %167 = insertelement <2 x i32> %166, i32 %165, i32 1 > %168 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %167, <8 x i32> %58, <4 x i32> %65, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %169 = extractelement <4 x float> %168, i32 0 > %170 = extractelement <4 x float> %168, i32 1 > %171 = call float @llvm.fma.f32(float %169, float 2.000000e+00, float -1.000000e+00) > %172 = call float @llvm.fma.f32(float %170, float 2.000000e+00, float -1.000000e+00) > %173 = fmul float %171, %171 > %174 = fmul float %172, %172 > %175 = fadd float %173, %174 > %176 = call float @llvm.AMDGPU.rsq.clamped.f32(float %175) > %177 = fmul float %176, %171 > %178 = fmul float %176, %172 > %179 = fmul float %177, 0xBFB99999A0000000 > %180 = fmul float %178, 0x3FB99999A0000000 > %181 = fadd float %179, %180 > %182 = fmul float %177, 0x3FD147AE20000000 > %183 = fmul float %178, 0x3F847AE140000000 > %184 = fadd float %182, %183 > %185 = fmul float %138, %163 > %186 = fmul float %139, %161 > %187 = fmul float %137, %162 > %188 = fsub float -0.000000e+00, %185 > %189 = call float @llvm.fma.f32(float %162, float %139, float %188) > %190 = fsub float -0.000000e+00, %186 > %191 = call float @llvm.fma.f32(float %163, float %137, float %190) > %192 = fsub float -0.000000e+00, %187 > %193 = call float @llvm.fma.f32(float %161, float %138, float %192) > %194 = fmul float %189, %189 > %195 = fmul float %191, %191 > %196 = fadd float %195, %194 > %197 = fmul float %193, %193 > %198 = fadd float %196, %197 > %199 = call float @llvm.AMDGPU.rsq.clamped.f32(float %198) > %200 = fmul float %199, %189 > %201 = fmul float %199, %191 > %202 = fmul float %199, %193 > %203 = fmul float %162, %202 > %204 = fmul float %163, %200 > %205 = fmul float %161, %201 > %206 = fsub float -0.000000e+00, %203 > %207 = call float @llvm.fma.f32(float %201, float %163, float %206) > %208 = fsub float -0.000000e+00, %204 > %209 = call float @llvm.fma.f32(float %202, float %161, float %208) > %210 = fsub float -0.000000e+00, %205 > %211 = call float @llvm.fma.f32(float %200, float %162, float %210) > %212 = call float @llvm.fma.f32(float %207, float %181, float %161) > %213 = call float @llvm.fma.f32(float %209, float %181, float %162) > %214 = call float @llvm.fma.f32(float %211, float %181, float %163) > %215 = fmul float %178, 0x3FB99999A0000000 > %216 = fmul float %177, 0x3FB99999A0000000 > %217 = fadd float %215, %216 > %218 = call float @llvm.fma.f32(float %200, float %217, float %212) > %219 = call float @llvm.fma.f32(float %201, float %217, float %213) > %220 = call float @llvm.fma.f32(float %202, float %217, float %214) > %221 = bitcast float %84 to i32 > %222 = bitcast float %85 to i32 > %223 = insertelement <2 x i32> undef, i32 %221, i32 0 > %224 = insertelement <2 x i32> %223, i32 %222, i32 1 > %225 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %224, <8 x i32> %67, <4 x i32> %74, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %226 = extractelement <4 x float> %225, i32 0 > %227 = fmul float %226, 0x3F70624DE0000000 > %228 = fmul float %226, 0x3F847AE140000000 > %229 = call float @llvm.maxnum.f32(float %227, float 1.000000e+00) > %230 = call float @llvm.maxnum.f32(float %228, float 1.000000e+00) > %231 = fmul float %229, 5.000000e-01 > %232 = fmul float %229, 2.500000e-01 > %233 = fmul float %229, 0x3FF4CCCCC0000000 > %234 = fmul float %229, 0x3FE4CCCCC0000000 > %235 = fmul float %226, %46 > %236 = fadd float %235, %47 > %237 = fmul float %236, %91 > %238 = fmul float %236, %92 > %239 = call float @llvm.fma.f32(float %218, float %231, float %237) > %240 = call float @llvm.fma.f32(float %219, float %231, float %238) > %241 = call float @llvm.fma.f32(float %220, float %231, float %226) > %242 = fmul float %25, %239 > %243 = fmul float %26, %240 > %244 = fadd float %242, %243 > %245 = fmul float %27, %241 > %246 = fadd float %244, %245 > %247 = fadd float %246, %28 > %248 = fmul float %29, %239 > %249 = fmul float %30, %240 > %250 = fadd float %248, %249 > %251 = fmul float %31, %241 > %252 = fadd float %250, %251 > %253 = fadd float %252, %32 > %254 = fmul float %33, %239 > %255 = fmul float %34, %240 > %256 = fadd float %254, %255 > %257 = fmul float %35, %241 > %258 = fadd float %256, %257 > %259 = fadd float %258, %36 > %260 = fcmp oeq float %259, 0.000000e+00 > %261 = fcmp oeq float %259, 0.000000e+00 > %262 = fcmp ogt float %247, 0.000000e+00 > %263 = select i1 %262, float 1.000000e+00, float %247 > %264 = fcmp oge float %263, 0.000000e+00 > %265 = fcmp ogt float %253, 0.000000e+00 > %266 = select i1 %265, float 1.000000e+00, float %253 > %267 = fcmp oge float %266, 0.000000e+00 > %.op = fmul float %263, 0x4600000000000000 > %268 = select i1 %264, float %.op, float 0xC600000000000000 > %.op290 = fmul float %266, 0x4600000000000000 > %269 = select i1 %267, float %.op290, float 0xC600000000000000 > %270 = fdiv float 1.000000e+00, %259 > %271 = fmul float %247, %270 > %272 = fmul float %253, %270 > %273 = select i1 %260, float %268, float %271 > %274 = select i1 %261, float %269, float %272 > %275 = call float @llvm.fma.f32(float %273, float 5.000000e-01, float 5.000000e-01) > %276 = call float @llvm.fma.f32(float %274, float -5.000000e-01, float 5.000000e-01) > %277 = bitcast float %275 to i32 > %278 = bitcast float %276 to i32 > %279 = insertelement <2 x i32> undef, i32 %277, i32 0 > %280 = insertelement <2 x i32> %279, i32 %278, i32 1 > %281 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %280, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %282 = extractelement <4 x float> %281, i32 0 > %283 = fsub float %241, %282 > %284 = fcmp olt float %283, 0.000000e+00 > %. = select i1 %284, float %232, float %231 > %285 = fmul float %., 5.000000e-01 > %286 = fcmp ogt float %163, 0.000000e+00 > %287 = fcmp ogt float %283, 0.000000e+00 > %288 = and i1 %286, %287 > %temp52.0 = select i1 %288, float %285, float %. > %289 = call float @llvm.fabs.f32(float %283) > %290 = fsub float %289, %temp52.0 > %291 = fcmp une float %temp52.0, 0.000000e+00 > br i1 %291, label %IF128, label %ELSE129 > >IF128: ; preds = %main_body > %292 = fdiv float 1.000000e+00, %temp52.0 > %293 = fmul float %290, %292 > br label %ENDIF127 > >ELSE129: ; preds = %main_body > %294 = fcmp ogt float %290, 0.000000e+00 > %295 = select i1 %294, float 1.000000e+00, float %290 > %296 = fcmp oge float %295, 0.000000e+00 > %.op291 = fmul float %295, 0x4600000000000000 > %297 = select i1 %296, float %.op291, float 0xC600000000000000 > br label %ENDIF127 > >ENDIF127: ; preds = %ELSE129, %IF128 > %temp48.1 = phi float [ %293, %IF128 ], [ %297, %ELSE129 ] > %298 = call float @llvm.AMDGPU.clamp.(float %temp48.1, float 0.000000e+00, float 1.000000e+00) > %299 = fmul float %178, 0x3FD3D70A40000000 > %300 = fmul float %177, 0xBFD3D70A40000000 > %301 = fmul float %178, 0x3FD3D70A40000000 > %302 = call float @llvm.fma.f32(float %207, float %300, float %161) > %303 = call float @llvm.fma.f32(float %209, float %300, float %162) > %304 = call float @llvm.fma.f32(float %211, float %300, float %163) > %305 = call float @llvm.fma.f32(float %200, float %299, float %302) > %306 = call float @llvm.fma.f32(float %201, float %299, float %303) > %307 = call float @llvm.fma.f32(float %202, float %299, float %304) > %308 = call float @llvm.fma.f32(float %305, float %233, float %237) > %309 = call float @llvm.fma.f32(float %306, float %233, float %238) > %310 = call float @llvm.fma.f32(float %307, float %233, float %226) > %311 = fmul float %25, %308 > %312 = fmul float %26, %309 > %313 = fadd float %311, %312 > %314 = fmul float %27, %310 > %315 = fadd float %313, %314 > %316 = fadd float %315, %28 > %317 = fmul float %29, %308 > %318 = fmul float %30, %309 > %319 = fadd float %317, %318 > %320 = fmul float %31, %310 > %321 = fadd float %319, %320 > %322 = fadd float %321, %32 > %323 = fmul float %33, %308 > %324 = fmul float %34, %309 > %325 = fadd float %323, %324 > %326 = fmul float %35, %310 > %327 = fadd float %325, %326 > %328 = fadd float %327, %36 > %329 = fcmp oeq float %328, 0.000000e+00 > %330 = fcmp oeq float %328, 0.000000e+00 > %331 = fcmp ogt float %316, 0.000000e+00 > %332 = select i1 %331, float 1.000000e+00, float %316 > %333 = fcmp oge float %332, 0.000000e+00 > %334 = fcmp ogt float %322, 0.000000e+00 > %335 = select i1 %334, float 1.000000e+00, float %322 > %336 = fcmp oge float %335, 0.000000e+00 > %.op292 = fmul float %332, 0x4600000000000000 > %337 = select i1 %333, float %.op292, float 0xC600000000000000 > %.op293 = fmul float %335, 0x4600000000000000 > %338 = select i1 %336, float %.op293, float 0xC600000000000000 > %339 = fdiv float 1.000000e+00, %328 > %340 = fmul float %316, %339 > %341 = fmul float %322, %339 > %342 = select i1 %329, float %337, float %340 > %343 = select i1 %330, float %338, float %341 > %344 = call float @llvm.fma.f32(float %342, float 5.000000e-01, float 5.000000e-01) > %345 = call float @llvm.fma.f32(float %343, float -5.000000e-01, float 5.000000e-01) > %346 = bitcast float %344 to i32 > %347 = bitcast float %345 to i32 > %348 = insertelement <2 x i32> undef, i32 %346, i32 0 > %349 = insertelement <2 x i32> %348, i32 %347, i32 1 > %350 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %349, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %351 = extractelement <4 x float> %350, i32 0 > %352 = fsub float %310, %351 > %353 = fcmp olt float %352, 0.000000e+00 > %.271 = select i1 %353, float %234, float %233 > %354 = fmul float %.271, 5.000000e-01 > %355 = fcmp ogt float %352, 0.000000e+00 > %356 = and i1 %286, %355 > %temp72.0 = select i1 %356, float %354, float %.271 > %357 = call float @llvm.fabs.f32(float %352) > %358 = fsub float %357, %temp72.0 > %359 = fcmp une float %temp72.0, 0.000000e+00 > br i1 %359, label %IF137, label %ELSE138 > >IF137: ; preds = %ENDIF127 > %360 = fdiv float 1.000000e+00, %temp72.0 > %361 = fmul float %358, %360 > br label %ENDIF136 > >ELSE138: ; preds = %ENDIF127 > %362 = fcmp ogt float %358, 0.000000e+00 > %363 = select i1 %362, float 1.000000e+00, float %358 > %364 = fcmp oge float %363, 0.000000e+00 > %.op294 = fmul float %363, 0x4600000000000000 > %365 = select i1 %364, float %.op294, float 0xC600000000000000 > br label %ENDIF136 > >ENDIF136: ; preds = %ELSE138, %IF137 > %temp68.1 = phi float [ %361, %IF137 ], [ %365, %ELSE138 ] > %366 = call float @llvm.AMDGPU.clamp.(float %temp68.1, float 0.000000e+00, float 1.000000e+00) > %367 = fadd float %366, %298 > %368 = fmul float %178, 0xBFD147AE20000000 > %369 = fmul float %177, 0x3F847AE140000000 > %370 = fadd float %368, %369 > %371 = call float @llvm.fma.f32(float %207, float %184, float %161) > %372 = call float @llvm.fma.f32(float %209, float %184, float %162) > %373 = call float @llvm.fma.f32(float %211, float %184, float %163) > %374 = call float @llvm.fma.f32(float %200, float %370, float %371) > %375 = call float @llvm.fma.f32(float %201, float %370, float %372) > %376 = call float @llvm.fma.f32(float %202, float %370, float %373) > %377 = fmul float %229, 0x4002666660000000 > %378 = fmul float %229, 0x3FF2666660000000 > %379 = fmul float %229, 0x40099999A0000000 > %380 = fmul float %229, 0x3FF99999A0000000 > %381 = call float @llvm.fma.f32(float %374, float %377, float %237) > %382 = call float @llvm.fma.f32(float %375, float %377, float %238) > %383 = call float @llvm.fma.f32(float %376, float %377, float %226) > %384 = fmul float %25, %381 > %385 = fmul float %26, %382 > %386 = fadd float %384, %385 > %387 = fmul float %27, %383 > %388 = fadd float %386, %387 > %389 = fadd float %388, %28 > %390 = fmul float %29, %381 > %391 = fmul float %30, %382 > %392 = fadd float %390, %391 > %393 = fmul float %31, %383 > %394 = fadd float %392, %393 > %395 = fadd float %394, %32 > %396 = fmul float %33, %381 > %397 = fmul float %34, %382 > %398 = fadd float %396, %397 > %399 = fmul float %35, %383 > %400 = fadd float %398, %399 > %401 = fadd float %400, %36 > %402 = fcmp oeq float %401, 0.000000e+00 > %403 = fcmp oeq float %401, 0.000000e+00 > %404 = fcmp ogt float %389, 0.000000e+00 > %405 = select i1 %404, float 1.000000e+00, float %389 > %406 = fcmp oge float %405, 0.000000e+00 > %407 = fcmp ogt float %395, 0.000000e+00 > %408 = select i1 %407, float 1.000000e+00, float %395 > %409 = fcmp oge float %408, 0.000000e+00 > %.op295 = fmul float %405, 0x4600000000000000 > %410 = select i1 %406, float %.op295, float 0xC600000000000000 > %.op296 = fmul float %408, 0x4600000000000000 > %411 = select i1 %409, float %.op296, float 0xC600000000000000 > %412 = fdiv float 1.000000e+00, %401 > %413 = fmul float %389, %412 > %414 = fmul float %395, %412 > %415 = select i1 %402, float %410, float %413 > %416 = select i1 %403, float %411, float %414 > %417 = call float @llvm.fma.f32(float %415, float 5.000000e-01, float 5.000000e-01) > %418 = call float @llvm.fma.f32(float %416, float -5.000000e-01, float 5.000000e-01) > %419 = bitcast float %417 to i32 > %420 = bitcast float %418 to i32 > %421 = insertelement <2 x i32> undef, i32 %419, i32 0 > %422 = insertelement <2 x i32> %421, i32 %420, i32 1 > %423 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %422, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %424 = extractelement <4 x float> %423, i32 0 > %425 = fsub float %383, %424 > %426 = fcmp olt float %425, 0.000000e+00 > %.272 = select i1 %426, float %378, float %377 > %427 = fmul float %.272, 5.000000e-01 > %428 = fcmp ogt float %425, 0.000000e+00 > %429 = and i1 %286, %428 > %temp68.2 = select i1 %429, float %427, float %.272 > %430 = call float @llvm.fabs.f32(float %425) > %431 = fsub float %430, %temp68.2 > %432 = fcmp une float %temp68.2, 0.000000e+00 > br i1 %432, label %IF146, label %ELSE147 > >IF146: ; preds = %ENDIF136 > %433 = fdiv float 1.000000e+00, %temp68.2 > %434 = fmul float %431, %433 > br label %ENDIF145 > >ELSE147: ; preds = %ENDIF136 > %435 = fcmp ogt float %431, 0.000000e+00 > %436 = select i1 %435, float 1.000000e+00, float %431 > %437 = fcmp oge float %436, 0.000000e+00 > %.op297 = fmul float %436, 0x4600000000000000 > %438 = select i1 %437, float %.op297, float 0xC600000000000000 > br label %ENDIF145 > >ENDIF145: ; preds = %ELSE147, %IF146 > %temp64.1 = phi float [ %434, %IF146 ], [ %438, %ELSE147 ] > %439 = call float @llvm.AMDGPU.clamp.(float %temp64.1, float 0.000000e+00, float 1.000000e+00) > %440 = fadd float %439, %367 > %441 = fmul float %178, 0xBF847AE140000000 > %442 = fmul float %177, 0xBFD3D70A40000000 > %443 = fadd float %441, %442 > %444 = fmul float %177, 0x3F847AE140000000 > %445 = fmul float %178, 0xBFD3D70A40000000 > %446 = fadd float %444, %445 > %447 = call float @llvm.fma.f32(float %207, float %446, float %161) > %448 = call float @llvm.fma.f32(float %209, float %446, float %162) > %449 = call float @llvm.fma.f32(float %211, float %446, float %163) > %450 = call float @llvm.fma.f32(float %200, float %443, float %447) > %451 = call float @llvm.fma.f32(float %201, float %443, float %448) > %452 = call float @llvm.fma.f32(float %202, float %443, float %449) > %453 = call float @llvm.fma.f32(float %450, float %379, float %237) > %454 = call float @llvm.fma.f32(float %451, float %379, float %238) > %455 = call float @llvm.fma.f32(float %452, float %379, float %226) > %456 = fmul float %25, %453 > %457 = fmul float %26, %454 > %458 = fadd float %456, %457 > %459 = fmul float %27, %455 > %460 = fadd float %458, %459 > %461 = fadd float %460, %28 > %462 = fmul float %29, %453 > %463 = fmul float %30, %454 > %464 = fadd float %462, %463 > %465 = fmul float %31, %455 > %466 = fadd float %464, %465 > %467 = fadd float %466, %32 > %468 = fmul float %33, %453 > %469 = fmul float %34, %454 > %470 = fadd float %468, %469 > %471 = fmul float %35, %455 > %472 = fadd float %470, %471 > %473 = fadd float %472, %36 > %474 = fcmp oeq float %473, 0.000000e+00 > %475 = fcmp oeq float %473, 0.000000e+00 > %476 = fcmp ogt float %461, 0.000000e+00 > %477 = select i1 %476, float 1.000000e+00, float %461 > %478 = fcmp oge float %477, 0.000000e+00 > %479 = fcmp ogt float %467, 0.000000e+00 > %480 = select i1 %479, float 1.000000e+00, float %467 > %481 = fcmp oge float %480, 0.000000e+00 > %.op298 = fmul float %477, 0x4600000000000000 > %482 = select i1 %478, float %.op298, float 0xC600000000000000 > %.op299 = fmul float %480, 0x4600000000000000 > %483 = select i1 %481, float %.op299, float 0xC600000000000000 > %484 = fdiv float 1.000000e+00, %473 > %485 = fmul float %461, %484 > %486 = fmul float %467, %484 > %487 = select i1 %474, float %482, float %485 > %488 = select i1 %475, float %483, float %486 > %489 = call float @llvm.fma.f32(float %487, float 5.000000e-01, float 5.000000e-01) > %490 = call float @llvm.fma.f32(float %488, float -5.000000e-01, float 5.000000e-01) > %491 = bitcast float %489 to i32 > %492 = bitcast float %490 to i32 > %493 = insertelement <2 x i32> undef, i32 %491, i32 0 > %494 = insertelement <2 x i32> %493, i32 %492, i32 1 > %495 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %494, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %496 = extractelement <4 x float> %495, i32 0 > %497 = fsub float %455, %496 > %498 = fcmp olt float %497, 0.000000e+00 > %.273 = select i1 %498, float %380, float %379 > %499 = fmul float %.273, 5.000000e-01 > %500 = fcmp ogt float %497, 0.000000e+00 > %501 = and i1 %286, %500 > %temp80.0 = select i1 %501, float %499, float %.273 > %502 = call float @llvm.fabs.f32(float %497) > %503 = fsub float %502, %temp80.0 > %504 = fcmp une float %temp80.0, 0.000000e+00 > br i1 %504, label %IF155, label %ELSE156 > >IF155: ; preds = %ENDIF145 > %505 = fdiv float 1.000000e+00, %temp80.0 > %506 = fmul float %503, %505 > br label %ENDIF154 > >ELSE156: ; preds = %ENDIF145 > %507 = fcmp ogt float %503, 0.000000e+00 > %508 = select i1 %507, float 1.000000e+00, float %503 > %509 = fcmp oge float %508, 0.000000e+00 > %.op300 = fmul float %508, 0x4600000000000000 > %510 = select i1 %509, float %.op300, float 0xC600000000000000 > br label %ENDIF154 > >ENDIF154: ; preds = %ELSE156, %IF155 > %temp76.1 = phi float [ %506, %IF155 ], [ %510, %ELSE156 ] > %511 = call float @llvm.AMDGPU.clamp.(float %temp76.1, float 0.000000e+00, float 1.000000e+00) > %512 = fadd float %440, %511 > %513 = fmul float %178, 0x3FA99999A0000000 > %514 = fmul float %177, 0xBFD851EB80000000 > %515 = fadd float %513, %514 > %516 = fmul float %177, 0xBFA99999A0000000 > %517 = fmul float %178, 0xBFD851EB80000000 > %518 = fadd float %516, %517 > %519 = call float @llvm.fma.f32(float %207, float %518, float %161) > %520 = call float @llvm.fma.f32(float %209, float %518, float %162) > %521 = call float @llvm.fma.f32(float %211, float %518, float %163) > %522 = call float @llvm.fma.f32(float %200, float %515, float %519) > %523 = call float @llvm.fma.f32(float %201, float %515, float %520) > %524 = call float @llvm.fma.f32(float %202, float %515, float %521) > %525 = fmul float %229, 0x4010666660000000 > %526 = fmul float %229, 0x4000666660000000 > %527 = fmul float %229, 0x4015333340000000 > %528 = fmul float %229, 0x4005333340000000 > %529 = call float @llvm.fma.f32(float %522, float %525, float %237) > %530 = call float @llvm.fma.f32(float %523, float %525, float %238) > %531 = call float @llvm.fma.f32(float %524, float %525, float %226) > %532 = fmul float %25, %529 > %533 = fmul float %26, %530 > %534 = fadd float %532, %533 > %535 = fmul float %27, %531 > %536 = fadd float %534, %535 > %537 = fadd float %536, %28 > %538 = fmul float %29, %529 > %539 = fmul float %30, %530 > %540 = fadd float %538, %539 > %541 = fmul float %31, %531 > %542 = fadd float %540, %541 > %543 = fadd float %542, %32 > %544 = fmul float %33, %529 > %545 = fmul float %34, %530 > %546 = fadd float %544, %545 > %547 = fmul float %35, %531 > %548 = fadd float %546, %547 > %549 = fadd float %548, %36 > %550 = fcmp oeq float %549, 0.000000e+00 > %551 = fcmp oeq float %549, 0.000000e+00 > %552 = fcmp ogt float %537, 0.000000e+00 > %553 = select i1 %552, float 1.000000e+00, float %537 > %554 = fcmp oge float %553, 0.000000e+00 > %555 = fcmp ogt float %543, 0.000000e+00 > %556 = select i1 %555, float 1.000000e+00, float %543 > %557 = fcmp oge float %556, 0.000000e+00 > %.op301 = fmul float %553, 0x4600000000000000 > %558 = select i1 %554, float %.op301, float 0xC600000000000000 > %.op302 = fmul float %556, 0x4600000000000000 > %559 = select i1 %557, float %.op302, float 0xC600000000000000 > %560 = fdiv float 1.000000e+00, %549 > %561 = fmul float %537, %560 > %562 = fmul float %543, %560 > %563 = select i1 %550, float %558, float %561 > %564 = select i1 %551, float %559, float %562 > %565 = call float @llvm.fma.f32(float %563, float 5.000000e-01, float 5.000000e-01) > %566 = call float @llvm.fma.f32(float %564, float -5.000000e-01, float 5.000000e-01) > %567 = bitcast float %565 to i32 > %568 = bitcast float %566 to i32 > %569 = insertelement <2 x i32> undef, i32 %567, i32 0 > %570 = insertelement <2 x i32> %569, i32 %568, i32 1 > %571 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %570, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %572 = extractelement <4 x float> %571, i32 0 > %573 = fsub float %531, %572 > %574 = fcmp olt float %573, 0.000000e+00 > %.274 = select i1 %574, float %526, float %525 > %575 = fmul float %.274, 5.000000e-01 > %576 = fcmp ogt float %573, 0.000000e+00 > %577 = and i1 %286, %576 > %temp84.0 = select i1 %577, float %575, float %.274 > %578 = call float @llvm.fabs.f32(float %573) > %579 = fsub float %578, %temp84.0 > %580 = fcmp une float %temp84.0, 0.000000e+00 > br i1 %580, label %IF164, label %ELSE165 > >IF164: ; preds = %ENDIF154 > %581 = fdiv float 1.000000e+00, %temp84.0 > %582 = fmul float %579, %581 > br label %ENDIF163 > >ELSE165: ; preds = %ENDIF154 > %583 = fcmp ogt float %579, 0.000000e+00 > %584 = select i1 %583, float 1.000000e+00, float %579 > %585 = fcmp oge float %584, 0.000000e+00 > %.op303 = fmul float %584, 0x4600000000000000 > %586 = select i1 %585, float %.op303, float 0xC600000000000000 > br label %ENDIF163 > >ENDIF163: ; preds = %ELSE165, %IF164 > %temp80.2 = phi float [ %582, %IF164 ], [ %586, %ELSE165 ] > %587 = call float @llvm.AMDGPU.clamp.(float %temp80.2, float 0.000000e+00, float 1.000000e+00) > %588 = fadd float %512, %587 > %589 = fmul float %178, 5.000000e-01 > %590 = fmul float %177, 5.000000e-01 > %591 = fadd float %589, %590 > %592 = fmul float %177, -5.000000e-01 > %593 = fmul float %178, 5.000000e-01 > %594 = fadd float %592, %593 > %595 = call float @llvm.fma.f32(float %207, float %594, float %161) > %596 = call float @llvm.fma.f32(float %209, float %594, float %162) > %597 = call float @llvm.fma.f32(float %211, float %594, float %163) > %598 = call float @llvm.fma.f32(float %200, float %591, float %595) > %599 = call float @llvm.fma.f32(float %201, float %591, float %596) > %600 = call float @llvm.fma.f32(float %202, float %591, float %597) > %601 = call float @llvm.fma.f32(float %598, float %527, float %237) > %602 = call float @llvm.fma.f32(float %599, float %527, float %238) > %603 = call float @llvm.fma.f32(float %600, float %527, float %226) > %604 = fmul float %25, %601 > %605 = fmul float %26, %602 > %606 = fadd float %604, %605 > %607 = fmul float %27, %603 > %608 = fadd float %606, %607 > %609 = fadd float %608, %28 > %610 = fmul float %29, %601 > %611 = fmul float %30, %602 > %612 = fadd float %610, %611 > %613 = fmul float %31, %603 > %614 = fadd float %612, %613 > %615 = fadd float %614, %32 > %616 = fmul float %33, %601 > %617 = fmul float %34, %602 > %618 = fadd float %616, %617 > %619 = fmul float %35, %603 > %620 = fadd float %618, %619 > %621 = fadd float %620, %36 > %622 = fcmp oeq float %621, 0.000000e+00 > %623 = fcmp oeq float %621, 0.000000e+00 > %624 = fcmp ogt float %609, 0.000000e+00 > %625 = select i1 %624, float 1.000000e+00, float %609 > %626 = fcmp oge float %625, 0.000000e+00 > %627 = fcmp ogt float %615, 0.000000e+00 > %628 = select i1 %627, float 1.000000e+00, float %615 > %629 = fcmp oge float %628, 0.000000e+00 > %.op304 = fmul float %625, 0x4600000000000000 > %630 = select i1 %626, float %.op304, float 0xC600000000000000 > %.op305 = fmul float %628, 0x4600000000000000 > %631 = select i1 %629, float %.op305, float 0xC600000000000000 > %632 = fdiv float 1.000000e+00, %621 > %633 = fmul float %609, %632 > %634 = fmul float %615, %632 > %635 = select i1 %622, float %630, float %633 > %636 = select i1 %623, float %631, float %634 > %637 = call float @llvm.fma.f32(float %635, float 5.000000e-01, float 5.000000e-01) > %638 = call float @llvm.fma.f32(float %636, float -5.000000e-01, float 5.000000e-01) > %639 = bitcast float %637 to i32 > %640 = bitcast float %638 to i32 > %641 = insertelement <2 x i32> undef, i32 %639, i32 0 > %642 = insertelement <2 x i32> %641, i32 %640, i32 1 > %643 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %642, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %644 = extractelement <4 x float> %643, i32 0 > %645 = fsub float %603, %644 > %646 = fcmp olt float %645, 0.000000e+00 > %.275 = select i1 %646, float %528, float %527 > %647 = fmul float %.275, 5.000000e-01 > %648 = fcmp ogt float %645, 0.000000e+00 > %649 = and i1 %286, %648 > %temp92.0 = select i1 %649, float %647, float %.275 > %650 = call float @llvm.fabs.f32(float %645) > %651 = fsub float %650, %temp92.0 > %652 = fcmp une float %temp92.0, 0.000000e+00 > br i1 %652, label %IF173, label %ELSE174 > >IF173: ; preds = %ENDIF163 > %653 = fdiv float 1.000000e+00, %temp92.0 > %654 = fmul float %651, %653 > br label %ENDIF172 > >ELSE174: ; preds = %ENDIF163 > %655 = fcmp ogt float %651, 0.000000e+00 > %656 = select i1 %655, float 1.000000e+00, float %651 > %657 = fcmp oge float %656, 0.000000e+00 > %.op306 = fmul float %656, 0x4600000000000000 > %658 = select i1 %657, float %.op306, float 0xC600000000000000 > br label %ENDIF172 > >ENDIF172: ; preds = %ELSE174, %IF173 > %temp88.1 = phi float [ %654, %IF173 ], [ %658, %ELSE174 ] > %659 = call float @llvm.AMDGPU.clamp.(float %temp88.1, float 0.000000e+00, float 1.000000e+00) > %660 = fadd float %588, %659 > %661 = fmul float %178, 0xBFD99999A0000000 > %662 = fmul float %177, 0x3FD6666660000000 > %663 = fadd float %661, %662 > %664 = fmul float %177, 0x3FD99999A0000000 > %665 = fmul float %178, 0x3FD6666660000000 > %666 = fadd float %664, %665 > %667 = call float @llvm.fma.f32(float %207, float %666, float %161) > %668 = call float @llvm.fma.f32(float %209, float %666, float %162) > %669 = call float @llvm.fma.f32(float %211, float %666, float %163) > %670 = call float @llvm.fma.f32(float %200, float %663, float %667) > %671 = call float @llvm.fma.f32(float %201, float %663, float %668) > %672 = call float @llvm.fma.f32(float %202, float %663, float %669) > %673 = fmul float %229, 0x4018666660000000 > %674 = fmul float %229, 0x4008666660000000 > %675 = fmul float %229, 0x401ECCCCC0000000 > %676 = fmul float %229, 0x400ECCCCC0000000 > %677 = call float @llvm.fma.f32(float %670, float %673, float %237) > %678 = call float @llvm.fma.f32(float %671, float %673, float %238) > %679 = call float @llvm.fma.f32(float %672, float %673, float %226) > %680 = fmul float %25, %677 > %681 = fmul float %26, %678 > %682 = fadd float %680, %681 > %683 = fmul float %27, %679 > %684 = fadd float %682, %683 > %685 = fadd float %684, %28 > %686 = fmul float %29, %677 > %687 = fmul float %30, %678 > %688 = fadd float %686, %687 > %689 = fmul float %31, %679 > %690 = fadd float %688, %689 > %691 = fadd float %690, %32 > %692 = fmul float %33, %677 > %693 = fmul float %34, %678 > %694 = fadd float %692, %693 > %695 = fmul float %35, %679 > %696 = fadd float %694, %695 > %697 = fadd float %696, %36 > %698 = fcmp oeq float %697, 0.000000e+00 > %699 = fcmp oeq float %697, 0.000000e+00 > %700 = fcmp ogt float %685, 0.000000e+00 > %701 = select i1 %700, float 1.000000e+00, float %685 > %702 = fcmp oge float %701, 0.000000e+00 > %703 = fcmp ogt float %691, 0.000000e+00 > %704 = select i1 %703, float 1.000000e+00, float %691 > %705 = fcmp oge float %704, 0.000000e+00 > %.op307 = fmul float %701, 0x4600000000000000 > %706 = select i1 %702, float %.op307, float 0xC600000000000000 > %.op308 = fmul float %704, 0x4600000000000000 > %707 = select i1 %705, float %.op308, float 0xC600000000000000 > %708 = fdiv float 1.000000e+00, %697 > %709 = fmul float %685, %708 > %710 = fmul float %691, %708 > %711 = select i1 %698, float %706, float %709 > %712 = select i1 %699, float %707, float %710 > %713 = call float @llvm.fma.f32(float %711, float 5.000000e-01, float 5.000000e-01) > %714 = call float @llvm.fma.f32(float %712, float -5.000000e-01, float 5.000000e-01) > %715 = bitcast float %713 to i32 > %716 = bitcast float %714 to i32 > %717 = insertelement <2 x i32> undef, i32 %715, i32 0 > %718 = insertelement <2 x i32> %717, i32 %716, i32 1 > %719 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %718, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %720 = extractelement <4 x float> %719, i32 0 > %721 = fsub float %679, %720 > %722 = fcmp olt float %721, 0.000000e+00 > %.276 = select i1 %722, float %674, float %673 > %723 = fmul float %.276, 5.000000e-01 > %724 = fcmp ogt float %721, 0.000000e+00 > %725 = and i1 %286, %724 > %temp96.0 = select i1 %725, float %723, float %.276 > %726 = call float @llvm.fabs.f32(float %721) > %727 = fsub float %726, %temp96.0 > %728 = fcmp une float %temp96.0, 0.000000e+00 > br i1 %728, label %IF182, label %ELSE183 > >IF182: ; preds = %ENDIF172 > %729 = fdiv float 1.000000e+00, %temp96.0 > %730 = fmul float %727, %729 > br label %ENDIF181 > >ELSE183: ; preds = %ENDIF172 > %731 = fcmp ogt float %727, 0.000000e+00 > %732 = select i1 %731, float 1.000000e+00, float %727 > %733 = fcmp oge float %732, 0.000000e+00 > %.op309 = fmul float %732, 0x4600000000000000 > %734 = select i1 %733, float %.op309, float 0xC600000000000000 > br label %ENDIF181 > >ENDIF181: ; preds = %ELSE183, %IF182 > %temp92.2 = phi float [ %730, %IF182 ], [ %734, %ELSE183 ] > %735 = call float @llvm.AMDGPU.clamp.(float %temp92.2, float 0.000000e+00, float 1.000000e+00) > %736 = fadd float %660, %735 > %737 = fmul float %178, 0xBFD851EB80000000 > %738 = fmul float %177, -5.000000e-01 > %739 = fadd float %737, %738 > %740 = fmul float %177, 0x3FD851EB80000000 > %741 = fmul float %178, -5.000000e-01 > %742 = fadd float %740, %741 > %743 = call float @llvm.fma.f32(float %207, float %742, float %161) > %744 = call float @llvm.fma.f32(float %209, float %742, float %162) > %745 = call float @llvm.fma.f32(float %211, float %742, float %163) > %746 = call float @llvm.fma.f32(float %200, float %739, float %743) > %747 = call float @llvm.fma.f32(float %201, float %739, float %744) > %748 = call float @llvm.fma.f32(float %202, float %739, float %745) > %749 = call float @llvm.fma.f32(float %746, float %675, float %237) > %750 = call float @llvm.fma.f32(float %747, float %675, float %238) > %751 = call float @llvm.fma.f32(float %748, float %675, float %226) > %752 = fmul float %25, %749 > %753 = fmul float %26, %750 > %754 = fadd float %752, %753 > %755 = fmul float %27, %751 > %756 = fadd float %754, %755 > %757 = fadd float %756, %28 > %758 = fmul float %29, %749 > %759 = fmul float %30, %750 > %760 = fadd float %758, %759 > %761 = fmul float %31, %751 > %762 = fadd float %760, %761 > %763 = fadd float %762, %32 > %764 = fmul float %33, %749 > %765 = fmul float %34, %750 > %766 = fadd float %764, %765 > %767 = fmul float %35, %751 > %768 = fadd float %766, %767 > %769 = fadd float %768, %36 > %770 = fcmp oeq float %769, 0.000000e+00 > %771 = fcmp oeq float %769, 0.000000e+00 > %772 = fcmp ogt float %757, 0.000000e+00 > %773 = select i1 %772, float 1.000000e+00, float %757 > %774 = fcmp oge float %773, 0.000000e+00 > %775 = fcmp ogt float %763, 0.000000e+00 > %776 = select i1 %775, float 1.000000e+00, float %763 > %777 = fcmp oge float %776, 0.000000e+00 > %.op310 = fmul float %773, 0x4600000000000000 > %778 = select i1 %774, float %.op310, float 0xC600000000000000 > %.op311 = fmul float %776, 0x4600000000000000 > %779 = select i1 %777, float %.op311, float 0xC600000000000000 > %780 = fdiv float 1.000000e+00, %769 > %781 = fmul float %757, %780 > %782 = fmul float %763, %780 > %783 = select i1 %770, float %778, float %781 > %784 = select i1 %771, float %779, float %782 > %785 = call float @llvm.fma.f32(float %783, float 5.000000e-01, float 5.000000e-01) > %786 = call float @llvm.fma.f32(float %784, float -5.000000e-01, float 5.000000e-01) > %787 = bitcast float %785 to i32 > %788 = bitcast float %786 to i32 > %789 = insertelement <2 x i32> undef, i32 %787, i32 0 > %790 = insertelement <2 x i32> %789, i32 %788, i32 1 > %791 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %790, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %792 = extractelement <4 x float> %791, i32 0 > %793 = fsub float %751, %792 > %794 = fcmp olt float %793, 0.000000e+00 > %.277 = select i1 %794, float %676, float %675 > %795 = fmul float %.277, 5.000000e-01 > %796 = fcmp ogt float %793, 0.000000e+00 > %797 = and i1 %286, %796 > %temp104.0 = select i1 %797, float %795, float %.277 > %798 = call float @llvm.fabs.f32(float %793) > %799 = fsub float %798, %temp104.0 > %800 = fcmp une float %temp104.0, 0.000000e+00 > br i1 %800, label %IF191, label %ELSE192 > >IF191: ; preds = %ENDIF181 > %801 = fdiv float 1.000000e+00, %temp104.0 > %802 = fmul float %799, %801 > br label %ENDIF190 > >ELSE192: ; preds = %ENDIF181 > %803 = fcmp ogt float %799, 0.000000e+00 > %804 = select i1 %803, float 1.000000e+00, float %799 > %805 = fcmp oge float %804, 0.000000e+00 > %.op312 = fmul float %804, 0x4600000000000000 > %806 = select i1 %805, float %.op312, float 0xC600000000000000 > br label %ENDIF190 > >ENDIF190: ; preds = %ELSE192, %IF191 > %temp100.1 = phi float [ %802, %IF191 ], [ %806, %ELSE192 ] > %807 = call float @llvm.AMDGPU.clamp.(float %temp100.1, float 0.000000e+00, float 1.000000e+00) > %808 = fadd float %736, %807 > %809 = fmul float %178, 0x3FE6B851E0000000 > %810 = fmul float %177, 0x3FD5C28F60000000 > %811 = fadd float %809, %810 > %812 = fmul float %178, 0x3FB99999A0000000 > %813 = fmul float %177, 0xBFC3333340000000 > %814 = fadd float %812, %813 > %815 = fmul float %177, 0xBFE6B851E0000000 > %816 = fmul float %178, 0x3FD5C28F60000000 > %817 = fadd float %815, %816 > %818 = call float @llvm.fma.f32(float %207, float %817, float %161) > %819 = call float @llvm.fma.f32(float %209, float %817, float %162) > %820 = call float @llvm.fma.f32(float %211, float %817, float %163) > %821 = call float @llvm.fma.f32(float %200, float %811, float %818) > %822 = call float @llvm.fma.f32(float %201, float %811, float %819) > %823 = call float @llvm.fma.f32(float %202, float %811, float %820) > %824 = fmul float %229, 8.500000e+00 > %825 = fmul float %229, 4.250000e+00 > %826 = fmul float %229, 1.000000e+01 > %827 = fmul float %229, 5.000000e+00 > %828 = call float @llvm.fma.f32(float %821, float %824, float %237) > %829 = call float @llvm.fma.f32(float %822, float %824, float %238) > %830 = call float @llvm.fma.f32(float %823, float %824, float %226) > %831 = fmul float %25, %828 > %832 = fmul float %26, %829 > %833 = fadd float %831, %832 > %834 = fmul float %27, %830 > %835 = fadd float %833, %834 > %836 = fadd float %835, %28 > %837 = fmul float %29, %828 > %838 = fmul float %30, %829 > %839 = fadd float %837, %838 > %840 = fmul float %31, %830 > %841 = fadd float %839, %840 > %842 = fadd float %841, %32 > %843 = fmul float %33, %828 > %844 = fmul float %34, %829 > %845 = fadd float %843, %844 > %846 = fmul float %35, %830 > %847 = fadd float %845, %846 > %848 = fadd float %847, %36 > %849 = fcmp oeq float %848, 0.000000e+00 > %850 = fcmp oeq float %848, 0.000000e+00 > %851 = fcmp ogt float %836, 0.000000e+00 > %852 = select i1 %851, float 1.000000e+00, float %836 > %853 = fcmp oge float %852, 0.000000e+00 > %854 = fcmp ogt float %842, 0.000000e+00 > %855 = select i1 %854, float 1.000000e+00, float %842 > %856 = fcmp oge float %855, 0.000000e+00 > %.op313 = fmul float %852, 0x4600000000000000 > %857 = select i1 %853, float %.op313, float 0xC600000000000000 > %.op314 = fmul float %855, 0x4600000000000000 > %858 = select i1 %856, float %.op314, float 0xC600000000000000 > %859 = fdiv float 1.000000e+00, %848 > %860 = fmul float %836, %859 > %861 = fmul float %842, %859 > %862 = select i1 %849, float %857, float %860 > %863 = select i1 %850, float %858, float %861 > %864 = call float @llvm.fma.f32(float %862, float 5.000000e-01, float 5.000000e-01) > %865 = call float @llvm.fma.f32(float %863, float -5.000000e-01, float 5.000000e-01) > %866 = bitcast float %864 to i32 > %867 = bitcast float %865 to i32 > %868 = insertelement <2 x i32> undef, i32 %866, i32 0 > %869 = insertelement <2 x i32> %868, i32 %867, i32 1 > %870 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %869, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %871 = extractelement <4 x float> %870, i32 0 > %872 = fsub float %830, %871 > %873 = fcmp ogt float %872, 0.000000e+00 > %874 = and i1 %286, %873 > %875 = fcmp olt float %872, 0.000000e+00 > %.278 = select i1 %875, float %825, float %824 > %876 = fmul float %.278, 5.000000e-01 > %temp108.0 = select i1 %874, float %876, float %.278 > %877 = call float @llvm.fabs.f32(float %872) > %878 = fsub float %877, %temp108.0 > %879 = fcmp une float %temp108.0, 0.000000e+00 > br i1 %879, label %IF200, label %ELSE201 > >IF200: ; preds = %ENDIF190 > %880 = fdiv float 1.000000e+00, %temp108.0 > %881 = fmul float %878, %880 > br label %ENDIF199 > >ELSE201: ; preds = %ENDIF190 > %882 = fcmp ogt float %878, 0.000000e+00 > %883 = select i1 %882, float 1.000000e+00, float %878 > %884 = fcmp oge float %883, 0.000000e+00 > %.op315 = fmul float %883, 0x4600000000000000 > %885 = select i1 %884, float %.op315, float 0xC600000000000000 > br label %ENDIF199 > >ENDIF199: ; preds = %ELSE201, %IF200 > %temp108.1 = phi float [ %881, %IF200 ], [ %885, %ELSE201 ] > %886 = call float @llvm.AMDGPU.clamp.(float %temp108.1, float 0.000000e+00, float 1.000000e+00) > %887 = fadd float %808, %886 > %888 = fmul float %177, 0xBFB99999A0000000 > %889 = fmul float %178, 0xBFC3333340000000 > %890 = fadd float %888, %889 > %891 = call float @llvm.fma.f32(float %207, float %890, float %161) > %892 = call float @llvm.fma.f32(float %209, float %890, float %162) > %893 = call float @llvm.fma.f32(float %211, float %890, float %163) > %894 = call float @llvm.fma.f32(float %200, float %814, float %891) > %895 = call float @llvm.fma.f32(float %201, float %814, float %892) > %896 = call float @llvm.fma.f32(float %202, float %814, float %893) > %897 = call float @llvm.fma.f32(float %894, float %826, float %237) > %898 = call float @llvm.fma.f32(float %895, float %826, float %238) > %899 = call float @llvm.fma.f32(float %896, float %826, float %226) > %900 = fmul float %25, %897 > %901 = fmul float %26, %898 > %902 = fadd float %900, %901 > %903 = fmul float %27, %899 > %904 = fadd float %902, %903 > %905 = fadd float %904, %28 > %906 = fmul float %29, %897 > %907 = fmul float %30, %898 > %908 = fadd float %906, %907 > %909 = fmul float %31, %899 > %910 = fadd float %908, %909 > %911 = fadd float %910, %32 > %912 = fmul float %33, %897 > %913 = fmul float %34, %898 > %914 = fadd float %912, %913 > %915 = fmul float %35, %899 > %916 = fadd float %914, %915 > %917 = fadd float %916, %36 > %918 = fcmp oeq float %917, 0.000000e+00 > %919 = fcmp oeq float %917, 0.000000e+00 > %920 = fcmp ogt float %905, 0.000000e+00 > %921 = select i1 %920, float 1.000000e+00, float %905 > %922 = fcmp oge float %921, 0.000000e+00 > %923 = fcmp ogt float %911, 0.000000e+00 > %924 = select i1 %923, float 1.000000e+00, float %911 > %925 = fcmp oge float %924, 0.000000e+00 > %.op316 = fmul float %921, 0x4600000000000000 > %926 = select i1 %922, float %.op316, float 0xC600000000000000 > %.op317 = fmul float %924, 0x4600000000000000 > %927 = select i1 %925, float %.op317, float 0xC600000000000000 > %928 = fdiv float 1.000000e+00, %917 > %929 = fmul float %905, %928 > %930 = fmul float %911, %928 > %931 = select i1 %918, float %926, float %929 > %932 = select i1 %919, float %927, float %930 > %933 = call float @llvm.fma.f32(float %931, float 5.000000e-01, float 5.000000e-01) > %934 = call float @llvm.fma.f32(float %932, float -5.000000e-01, float 5.000000e-01) > %935 = bitcast float %933 to i32 > %936 = bitcast float %934 to i32 > %937 = insertelement <2 x i32> undef, i32 %935, i32 0 > %938 = insertelement <2 x i32> %937, i32 %936, i32 1 > %939 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %938, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %940 = extractelement <4 x float> %939, i32 0 > %941 = fsub float %899, %940 > %942 = fcmp ogt float %941, 0.000000e+00 > %943 = and i1 %286, %942 > %944 = fcmp olt float %941, 0.000000e+00 > %.279 = select i1 %944, float %827, float %826 > %945 = fmul float %.279, 5.000000e-01 > %temp56.0 = select i1 %943, float %945, float %.279 > %946 = call float @llvm.fabs.f32(float %941) > %947 = fsub float %946, %temp56.0 > %948 = fcmp une float %temp56.0, 0.000000e+00 > br i1 %948, label %IF209, label %ELSE210 > >IF209: ; preds = %ENDIF199 > %949 = fdiv float 1.000000e+00, %temp56.0 > %950 = fmul float %947, %949 > br label %ENDIF208 > >ELSE210: ; preds = %ENDIF199 > %951 = fcmp ogt float %947, 0.000000e+00 > %952 = select i1 %951, float 1.000000e+00, float %947 > %953 = fcmp oge float %952, 0.000000e+00 > %.op318 = fmul float %952, 0x4600000000000000 > %954 = select i1 %953, float %.op318, float 0xC600000000000000 > br label %ENDIF208 > >ENDIF208: ; preds = %ELSE210, %IF209 > %temp56.1 = phi float [ %950, %IF209 ], [ %954, %ELSE210 ] > %955 = call float @llvm.AMDGPU.clamp.(float %temp56.1, float 0.000000e+00, float 1.000000e+00) > %956 = fadd float %955, %887 > %957 = fmul float %956, 0x3FB99999A0000000 > %958 = fmul float %957, %957 > %959 = fmul float %177, 0xBFB99999A0000000 > %960 = fmul float %178, 0x3FB99999A0000000 > %961 = fadd float %959, %960 > %962 = fmul float %177, 0x3FD147AE20000000 > %963 = fmul float %178, 0x3F847AE140000000 > %964 = fadd float %962, %963 > %965 = fmul float %139, %201 > %966 = fmul float %137, %202 > %967 = fmul float %138, %200 > %968 = fsub float -0.000000e+00, %965 > %969 = call float @llvm.fma.f32(float %138, float %202, float %968) > %970 = fsub float -0.000000e+00, %966 > %971 = call float @llvm.fma.f32(float %139, float %200, float %970) > %972 = fsub float -0.000000e+00, %967 > %973 = call float @llvm.fma.f32(float %137, float %201, float %972) > %974 = call float @llvm.fma.f32(float %969, float %961, float %137) > %975 = call float @llvm.fma.f32(float %971, float %961, float %138) > %976 = call float @llvm.fma.f32(float %973, float %961, float %139) > %977 = call float @llvm.fma.f32(float %200, float %217, float %974) > %978 = call float @llvm.fma.f32(float %201, float %217, float %975) > %979 = call float @llvm.fma.f32(float %202, float %217, float %976) > %980 = fmul float %230, 5.000000e-01 > %981 = fmul float %230, 2.500000e-01 > %982 = fmul float %230, 0x3FF4CCCCC0000000 > %983 = fmul float %230, 0x3FE4CCCCC0000000 > %984 = call float @llvm.fma.f32(float %977, float %980, float %237) > %985 = call float @llvm.fma.f32(float %978, float %980, float %238) > %986 = call float @llvm.fma.f32(float %979, float %980, float %226) > %987 = fmul float %25, %984 > %988 = fmul float %26, %985 > %989 = fadd float %987, %988 > %990 = fmul float %27, %986 > %991 = fadd float %989, %990 > %992 = fadd float %991, %28 > %993 = fmul float %29, %984 > %994 = fmul float %30, %985 > %995 = fadd float %993, %994 > %996 = fmul float %31, %986 > %997 = fadd float %995, %996 > %998 = fadd float %997, %32 > %999 = fmul float %33, %984 > %1000 = fmul float %34, %985 > %1001 = fadd float %999, %1000 > %1002 = fmul float %35, %986 > %1003 = fadd float %1001, %1002 > %1004 = fadd float %1003, %36 > %1005 = fcmp oeq float %1004, 0.000000e+00 > %1006 = fcmp oeq float %1004, 0.000000e+00 > %1007 = fcmp ogt float %992, 0.000000e+00 > %1008 = select i1 %1007, float 1.000000e+00, float %992 > %1009 = fcmp oge float %1008, 0.000000e+00 > %1010 = fcmp ogt float %998, 0.000000e+00 > %1011 = select i1 %1010, float 1.000000e+00, float %998 > %1012 = fcmp oge float %1011, 0.000000e+00 > %.op319 = fmul float %1008, 0x4600000000000000 > %1013 = select i1 %1009, float %.op319, float 0xC600000000000000 > %.op320 = fmul float %1011, 0x4600000000000000 > %1014 = select i1 %1012, float %.op320, float 0xC600000000000000 > %1015 = fdiv float 1.000000e+00, %1004 > %1016 = fmul float %992, %1015 > %1017 = fmul float %998, %1015 > %1018 = select i1 %1005, float %1013, float %1016 > %1019 = select i1 %1006, float %1014, float %1017 > %1020 = call float @llvm.fma.f32(float %1018, float 5.000000e-01, float 5.000000e-01) > %1021 = call float @llvm.fma.f32(float %1019, float -5.000000e-01, float 5.000000e-01) > %1022 = bitcast float %1020 to i32 > %1023 = bitcast float %1021 to i32 > %1024 = insertelement <2 x i32> undef, i32 %1022, i32 0 > %1025 = insertelement <2 x i32> %1024, i32 %1023, i32 1 > %1026 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1025, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1027 = extractelement <4 x float> %1026, i32 0 > %1028 = fsub float %986, %1027 > %1029 = fcmp olt float %1028, 0.000000e+00 > %.280 = select i1 %1029, float %981, float %980 > %1030 = call float @llvm.fabs.f32(float %1028) > %1031 = fsub float %1030, %.280 > %1032 = fcmp une float %.280, 0.000000e+00 > br i1 %1032, label %IF215, label %ELSE216 > >IF215: ; preds = %ENDIF208 > %1033 = fdiv float 1.000000e+00, %.280 > %1034 = fmul float %1031, %1033 > br label %ENDIF214 > >ELSE216: ; preds = %ENDIF208 > %1035 = fcmp ogt float %1031, 0.000000e+00 > %1036 = select i1 %1035, float 1.000000e+00, float %1031 > %1037 = fcmp oge float %1036, 0.000000e+00 > %.op321 = fmul float %1036, 0x4600000000000000 > %1038 = select i1 %1037, float %.op321, float 0xC600000000000000 > br label %ENDIF214 > >ENDIF214: ; preds = %ELSE216, %IF215 > %temp56.3 = phi float [ %1034, %IF215 ], [ %1038, %ELSE216 ] > %1039 = call float @llvm.AMDGPU.clamp.(float %temp56.3, float 0.000000e+00, float 1.000000e+00) > %1040 = call float @llvm.fma.f32(float %969, float %300, float %137) > %1041 = call float @llvm.fma.f32(float %971, float %300, float %138) > %1042 = call float @llvm.fma.f32(float %973, float %300, float %139) > %1043 = call float @llvm.fma.f32(float %200, float %301, float %1040) > %1044 = call float @llvm.fma.f32(float %201, float %301, float %1041) > %1045 = call float @llvm.fma.f32(float %202, float %301, float %1042) > %1046 = call float @llvm.fma.f32(float %1043, float %982, float %237) > %1047 = call float @llvm.fma.f32(float %1044, float %982, float %238) > %1048 = call float @llvm.fma.f32(float %1045, float %982, float %226) > %1049 = fmul float %25, %1046 > %1050 = fmul float %26, %1047 > %1051 = fadd float %1049, %1050 > %1052 = fmul float %27, %1048 > %1053 = fadd float %1051, %1052 > %1054 = fadd float %1053, %28 > %1055 = fmul float %29, %1046 > %1056 = fmul float %30, %1047 > %1057 = fadd float %1055, %1056 > %1058 = fmul float %31, %1048 > %1059 = fadd float %1057, %1058 > %1060 = fadd float %1059, %32 > %1061 = fmul float %33, %1046 > %1062 = fmul float %34, %1047 > %1063 = fadd float %1061, %1062 > %1064 = fmul float %35, %1048 > %1065 = fadd float %1063, %1064 > %1066 = fadd float %1065, %36 > %1067 = fcmp oeq float %1066, 0.000000e+00 > %1068 = fcmp oeq float %1066, 0.000000e+00 > %1069 = fcmp ogt float %1054, 0.000000e+00 > %1070 = select i1 %1069, float 1.000000e+00, float %1054 > %1071 = fcmp oge float %1070, 0.000000e+00 > %1072 = fcmp ogt float %1060, 0.000000e+00 > %1073 = select i1 %1072, float 1.000000e+00, float %1060 > %1074 = fcmp oge float %1073, 0.000000e+00 > %.op322 = fmul float %1070, 0x4600000000000000 > %1075 = select i1 %1071, float %.op322, float 0xC600000000000000 > %.op323 = fmul float %1073, 0x4600000000000000 > %1076 = select i1 %1074, float %.op323, float 0xC600000000000000 > %1077 = fdiv float 1.000000e+00, %1066 > %1078 = fmul float %1054, %1077 > %1079 = fmul float %1060, %1077 > %1080 = select i1 %1067, float %1075, float %1078 > %1081 = select i1 %1068, float %1076, float %1079 > %1082 = call float @llvm.fma.f32(float %1080, float 5.000000e-01, float 5.000000e-01) > %1083 = call float @llvm.fma.f32(float %1081, float -5.000000e-01, float 5.000000e-01) > %1084 = bitcast float %1082 to i32 > %1085 = bitcast float %1083 to i32 > %1086 = insertelement <2 x i32> undef, i32 %1084, i32 0 > %1087 = insertelement <2 x i32> %1086, i32 %1085, i32 1 > %1088 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1087, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1089 = extractelement <4 x float> %1088, i32 0 > %1090 = fsub float %1048, %1089 > %1091 = fcmp olt float %1090, 0.000000e+00 > %.281 = select i1 %1091, float %983, float %982 > %1092 = call float @llvm.fabs.f32(float %1090) > %1093 = fsub float %1092, %.281 > %1094 = fcmp une float %.281, 0.000000e+00 > br i1 %1094, label %IF221, label %ELSE222 > >IF221: ; preds = %ENDIF214 > %1095 = fdiv float 1.000000e+00, %.281 > %1096 = fmul float %1093, %1095 > br label %ENDIF220 > >ELSE222: ; preds = %ENDIF214 > %1097 = fcmp ogt float %1093, 0.000000e+00 > %1098 = select i1 %1097, float 1.000000e+00, float %1093 > %1099 = fcmp oge float %1098, 0.000000e+00 > %.op324 = fmul float %1098, 0x4600000000000000 > %1100 = select i1 %1099, float %.op324, float 0xC600000000000000 > br label %ENDIF220 > >ENDIF220: ; preds = %ELSE222, %IF221 > %temp68.4 = phi float [ %1096, %IF221 ], [ %1100, %ELSE222 ] > %1101 = call float @llvm.AMDGPU.clamp.(float %temp68.4, float 0.000000e+00, float 1.000000e+00) > %1102 = fadd float %1039, %1101 > %1103 = call float @llvm.fma.f32(float %969, float %964, float %137) > %1104 = call float @llvm.fma.f32(float %971, float %964, float %138) > %1105 = call float @llvm.fma.f32(float %973, float %964, float %139) > %1106 = call float @llvm.fma.f32(float %200, float %370, float %1103) > %1107 = call float @llvm.fma.f32(float %201, float %370, float %1104) > %1108 = call float @llvm.fma.f32(float %202, float %370, float %1105) > %1109 = fmul float %230, 0x4002666660000000 > %1110 = fmul float %230, 0x3FF2666660000000 > %1111 = fmul float %230, 0x40099999A0000000 > %1112 = fmul float %230, 0x3FF99999A0000000 > %1113 = call float @llvm.fma.f32(float %1106, float %1109, float %237) > %1114 = call float @llvm.fma.f32(float %1107, float %1109, float %238) > %1115 = call float @llvm.fma.f32(float %1108, float %1109, float %226) > %1116 = fmul float %25, %1113 > %1117 = fmul float %26, %1114 > %1118 = fadd float %1116, %1117 > %1119 = fmul float %27, %1115 > %1120 = fadd float %1118, %1119 > %1121 = fadd float %1120, %28 > %1122 = fmul float %29, %1113 > %1123 = fmul float %30, %1114 > %1124 = fadd float %1122, %1123 > %1125 = fmul float %31, %1115 > %1126 = fadd float %1124, %1125 > %1127 = fadd float %1126, %32 > %1128 = fmul float %33, %1113 > %1129 = fmul float %34, %1114 > %1130 = fadd float %1128, %1129 > %1131 = fmul float %35, %1115 > %1132 = fadd float %1130, %1131 > %1133 = fadd float %1132, %36 > %1134 = fcmp oeq float %1133, 0.000000e+00 > %1135 = fcmp oeq float %1133, 0.000000e+00 > %1136 = fcmp ogt float %1121, 0.000000e+00 > %1137 = select i1 %1136, float 1.000000e+00, float %1121 > %1138 = fcmp oge float %1137, 0.000000e+00 > %1139 = fcmp ogt float %1127, 0.000000e+00 > %1140 = select i1 %1139, float 1.000000e+00, float %1127 > %1141 = fcmp oge float %1140, 0.000000e+00 > %.op325 = fmul float %1137, 0x4600000000000000 > %1142 = select i1 %1138, float %.op325, float 0xC600000000000000 > %.op326 = fmul float %1140, 0x4600000000000000 > %1143 = select i1 %1141, float %.op326, float 0xC600000000000000 > %1144 = fdiv float 1.000000e+00, %1133 > %1145 = fmul float %1121, %1144 > %1146 = fmul float %1127, %1144 > %1147 = select i1 %1134, float %1142, float %1145 > %1148 = select i1 %1135, float %1143, float %1146 > %1149 = call float @llvm.fma.f32(float %1147, float 5.000000e-01, float 5.000000e-01) > %1150 = call float @llvm.fma.f32(float %1148, float -5.000000e-01, float 5.000000e-01) > %1151 = bitcast float %1149 to i32 > %1152 = bitcast float %1150 to i32 > %1153 = insertelement <2 x i32> undef, i32 %1151, i32 0 > %1154 = insertelement <2 x i32> %1153, i32 %1152, i32 1 > %1155 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1154, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1156 = extractelement <4 x float> %1155, i32 0 > %1157 = fsub float %1115, %1156 > %1158 = fcmp olt float %1157, 0.000000e+00 > %.282 = select i1 %1158, float %1110, float %1109 > %1159 = call float @llvm.fabs.f32(float %1157) > %1160 = fsub float %1159, %.282 > %1161 = fcmp une float %.282, 0.000000e+00 > br i1 %1161, label %IF227, label %ELSE228 > >IF227: ; preds = %ENDIF220 > %1162 = fdiv float 1.000000e+00, %.282 > %1163 = fmul float %1160, %1162 > br label %ENDIF226 > >ELSE228: ; preds = %ENDIF220 > %1164 = fcmp ogt float %1160, 0.000000e+00 > %1165 = select i1 %1164, float 1.000000e+00, float %1160 > %1166 = fcmp oge float %1165, 0.000000e+00 > %.op327 = fmul float %1165, 0x4600000000000000 > %1167 = select i1 %1166, float %.op327, float 0xC600000000000000 > br label %ENDIF226 > >ENDIF226: ; preds = %ELSE228, %IF227 > %temp56.5 = phi float [ %1163, %IF227 ], [ %1167, %ELSE228 ] > %1168 = call float @llvm.AMDGPU.clamp.(float %temp56.5, float 0.000000e+00, float 1.000000e+00) > %1169 = fadd float %1168, %1102 > %1170 = call float @llvm.fma.f32(float %969, float %446, float %137) > %1171 = call float @llvm.fma.f32(float %971, float %446, float %138) > %1172 = call float @llvm.fma.f32(float %973, float %446, float %139) > %1173 = call float @llvm.fma.f32(float %200, float %443, float %1170) > %1174 = call float @llvm.fma.f32(float %201, float %443, float %1171) > %1175 = call float @llvm.fma.f32(float %202, float %443, float %1172) > %1176 = call float @llvm.fma.f32(float %1173, float %1111, float %237) > %1177 = call float @llvm.fma.f32(float %1174, float %1111, float %238) > %1178 = call float @llvm.fma.f32(float %1175, float %1111, float %226) > %1179 = fmul float %25, %1176 > %1180 = fmul float %26, %1177 > %1181 = fadd float %1179, %1180 > %1182 = fmul float %27, %1178 > %1183 = fadd float %1181, %1182 > %1184 = fadd float %1183, %28 > %1185 = fmul float %29, %1176 > %1186 = fmul float %30, %1177 > %1187 = fadd float %1185, %1186 > %1188 = fmul float %31, %1178 > %1189 = fadd float %1187, %1188 > %1190 = fadd float %1189, %32 > %1191 = fmul float %33, %1176 > %1192 = fmul float %34, %1177 > %1193 = fadd float %1191, %1192 > %1194 = fmul float %35, %1178 > %1195 = fadd float %1193, %1194 > %1196 = fadd float %1195, %36 > %1197 = fcmp oeq float %1196, 0.000000e+00 > %1198 = fcmp oeq float %1196, 0.000000e+00 > %1199 = fcmp ogt float %1184, 0.000000e+00 > %1200 = select i1 %1199, float 1.000000e+00, float %1184 > %1201 = fcmp oge float %1200, 0.000000e+00 > %1202 = fcmp ogt float %1190, 0.000000e+00 > %1203 = select i1 %1202, float 1.000000e+00, float %1190 > %1204 = fcmp oge float %1203, 0.000000e+00 > %.op328 = fmul float %1200, 0x4600000000000000 > %1205 = select i1 %1201, float %.op328, float 0xC600000000000000 > %.op329 = fmul float %1203, 0x4600000000000000 > %1206 = select i1 %1204, float %.op329, float 0xC600000000000000 > %1207 = fdiv float 1.000000e+00, %1196 > %1208 = fmul float %1184, %1207 > %1209 = fmul float %1190, %1207 > %1210 = select i1 %1197, float %1205, float %1208 > %1211 = select i1 %1198, float %1206, float %1209 > %1212 = call float @llvm.fma.f32(float %1210, float 5.000000e-01, float 5.000000e-01) > %1213 = call float @llvm.fma.f32(float %1211, float -5.000000e-01, float 5.000000e-01) > %1214 = bitcast float %1212 to i32 > %1215 = bitcast float %1213 to i32 > %1216 = insertelement <2 x i32> undef, i32 %1214, i32 0 > %1217 = insertelement <2 x i32> %1216, i32 %1215, i32 1 > %1218 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1217, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1219 = extractelement <4 x float> %1218, i32 0 > %1220 = fsub float %1178, %1219 > %1221 = fcmp olt float %1220, 0.000000e+00 > %.283 = select i1 %1221, float %1112, float %1111 > %1222 = call float @llvm.fabs.f32(float %1220) > %1223 = fsub float %1222, %.283 > %1224 = fcmp une float %.283, 0.000000e+00 > br i1 %1224, label %IF233, label %ELSE234 > >IF233: ; preds = %ENDIF226 > %1225 = fdiv float 1.000000e+00, %.283 > %1226 = fmul float %1223, %1225 > br label %ENDIF232 > >ELSE234: ; preds = %ENDIF226 > %1227 = fcmp ogt float %1223, 0.000000e+00 > %1228 = select i1 %1227, float 1.000000e+00, float %1223 > %1229 = fcmp oge float %1228, 0.000000e+00 > %.op330 = fmul float %1228, 0x4600000000000000 > %1230 = select i1 %1229, float %.op330, float 0xC600000000000000 > br label %ENDIF232 > >ENDIF232: ; preds = %ELSE234, %IF233 > %temp48.3 = phi float [ %1226, %IF233 ], [ %1230, %ELSE234 ] > %1231 = call float @llvm.AMDGPU.clamp.(float %temp48.3, float 0.000000e+00, float 1.000000e+00) > %1232 = fadd float %1231, %1169 > %1233 = call float @llvm.fma.f32(float %969, float %518, float %137) > %1234 = call float @llvm.fma.f32(float %971, float %518, float %138) > %1235 = call float @llvm.fma.f32(float %973, float %518, float %139) > %1236 = call float @llvm.fma.f32(float %200, float %515, float %1233) > %1237 = call float @llvm.fma.f32(float %201, float %515, float %1234) > %1238 = call float @llvm.fma.f32(float %202, float %515, float %1235) > %1239 = fmul float %230, 0x4010666660000000 > %1240 = fmul float %230, 0x4000666660000000 > %1241 = fmul float %230, 0x4015333340000000 > %1242 = fmul float %230, 0x4005333340000000 > %1243 = call float @llvm.fma.f32(float %1236, float %1239, float %237) > %1244 = call float @llvm.fma.f32(float %1237, float %1239, float %238) > %1245 = call float @llvm.fma.f32(float %1238, float %1239, float %226) > %1246 = fmul float %25, %1243 > %1247 = fmul float %26, %1244 > %1248 = fadd float %1246, %1247 > %1249 = fmul float %27, %1245 > %1250 = fadd float %1248, %1249 > %1251 = fadd float %1250, %28 > %1252 = fmul float %29, %1243 > %1253 = fmul float %30, %1244 > %1254 = fadd float %1252, %1253 > %1255 = fmul float %31, %1245 > %1256 = fadd float %1254, %1255 > %1257 = fadd float %1256, %32 > %1258 = fmul float %33, %1243 > %1259 = fmul float %34, %1244 > %1260 = fadd float %1258, %1259 > %1261 = fmul float %35, %1245 > %1262 = fadd float %1260, %1261 > %1263 = fadd float %1262, %36 > %1264 = fcmp oeq float %1263, 0.000000e+00 > %1265 = fcmp oeq float %1263, 0.000000e+00 > %1266 = fcmp ogt float %1251, 0.000000e+00 > %1267 = select i1 %1266, float 1.000000e+00, float %1251 > %1268 = fcmp oge float %1267, 0.000000e+00 > %1269 = fcmp ogt float %1257, 0.000000e+00 > %1270 = select i1 %1269, float 1.000000e+00, float %1257 > %1271 = fcmp oge float %1270, 0.000000e+00 > %.op331 = fmul float %1267, 0x4600000000000000 > %1272 = select i1 %1268, float %.op331, float 0xC600000000000000 > %.op332 = fmul float %1270, 0x4600000000000000 > %1273 = select i1 %1271, float %.op332, float 0xC600000000000000 > %1274 = fdiv float 1.000000e+00, %1263 > %1275 = fmul float %1251, %1274 > %1276 = fmul float %1257, %1274 > %1277 = select i1 %1264, float %1272, float %1275 > %1278 = select i1 %1265, float %1273, float %1276 > %1279 = call float @llvm.fma.f32(float %1277, float 5.000000e-01, float 5.000000e-01) > %1280 = call float @llvm.fma.f32(float %1278, float -5.000000e-01, float 5.000000e-01) > %1281 = bitcast float %1279 to i32 > %1282 = bitcast float %1280 to i32 > %1283 = insertelement <2 x i32> undef, i32 %1281, i32 0 > %1284 = insertelement <2 x i32> %1283, i32 %1282, i32 1 > %1285 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1284, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1286 = extractelement <4 x float> %1285, i32 0 > %1287 = fsub float %1245, %1286 > %1288 = fcmp olt float %1287, 0.000000e+00 > %.284 = select i1 %1288, float %1240, float %1239 > %1289 = call float @llvm.fabs.f32(float %1287) > %1290 = fsub float %1289, %.284 > %1291 = fcmp une float %.284, 0.000000e+00 > br i1 %1291, label %IF239, label %ELSE240 > >IF239: ; preds = %ENDIF232 > %1292 = fdiv float 1.000000e+00, %.284 > %1293 = fmul float %1290, %1292 > br label %ENDIF238 > >ELSE240: ; preds = %ENDIF232 > %1294 = fcmp ogt float %1290, 0.000000e+00 > %1295 = select i1 %1294, float 1.000000e+00, float %1290 > %1296 = fcmp oge float %1295, 0.000000e+00 > %.op333 = fmul float %1295, 0x4600000000000000 > %1297 = select i1 %1296, float %.op333, float 0xC600000000000000 > br label %ENDIF238 > >ENDIF238: ; preds = %ELSE240, %IF239 > %temp48.5 = phi float [ %1293, %IF239 ], [ %1297, %ELSE240 ] > %1298 = call float @llvm.AMDGPU.clamp.(float %temp48.5, float 0.000000e+00, float 1.000000e+00) > %1299 = fadd float %1298, %1232 > %1300 = call float @llvm.fma.f32(float %969, float %594, float %137) > %1301 = call float @llvm.fma.f32(float %971, float %594, float %138) > %1302 = call float @llvm.fma.f32(float %973, float %594, float %139) > %1303 = call float @llvm.fma.f32(float %200, float %591, float %1300) > %1304 = call float @llvm.fma.f32(float %201, float %591, float %1301) > %1305 = call float @llvm.fma.f32(float %202, float %591, float %1302) > %1306 = call float @llvm.fma.f32(float %1303, float %1241, float %237) > %1307 = call float @llvm.fma.f32(float %1304, float %1241, float %238) > %1308 = call float @llvm.fma.f32(float %1305, float %1241, float %226) > %1309 = fmul float %25, %1306 > %1310 = fmul float %26, %1307 > %1311 = fadd float %1309, %1310 > %1312 = fmul float %27, %1308 > %1313 = fadd float %1311, %1312 > %1314 = fadd float %1313, %28 > %1315 = fmul float %29, %1306 > %1316 = fmul float %30, %1307 > %1317 = fadd float %1315, %1316 > %1318 = fmul float %31, %1308 > %1319 = fadd float %1317, %1318 > %1320 = fadd float %1319, %32 > %1321 = fmul float %33, %1306 > %1322 = fmul float %34, %1307 > %1323 = fadd float %1321, %1322 > %1324 = fmul float %35, %1308 > %1325 = fadd float %1323, %1324 > %1326 = fadd float %1325, %36 > %1327 = fcmp oeq float %1326, 0.000000e+00 > %1328 = fcmp oeq float %1326, 0.000000e+00 > %1329 = fcmp ogt float %1314, 0.000000e+00 > %1330 = select i1 %1329, float 1.000000e+00, float %1314 > %1331 = fcmp oge float %1330, 0.000000e+00 > %1332 = fcmp ogt float %1320, 0.000000e+00 > %1333 = select i1 %1332, float 1.000000e+00, float %1320 > %1334 = fcmp oge float %1333, 0.000000e+00 > %.op334 = fmul float %1330, 0x4600000000000000 > %1335 = select i1 %1331, float %.op334, float 0xC600000000000000 > %.op335 = fmul float %1333, 0x4600000000000000 > %1336 = select i1 %1334, float %.op335, float 0xC600000000000000 > %1337 = fdiv float 1.000000e+00, %1326 > %1338 = fmul float %1314, %1337 > %1339 = fmul float %1320, %1337 > %1340 = select i1 %1327, float %1335, float %1338 > %1341 = select i1 %1328, float %1336, float %1339 > %1342 = call float @llvm.fma.f32(float %1340, float 5.000000e-01, float 5.000000e-01) > %1343 = call float @llvm.fma.f32(float %1341, float -5.000000e-01, float 5.000000e-01) > %1344 = bitcast float %1342 to i32 > %1345 = bitcast float %1343 to i32 > %1346 = insertelement <2 x i32> undef, i32 %1344, i32 0 > %1347 = insertelement <2 x i32> %1346, i32 %1345, i32 1 > %1348 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1347, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1349 = extractelement <4 x float> %1348, i32 0 > %1350 = fsub float %1308, %1349 > %1351 = fcmp olt float %1350, 0.000000e+00 > %.285 = select i1 %1351, float %1242, float %1241 > %1352 = call float @llvm.fabs.f32(float %1350) > %1353 = fsub float %1352, %.285 > %1354 = fcmp une float %.285, 0.000000e+00 > br i1 %1354, label %IF245, label %ELSE246 > >IF245: ; preds = %ENDIF238 > %1355 = fdiv float 1.000000e+00, %.285 > %1356 = fmul float %1353, %1355 > br label %ENDIF244 > >ELSE246: ; preds = %ENDIF238 > %1357 = fcmp ogt float %1353, 0.000000e+00 > %1358 = select i1 %1357, float 1.000000e+00, float %1353 > %1359 = fcmp oge float %1358, 0.000000e+00 > %.op336 = fmul float %1358, 0x4600000000000000 > %1360 = select i1 %1359, float %.op336, float 0xC600000000000000 > br label %ENDIF244 > >ENDIF244: ; preds = %ELSE246, %IF245 > %temp48.7 = phi float [ %1356, %IF245 ], [ %1360, %ELSE246 ] > %1361 = call float @llvm.AMDGPU.clamp.(float %temp48.7, float 0.000000e+00, float 1.000000e+00) > %1362 = fadd float %1361, %1299 > %1363 = call float @llvm.fma.f32(float %969, float %666, float %137) > %1364 = call float @llvm.fma.f32(float %971, float %666, float %138) > %1365 = call float @llvm.fma.f32(float %973, float %666, float %139) > %1366 = call float @llvm.fma.f32(float %200, float %663, float %1363) > %1367 = call float @llvm.fma.f32(float %201, float %663, float %1364) > %1368 = call float @llvm.fma.f32(float %202, float %663, float %1365) > %1369 = fmul float %230, 0x4018666660000000 > %1370 = fmul float %230, 0x4008666660000000 > %1371 = fmul float %230, 0x401ECCCCC0000000 > %1372 = fmul float %230, 0x400ECCCCC0000000 > %1373 = fmul float %230, 8.500000e+00 > %1374 = fmul float %230, 4.250000e+00 > %1375 = fmul float %230, 1.000000e+01 > %1376 = fmul float %230, 5.000000e+00 > %1377 = call float @llvm.fma.f32(float %1366, float %1369, float %237) > %1378 = call float @llvm.fma.f32(float %1367, float %1369, float %238) > %1379 = call float @llvm.fma.f32(float %1368, float %1369, float %226) > %1380 = fmul float %25, %1377 > %1381 = fmul float %26, %1378 > %1382 = fadd float %1380, %1381 > %1383 = fmul float %27, %1379 > %1384 = fadd float %1382, %1383 > %1385 = fadd float %1384, %28 > %1386 = fmul float %29, %1377 > %1387 = fmul float %30, %1378 > %1388 = fadd float %1386, %1387 > %1389 = fmul float %31, %1379 > %1390 = fadd float %1388, %1389 > %1391 = fadd float %1390, %32 > %1392 = fmul float %33, %1377 > %1393 = fmul float %34, %1378 > %1394 = fadd float %1392, %1393 > %1395 = fmul float %35, %1379 > %1396 = fadd float %1394, %1395 > %1397 = fadd float %1396, %36 > %1398 = fcmp oeq float %1397, 0.000000e+00 > %1399 = fcmp oeq float %1397, 0.000000e+00 > %1400 = fcmp ogt float %1385, 0.000000e+00 > %1401 = select i1 %1400, float 1.000000e+00, float %1385 > %1402 = fcmp oge float %1401, 0.000000e+00 > %1403 = fcmp ogt float %1391, 0.000000e+00 > %1404 = select i1 %1403, float 1.000000e+00, float %1391 > %1405 = fcmp oge float %1404, 0.000000e+00 > %.op337 = fmul float %1401, 0x4600000000000000 > %1406 = select i1 %1402, float %.op337, float 0xC600000000000000 > %.op338 = fmul float %1404, 0x4600000000000000 > %1407 = select i1 %1405, float %.op338, float 0xC600000000000000 > %1408 = fdiv float 1.000000e+00, %1397 > %1409 = fmul float %1385, %1408 > %1410 = fmul float %1391, %1408 > %1411 = select i1 %1398, float %1406, float %1409 > %1412 = select i1 %1399, float %1407, float %1410 > %1413 = call float @llvm.fma.f32(float %1411, float 5.000000e-01, float 5.000000e-01) > %1414 = call float @llvm.fma.f32(float %1412, float -5.000000e-01, float 5.000000e-01) > %1415 = bitcast float %1413 to i32 > %1416 = bitcast float %1414 to i32 > %1417 = insertelement <2 x i32> undef, i32 %1415, i32 0 > %1418 = insertelement <2 x i32> %1417, i32 %1416, i32 1 > %1419 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1418, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1420 = extractelement <4 x float> %1419, i32 0 > %1421 = fsub float %1379, %1420 > %1422 = fcmp olt float %1421, 0.000000e+00 > %.286 = select i1 %1422, float %1370, float %1369 > %1423 = call float @llvm.fabs.f32(float %1421) > %1424 = fsub float %1423, %.286 > %1425 = fcmp une float %.286, 0.000000e+00 > br i1 %1425, label %IF251, label %ELSE252 > >IF251: ; preds = %ENDIF244 > %1426 = fdiv float 1.000000e+00, %.286 > %1427 = fmul float %1424, %1426 > br label %ENDIF250 > >ELSE252: ; preds = %ENDIF244 > %1428 = fcmp ogt float %1424, 0.000000e+00 > %1429 = select i1 %1428, float 1.000000e+00, float %1424 > %1430 = fcmp oge float %1429, 0.000000e+00 > %.op339 = fmul float %1429, 0x4600000000000000 > %1431 = select i1 %1430, float %.op339, float 0xC600000000000000 > br label %ENDIF250 > >ENDIF250: ; preds = %ELSE252, %IF251 > %temp36.1 = phi float [ %1427, %IF251 ], [ %1431, %ELSE252 ] > %1432 = call float @llvm.AMDGPU.clamp.(float %temp36.1, float 0.000000e+00, float 1.000000e+00) > %1433 = fadd float %1432, %1362 > %1434 = call float @llvm.fma.f32(float %969, float %742, float %137) > %1435 = call float @llvm.fma.f32(float %971, float %742, float %138) > %1436 = call float @llvm.fma.f32(float %973, float %742, float %139) > %1437 = call float @llvm.fma.f32(float %200, float %739, float %1434) > %1438 = call float @llvm.fma.f32(float %201, float %739, float %1435) > %1439 = call float @llvm.fma.f32(float %202, float %739, float %1436) > %1440 = call float @llvm.fma.f32(float %1437, float %1371, float %237) > %1441 = call float @llvm.fma.f32(float %1438, float %1371, float %238) > %1442 = call float @llvm.fma.f32(float %1439, float %1371, float %226) > %1443 = fmul float %25, %1440 > %1444 = fmul float %26, %1441 > %1445 = fadd float %1443, %1444 > %1446 = fmul float %27, %1442 > %1447 = fadd float %1445, %1446 > %1448 = fadd float %1447, %28 > %1449 = fmul float %29, %1440 > %1450 = fmul float %30, %1441 > %1451 = fadd float %1449, %1450 > %1452 = fmul float %31, %1442 > %1453 = fadd float %1451, %1452 > %1454 = fadd float %1453, %32 > %1455 = fmul float %33, %1440 > %1456 = fmul float %34, %1441 > %1457 = fadd float %1455, %1456 > %1458 = fmul float %35, %1442 > %1459 = fadd float %1457, %1458 > %1460 = fadd float %1459, %36 > %1461 = fcmp oeq float %1460, 0.000000e+00 > %1462 = fcmp oeq float %1460, 0.000000e+00 > %1463 = fcmp ogt float %1448, 0.000000e+00 > %1464 = select i1 %1463, float 1.000000e+00, float %1448 > %1465 = fcmp oge float %1464, 0.000000e+00 > %1466 = fcmp ogt float %1454, 0.000000e+00 > %1467 = select i1 %1466, float 1.000000e+00, float %1454 > %1468 = fcmp oge float %1467, 0.000000e+00 > %.op340 = fmul float %1464, 0x4600000000000000 > %1469 = select i1 %1465, float %.op340, float 0xC600000000000000 > %.op341 = fmul float %1467, 0x4600000000000000 > %1470 = select i1 %1468, float %.op341, float 0xC600000000000000 > %1471 = fdiv float 1.000000e+00, %1460 > %1472 = fmul float %1448, %1471 > %1473 = fmul float %1454, %1471 > %1474 = select i1 %1461, float %1469, float %1472 > %1475 = select i1 %1462, float %1470, float %1473 > %1476 = call float @llvm.fma.f32(float %1474, float 5.000000e-01, float 5.000000e-01) > %1477 = call float @llvm.fma.f32(float %1475, float -5.000000e-01, float 5.000000e-01) > %1478 = bitcast float %1476 to i32 > %1479 = bitcast float %1477 to i32 > %1480 = insertelement <2 x i32> undef, i32 %1478, i32 0 > %1481 = insertelement <2 x i32> %1480, i32 %1479, i32 1 > %1482 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1481, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1483 = extractelement <4 x float> %1482, i32 0 > %1484 = fsub float %1442, %1483 > %1485 = fcmp olt float %1484, 0.000000e+00 > %.287 = select i1 %1485, float %1372, float %1371 > %1486 = call float @llvm.fabs.f32(float %1484) > %1487 = fsub float %1486, %.287 > %1488 = fcmp une float %.287, 0.000000e+00 > br i1 %1488, label %IF257, label %ELSE258 > >IF257: ; preds = %ENDIF250 > %1489 = fdiv float 1.000000e+00, %.287 > %1490 = fmul float %1487, %1489 > br label %ENDIF256 > >ELSE258: ; preds = %ENDIF250 > %1491 = fcmp ogt float %1487, 0.000000e+00 > %1492 = select i1 %1491, float 1.000000e+00, float %1487 > %1493 = fcmp oge float %1492, 0.000000e+00 > %.op342 = fmul float %1492, 0x4600000000000000 > %1494 = select i1 %1493, float %.op342, float 0xC600000000000000 > br label %ENDIF256 > >ENDIF256: ; preds = %ELSE258, %IF257 > %temp32.1 = phi float [ %1490, %IF257 ], [ %1494, %ELSE258 ] > %1495 = call float @llvm.AMDGPU.clamp.(float %temp32.1, float 0.000000e+00, float 1.000000e+00) > %1496 = fadd float %1495, %1433 > %1497 = call float @llvm.fma.f32(float %969, float %817, float %137) > %1498 = call float @llvm.fma.f32(float %971, float %817, float %138) > %1499 = call float @llvm.fma.f32(float %973, float %817, float %139) > %1500 = call float @llvm.fma.f32(float %969, float %890, float %137) > %1501 = call float @llvm.fma.f32(float %971, float %890, float %138) > %1502 = call float @llvm.fma.f32(float %973, float %890, float %139) > %1503 = call float @llvm.fma.f32(float %200, float %814, float %1500) > %1504 = call float @llvm.fma.f32(float %201, float %814, float %1501) > %1505 = call float @llvm.fma.f32(float %202, float %814, float %1502) > %1506 = call float @llvm.fma.f32(float %200, float %811, float %1497) > %1507 = call float @llvm.fma.f32(float %201, float %811, float %1498) > %1508 = call float @llvm.fma.f32(float %202, float %811, float %1499) > %1509 = call float @llvm.fma.f32(float %1506, float %1373, float %237) > %1510 = call float @llvm.fma.f32(float %1507, float %1373, float %238) > %1511 = call float @llvm.fma.f32(float %1508, float %1373, float %226) > %1512 = call float @llvm.fma.f32(float %1503, float %1375, float %237) > %1513 = call float @llvm.fma.f32(float %1504, float %1375, float %238) > %1514 = call float @llvm.fma.f32(float %1505, float %1375, float %226) > %1515 = fmul float %25, %1509 > %1516 = fmul float %26, %1510 > %1517 = fadd float %1515, %1516 > %1518 = fmul float %27, %1511 > %1519 = fadd float %1517, %1518 > %1520 = fadd float %1519, %28 > %1521 = fmul float %29, %1509 > %1522 = fmul float %30, %1510 > %1523 = fadd float %1521, %1522 > %1524 = fmul float %31, %1511 > %1525 = fadd float %1523, %1524 > %1526 = fadd float %1525, %32 > %1527 = fmul float %33, %1509 > %1528 = fmul float %34, %1510 > %1529 = fadd float %1527, %1528 > %1530 = fmul float %35, %1511 > %1531 = fadd float %1529, %1530 > %1532 = fadd float %1531, %36 > %1533 = fcmp oeq float %1532, 0.000000e+00 > %1534 = fcmp oeq float %1532, 0.000000e+00 > %1535 = fcmp ogt float %1520, 0.000000e+00 > %1536 = select i1 %1535, float 1.000000e+00, float %1520 > %1537 = fcmp oge float %1536, 0.000000e+00 > %1538 = fcmp ogt float %1526, 0.000000e+00 > %1539 = select i1 %1538, float 1.000000e+00, float %1526 > %1540 = fcmp oge float %1539, 0.000000e+00 > %.op343 = fmul float %1536, 0x4600000000000000 > %1541 = select i1 %1537, float %.op343, float 0xC600000000000000 > %.op344 = fmul float %1539, 0x4600000000000000 > %1542 = select i1 %1540, float %.op344, float 0xC600000000000000 > %1543 = fdiv float 1.000000e+00, %1532 > %1544 = fmul float %1520, %1543 > %1545 = fmul float %1526, %1543 > %1546 = select i1 %1533, float %1541, float %1544 > %1547 = select i1 %1534, float %1542, float %1545 > %1548 = call float @llvm.fma.f32(float %1546, float 5.000000e-01, float 5.000000e-01) > %1549 = call float @llvm.fma.f32(float %1547, float -5.000000e-01, float 5.000000e-01) > %1550 = bitcast float %1548 to i32 > %1551 = bitcast float %1549 to i32 > %1552 = insertelement <2 x i32> undef, i32 %1550, i32 0 > %1553 = insertelement <2 x i32> %1552, i32 %1551, i32 1 > %1554 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1553, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1555 = extractelement <4 x float> %1554, i32 0 > %1556 = fsub float %1511, %1555 > %1557 = fcmp olt float %1556, 0.000000e+00 > %.288 = select i1 %1557, float %1374, float %1373 > %1558 = call float @llvm.fabs.f32(float %1556) > %1559 = fsub float %1558, %.288 > %1560 = fcmp une float %.288, 0.000000e+00 > br i1 %1560, label %IF263, label %ELSE264 > >IF263: ; preds = %ENDIF256 > %1561 = fdiv float 1.000000e+00, %.288 > %1562 = fmul float %1559, %1561 > br label %ENDIF262 > >ELSE264: ; preds = %ENDIF256 > %1563 = fcmp ogt float %1559, 0.000000e+00 > %1564 = select i1 %1563, float 1.000000e+00, float %1559 > %1565 = fcmp oge float %1564, 0.000000e+00 > %.op345 = fmul float %1564, 0x4600000000000000 > %1566 = select i1 %1565, float %.op345, float 0xC600000000000000 > br label %ENDIF262 > >ENDIF262: ; preds = %ELSE264, %IF263 > %temp20.1 = phi float [ %1562, %IF263 ], [ %1566, %ELSE264 ] > %1567 = call float @llvm.AMDGPU.clamp.(float %temp20.1, float 0.000000e+00, float 1.000000e+00) > %1568 = fadd float %1567, %1496 > %1569 = fmul float %25, %1512 > %1570 = fmul float %26, %1513 > %1571 = fadd float %1569, %1570 > %1572 = fmul float %27, %1514 > %1573 = fadd float %1571, %1572 > %1574 = fadd float %1573, %28 > %1575 = fmul float %29, %1512 > %1576 = fmul float %30, %1513 > %1577 = fadd float %1575, %1576 > %1578 = fmul float %31, %1514 > %1579 = fadd float %1577, %1578 > %1580 = fadd float %1579, %32 > %1581 = fmul float %33, %1512 > %1582 = fmul float %34, %1513 > %1583 = fadd float %1581, %1582 > %1584 = fmul float %35, %1514 > %1585 = fadd float %1583, %1584 > %1586 = fadd float %1585, %36 > %1587 = fcmp oeq float %1586, 0.000000e+00 > %1588 = fcmp oeq float %1586, 0.000000e+00 > %1589 = fcmp ogt float %1574, 0.000000e+00 > %1590 = select i1 %1589, float 1.000000e+00, float %1574 > %1591 = fcmp oge float %1590, 0.000000e+00 > %1592 = fcmp ogt float %1580, 0.000000e+00 > %1593 = select i1 %1592, float 1.000000e+00, float %1580 > %1594 = fcmp oge float %1593, 0.000000e+00 > %.op346 = fmul float %1590, 0x4600000000000000 > %1595 = select i1 %1591, float %.op346, float 0xC600000000000000 > %.op347 = fmul float %1593, 0x4600000000000000 > %1596 = select i1 %1594, float %.op347, float 0xC600000000000000 > %1597 = fdiv float 1.000000e+00, %1586 > %1598 = fmul float %1574, %1597 > %1599 = fmul float %1580, %1597 > %1600 = select i1 %1587, float %1595, float %1598 > %1601 = select i1 %1588, float %1596, float %1599 > %1602 = call float @llvm.fma.f32(float %1600, float 5.000000e-01, float 5.000000e-01) > %1603 = call float @llvm.fma.f32(float %1601, float -5.000000e-01, float 5.000000e-01) > %1604 = bitcast float %1602 to i32 > %1605 = bitcast float %1603 to i32 > %1606 = insertelement <2 x i32> undef, i32 %1604, i32 0 > %1607 = insertelement <2 x i32> %1606, i32 %1605, i32 1 > %1608 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1607, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1609 = extractelement <4 x float> %1608, i32 0 > %1610 = fsub float %1514, %1609 > %1611 = fcmp olt float %1610, 0.000000e+00 > %.289 = select i1 %1611, float %1376, float %1375 > %1612 = call float @llvm.fabs.f32(float %1610) > %1613 = fsub float %1612, %.289 > %1614 = fcmp une float %.289, 0.000000e+00 > br i1 %1614, label %IF269, label %ELSE270 > >IF269: ; preds = %ENDIF262 > %1615 = fdiv float 1.000000e+00, %.289 > %1616 = fmul float %1613, %1615 > br label %ENDIF268 > >ELSE270: ; preds = %ENDIF262 > %1617 = fcmp ogt float %1613, 0.000000e+00 > %1618 = select i1 %1617, float 1.000000e+00, float %1613 > %1619 = fcmp oge float %1618, 0.000000e+00 > %.op348 = fmul float %1618, 0x4600000000000000 > %1620 = select i1 %1619, float %.op348, float 0xC600000000000000 > br label %ENDIF268 > >ENDIF268: ; preds = %ELSE270, %IF269 > %temp8.1 = phi float [ %1616, %IF269 ], [ %1620, %ELSE270 ] > %1621 = call float @llvm.AMDGPU.clamp.(float %temp8.1, float 0.000000e+00, float 1.000000e+00) > %1622 = fadd float %1621, %1568 > %1623 = fmul float %1622, 0x3FB99999A0000000 > %1624 = fmul float %1623, %1623 > %1625 = call float @llvm.minnum.f32(float %1624, float %958) > %1626 = bitcast float %5 to i32 > %1627 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %1626, 10 > %1628 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1627, float %1625, 11 > %1629 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1628, float %1624, 12 > %1630 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1629, float 0.000000e+00, 13 > %1631 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1630, float 1.000000e+00, 14 > %1632 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1631, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1632 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..14] >DCL TEMP[0..23], LOCAL >IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.1000} >IMM[1] UINT32 {0, 176, 160, 192} >IMM[2] FLT32 { -0.2700, 0.0100, 0.0040, 0.0000} >IMM[3] FLT32 { 0.5000, 0.2500, 1.3000, 0.6500} >IMM[4] UINT32 {224, 96, 112, 144} >IMM[5] FLT32 {158456325028528675187087900672.0000, 0.5000, -0.5000, 0.1667} >IMM[6] INT32 {1, 0, 0, 0} >IMM[7] FLT32 { 0.3100, -0.3100, -0.0100, 0.0000} >IMM[8] FLT32 { 2.3000, 1.1500, 3.2000, 1.6000} >IMM[9] FLT32 { 0.0500, -0.3800, 0.0000, 1.0000} >IMM[10] FLT32 { 4.1000, 2.0500, 5.3000, 2.6500} > 0: MOV TEMP[0].w, IMM[0].xxxx > 1: DP3 TEMP[1].x, IN[1].xyzz, IN[1].xyzz > 2: RSQ TEMP[2].x, TEMP[1].xxxx > 3: MUL TEMP[1].xyz, TEMP[2].xxxx, IN[1].xyzz > 4: MOV TEMP[2].xy, IN[0].xyyy > 5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D > 6: FMA TEMP[3].xyz, TEMP[2].xyzz, IMM[0].yyyy, IMM[0].zzzz > 7: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz > 8: RSQ TEMP[4].x, TEMP[4].xxxx > 9: MUL TEMP[2].xyz, TEMP[4].xxxx, TEMP[3].xyzz > 10: MUL TEMP[3].xyz, TEMP[2].yyyy, CONST[1][11].xyzz > 11: FMA TEMP[4].xyz, TEMP[2].xxxx, CONST[1][10].xyzz, TEMP[3].xyzz > 12: FMA TEMP[4].xyz, TEMP[2].zzzz, CONST[1][12].xyzz, TEMP[4].xyzz > 13: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz > 14: RSQ TEMP[5].x, TEMP[5].xxxx > 15: MUL TEMP[2].xyz, TEMP[5].xxxx, TEMP[4].xyzz > 16: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[2].xyzz > 17: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx > 18: FMA TEMP[1].xyz, TEMP[2].xyzz, -TEMP[4].xxxx, TEMP[1].xyzz > 19: FMA TEMP[4].xyz, TEMP[1].xyzz, IMM[0].yyyy, TEMP[2].xyzz > 20: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz > 21: RSQ TEMP[5].x, TEMP[5].xxxx > 22: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[4].xyzz > 23: MUL TEMP[3].xyz, TEMP[2].yzxx, TEMP[1].zxyy > 24: FMA TEMP[4].xyz, TEMP[1].yzxx, TEMP[2].zxyy, -TEMP[3].xyzz > 25: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz > 26: RSQ TEMP[5].x, TEMP[5].xxxx > 27: MUL TEMP[3].xyz, TEMP[5].xxxx, TEMP[4].xyzz > 28: MUL TEMP[4].xyz, TEMP[1].yzxx, TEMP[3].zxyy > 29: FMA TEMP[4].xyz, TEMP[3].yzxx, TEMP[1].zxyy, -TEMP[4].xyzz > 30: MOV TEMP[5].xy, IN[0].zwww > 31: TEX TEMP[5].xy, TEMP[5], SAMP[1], 2D > 32: FMA TEMP[6].xy, TEMP[5].xyyy, IMM[0].yyyy, IMM[0].zzzz > 33: DP2 TEMP[7].x, TEMP[6].xyyy, TEMP[6].xyyy > 34: RSQ TEMP[7].x, TEMP[7].xxxx > 35: MUL TEMP[5].xy, TEMP[7].xxxx, TEMP[6].xyyy > 36: MOV TEMP[5].zw, -TEMP[5].xxxx > 37: DP2 TEMP[6].x, TEMP[5].zyyy, IMM[0].wwww > 38: DP2 TEMP[7].x, TEMP[5].wyyy, IMM[2].xyyy > 39: FMA TEMP[7].xyz, TEMP[4].xyzz, TEMP[7].xxxx, TEMP[1].xyzz > 40: FMA TEMP[6].xyz, TEMP[4].xyzz, TEMP[6].xxxx, TEMP[1].xyzz > 41: DP2 TEMP[8].x, TEMP[5].yxxx, IMM[0].wwww > 42: FMA TEMP[6].xyz, TEMP[3].xyzz, TEMP[8].xxxx, TEMP[6].xyzz > 43: MOV TEMP[9].xy, IN[0].xyyy > 44: TEX TEMP[9].x, TEMP[9], SAMP[2], 2D > 45: MOV TEMP[10].z, TEMP[9].xxxx > 46: MUL TEMP[9].xy, TEMP[9].xxxx, IMM[2].zyyy > 47: MAX TEMP[9].xy, TEMP[9].xyyy, IMM[0].xxxx > 48: MUL TEMP[11], TEMP[9].xxxx, IMM[3] > 49: MOV TEMP[10].w, IMM[0].xxxx > 50: DP2 TEMP[12].x, TEMP[10].zwww, CONST[1][14].xyyy > 51: MUL TEMP[10].xy, TEMP[12].xxxx, IN[2].xyyy > 52: FMA TEMP[12].xyz, TEMP[6].xyzz, TEMP[11].xxxx, TEMP[10].xyzz > 53: MOV TEMP[0].xyz, TEMP[12].xyzx > 54: DP4 TEMP[6].x, CONST[1][6], TEMP[0] > 55: DP4 TEMP[13].x, CONST[1][7], TEMP[0] > 56: MOV TEMP[6].y, TEMP[13].xxxx > 57: DP4 TEMP[13].x, CONST[1][9], TEMP[0] > 58: FSEQ TEMP[14].xy, TEMP[13].xxxx, IMM[2].wwww > 59: SSG TEMP[15].xy, TEMP[6].xyyy > 60: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy > 61: RCP TEMP[13].xy, TEMP[13].xxxx > 62: MUL TEMP[13].xy, TEMP[6].xyyy, TEMP[13].xyyy > 63: UCMP TEMP[0].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[13].xyyy > 64: FMA TEMP[0].xy, TEMP[0].xyyy, IMM[5].yzzz, IMM[3].xxxx > 65: MOV TEMP[13].xy, TEMP[0].xyyy > 66: TEX TEMP[13].x, TEMP[13], SAMP[3], 2D > 67: ADD TEMP[0].x, -TEMP[13].xxxx, TEMP[12].zzzz > 68: FSLT TEMP[12].x, TEMP[0].xxxx, IMM[2].wwww > 69: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx > 70: INEG TEMP[12].x, TEMP[12].xxxx > 71: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx > 72: UIF TEMP[12].xxxx :0 > 73: MOV TEMP[12].x, TEMP[11].yyyy > 74: ELSE :0 > 75: MOV TEMP[12].x, TEMP[11].xxxx > 76: ENDIF > 77: MUL TEMP[13].x, TEMP[12].xxxx, IMM[3].xxxx > 78: FSLT TEMP[14].x, IMM[2].wwww, TEMP[1].zzzz > 79: AND TEMP[14].x, TEMP[14].xxxx, IMM[6].xxxx > 80: INEG TEMP[14].x, TEMP[14].xxxx > 81: FSLT TEMP[15].x, IMM[2].wwww, TEMP[0].xxxx > 82: AND TEMP[15].x, TEMP[15].xxxx, IMM[6].xxxx > 83: INEG TEMP[15].x, TEMP[15].xxxx > 84: AND TEMP[15].x, TEMP[14].xxxx, TEMP[15].xxxx > 85: USNE TEMP[15].x, TEMP[15].xxxx, IMM[1].xxxx > 86: UIF TEMP[15].xxxx :0 > 87: MOV TEMP[13].x, TEMP[13].xxxx > 88: ELSE :0 > 89: MOV TEMP[13].x, TEMP[12].xxxx > 90: ENDIF > 91: ABS TEMP[12].x, TEMP[0].xxxx > 92: ADD TEMP[0].x, -TEMP[13].xxxx, TEMP[12].xxxx > 93: FSNE TEMP[12].x, TEMP[13].xxxx, IMM[2].wwww > 94: UIF TEMP[12].xxxx :0 > 95: RCP TEMP[12].x, TEMP[13].xxxx > 96: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 97: ELSE :0 > 98: SSG TEMP[13].x, TEMP[0].xxxx > 99: MUL TEMP[12].x, IMM[5].xxxx, TEMP[13].xxxx >100: ENDIF >101: MOV_SAT TEMP[12].x, TEMP[12].xxxx >102: MUL TEMP[6].xyz, TEMP[5].yxyy, IMM[7].xyxx >103: FMA TEMP[13].xyz, TEMP[4].xyzz, TEMP[6].yyyy, TEMP[1].xyzz >104: FMA TEMP[13].xyz, TEMP[3].xyzz, TEMP[6].xxxx, TEMP[13].xyzz >105: FMA TEMP[15].xyz, TEMP[13].xyzz, TEMP[11].zzzz, TEMP[10].xyzz >106: MOV TEMP[13].xyz, TEMP[15].xyzx >107: MOV TEMP[13].w, IMM[0].xxxx >108: DP4 TEMP[11].x, CONST[1][6], TEMP[13] >109: DP4 TEMP[16].x, CONST[1][7], TEMP[13] >110: MOV TEMP[11].y, TEMP[16].xxxx >111: DP4 TEMP[16].x, CONST[1][9], TEMP[13] >112: FSEQ TEMP[17].xy, TEMP[16].xxxx, IMM[2].wwww >113: SSG TEMP[18].xy, TEMP[11].xyyy >114: MUL TEMP[18].xy, IMM[5].xxxx, TEMP[18].xyyy >115: RCP TEMP[16].xy, TEMP[16].xxxx >116: MUL TEMP[16].xy, TEMP[11].xyyy, TEMP[16].xyyy >117: UCMP TEMP[16].xy, TEMP[17].xyyy, TEMP[18].xyyy, TEMP[16].xyyy >118: FMA TEMP[16].xy, TEMP[16].xyyy, IMM[5].yzzz, IMM[3].xxxx >119: MOV TEMP[16].xy, TEMP[16].xyyy >120: TEX TEMP[16].x, TEMP[16], SAMP[3], 2D >121: ADD TEMP[15].x, -TEMP[16].xxxx, TEMP[15].zzzz >122: FSLT TEMP[16].x, TEMP[15].xxxx, IMM[2].wwww >123: AND TEMP[16].x, TEMP[16].xxxx, IMM[6].xxxx >124: INEG TEMP[16].x, TEMP[16].xxxx >125: USNE TEMP[16].x, TEMP[16].xxxx, IMM[1].xxxx >126: UIF TEMP[16].xxxx :0 >127: MOV TEMP[16].x, TEMP[11].wwww >128: ELSE :0 >129: MOV TEMP[16].x, TEMP[11].zzzz >130: ENDIF >131: MUL TEMP[17].x, TEMP[16].xxxx, IMM[3].xxxx >132: FSLT TEMP[18].x, IMM[2].wwww, TEMP[15].xxxx >133: AND TEMP[18].x, TEMP[18].xxxx, IMM[6].xxxx >134: INEG TEMP[18].x, TEMP[18].xxxx >135: AND TEMP[18].x, TEMP[14].xxxx, TEMP[18].xxxx >136: USNE TEMP[18].x, TEMP[18].xxxx, IMM[1].xxxx >137: UIF TEMP[18].xxxx :0 >138: MOV TEMP[17].x, TEMP[17].xxxx >139: ELSE :0 >140: MOV TEMP[17].x, TEMP[16].xxxx >141: ENDIF >142: ABS TEMP[15].x, TEMP[15].xxxx >143: ADD TEMP[15].x, -TEMP[17].xxxx, TEMP[15].xxxx >144: FSNE TEMP[16].x, TEMP[17].xxxx, IMM[2].wwww >145: UIF TEMP[16].xxxx :0 >146: RCP TEMP[16].x, TEMP[17].xxxx >147: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[16].xxxx >148: ELSE :0 >149: SSG TEMP[15].x, TEMP[15].xxxx >150: MUL TEMP[16].x, IMM[5].xxxx, TEMP[15].xxxx >151: ENDIF >152: MOV_SAT TEMP[15].x, TEMP[16].xxxx >153: ADD TEMP[0].x, TEMP[15].xxxx, TEMP[12].xxxx >154: DP2 TEMP[12].x, TEMP[5].yxxx, IMM[2].xyyy >155: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[12].xxxx, TEMP[7].xyzz >156: MUL TEMP[11], TEMP[9].xxxx, IMM[8] >157: FMA TEMP[15].xyz, TEMP[7].xyzz, TEMP[11].xxxx, TEMP[10].xyzz >158: MOV TEMP[7].xyz, TEMP[15].xyzx >159: MOV TEMP[7].w, IMM[0].xxxx >160: DP4 TEMP[13].x, CONST[1][6], TEMP[7] >161: DP4 TEMP[16].x, CONST[1][7], TEMP[7] >162: MOV TEMP[13].y, TEMP[16].xxxx >163: DP4 TEMP[16].x, CONST[1][9], TEMP[7] >164: FSEQ TEMP[17].xy, TEMP[16].xxxx, IMM[2].wwww >165: SSG TEMP[18].xy, TEMP[13].xyyy >166: MUL TEMP[18].xy, IMM[5].xxxx, TEMP[18].xyyy >167: RCP TEMP[16].xy, TEMP[16].xxxx >168: MUL TEMP[16].xy, TEMP[13].xyyy, TEMP[16].xyyy >169: UCMP TEMP[7].xy, TEMP[17].xyyy, TEMP[18].xyyy, TEMP[16].xyyy >170: FMA TEMP[7].xy, TEMP[7].xyyy, IMM[5].yzzz, IMM[3].xxxx >171: MOV TEMP[16].xy, TEMP[7].xyyy >172: TEX TEMP[16].x, TEMP[16], SAMP[3], 2D >173: ADD TEMP[15].x, -TEMP[16].xxxx, TEMP[15].zzzz >174: FSLT TEMP[16].x, TEMP[15].xxxx, IMM[2].wwww >175: AND TEMP[16].x, TEMP[16].xxxx, IMM[6].xxxx >176: INEG TEMP[16].x, TEMP[16].xxxx >177: USNE TEMP[16].x, TEMP[16].xxxx, IMM[1].xxxx >178: UIF TEMP[16].xxxx :0 >179: MOV TEMP[16].x, TEMP[11].yyyy >180: ELSE :0 >181: MOV TEMP[16].x, TEMP[11].xxxx >182: ENDIF >183: MUL TEMP[17].x, TEMP[16].xxxx, IMM[3].xxxx >184: FSLT TEMP[18].x, IMM[2].wwww, TEMP[15].xxxx >185: AND TEMP[18].x, TEMP[18].xxxx, IMM[6].xxxx >186: INEG TEMP[18].x, TEMP[18].xxxx >187: AND TEMP[18].x, TEMP[14].xxxx, TEMP[18].xxxx >188: USNE TEMP[18].x, TEMP[18].xxxx, IMM[1].xxxx >189: UIF TEMP[18].xxxx :0 >190: MOV TEMP[17].x, TEMP[17].xxxx >191: ELSE :0 >192: MOV TEMP[17].x, TEMP[16].xxxx >193: ENDIF >194: ABS TEMP[15].x, TEMP[15].xxxx >195: ADD TEMP[15].x, TEMP[15].xxxx, -TEMP[17].xxxx >196: FSNE TEMP[16].x, TEMP[17].xxxx, IMM[2].wwww >197: UIF TEMP[16].xxxx :0 >198: RCP TEMP[16].x, TEMP[17].xxxx >199: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[16].xxxx >200: ELSE :0 >201: SSG TEMP[15].x, TEMP[15].xxxx >202: MUL TEMP[16].x, IMM[5].xxxx, TEMP[15].xxxx >203: ENDIF >204: MOV_SAT TEMP[15].x, TEMP[16].xxxx >205: ADD TEMP[0].x, TEMP[15].xxxx, TEMP[0].xxxx >206: DP2 TEMP[15].x, TEMP[5].yxxx, IMM[7].zyyy >207: MUL TEMP[7].xy, TEMP[5].xyyy, IMM[0].zxxx >208: DP2 TEMP[16].x, TEMP[7].xyyy, IMM[7].zyyy >209: FMA TEMP[13].xyz, TEMP[4].xyzz, TEMP[16].xxxx, TEMP[1].xyzz >210: FMA TEMP[13].xyz, TEMP[3].xyzz, TEMP[15].xxxx, TEMP[13].xyzz >211: FMA TEMP[17].xyz, TEMP[13].xyzz, TEMP[11].zzzz, TEMP[10].xyzz >212: MOV TEMP[13].xyz, TEMP[17].xyzx >213: MOV TEMP[13].w, IMM[0].xxxx >214: DP4 TEMP[11].x, CONST[1][6], TEMP[13] >215: DP4 TEMP[18].x, CONST[1][7], TEMP[13] >216: MOV TEMP[11].y, TEMP[18].xxxx >217: DP4 TEMP[18].x, CONST[1][9], TEMP[13] >218: FSEQ TEMP[19].xy, TEMP[18].xxxx, IMM[2].wwww >219: SSG TEMP[20].xy, TEMP[11].xyyy >220: MUL TEMP[20].xy, IMM[5].xxxx, TEMP[20].xyyy >221: RCP TEMP[18].xy, TEMP[18].xxxx >222: MUL TEMP[18].xy, TEMP[11].xyyy, TEMP[18].xyyy >223: UCMP TEMP[18].xy, TEMP[19].xyyy, TEMP[20].xyyy, TEMP[18].xyyy >224: FMA TEMP[18].xy, TEMP[18].xyyy, IMM[5].yzzz, IMM[3].xxxx >225: MOV TEMP[18].xy, TEMP[18].xyyy >226: TEX TEMP[18].x, TEMP[18], SAMP[3], 2D >227: ADD TEMP[17].x, -TEMP[18].xxxx, TEMP[17].zzzz >228: FSLT TEMP[18].x, TEMP[17].xxxx, IMM[2].wwww >229: AND TEMP[18].x, TEMP[18].xxxx, IMM[6].xxxx >230: INEG TEMP[18].x, TEMP[18].xxxx >231: USNE TEMP[18].x, TEMP[18].xxxx, IMM[1].xxxx >232: UIF TEMP[18].xxxx :0 >233: MOV TEMP[18].x, TEMP[11].wwww >234: ELSE :0 >235: MOV TEMP[18].x, TEMP[11].zzzz >236: ENDIF >237: MUL TEMP[19].x, TEMP[18].xxxx, IMM[3].xxxx >238: FSLT TEMP[20].x, IMM[2].wwww, TEMP[17].xxxx >239: AND TEMP[20].x, TEMP[20].xxxx, IMM[6].xxxx >240: INEG TEMP[20].x, TEMP[20].xxxx >241: AND TEMP[20].x, TEMP[14].xxxx, TEMP[20].xxxx >242: USNE TEMP[20].x, TEMP[20].xxxx, IMM[1].xxxx >243: UIF TEMP[20].xxxx :0 >244: MOV TEMP[19].x, TEMP[19].xxxx >245: ELSE :0 >246: MOV TEMP[19].x, TEMP[18].xxxx >247: ENDIF >248: ABS TEMP[17].x, TEMP[17].xxxx >249: ADD TEMP[17].x, TEMP[17].xxxx, -TEMP[19].xxxx >250: FSNE TEMP[18].x, TEMP[19].xxxx, IMM[2].wwww >251: UIF TEMP[18].xxxx :0 >252: RCP TEMP[18].x, TEMP[19].xxxx >253: MUL TEMP[18].x, TEMP[17].xxxx, TEMP[18].xxxx >254: ELSE :0 >255: SSG TEMP[17].x, TEMP[17].xxxx >256: MUL TEMP[18].x, IMM[5].xxxx, TEMP[17].xxxx >257: ENDIF >258: MOV_SAT TEMP[17].x, TEMP[18].xxxx >259: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[17].xxxx >260: DP2 TEMP[17].x, TEMP[5].yxxx, IMM[9].xyyy >261: DP2 TEMP[18].x, TEMP[5].yxxx, IMM[3].xxxx >262: DP2 TEMP[5].x, TEMP[7].xyyy, IMM[9].xyyy >263: FMA TEMP[11].xyz, TEMP[4].xyzz, TEMP[5].xxxx, TEMP[1].xyzz >264: FMA TEMP[11].xyz, TEMP[3].xyzz, TEMP[17].xxxx, TEMP[11].xyzz >265: MUL TEMP[13], TEMP[9].xxxx, IMM[10] >266: FMA TEMP[19].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[10].xyzz >267: MOV TEMP[11].xyz, TEMP[19].xyzx >268: MOV TEMP[11].w, IMM[0].xxxx >269: DP4 TEMP[20].x, CONST[1][6], TEMP[11] >270: DP4 TEMP[21].x, CONST[1][7], TEMP[11] >271: MOV TEMP[20].y, TEMP[21].xxxx >272: DP4 TEMP[21].x, CONST[1][9], TEMP[11] >273: FSEQ TEMP[22].xy, TEMP[21].xxxx, IMM[2].wwww >274: SSG TEMP[23].xy, TEMP[20].xyyy >275: MUL TEMP[23].xy, IMM[5].xxxx, TEMP[23].xyyy >276: RCP TEMP[21].xy, TEMP[21].xxxx >277: MUL TEMP[20].xy, TEMP[20].xyyy, TEMP[21].xyyy >278: UCMP TEMP[20].xy, TEMP[22].xyyy, TEMP[23].xyyy, TEMP[20].xyyy >279: FMA TEMP[20].xy, TEMP[20].xyyy, IMM[5].yzzz, IMM[3].xxxx >280: MOV TEMP[20].xy, TEMP[20].xyyy >281: TEX TEMP[20].x, TEMP[20], SAMP[3], 2D >282: ADD TEMP[19].x, -TEMP[20].xxxx, TEMP[19].zzzz >283: FSLT TEMP[20].x, IMM[2].wwww, TEMP[19].xxxx >284: AND TEMP[20].x, TEMP[20].xxxx, IMM[6].xxxx >285: INEG TEMP[20].x, TEMP[20].xxxx >286: AND TEMP[20].x, TEMP[14].xxxx, TEMP[20].xxxx >287: FSLT TEMP[21].x, TEMP[19].xxxx, IMM[2].wwww >288: AND TEMP[21].x, TEMP[21].xxxx, IMM[6].xxxx >289: INEG TEMP[21].x, TEMP[21].xxxx >290: USNE TEMP[21].x, TEMP[21].xxxx, IMM[1].xxxx >291: UIF TEMP[21].xxxx :0 >292: MOV TEMP[21].x, TEMP[13].yyyy >293: ELSE :0 >294: MOV TEMP[21].x, TEMP[13].xxxx >295: ENDIF >296: MUL TEMP[22].x, TEMP[21].xxxx, IMM[3].xxxx >297: USNE TEMP[20].x, TEMP[20].xxxx, IMM[1].xxxx >298: UIF TEMP[20].xxxx :0 >299: MOV TEMP[20].x, TEMP[22].xxxx >300: ELSE :0 >301: MOV TEMP[20].x, TEMP[21].xxxx >302: ENDIF >303: ABS TEMP[19].x, TEMP[19].xxxx >304: ADD TEMP[19].x, -TEMP[20].xxxx, TEMP[19].xxxx >305: FSNE TEMP[21].x, TEMP[20].xxxx, IMM[2].wwww >306: UIF TEMP[21].xxxx :0 >307: RCP TEMP[20].x, TEMP[20].xxxx >308: MUL TEMP[20].x, TEMP[19].xxxx, TEMP[20].xxxx >309: ELSE :0 >310: SSG TEMP[19].x, TEMP[19].xxxx >311: MUL TEMP[20].x, IMM[5].xxxx, TEMP[19].xxxx >312: ENDIF >313: MOV_SAT TEMP[19].x, TEMP[20].xxxx >314: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[19].xxxx >315: DP2 TEMP[19].x, TEMP[7].xyyy, IMM[3].xxxx >316: FMA TEMP[1].xyz, TEMP[4].xyzz, TEMP[19].xxxx, TEMP[1].xyzz >317: FMA TEMP[1].xyz, TEMP[3].xyzz, TEMP[18].xxxx, TEMP[1].xyzz >318: FMA TEMP[20].xyz, TEMP[1].xyzz, TEMP[13].zzzz, TEMP[10].xyzz >319: MOV TEMP[11].xyz, TEMP[20].xyzx >320: MOV TEMP[11].w, IMM[0].xxxx >321: DP4 TEMP[1].x, CONST[1][6], TEMP[11] >322: DP4 TEMP[21].x, CONST[1][7], TEMP[11] >323: MOV TEMP[1].y, TEMP[21].xxxx >324: DP4 TEMP[21].x, CONST[1][9], TEMP[11] >325: FSEQ TEMP[22].xy, TEMP[21].xxxx, IMM[2].wwww >326: SSG TEMP[23].xy, TEMP[1].xyyy >327: MUL TEMP[23].xy, IMM[5].xxxx, TEMP[23].xyyy >328: RCP TEMP[21].xy, TEMP[21].xxxx >329: MUL TEMP[21].xy, TEMP[1].xyyy, TEMP[21].xyyy >330: UCMP TEMP[1].xy, TEMP[22].xyyy, TEMP[23].xyyy, TEMP[21].xyyy >331: FMA TEMP[1].xy, TEMP[1].xyyy, IMM[5].yzzz, IMM[3].xxxx >332: MOV TEMP[21].xy, TEMP[1].xyyy >333: TEX TEMP[21].x, TEMP[21], SAMP[3], 2D >334: ADD TEMP[1].x, -TEMP[21].xxxx, TEMP[20].zzzz >335: FSLT TEMP[20].x, IMM[2].wwww, TEMP[1].xxxx >336: AND TEMP[20].x, TEMP[20].xxxx, IMM[6].xxxx >337: INEG TEMP[20].x, TEMP[20].xxxx >338: AND TEMP[14].x, TEMP[14].xxxx, TEMP[20].xxxx >339: FSLT TEMP[20].x, TEMP[1].xxxx, IMM[2].wwww >340: AND TEMP[20].x, TEMP[20].xxxx, IMM[6].xxxx >341: INEG TEMP[20].x, TEMP[20].xxxx >342: USNE TEMP[20].x, TEMP[20].xxxx, IMM[1].xxxx >343: UIF TEMP[20].xxxx :0 >344: MOV TEMP[20].x, TEMP[13].wwww >345: ELSE :0 >346: MOV TEMP[20].x, TEMP[13].zzzz >347: ENDIF >348: MUL TEMP[13].x, TEMP[20].xxxx, IMM[3].xxxx >349: USNE TEMP[14].x, TEMP[14].xxxx, IMM[1].xxxx >350: UIF TEMP[14].xxxx :0 >351: MOV TEMP[13].x, TEMP[13].xxxx >352: ELSE :0 >353: MOV TEMP[13].x, TEMP[20].xxxx >354: ENDIF >355: ABS TEMP[14].x, TEMP[1].xxxx >356: ADD TEMP[1].x, -TEMP[13].xxxx, TEMP[14].xxxx >357: FSNE TEMP[14].x, TEMP[13].xxxx, IMM[2].wwww >358: UIF TEMP[14].xxxx :0 >359: RCP TEMP[13].x, TEMP[13].xxxx >360: MUL TEMP[13].x, TEMP[1].xxxx, TEMP[13].xxxx >361: ELSE :0 >362: SSG TEMP[14].x, TEMP[1].xxxx >363: MUL TEMP[13].x, IMM[5].xxxx, TEMP[14].xxxx >364: ENDIF >365: MOV_SAT TEMP[13].x, TEMP[13].xxxx >366: ADD TEMP[0].x, TEMP[13].xxxx, TEMP[0].xxxx >367: MUL TEMP[0].x, TEMP[0].xxxx, IMM[5].wwww >368: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx >369: MUL TEMP[1].xyz, TEMP[2].zxyy, TEMP[3].yzxx >370: FMA TEMP[1].xyz, TEMP[2].yzxx, TEMP[3].zxyy, -TEMP[1].xyzz >371: FMA TEMP[4].xyz, TEMP[1].xyzz, TEMP[6].yyyy, TEMP[2].xyzz >372: FMA TEMP[4].xyz, TEMP[3].xyzz, TEMP[6].zzzz, TEMP[4].xyzz >373: MUL TEMP[6], TEMP[9].yyyy, IMM[3] >374: FMA TEMP[13].xyz, TEMP[4].xyzz, TEMP[6].zzzz, TEMP[10].xyzz >375: MOV TEMP[11].xyz, TEMP[13].xyzx >376: MOV TEMP[11].w, IMM[0].xxxx >377: DP4 TEMP[4].x, CONST[1][6], TEMP[11] >378: DP4 TEMP[14].x, CONST[1][7], TEMP[11] >379: MOV TEMP[4].y, TEMP[14].xxxx >380: DP4 TEMP[14].x, CONST[1][9], TEMP[11] >381: FSEQ TEMP[20].xy, TEMP[14].xxxx, IMM[2].wwww >382: SSG TEMP[21].xy, TEMP[4].xyyy >383: MUL TEMP[21].xy, IMM[5].xxxx, TEMP[21].xyyy >384: RCP TEMP[14].xy, TEMP[14].xxxx >385: MUL TEMP[14].xy, TEMP[4].xyyy, TEMP[14].xyyy >386: UCMP TEMP[4].xy, TEMP[20].xyyy, TEMP[21].xyyy, TEMP[14].xyyy >387: FMA TEMP[4].xy, TEMP[4].xyyy, IMM[5].yzzz, IMM[3].xxxx >388: MOV TEMP[14].xy, TEMP[4].xyyy >389: TEX TEMP[14].x, TEMP[14], SAMP[3], 2D >390: ADD TEMP[13].x, -TEMP[14].xxxx, TEMP[13].zzzz >391: FSLT TEMP[14].x, TEMP[13].xxxx, IMM[2].wwww >392: AND TEMP[14].x, TEMP[14].xxxx, IMM[6].xxxx >393: INEG TEMP[14].x, TEMP[14].xxxx >394: USNE TEMP[14].x, TEMP[14].xxxx, IMM[1].xxxx >395: UIF TEMP[14].xxxx :0 >396: MOV TEMP[14].x, TEMP[6].wwww >397: ELSE :0 >398: MOV TEMP[14].x, TEMP[6].zzzz >399: ENDIF >400: ABS TEMP[13].x, TEMP[13].xxxx >401: ADD TEMP[13].x, TEMP[13].xxxx, -TEMP[14].xxxx >402: FSNE TEMP[20].x, TEMP[14].xxxx, IMM[2].wwww >403: UIF TEMP[20].xxxx :0 >404: RCP TEMP[14].x, TEMP[14].xxxx >405: MUL TEMP[14].x, TEMP[13].xxxx, TEMP[14].xxxx >406: ELSE :0 >407: SSG TEMP[13].x, TEMP[13].xxxx >408: MUL TEMP[14].x, IMM[5].xxxx, TEMP[13].xxxx >409: ENDIF >410: MOV_SAT TEMP[13].x, TEMP[14].xxxx >411: DP2 TEMP[4].x, TEMP[7].xyyy, IMM[0].wwww >412: DP2 TEMP[14].x, TEMP[7].xyyy, IMM[2].xyyy >413: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[14].xxxx, TEMP[2].xyzz >414: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[12].xxxx, TEMP[7].xyzz >415: FMA TEMP[4].xyz, TEMP[1].xyzz, TEMP[4].xxxx, TEMP[2].xyzz >416: FMA TEMP[4].xyz, TEMP[3].xyzz, TEMP[8].xxxx, TEMP[4].xyzz >417: FMA TEMP[8].xyz, TEMP[4].xyzz, TEMP[6].xxxx, TEMP[10].xyzz >418: MOV TEMP[11].xyz, TEMP[8].xyzx >419: MOV TEMP[11].w, IMM[0].xxxx >420: DP4 TEMP[4].x, CONST[1][6], TEMP[11] >421: DP4 TEMP[12].x, CONST[1][7], TEMP[11] >422: MOV TEMP[4].y, TEMP[12].xxxx >423: DP4 TEMP[12].x, CONST[1][9], TEMP[11] >424: FSEQ TEMP[14].xy, TEMP[12].xxxx, IMM[2].wwww >425: SSG TEMP[20].xy, TEMP[4].xyyy >426: MUL TEMP[20].xy, IMM[5].xxxx, TEMP[20].xyyy >427: RCP TEMP[12].xy, TEMP[12].xxxx >428: MUL TEMP[12].xy, TEMP[4].xyyy, TEMP[12].xyyy >429: UCMP TEMP[4].xy, TEMP[14].xyyy, TEMP[20].xyyy, TEMP[12].xyyy >430: FMA TEMP[4].xy, TEMP[4].xyyy, IMM[5].yzzz, IMM[3].xxxx >431: MOV TEMP[12].xy, TEMP[4].xyyy >432: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >433: ADD TEMP[8].x, -TEMP[12].xxxx, TEMP[8].zzzz >434: FSLT TEMP[12].x, TEMP[8].xxxx, IMM[2].wwww >435: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >436: INEG TEMP[12].x, TEMP[12].xxxx >437: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >438: UIF TEMP[12].xxxx :0 >439: MOV TEMP[12].x, TEMP[6].yyyy >440: ELSE :0 >441: MOV TEMP[12].x, TEMP[6].xxxx >442: ENDIF >443: ABS TEMP[8].x, TEMP[8].xxxx >444: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[12].xxxx >445: FSNE TEMP[14].x, TEMP[12].xxxx, IMM[2].wwww >446: UIF TEMP[14].xxxx :0 >447: RCP TEMP[12].x, TEMP[12].xxxx >448: MUL TEMP[12].x, TEMP[8].xxxx, TEMP[12].xxxx >449: ELSE :0 >450: SSG TEMP[8].x, TEMP[8].xxxx >451: MUL TEMP[12].x, IMM[5].xxxx, TEMP[8].xxxx >452: ENDIF >453: MOV_SAT TEMP[8].x, TEMP[12].xxxx >454: ADD TEMP[8].x, TEMP[13].xxxx, TEMP[8].xxxx >455: MUL TEMP[6], TEMP[9].yyyy, IMM[8] >456: MUL TEMP[11], TEMP[9].yyyy, IMM[10] >457: FMA TEMP[9].xyz, TEMP[7].xyzz, TEMP[6].xxxx, TEMP[10].xyzz >458: MOV TEMP[7].xyz, TEMP[9].xyzx >459: MOV TEMP[7].w, IMM[0].xxxx >460: DP4 TEMP[4].x, CONST[1][6], TEMP[7] >461: DP4 TEMP[12].x, CONST[1][7], TEMP[7] >462: MOV TEMP[4].y, TEMP[12].xxxx >463: DP4 TEMP[12].x, CONST[1][9], TEMP[7] >464: FSEQ TEMP[13].xy, TEMP[12].xxxx, IMM[2].wwww >465: SSG TEMP[14].xy, TEMP[4].xyyy >466: MUL TEMP[14].xy, IMM[5].xxxx, TEMP[14].xyyy >467: RCP TEMP[12].xy, TEMP[12].xxxx >468: MUL TEMP[12].xy, TEMP[4].xyyy, TEMP[12].xyyy >469: UCMP TEMP[4].xy, TEMP[13].xyyy, TEMP[14].xyyy, TEMP[12].xyyy >470: FMA TEMP[4].xy, TEMP[4].xyyy, IMM[5].yzzz, IMM[3].xxxx >471: MOV TEMP[12].xy, TEMP[4].xyyy >472: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >473: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].zzzz >474: FSLT TEMP[12].x, TEMP[9].xxxx, IMM[2].wwww >475: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >476: INEG TEMP[12].x, TEMP[12].xxxx >477: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >478: UIF TEMP[12].xxxx :0 >479: MOV TEMP[12].x, TEMP[6].yyyy >480: ELSE :0 >481: MOV TEMP[12].x, TEMP[6].xxxx >482: ENDIF >483: ABS TEMP[9].x, TEMP[9].xxxx >484: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[12].xxxx >485: FSNE TEMP[13].x, TEMP[12].xxxx, IMM[2].wwww >486: UIF TEMP[13].xxxx :0 >487: RCP TEMP[12].x, TEMP[12].xxxx >488: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx >489: ELSE :0 >490: SSG TEMP[9].x, TEMP[9].xxxx >491: MUL TEMP[12].x, IMM[5].xxxx, TEMP[9].xxxx >492: ENDIF >493: MOV_SAT TEMP[9].x, TEMP[12].xxxx >494: ADD TEMP[8].x, TEMP[9].xxxx, TEMP[8].xxxx >495: FMA TEMP[4].xyz, TEMP[1].xyzz, TEMP[16].xxxx, TEMP[2].xyzz >496: FMA TEMP[4].xyz, TEMP[3].xyzz, TEMP[15].xxxx, TEMP[4].xyzz >497: FMA TEMP[9].xyz, TEMP[4].xyzz, TEMP[6].zzzz, TEMP[10].xyzz >498: MOV TEMP[7].xyz, TEMP[9].xyzx >499: MOV TEMP[7].w, IMM[0].xxxx >500: DP4 TEMP[4].x, CONST[1][6], TEMP[7] >501: DP4 TEMP[12].x, CONST[1][7], TEMP[7] >502: MOV TEMP[4].y, TEMP[12].xxxx >503: DP4 TEMP[7].x, CONST[1][9], TEMP[7] >504: FSEQ TEMP[12].xy, TEMP[7].xxxx, IMM[2].wwww >505: SSG TEMP[13].xy, TEMP[4].xyyy >506: MUL TEMP[13].xy, IMM[5].xxxx, TEMP[13].xyyy >507: RCP TEMP[7].xy, TEMP[7].xxxx >508: MUL TEMP[7].xy, TEMP[4].xyyy, TEMP[7].xyyy >509: UCMP TEMP[7].xy, TEMP[12].xyyy, TEMP[13].xyyy, TEMP[7].xyyy >510: FMA TEMP[7].xy, TEMP[7].xyyy, IMM[5].yzzz, IMM[3].xxxx >511: MOV TEMP[7].xy, TEMP[7].xyyy >512: TEX TEMP[7].x, TEMP[7], SAMP[3], 2D >513: ADD TEMP[7].x, -TEMP[7].xxxx, TEMP[9].zzzz >514: FSLT TEMP[9].x, TEMP[7].xxxx, IMM[2].wwww >515: AND TEMP[9].x, TEMP[9].xxxx, IMM[6].xxxx >516: INEG TEMP[9].x, TEMP[9].xxxx >517: USNE TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx >518: UIF TEMP[9].xxxx :0 >519: MOV TEMP[9].x, TEMP[6].wwww >520: ELSE :0 >521: MOV TEMP[9].x, TEMP[6].zzzz >522: ENDIF >523: ABS TEMP[6].x, TEMP[7].xxxx >524: ADD TEMP[6].x, -TEMP[9].xxxx, TEMP[6].xxxx >525: FSNE TEMP[7].x, TEMP[9].xxxx, IMM[2].wwww >526: UIF TEMP[7].xxxx :0 >527: RCP TEMP[7].x, TEMP[9].xxxx >528: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx >529: ELSE :0 >530: SSG TEMP[6].x, TEMP[6].xxxx >531: MUL TEMP[7].x, IMM[5].xxxx, TEMP[6].xxxx >532: ENDIF >533: MOV_SAT TEMP[6].x, TEMP[7].xxxx >534: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx >535: FMA TEMP[4].xyz, TEMP[1].xyzz, TEMP[5].xxxx, TEMP[2].xyzz >536: FMA TEMP[1].xyz, TEMP[1].xyzz, TEMP[19].xxxx, TEMP[2].xyzz >537: FMA TEMP[1].xyz, TEMP[3].xyzz, TEMP[18].xxxx, TEMP[1].xyzz >538: FMA TEMP[2].xyz, TEMP[3].xyzz, TEMP[17].xxxx, TEMP[4].xyzz >539: FMA TEMP[4].xyz, TEMP[2].xyzz, TEMP[11].xxxx, TEMP[10].xyzz >540: MOV TEMP[2].xyz, TEMP[4].xyzx >541: FMA TEMP[5].xyz, TEMP[1].xyzz, TEMP[11].zzzz, TEMP[10].xyzz >542: MOV TEMP[1].xyz, TEMP[5].xyzx >543: MOV TEMP[2].w, IMM[0].xxxx >544: DP4 TEMP[3].x, CONST[1][6], TEMP[2] >545: DP4 TEMP[7].x, CONST[1][7], TEMP[2] >546: MOV TEMP[3].y, TEMP[7].xxxx >547: DP4 TEMP[7].x, CONST[1][9], TEMP[2] >548: FSEQ TEMP[8].xy, TEMP[7].xxxx, IMM[2].wwww >549: SSG TEMP[9].xy, TEMP[3].xyyy >550: MUL TEMP[9].xy, IMM[5].xxxx, TEMP[9].xyyy >551: RCP TEMP[7].xy, TEMP[7].xxxx >552: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >553: UCMP TEMP[3].xy, TEMP[8].xyyy, TEMP[9].xyyy, TEMP[3].xyyy >554: FMA TEMP[3].xy, TEMP[3].xyyy, IMM[5].yzzz, IMM[3].xxxx >555: MOV TEMP[3].xy, TEMP[3].xyyy >556: TEX TEMP[3].x, TEMP[3], SAMP[3], 2D >557: ADD TEMP[3].x, -TEMP[3].xxxx, TEMP[4].zzzz >558: FSLT TEMP[4].x, TEMP[3].xxxx, IMM[2].wwww >559: AND TEMP[4].x, TEMP[4].xxxx, IMM[6].xxxx >560: INEG TEMP[4].x, TEMP[4].xxxx >561: USNE TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx >562: UIF TEMP[4].xxxx :0 >563: MOV TEMP[4].x, TEMP[11].yyyy >564: ELSE :0 >565: MOV TEMP[4].x, TEMP[11].xxxx >566: ENDIF >567: ABS TEMP[3].x, TEMP[3].xxxx >568: ADD TEMP[3].x, -TEMP[4].xxxx, TEMP[3].xxxx >569: FSNE TEMP[7].x, TEMP[4].xxxx, IMM[2].wwww >570: UIF TEMP[7].xxxx :0 >571: RCP TEMP[4].x, TEMP[4].xxxx >572: MUL TEMP[4].x, TEMP[3].xxxx, TEMP[4].xxxx >573: ELSE :0 >574: SSG TEMP[3].x, TEMP[3].xxxx >575: MUL TEMP[4].x, IMM[5].xxxx, TEMP[3].xxxx >576: ENDIF >577: MOV_SAT TEMP[3].x, TEMP[4].xxxx >578: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[6].xxxx >579: MOV TEMP[1].w, IMM[0].xxxx >580: DP4 TEMP[2].x, CONST[1][6], TEMP[1] >581: DP4 TEMP[4].x, CONST[1][7], TEMP[1] >582: MOV TEMP[2].y, TEMP[4].xxxx >583: DP4 TEMP[1].x, CONST[1][9], TEMP[1] >584: FSEQ TEMP[4].xy, TEMP[1].xxxx, IMM[2].wwww >585: SSG TEMP[6].xy, TEMP[2].xyyy >586: MUL TEMP[6].xy, IMM[5].xxxx, TEMP[6].xyyy >587: RCP TEMP[1].xy, TEMP[1].xxxx >588: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[1].xyyy >589: UCMP TEMP[1].xy, TEMP[4].xyyy, TEMP[6].xyyy, TEMP[1].xyyy >590: FMA TEMP[1].xy, TEMP[1].xyyy, IMM[5].yzzz, IMM[3].xxxx >591: MOV TEMP[1].xy, TEMP[1].xyyy >592: TEX TEMP[1].x, TEMP[1], SAMP[3], 2D >593: ADD TEMP[1].x, -TEMP[1].xxxx, TEMP[5].zzzz >594: FSLT TEMP[2].x, TEMP[1].xxxx, IMM[2].wwww >595: AND TEMP[2].x, TEMP[2].xxxx, IMM[6].xxxx >596: INEG TEMP[2].x, TEMP[2].xxxx >597: USNE TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx >598: UIF TEMP[2].xxxx :0 >599: MOV TEMP[2].x, TEMP[11].wwww >600: ELSE :0 >601: MOV TEMP[2].x, TEMP[11].zzzz >602: ENDIF >603: ABS TEMP[1].x, TEMP[1].xxxx >604: ADD TEMP[1].x, -TEMP[2].xxxx, TEMP[1].xxxx >605: FSNE TEMP[4].x, TEMP[2].xxxx, IMM[2].wwww >606: UIF TEMP[4].xxxx :0 >607: RCP TEMP[2].x, TEMP[2].xxxx >608: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx >609: ELSE :0 >610: SSG TEMP[1].x, TEMP[1].xxxx >611: MUL TEMP[2].x, IMM[5].xxxx, TEMP[1].xxxx >612: ENDIF >613: MOV_SAT TEMP[1].x, TEMP[2].xxxx >614: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx >615: MUL TEMP[1].x, TEMP[1].xxxx, IMM[5].wwww >616: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx >617: MIN TEMP[0].x, TEMP[1].xxxx, TEMP[0].xxxx >618: MOV TEMP[0].y, TEMP[1].xxxx >619: MOV TEMP[0].zw, IMM[9].wwzw >620: MOV OUT[0], TEMP[0] >621: END >radeonsi: Compiling shader 87 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 124) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 144) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 148) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 152) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 156) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 160) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 164) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 168) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 176) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 180) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 184) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 200) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 224) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 228) > %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 > %50 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %50, i64 0, i64 3 > %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 > %53 = extractelement <8 x i32> %49, i32 7 > %54 = extractelement <4 x i32> %52, i32 0 > %55 = and i32 %54, %53 > %56 = insertelement <4 x i32> %52, i32 %55, i32 0 > %57 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %58 = load <8 x i32>, <8 x i32> addrspace(2)* %57, align 32, !tbaa !0 > %59 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %60 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %59, i64 0, i64 7 > %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0 > %62 = extractelement <8 x i32> %58, i32 7 > %63 = extractelement <4 x i32> %61, i32 0 > %64 = and i32 %63, %62 > %65 = insertelement <4 x i32> %61, i32 %64, i32 0 > %66 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, align 32, !tbaa !0 > %68 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %69 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %68, i64 0, i64 11 > %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 > %71 = extractelement <8 x i32> %67, i32 7 > %72 = extractelement <4 x i32> %70, i32 0 > %73 = and i32 %72, %71 > %74 = insertelement <4 x i32> %70, i32 %73, i32 0 > %75 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0 > %77 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %78 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %77, i64 0, i64 15 > %79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !tbaa !0 > %80 = extractelement <8 x i32> %76, i32 7 > %81 = extractelement <4 x i32> %79, i32 0 > %82 = and i32 %81, %80 > %83 = insertelement <4 x i32> %79, i32 %82, i32 0 > %84 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %93 = fmul float %88, %88 > %94 = fmul float %89, %89 > %95 = fadd float %94, %93 > %96 = fmul float %90, %90 > %97 = fadd float %95, %96 > %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97) > %99 = fmul float %98, %88 > %100 = fmul float %98, %89 > %101 = fmul float %98, %90 > %102 = bitcast float %84 to i32 > %103 = bitcast float %85 to i32 > %104 = insertelement <2 x i32> undef, i32 %102, i32 0 > %105 = insertelement <2 x i32> %104, i32 %103, i32 1 > %106 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %105, <8 x i32> %49, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %107 = extractelement <4 x float> %106, i32 0 > %108 = extractelement <4 x float> %106, i32 1 > %109 = extractelement <4 x float> %106, i32 2 > %110 = call float @llvm.fma.f32(float %107, float 2.000000e+00, float -1.000000e+00) > %111 = call float @llvm.fma.f32(float %108, float 2.000000e+00, float -1.000000e+00) > %112 = call float @llvm.fma.f32(float %109, float 2.000000e+00, float -1.000000e+00) > %113 = fmul float %110, %110 > %114 = fmul float %111, %111 > %115 = fadd float %114, %113 > %116 = fmul float %112, %112 > %117 = fadd float %115, %116 > %118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117) > %119 = fmul float %118, %110 > %120 = fmul float %118, %111 > %121 = fmul float %118, %112 > %122 = fmul float %120, %40 > %123 = fmul float %120, %41 > %124 = fmul float %120, %42 > %125 = call float @llvm.fma.f32(float %119, float %37, float %122) > %126 = call float @llvm.fma.f32(float %119, float %38, float %123) > %127 = call float @llvm.fma.f32(float %119, float %39, float %124) > %128 = call float @llvm.fma.f32(float %121, float %43, float %125) > %129 = call float @llvm.fma.f32(float %121, float %44, float %126) > %130 = call float @llvm.fma.f32(float %121, float %45, float %127) > %131 = fmul float %128, %128 > %132 = fmul float %129, %129 > %133 = fadd float %132, %131 > %134 = fmul float %130, %130 > %135 = fadd float %133, %134 > %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135) > %137 = fmul float %136, %128 > %138 = fmul float %136, %129 > %139 = fmul float %136, %130 > %140 = fmul float %99, %137 > %141 = fmul float %100, %138 > %142 = fadd float %141, %140 > %143 = fmul float %101, %139 > %144 = fadd float %142, %143 > %145 = fadd float %144, %144 > %146 = fsub float -0.000000e+00, %145 > %147 = call float @llvm.fma.f32(float %137, float %146, float %99) > %148 = fsub float -0.000000e+00, %145 > %149 = call float @llvm.fma.f32(float %138, float %148, float %100) > %150 = fsub float -0.000000e+00, %145 > %151 = call float @llvm.fma.f32(float %139, float %150, float %101) > %152 = call float @llvm.fma.f32(float %147, float 2.000000e+00, float %137) > %153 = call float @llvm.fma.f32(float %149, float 2.000000e+00, float %138) > %154 = call float @llvm.fma.f32(float %151, float 2.000000e+00, float %139) > %155 = fmul float %152, %152 > %156 = fmul float %153, %153 > %157 = fadd float %156, %155 > %158 = fmul float %154, %154 > %159 = fadd float %157, %158 > %160 = call float @llvm.AMDGPU.rsq.clamped.f32(float %159) > %161 = fmul float %160, %152 > %162 = fmul float %160, %153 > %163 = fmul float %160, %154 > %164 = fmul float %138, %163 > %165 = fmul float %139, %161 > %166 = fmul float %137, %162 > %167 = fsub float -0.000000e+00, %164 > %168 = call float @llvm.fma.f32(float %162, float %139, float %167) > %169 = fsub float -0.000000e+00, %165 > %170 = call float @llvm.fma.f32(float %163, float %137, float %169) > %171 = fsub float -0.000000e+00, %166 > %172 = call float @llvm.fma.f32(float %161, float %138, float %171) > %173 = fmul float %168, %168 > %174 = fmul float %170, %170 > %175 = fadd float %174, %173 > %176 = fmul float %172, %172 > %177 = fadd float %175, %176 > %178 = call float @llvm.AMDGPU.rsq.clamped.f32(float %177) > %179 = fmul float %178, %168 > %180 = fmul float %178, %170 > %181 = fmul float %178, %172 > %182 = fmul float %162, %181 > %183 = fmul float %163, %179 > %184 = fmul float %161, %180 > %185 = fsub float -0.000000e+00, %182 > %186 = call float @llvm.fma.f32(float %180, float %163, float %185) > %187 = fsub float -0.000000e+00, %183 > %188 = call float @llvm.fma.f32(float %181, float %161, float %187) > %189 = fsub float -0.000000e+00, %184 > %190 = call float @llvm.fma.f32(float %179, float %162, float %189) > %191 = bitcast float %86 to i32 > %192 = bitcast float %87 to i32 > %193 = insertelement <2 x i32> undef, i32 %191, i32 0 > %194 = insertelement <2 x i32> %193, i32 %192, i32 1 > %195 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %194, <8 x i32> %58, <4 x i32> %65, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %196 = extractelement <4 x float> %195, i32 0 > %197 = extractelement <4 x float> %195, i32 1 > %198 = call float @llvm.fma.f32(float %196, float 2.000000e+00, float -1.000000e+00) > %199 = call float @llvm.fma.f32(float %197, float 2.000000e+00, float -1.000000e+00) > %200 = fmul float %198, %198 > %201 = fmul float %199, %199 > %202 = fadd float %200, %201 > %203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202) > %204 = fmul float %203, %198 > %205 = fmul float %203, %199 > %206 = fmul float %204, 0xBFB99999A0000000 > %207 = fmul float %205, 0x3FB99999A0000000 > %208 = fadd float %206, %207 > %209 = fmul float %204, 0x3FD147AE20000000 > %210 = fmul float %205, 0x3F847AE140000000 > %211 = fadd float %209, %210 > %212 = call float @llvm.fma.f32(float %186, float %211, float %161) > %213 = call float @llvm.fma.f32(float %188, float %211, float %162) > %214 = call float @llvm.fma.f32(float %190, float %211, float %163) > %215 = call float @llvm.fma.f32(float %186, float %208, float %161) > %216 = call float @llvm.fma.f32(float %188, float %208, float %162) > %217 = call float @llvm.fma.f32(float %190, float %208, float %163) > %218 = fmul float %205, 0x3FB99999A0000000 > %219 = fmul float %204, 0x3FB99999A0000000 > %220 = fadd float %218, %219 > %221 = call float @llvm.fma.f32(float %179, float %220, float %215) > %222 = call float @llvm.fma.f32(float %180, float %220, float %216) > %223 = call float @llvm.fma.f32(float %181, float %220, float %217) > %224 = bitcast float %84 to i32 > %225 = bitcast float %85 to i32 > %226 = insertelement <2 x i32> undef, i32 %224, i32 0 > %227 = insertelement <2 x i32> %226, i32 %225, i32 1 > %228 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %227, <8 x i32> %67, <4 x i32> %74, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %229 = extractelement <4 x float> %228, i32 0 > %230 = fmul float %229, 0x3F70624DE0000000 > %231 = fmul float %229, 0x3F847AE140000000 > %232 = call float @llvm.maxnum.f32(float %230, float 1.000000e+00) > %233 = call float @llvm.maxnum.f32(float %231, float 1.000000e+00) > %234 = fmul float %232, 5.000000e-01 > %235 = fmul float %232, 2.500000e-01 > %236 = fmul float %232, 0x3FF4CCCCC0000000 > %237 = fmul float %232, 0x3FE4CCCCC0000000 > %238 = fmul float %229, %46 > %239 = fadd float %238, %47 > %240 = fmul float %239, %91 > %241 = fmul float %239, %92 > %242 = call float @llvm.fma.f32(float %221, float %234, float %240) > %243 = call float @llvm.fma.f32(float %222, float %234, float %241) > %244 = call float @llvm.fma.f32(float %223, float %234, float %229) > %245 = fmul float %25, %242 > %246 = fmul float %26, %243 > %247 = fadd float %245, %246 > %248 = fmul float %27, %244 > %249 = fadd float %247, %248 > %250 = fadd float %249, %28 > %251 = fmul float %29, %242 > %252 = fmul float %30, %243 > %253 = fadd float %251, %252 > %254 = fmul float %31, %244 > %255 = fadd float %253, %254 > %256 = fadd float %255, %32 > %257 = fmul float %33, %242 > %258 = fmul float %34, %243 > %259 = fadd float %257, %258 > %260 = fmul float %35, %244 > %261 = fadd float %259, %260 > %262 = fadd float %261, %36 > %263 = fcmp oeq float %262, 0.000000e+00 > %264 = fcmp oeq float %262, 0.000000e+00 > %265 = fcmp ogt float %250, 0.000000e+00 > %266 = select i1 %265, float 1.000000e+00, float %250 > %267 = fcmp oge float %266, 0.000000e+00 > %268 = fcmp ogt float %256, 0.000000e+00 > %269 = select i1 %268, float 1.000000e+00, float %256 > %270 = fcmp oge float %269, 0.000000e+00 > %.op = fmul float %266, 0x4600000000000000 > %271 = select i1 %267, float %.op, float 0xC600000000000000 > %.op194 = fmul float %269, 0x4600000000000000 > %272 = select i1 %270, float %.op194, float 0xC600000000000000 > %273 = fdiv float 1.000000e+00, %262 > %274 = fmul float %250, %273 > %275 = fmul float %256, %273 > %276 = select i1 %263, float %271, float %274 > %277 = select i1 %264, float %272, float %275 > %278 = call float @llvm.fma.f32(float %276, float 5.000000e-01, float 5.000000e-01) > %279 = call float @llvm.fma.f32(float %277, float -5.000000e-01, float 5.000000e-01) > %280 = bitcast float %278 to i32 > %281 = bitcast float %279 to i32 > %282 = insertelement <2 x i32> undef, i32 %280, i32 0 > %283 = insertelement <2 x i32> %282, i32 %281, i32 1 > %284 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %283, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %285 = extractelement <4 x float> %284, i32 0 > %286 = fsub float %244, %285 > %287 = fcmp olt float %286, 0.000000e+00 > %. = select i1 %287, float %235, float %234 > %288 = fmul float %., 5.000000e-01 > %289 = fcmp ogt float %163, 0.000000e+00 > %290 = fcmp ogt float %286, 0.000000e+00 > %291 = and i1 %289, %290 > %temp52.0 = select i1 %291, float %288, float %. > %292 = call float @llvm.fabs.f32(float %286) > %293 = fsub float %292, %temp52.0 > %294 = fcmp une float %temp52.0, 0.000000e+00 > br i1 %294, label %IF100, label %ELSE101 > >IF100: ; preds = %main_body > %295 = fdiv float 1.000000e+00, %temp52.0 > %296 = fmul float %293, %295 > br label %ENDIF99 > >ELSE101: ; preds = %main_body > %297 = fcmp ogt float %293, 0.000000e+00 > %298 = select i1 %297, float 1.000000e+00, float %293 > %299 = fcmp oge float %298, 0.000000e+00 > %.op195 = fmul float %298, 0x4600000000000000 > %300 = select i1 %299, float %.op195, float 0xC600000000000000 > br label %ENDIF99 > >ENDIF99: ; preds = %ELSE101, %IF100 > %temp48.1 = phi float [ %296, %IF100 ], [ %300, %ELSE101 ] > %301 = call float @llvm.AMDGPU.clamp.(float %temp48.1, float 0.000000e+00, float 1.000000e+00) > %302 = fmul float %205, 0x3FD3D70A40000000 > %303 = fmul float %204, 0xBFD3D70A40000000 > %304 = fmul float %205, 0x3FD3D70A40000000 > %305 = call float @llvm.fma.f32(float %186, float %303, float %161) > %306 = call float @llvm.fma.f32(float %188, float %303, float %162) > %307 = call float @llvm.fma.f32(float %190, float %303, float %163) > %308 = call float @llvm.fma.f32(float %179, float %302, float %305) > %309 = call float @llvm.fma.f32(float %180, float %302, float %306) > %310 = call float @llvm.fma.f32(float %181, float %302, float %307) > %311 = call float @llvm.fma.f32(float %308, float %236, float %240) > %312 = call float @llvm.fma.f32(float %309, float %236, float %241) > %313 = call float @llvm.fma.f32(float %310, float %236, float %229) > %314 = fmul float %25, %311 > %315 = fmul float %26, %312 > %316 = fadd float %314, %315 > %317 = fmul float %27, %313 > %318 = fadd float %316, %317 > %319 = fadd float %318, %28 > %320 = fmul float %29, %311 > %321 = fmul float %30, %312 > %322 = fadd float %320, %321 > %323 = fmul float %31, %313 > %324 = fadd float %322, %323 > %325 = fadd float %324, %32 > %326 = fmul float %33, %311 > %327 = fmul float %34, %312 > %328 = fadd float %326, %327 > %329 = fmul float %35, %313 > %330 = fadd float %328, %329 > %331 = fadd float %330, %36 > %332 = fcmp oeq float %331, 0.000000e+00 > %333 = fcmp oeq float %331, 0.000000e+00 > %334 = fcmp ogt float %319, 0.000000e+00 > %335 = select i1 %334, float 1.000000e+00, float %319 > %336 = fcmp oge float %335, 0.000000e+00 > %337 = fcmp ogt float %325, 0.000000e+00 > %338 = select i1 %337, float 1.000000e+00, float %325 > %339 = fcmp oge float %338, 0.000000e+00 > %.op196 = fmul float %335, 0x4600000000000000 > %340 = select i1 %336, float %.op196, float 0xC600000000000000 > %.op197 = fmul float %338, 0x4600000000000000 > %341 = select i1 %339, float %.op197, float 0xC600000000000000 > %342 = fdiv float 1.000000e+00, %331 > %343 = fmul float %319, %342 > %344 = fmul float %325, %342 > %345 = select i1 %332, float %340, float %343 > %346 = select i1 %333, float %341, float %344 > %347 = call float @llvm.fma.f32(float %345, float 5.000000e-01, float 5.000000e-01) > %348 = call float @llvm.fma.f32(float %346, float -5.000000e-01, float 5.000000e-01) > %349 = bitcast float %347 to i32 > %350 = bitcast float %348 to i32 > %351 = insertelement <2 x i32> undef, i32 %349, i32 0 > %352 = insertelement <2 x i32> %351, i32 %350, i32 1 > %353 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %352, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %354 = extractelement <4 x float> %353, i32 0 > %355 = fsub float %313, %354 > %356 = fcmp olt float %355, 0.000000e+00 > %.183 = select i1 %356, float %237, float %236 > %357 = fmul float %.183, 5.000000e-01 > %358 = fcmp ogt float %355, 0.000000e+00 > %359 = and i1 %289, %358 > %temp68.0 = select i1 %359, float %357, float %.183 > %360 = call float @llvm.fabs.f32(float %355) > %361 = fsub float %360, %temp68.0 > %362 = fcmp une float %temp68.0, 0.000000e+00 > br i1 %362, label %IF109, label %ELSE110 > >IF109: ; preds = %ENDIF99 > %363 = fdiv float 1.000000e+00, %temp68.0 > %364 = fmul float %361, %363 > br label %ENDIF108 > >ELSE110: ; preds = %ENDIF99 > %365 = fcmp ogt float %361, 0.000000e+00 > %366 = select i1 %365, float 1.000000e+00, float %361 > %367 = fcmp oge float %366, 0.000000e+00 > %.op198 = fmul float %366, 0x4600000000000000 > %368 = select i1 %367, float %.op198, float 0xC600000000000000 > br label %ENDIF108 > >ENDIF108: ; preds = %ELSE110, %IF109 > %temp64.1 = phi float [ %364, %IF109 ], [ %368, %ELSE110 ] > %369 = call float @llvm.AMDGPU.clamp.(float %temp64.1, float 0.000000e+00, float 1.000000e+00) > %370 = fadd float %369, %301 > %371 = fmul float %205, 0xBFD147AE20000000 > %372 = fmul float %204, 0x3F847AE140000000 > %373 = fadd float %371, %372 > %374 = call float @llvm.fma.f32(float %179, float %373, float %212) > %375 = call float @llvm.fma.f32(float %180, float %373, float %213) > %376 = call float @llvm.fma.f32(float %181, float %373, float %214) > %377 = fmul float %232, 0x4002666660000000 > %378 = fmul float %232, 0x3FF2666660000000 > %379 = fmul float %232, 0x40099999A0000000 > %380 = fmul float %232, 0x3FF99999A0000000 > %381 = call float @llvm.fma.f32(float %374, float %377, float %240) > %382 = call float @llvm.fma.f32(float %375, float %377, float %241) > %383 = call float @llvm.fma.f32(float %376, float %377, float %229) > %384 = fmul float %25, %381 > %385 = fmul float %26, %382 > %386 = fadd float %384, %385 > %387 = fmul float %27, %383 > %388 = fadd float %386, %387 > %389 = fadd float %388, %28 > %390 = fmul float %29, %381 > %391 = fmul float %30, %382 > %392 = fadd float %390, %391 > %393 = fmul float %31, %383 > %394 = fadd float %392, %393 > %395 = fadd float %394, %32 > %396 = fmul float %33, %381 > %397 = fmul float %34, %382 > %398 = fadd float %396, %397 > %399 = fmul float %35, %383 > %400 = fadd float %398, %399 > %401 = fadd float %400, %36 > %402 = fcmp oeq float %401, 0.000000e+00 > %403 = fcmp oeq float %401, 0.000000e+00 > %404 = fcmp ogt float %389, 0.000000e+00 > %405 = select i1 %404, float 1.000000e+00, float %389 > %406 = fcmp oge float %405, 0.000000e+00 > %407 = fcmp ogt float %395, 0.000000e+00 > %408 = select i1 %407, float 1.000000e+00, float %395 > %409 = fcmp oge float %408, 0.000000e+00 > %.op199 = fmul float %405, 0x4600000000000000 > %410 = select i1 %406, float %.op199, float 0xC600000000000000 > %.op200 = fmul float %408, 0x4600000000000000 > %411 = select i1 %409, float %.op200, float 0xC600000000000000 > %412 = fdiv float 1.000000e+00, %401 > %413 = fmul float %389, %412 > %414 = fmul float %395, %412 > %415 = select i1 %402, float %410, float %413 > %416 = select i1 %403, float %411, float %414 > %417 = call float @llvm.fma.f32(float %415, float 5.000000e-01, float 5.000000e-01) > %418 = call float @llvm.fma.f32(float %416, float -5.000000e-01, float 5.000000e-01) > %419 = bitcast float %417 to i32 > %420 = bitcast float %418 to i32 > %421 = insertelement <2 x i32> undef, i32 %419, i32 0 > %422 = insertelement <2 x i32> %421, i32 %420, i32 1 > %423 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %422, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %424 = extractelement <4 x float> %423, i32 0 > %425 = fsub float %383, %424 > %426 = fcmp olt float %425, 0.000000e+00 > %.184 = select i1 %426, float %378, float %377 > %427 = fmul float %.184, 5.000000e-01 > %428 = fcmp ogt float %425, 0.000000e+00 > %429 = and i1 %289, %428 > %temp68.1 = select i1 %429, float %427, float %.184 > %430 = call float @llvm.fabs.f32(float %425) > %431 = fsub float %430, %temp68.1 > %432 = fcmp une float %temp68.1, 0.000000e+00 > br i1 %432, label %IF118, label %ELSE119 > >IF118: ; preds = %ENDIF108 > %433 = fdiv float 1.000000e+00, %temp68.1 > %434 = fmul float %431, %433 > br label %ENDIF117 > >ELSE119: ; preds = %ENDIF108 > %435 = fcmp ogt float %431, 0.000000e+00 > %436 = select i1 %435, float 1.000000e+00, float %431 > %437 = fcmp oge float %436, 0.000000e+00 > %.op201 = fmul float %436, 0x4600000000000000 > %438 = select i1 %437, float %.op201, float 0xC600000000000000 > br label %ENDIF117 > >ENDIF117: ; preds = %ELSE119, %IF118 > %temp64.3 = phi float [ %434, %IF118 ], [ %438, %ELSE119 ] > %439 = call float @llvm.AMDGPU.clamp.(float %temp64.3, float 0.000000e+00, float 1.000000e+00) > %440 = fadd float %439, %370 > %441 = fmul float %205, 0xBF847AE140000000 > %442 = fmul float %204, 0xBFD3D70A40000000 > %443 = fadd float %441, %442 > %444 = fmul float %204, 0x3F847AE140000000 > %445 = fmul float %205, 0xBFD3D70A40000000 > %446 = fadd float %444, %445 > %447 = call float @llvm.fma.f32(float %186, float %446, float %161) > %448 = call float @llvm.fma.f32(float %188, float %446, float %162) > %449 = call float @llvm.fma.f32(float %190, float %446, float %163) > %450 = call float @llvm.fma.f32(float %179, float %443, float %447) > %451 = call float @llvm.fma.f32(float %180, float %443, float %448) > %452 = call float @llvm.fma.f32(float %181, float %443, float %449) > %453 = call float @llvm.fma.f32(float %450, float %379, float %240) > %454 = call float @llvm.fma.f32(float %451, float %379, float %241) > %455 = call float @llvm.fma.f32(float %452, float %379, float %229) > %456 = fmul float %25, %453 > %457 = fmul float %26, %454 > %458 = fadd float %456, %457 > %459 = fmul float %27, %455 > %460 = fadd float %458, %459 > %461 = fadd float %460, %28 > %462 = fmul float %29, %453 > %463 = fmul float %30, %454 > %464 = fadd float %462, %463 > %465 = fmul float %31, %455 > %466 = fadd float %464, %465 > %467 = fadd float %466, %32 > %468 = fmul float %33, %453 > %469 = fmul float %34, %454 > %470 = fadd float %468, %469 > %471 = fmul float %35, %455 > %472 = fadd float %470, %471 > %473 = fadd float %472, %36 > %474 = fcmp oeq float %473, 0.000000e+00 > %475 = fcmp oeq float %473, 0.000000e+00 > %476 = fcmp ogt float %461, 0.000000e+00 > %477 = select i1 %476, float 1.000000e+00, float %461 > %478 = fcmp oge float %477, 0.000000e+00 > %479 = fcmp ogt float %467, 0.000000e+00 > %480 = select i1 %479, float 1.000000e+00, float %467 > %481 = fcmp oge float %480, 0.000000e+00 > %.op202 = fmul float %477, 0x4600000000000000 > %482 = select i1 %478, float %.op202, float 0xC600000000000000 > %.op203 = fmul float %480, 0x4600000000000000 > %483 = select i1 %481, float %.op203, float 0xC600000000000000 > %484 = fdiv float 1.000000e+00, %473 > %485 = fmul float %461, %484 > %486 = fmul float %467, %484 > %487 = select i1 %474, float %482, float %485 > %488 = select i1 %475, float %483, float %486 > %489 = call float @llvm.fma.f32(float %487, float 5.000000e-01, float 5.000000e-01) > %490 = call float @llvm.fma.f32(float %488, float -5.000000e-01, float 5.000000e-01) > %491 = bitcast float %489 to i32 > %492 = bitcast float %490 to i32 > %493 = insertelement <2 x i32> undef, i32 %491, i32 0 > %494 = insertelement <2 x i32> %493, i32 %492, i32 1 > %495 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %494, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %496 = extractelement <4 x float> %495, i32 0 > %497 = fsub float %455, %496 > %498 = fcmp olt float %497, 0.000000e+00 > %.185 = select i1 %498, float %380, float %379 > %499 = fmul float %.185, 5.000000e-01 > %500 = fcmp ogt float %497, 0.000000e+00 > %501 = and i1 %289, %500 > %temp76.0 = select i1 %501, float %499, float %.185 > %502 = call float @llvm.fabs.f32(float %497) > %503 = fsub float %502, %temp76.0 > %504 = fcmp une float %temp76.0, 0.000000e+00 > br i1 %504, label %IF127, label %ELSE128 > >IF127: ; preds = %ENDIF117 > %505 = fdiv float 1.000000e+00, %temp76.0 > %506 = fmul float %503, %505 > br label %ENDIF126 > >ELSE128: ; preds = %ENDIF117 > %507 = fcmp ogt float %503, 0.000000e+00 > %508 = select i1 %507, float 1.000000e+00, float %503 > %509 = fcmp oge float %508, 0.000000e+00 > %.op204 = fmul float %508, 0x4600000000000000 > %510 = select i1 %509, float %.op204, float 0xC600000000000000 > br label %ENDIF126 > >ENDIF126: ; preds = %ELSE128, %IF127 > %temp72.1 = phi float [ %506, %IF127 ], [ %510, %ELSE128 ] > %511 = call float @llvm.AMDGPU.clamp.(float %temp72.1, float 0.000000e+00, float 1.000000e+00) > %512 = fadd float %440, %511 > %513 = fmul float %205, 0x3FA99999A0000000 > %514 = fmul float %204, 0xBFD851EB80000000 > %515 = fadd float %513, %514 > %516 = fmul float %205, 5.000000e-01 > %517 = fmul float %204, 5.000000e-01 > %518 = fadd float %516, %517 > %519 = fmul float %204, 0xBFA99999A0000000 > %520 = fmul float %205, 0xBFD851EB80000000 > %521 = fadd float %519, %520 > %522 = call float @llvm.fma.f32(float %186, float %521, float %161) > %523 = call float @llvm.fma.f32(float %188, float %521, float %162) > %524 = call float @llvm.fma.f32(float %190, float %521, float %163) > %525 = call float @llvm.fma.f32(float %179, float %515, float %522) > %526 = call float @llvm.fma.f32(float %180, float %515, float %523) > %527 = call float @llvm.fma.f32(float %181, float %515, float %524) > %528 = fmul float %232, 0x4010666660000000 > %529 = fmul float %232, 0x4000666660000000 > %530 = fmul float %232, 0x4015333340000000 > %531 = fmul float %232, 0x4005333340000000 > %532 = call float @llvm.fma.f32(float %525, float %528, float %240) > %533 = call float @llvm.fma.f32(float %526, float %528, float %241) > %534 = call float @llvm.fma.f32(float %527, float %528, float %229) > %535 = fmul float %25, %532 > %536 = fmul float %26, %533 > %537 = fadd float %535, %536 > %538 = fmul float %27, %534 > %539 = fadd float %537, %538 > %540 = fadd float %539, %28 > %541 = fmul float %29, %532 > %542 = fmul float %30, %533 > %543 = fadd float %541, %542 > %544 = fmul float %31, %534 > %545 = fadd float %543, %544 > %546 = fadd float %545, %32 > %547 = fmul float %33, %532 > %548 = fmul float %34, %533 > %549 = fadd float %547, %548 > %550 = fmul float %35, %534 > %551 = fadd float %549, %550 > %552 = fadd float %551, %36 > %553 = fcmp oeq float %552, 0.000000e+00 > %554 = fcmp oeq float %552, 0.000000e+00 > %555 = fcmp ogt float %540, 0.000000e+00 > %556 = select i1 %555, float 1.000000e+00, float %540 > %557 = fcmp oge float %556, 0.000000e+00 > %558 = fcmp ogt float %546, 0.000000e+00 > %559 = select i1 %558, float 1.000000e+00, float %546 > %560 = fcmp oge float %559, 0.000000e+00 > %.op205 = fmul float %556, 0x4600000000000000 > %561 = select i1 %557, float %.op205, float 0xC600000000000000 > %.op206 = fmul float %559, 0x4600000000000000 > %562 = select i1 %560, float %.op206, float 0xC600000000000000 > %563 = fdiv float 1.000000e+00, %552 > %564 = fmul float %540, %563 > %565 = fmul float %546, %563 > %566 = select i1 %553, float %561, float %564 > %567 = select i1 %554, float %562, float %565 > %568 = call float @llvm.fma.f32(float %566, float 5.000000e-01, float 5.000000e-01) > %569 = call float @llvm.fma.f32(float %567, float -5.000000e-01, float 5.000000e-01) > %570 = bitcast float %568 to i32 > %571 = bitcast float %569 to i32 > %572 = insertelement <2 x i32> undef, i32 %570, i32 0 > %573 = insertelement <2 x i32> %572, i32 %571, i32 1 > %574 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %573, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %575 = extractelement <4 x float> %574, i32 0 > %576 = fsub float %534, %575 > %577 = fcmp ogt float %576, 0.000000e+00 > %578 = and i1 %289, %577 > %579 = fcmp olt float %576, 0.000000e+00 > %.186 = select i1 %579, float %529, float %528 > %580 = fmul float %.186, 5.000000e-01 > %temp80.0 = select i1 %578, float %580, float %.186 > %581 = call float @llvm.fabs.f32(float %576) > %582 = fsub float %581, %temp80.0 > %583 = fcmp une float %temp80.0, 0.000000e+00 > br i1 %583, label %IF136, label %ELSE137 > >IF136: ; preds = %ENDIF126 > %584 = fdiv float 1.000000e+00, %temp80.0 > %585 = fmul float %582, %584 > br label %ENDIF135 > >ELSE137: ; preds = %ENDIF126 > %586 = fcmp ogt float %582, 0.000000e+00 > %587 = select i1 %586, float 1.000000e+00, float %582 > %588 = fcmp oge float %587, 0.000000e+00 > %.op207 = fmul float %587, 0x4600000000000000 > %589 = select i1 %588, float %.op207, float 0xC600000000000000 > br label %ENDIF135 > >ENDIF135: ; preds = %ELSE137, %IF136 > %temp80.1 = phi float [ %585, %IF136 ], [ %589, %ELSE137 ] > %590 = call float @llvm.AMDGPU.clamp.(float %temp80.1, float 0.000000e+00, float 1.000000e+00) > %591 = fadd float %512, %590 > %592 = fmul float %204, -5.000000e-01 > %593 = fmul float %205, 5.000000e-01 > %594 = fadd float %592, %593 > %595 = call float @llvm.fma.f32(float %186, float %594, float %161) > %596 = call float @llvm.fma.f32(float %188, float %594, float %162) > %597 = call float @llvm.fma.f32(float %190, float %594, float %163) > %598 = call float @llvm.fma.f32(float %179, float %518, float %595) > %599 = call float @llvm.fma.f32(float %180, float %518, float %596) > %600 = call float @llvm.fma.f32(float %181, float %518, float %597) > %601 = call float @llvm.fma.f32(float %598, float %530, float %240) > %602 = call float @llvm.fma.f32(float %599, float %530, float %241) > %603 = call float @llvm.fma.f32(float %600, float %530, float %229) > %604 = fmul float %25, %601 > %605 = fmul float %26, %602 > %606 = fadd float %604, %605 > %607 = fmul float %27, %603 > %608 = fadd float %606, %607 > %609 = fadd float %608, %28 > %610 = fmul float %29, %601 > %611 = fmul float %30, %602 > %612 = fadd float %610, %611 > %613 = fmul float %31, %603 > %614 = fadd float %612, %613 > %615 = fadd float %614, %32 > %616 = fmul float %33, %601 > %617 = fmul float %34, %602 > %618 = fadd float %616, %617 > %619 = fmul float %35, %603 > %620 = fadd float %618, %619 > %621 = fadd float %620, %36 > %622 = fcmp oeq float %621, 0.000000e+00 > %623 = fcmp oeq float %621, 0.000000e+00 > %624 = fcmp ogt float %609, 0.000000e+00 > %625 = select i1 %624, float 1.000000e+00, float %609 > %626 = fcmp oge float %625, 0.000000e+00 > %627 = fcmp ogt float %615, 0.000000e+00 > %628 = select i1 %627, float 1.000000e+00, float %615 > %629 = fcmp oge float %628, 0.000000e+00 > %.op208 = fmul float %625, 0x4600000000000000 > %630 = select i1 %626, float %.op208, float 0xC600000000000000 > %.op209 = fmul float %628, 0x4600000000000000 > %631 = select i1 %629, float %.op209, float 0xC600000000000000 > %632 = fdiv float 1.000000e+00, %621 > %633 = fmul float %609, %632 > %634 = fmul float %615, %632 > %635 = select i1 %622, float %630, float %633 > %636 = select i1 %623, float %631, float %634 > %637 = call float @llvm.fma.f32(float %635, float 5.000000e-01, float 5.000000e-01) > %638 = call float @llvm.fma.f32(float %636, float -5.000000e-01, float 5.000000e-01) > %639 = bitcast float %637 to i32 > %640 = bitcast float %638 to i32 > %641 = insertelement <2 x i32> undef, i32 %639, i32 0 > %642 = insertelement <2 x i32> %641, i32 %640, i32 1 > %643 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %642, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %644 = extractelement <4 x float> %643, i32 0 > %645 = fsub float %603, %644 > %646 = fcmp ogt float %645, 0.000000e+00 > %647 = and i1 %289, %646 > %648 = fcmp olt float %645, 0.000000e+00 > %.187 = select i1 %648, float %531, float %530 > %649 = fmul float %.187, 5.000000e-01 > %temp52.1 = select i1 %647, float %649, float %.187 > %650 = call float @llvm.fabs.f32(float %645) > %651 = fsub float %650, %temp52.1 > %652 = fcmp une float %temp52.1, 0.000000e+00 > br i1 %652, label %IF145, label %ELSE146 > >IF145: ; preds = %ENDIF135 > %653 = fdiv float 1.000000e+00, %temp52.1 > %654 = fmul float %651, %653 > br label %ENDIF144 > >ELSE146: ; preds = %ENDIF135 > %655 = fcmp ogt float %651, 0.000000e+00 > %656 = select i1 %655, float 1.000000e+00, float %651 > %657 = fcmp oge float %656, 0.000000e+00 > %.op210 = fmul float %656, 0x4600000000000000 > %658 = select i1 %657, float %.op210, float 0xC600000000000000 > br label %ENDIF144 > >ENDIF144: ; preds = %ELSE146, %IF145 > %temp52.2 = phi float [ %654, %IF145 ], [ %658, %ELSE146 ] > %659 = call float @llvm.AMDGPU.clamp.(float %temp52.2, float 0.000000e+00, float 1.000000e+00) > %660 = fadd float %659, %591 > %661 = fmul float %660, 0x3FC5555560000000 > %662 = fmul float %661, %661 > %663 = fmul float %139, %180 > %664 = fmul float %137, %181 > %665 = fmul float %138, %179 > %666 = fsub float -0.000000e+00, %663 > %667 = call float @llvm.fma.f32(float %138, float %181, float %666) > %668 = fsub float -0.000000e+00, %664 > %669 = call float @llvm.fma.f32(float %139, float %179, float %668) > %670 = fsub float -0.000000e+00, %665 > %671 = call float @llvm.fma.f32(float %137, float %180, float %670) > %672 = call float @llvm.fma.f32(float %667, float %303, float %137) > %673 = call float @llvm.fma.f32(float %669, float %303, float %138) > %674 = call float @llvm.fma.f32(float %671, float %303, float %139) > %675 = call float @llvm.fma.f32(float %179, float %304, float %672) > %676 = call float @llvm.fma.f32(float %180, float %304, float %673) > %677 = call float @llvm.fma.f32(float %181, float %304, float %674) > %678 = fmul float %233, 5.000000e-01 > %679 = fmul float %233, 2.500000e-01 > %680 = fmul float %233, 0x3FF4CCCCC0000000 > %681 = fmul float %233, 0x3FE4CCCCC0000000 > %682 = call float @llvm.fma.f32(float %675, float %680, float %240) > %683 = call float @llvm.fma.f32(float %676, float %680, float %241) > %684 = call float @llvm.fma.f32(float %677, float %680, float %229) > %685 = fmul float %25, %682 > %686 = fmul float %26, %683 > %687 = fadd float %685, %686 > %688 = fmul float %27, %684 > %689 = fadd float %687, %688 > %690 = fadd float %689, %28 > %691 = fmul float %29, %682 > %692 = fmul float %30, %683 > %693 = fadd float %691, %692 > %694 = fmul float %31, %684 > %695 = fadd float %693, %694 > %696 = fadd float %695, %32 > %697 = fmul float %33, %682 > %698 = fmul float %34, %683 > %699 = fadd float %697, %698 > %700 = fmul float %35, %684 > %701 = fadd float %699, %700 > %702 = fadd float %701, %36 > %703 = fcmp oeq float %702, 0.000000e+00 > %704 = fcmp oeq float %702, 0.000000e+00 > %705 = fcmp ogt float %690, 0.000000e+00 > %706 = select i1 %705, float 1.000000e+00, float %690 > %707 = fcmp oge float %706, 0.000000e+00 > %708 = fcmp ogt float %696, 0.000000e+00 > %709 = select i1 %708, float 1.000000e+00, float %696 > %710 = fcmp oge float %709, 0.000000e+00 > %.op211 = fmul float %706, 0x4600000000000000 > %711 = select i1 %707, float %.op211, float 0xC600000000000000 > %.op212 = fmul float %709, 0x4600000000000000 > %712 = select i1 %710, float %.op212, float 0xC600000000000000 > %713 = fdiv float 1.000000e+00, %702 > %714 = fmul float %690, %713 > %715 = fmul float %696, %713 > %716 = select i1 %703, float %711, float %714 > %717 = select i1 %704, float %712, float %715 > %718 = call float @llvm.fma.f32(float %716, float 5.000000e-01, float 5.000000e-01) > %719 = call float @llvm.fma.f32(float %717, float -5.000000e-01, float 5.000000e-01) > %720 = bitcast float %718 to i32 > %721 = bitcast float %719 to i32 > %722 = insertelement <2 x i32> undef, i32 %720, i32 0 > %723 = insertelement <2 x i32> %722, i32 %721, i32 1 > %724 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %723, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %725 = extractelement <4 x float> %724, i32 0 > %726 = fsub float %684, %725 > %727 = fcmp olt float %726, 0.000000e+00 > %.188 = select i1 %727, float %681, float %680 > %728 = call float @llvm.fabs.f32(float %726) > %729 = fsub float %728, %.188 > %730 = fcmp une float %.188, 0.000000e+00 > br i1 %730, label %IF151, label %ELSE152 > >IF151: ; preds = %ENDIF144 > %731 = fdiv float 1.000000e+00, %.188 > %732 = fmul float %729, %731 > br label %ENDIF150 > >ELSE152: ; preds = %ENDIF144 > %733 = fcmp ogt float %729, 0.000000e+00 > %734 = select i1 %733, float 1.000000e+00, float %729 > %735 = fcmp oge float %734, 0.000000e+00 > %.op213 = fmul float %734, 0x4600000000000000 > %736 = select i1 %735, float %.op213, float 0xC600000000000000 > br label %ENDIF150 > >ENDIF150: ; preds = %ELSE152, %IF151 > %temp56.1 = phi float [ %732, %IF151 ], [ %736, %ELSE152 ] > %737 = call float @llvm.AMDGPU.clamp.(float %temp56.1, float 0.000000e+00, float 1.000000e+00) > %738 = fmul float %204, 0xBFB99999A0000000 > %739 = fmul float %205, 0x3FB99999A0000000 > %740 = fadd float %738, %739 > %741 = fmul float %204, 0x3FD147AE20000000 > %742 = fmul float %205, 0x3F847AE140000000 > %743 = fadd float %741, %742 > %744 = call float @llvm.fma.f32(float %667, float %743, float %137) > %745 = call float @llvm.fma.f32(float %669, float %743, float %138) > %746 = call float @llvm.fma.f32(float %671, float %743, float %139) > %747 = call float @llvm.fma.f32(float %179, float %373, float %744) > %748 = call float @llvm.fma.f32(float %180, float %373, float %745) > %749 = call float @llvm.fma.f32(float %181, float %373, float %746) > %750 = call float @llvm.fma.f32(float %667, float %740, float %137) > %751 = call float @llvm.fma.f32(float %669, float %740, float %138) > %752 = call float @llvm.fma.f32(float %671, float %740, float %139) > %753 = call float @llvm.fma.f32(float %179, float %220, float %750) > %754 = call float @llvm.fma.f32(float %180, float %220, float %751) > %755 = call float @llvm.fma.f32(float %181, float %220, float %752) > %756 = call float @llvm.fma.f32(float %753, float %678, float %240) > %757 = call float @llvm.fma.f32(float %754, float %678, float %241) > %758 = call float @llvm.fma.f32(float %755, float %678, float %229) > %759 = fmul float %25, %756 > %760 = fmul float %26, %757 > %761 = fadd float %759, %760 > %762 = fmul float %27, %758 > %763 = fadd float %761, %762 > %764 = fadd float %763, %28 > %765 = fmul float %29, %756 > %766 = fmul float %30, %757 > %767 = fadd float %765, %766 > %768 = fmul float %31, %758 > %769 = fadd float %767, %768 > %770 = fadd float %769, %32 > %771 = fmul float %33, %756 > %772 = fmul float %34, %757 > %773 = fadd float %771, %772 > %774 = fmul float %35, %758 > %775 = fadd float %773, %774 > %776 = fadd float %775, %36 > %777 = fcmp oeq float %776, 0.000000e+00 > %778 = fcmp oeq float %776, 0.000000e+00 > %779 = fcmp ogt float %764, 0.000000e+00 > %780 = select i1 %779, float 1.000000e+00, float %764 > %781 = fcmp oge float %780, 0.000000e+00 > %782 = fcmp ogt float %770, 0.000000e+00 > %783 = select i1 %782, float 1.000000e+00, float %770 > %784 = fcmp oge float %783, 0.000000e+00 > %.op214 = fmul float %780, 0x4600000000000000 > %785 = select i1 %781, float %.op214, float 0xC600000000000000 > %.op215 = fmul float %783, 0x4600000000000000 > %786 = select i1 %784, float %.op215, float 0xC600000000000000 > %787 = fdiv float 1.000000e+00, %776 > %788 = fmul float %764, %787 > %789 = fmul float %770, %787 > %790 = select i1 %777, float %785, float %788 > %791 = select i1 %778, float %786, float %789 > %792 = call float @llvm.fma.f32(float %790, float 5.000000e-01, float 5.000000e-01) > %793 = call float @llvm.fma.f32(float %791, float -5.000000e-01, float 5.000000e-01) > %794 = bitcast float %792 to i32 > %795 = bitcast float %793 to i32 > %796 = insertelement <2 x i32> undef, i32 %794, i32 0 > %797 = insertelement <2 x i32> %796, i32 %795, i32 1 > %798 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %797, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %799 = extractelement <4 x float> %798, i32 0 > %800 = fsub float %758, %799 > %801 = fcmp olt float %800, 0.000000e+00 > %.189 = select i1 %801, float %679, float %678 > %802 = call float @llvm.fabs.f32(float %800) > %803 = fsub float %802, %.189 > %804 = fcmp une float %.189, 0.000000e+00 > br i1 %804, label %IF157, label %ELSE158 > >IF157: ; preds = %ENDIF150 > %805 = fdiv float 1.000000e+00, %.189 > %806 = fmul float %803, %805 > br label %ENDIF156 > >ELSE158: ; preds = %ENDIF150 > %807 = fcmp ogt float %803, 0.000000e+00 > %808 = select i1 %807, float 1.000000e+00, float %803 > %809 = fcmp oge float %808, 0.000000e+00 > %.op216 = fmul float %808, 0x4600000000000000 > %810 = select i1 %809, float %.op216, float 0xC600000000000000 > br label %ENDIF156 > >ENDIF156: ; preds = %ELSE158, %IF157 > %temp48.3 = phi float [ %806, %IF157 ], [ %810, %ELSE158 ] > %811 = call float @llvm.AMDGPU.clamp.(float %temp48.3, float 0.000000e+00, float 1.000000e+00) > %812 = fadd float %737, %811 > %813 = fmul float %233, 0x4002666660000000 > %814 = fmul float %233, 0x3FF2666660000000 > %815 = fmul float %233, 0x40099999A0000000 > %816 = fmul float %233, 0x3FF99999A0000000 > %817 = fmul float %233, 0x4010666660000000 > %818 = fmul float %233, 0x4000666660000000 > %819 = fmul float %233, 0x4015333340000000 > %820 = fmul float %233, 0x4005333340000000 > %821 = call float @llvm.fma.f32(float %747, float %813, float %240) > %822 = call float @llvm.fma.f32(float %748, float %813, float %241) > %823 = call float @llvm.fma.f32(float %749, float %813, float %229) > %824 = fmul float %25, %821 > %825 = fmul float %26, %822 > %826 = fadd float %824, %825 > %827 = fmul float %27, %823 > %828 = fadd float %826, %827 > %829 = fadd float %828, %28 > %830 = fmul float %29, %821 > %831 = fmul float %30, %822 > %832 = fadd float %830, %831 > %833 = fmul float %31, %823 > %834 = fadd float %832, %833 > %835 = fadd float %834, %32 > %836 = fmul float %33, %821 > %837 = fmul float %34, %822 > %838 = fadd float %836, %837 > %839 = fmul float %35, %823 > %840 = fadd float %838, %839 > %841 = fadd float %840, %36 > %842 = fcmp oeq float %841, 0.000000e+00 > %843 = fcmp oeq float %841, 0.000000e+00 > %844 = fcmp ogt float %829, 0.000000e+00 > %845 = select i1 %844, float 1.000000e+00, float %829 > %846 = fcmp oge float %845, 0.000000e+00 > %847 = fcmp ogt float %835, 0.000000e+00 > %848 = select i1 %847, float 1.000000e+00, float %835 > %849 = fcmp oge float %848, 0.000000e+00 > %.op217 = fmul float %845, 0x4600000000000000 > %850 = select i1 %846, float %.op217, float 0xC600000000000000 > %.op218 = fmul float %848, 0x4600000000000000 > %851 = select i1 %849, float %.op218, float 0xC600000000000000 > %852 = fdiv float 1.000000e+00, %841 > %853 = fmul float %829, %852 > %854 = fmul float %835, %852 > %855 = select i1 %842, float %850, float %853 > %856 = select i1 %843, float %851, float %854 > %857 = call float @llvm.fma.f32(float %855, float 5.000000e-01, float 5.000000e-01) > %858 = call float @llvm.fma.f32(float %856, float -5.000000e-01, float 5.000000e-01) > %859 = bitcast float %857 to i32 > %860 = bitcast float %858 to i32 > %861 = insertelement <2 x i32> undef, i32 %859, i32 0 > %862 = insertelement <2 x i32> %861, i32 %860, i32 1 > %863 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %862, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %864 = extractelement <4 x float> %863, i32 0 > %865 = fsub float %823, %864 > %866 = fcmp olt float %865, 0.000000e+00 > %.190 = select i1 %866, float %814, float %813 > %867 = call float @llvm.fabs.f32(float %865) > %868 = fsub float %867, %.190 > %869 = fcmp une float %.190, 0.000000e+00 > br i1 %869, label %IF163, label %ELSE164 > >IF163: ; preds = %ENDIF156 > %870 = fdiv float 1.000000e+00, %.190 > %871 = fmul float %868, %870 > br label %ENDIF162 > >ELSE164: ; preds = %ENDIF156 > %872 = fcmp ogt float %868, 0.000000e+00 > %873 = select i1 %872, float 1.000000e+00, float %868 > %874 = fcmp oge float %873, 0.000000e+00 > %.op219 = fmul float %873, 0x4600000000000000 > %875 = select i1 %874, float %.op219, float 0xC600000000000000 > br label %ENDIF162 > >ENDIF162: ; preds = %ELSE164, %IF163 > %temp48.5 = phi float [ %871, %IF163 ], [ %875, %ELSE164 ] > %876 = call float @llvm.AMDGPU.clamp.(float %temp48.5, float 0.000000e+00, float 1.000000e+00) > %877 = fadd float %876, %812 > %878 = call float @llvm.fma.f32(float %667, float %446, float %137) > %879 = call float @llvm.fma.f32(float %669, float %446, float %138) > %880 = call float @llvm.fma.f32(float %671, float %446, float %139) > %881 = call float @llvm.fma.f32(float %179, float %443, float %878) > %882 = call float @llvm.fma.f32(float %180, float %443, float %879) > %883 = call float @llvm.fma.f32(float %181, float %443, float %880) > %884 = call float @llvm.fma.f32(float %881, float %815, float %240) > %885 = call float @llvm.fma.f32(float %882, float %815, float %241) > %886 = call float @llvm.fma.f32(float %883, float %815, float %229) > %887 = fmul float %25, %884 > %888 = fmul float %26, %885 > %889 = fadd float %887, %888 > %890 = fmul float %27, %886 > %891 = fadd float %889, %890 > %892 = fadd float %891, %28 > %893 = fmul float %29, %884 > %894 = fmul float %30, %885 > %895 = fadd float %893, %894 > %896 = fmul float %31, %886 > %897 = fadd float %895, %896 > %898 = fadd float %897, %32 > %899 = fmul float %33, %884 > %900 = fmul float %34, %885 > %901 = fadd float %899, %900 > %902 = fmul float %35, %886 > %903 = fadd float %901, %902 > %904 = fadd float %903, %36 > %905 = fcmp oeq float %904, 0.000000e+00 > %906 = fcmp oeq float %904, 0.000000e+00 > %907 = fcmp ogt float %892, 0.000000e+00 > %908 = select i1 %907, float 1.000000e+00, float %892 > %909 = fcmp oge float %908, 0.000000e+00 > %910 = fcmp ogt float %898, 0.000000e+00 > %911 = select i1 %910, float 1.000000e+00, float %898 > %912 = fcmp oge float %911, 0.000000e+00 > %.op220 = fmul float %908, 0x4600000000000000 > %913 = select i1 %909, float %.op220, float 0xC600000000000000 > %.op221 = fmul float %911, 0x4600000000000000 > %914 = select i1 %912, float %.op221, float 0xC600000000000000 > %915 = fdiv float 1.000000e+00, %904 > %916 = fmul float %892, %915 > %917 = fmul float %898, %915 > %918 = select i1 %905, float %913, float %916 > %919 = select i1 %906, float %914, float %917 > %920 = call float @llvm.fma.f32(float %918, float 5.000000e-01, float 5.000000e-01) > %921 = call float @llvm.fma.f32(float %919, float -5.000000e-01, float 5.000000e-01) > %922 = bitcast float %920 to i32 > %923 = bitcast float %921 to i32 > %924 = insertelement <2 x i32> undef, i32 %922, i32 0 > %925 = insertelement <2 x i32> %924, i32 %923, i32 1 > %926 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %925, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %927 = extractelement <4 x float> %926, i32 0 > %928 = fsub float %886, %927 > %929 = fcmp olt float %928, 0.000000e+00 > %.191 = select i1 %929, float %816, float %815 > %930 = call float @llvm.fabs.f32(float %928) > %931 = fsub float %930, %.191 > %932 = fcmp une float %.191, 0.000000e+00 > br i1 %932, label %IF169, label %ELSE170 > >IF169: ; preds = %ENDIF162 > %933 = fdiv float 1.000000e+00, %.191 > %934 = fmul float %931, %933 > br label %ENDIF168 > >ELSE170: ; preds = %ENDIF162 > %935 = fcmp ogt float %931, 0.000000e+00 > %936 = select i1 %935, float 1.000000e+00, float %931 > %937 = fcmp oge float %936, 0.000000e+00 > %.op222 = fmul float %936, 0x4600000000000000 > %938 = select i1 %937, float %.op222, float 0xC600000000000000 > br label %ENDIF168 > >ENDIF168: ; preds = %ELSE170, %IF169 > %temp28.0 = phi float [ %934, %IF169 ], [ %938, %ELSE170 ] > %939 = call float @llvm.AMDGPU.clamp.(float %temp28.0, float 0.000000e+00, float 1.000000e+00) > %940 = fadd float %939, %877 > %941 = call float @llvm.fma.f32(float %667, float %521, float %137) > %942 = call float @llvm.fma.f32(float %669, float %521, float %138) > %943 = call float @llvm.fma.f32(float %671, float %521, float %139) > %944 = call float @llvm.fma.f32(float %667, float %594, float %137) > %945 = call float @llvm.fma.f32(float %669, float %594, float %138) > %946 = call float @llvm.fma.f32(float %671, float %594, float %139) > %947 = call float @llvm.fma.f32(float %179, float %518, float %944) > %948 = call float @llvm.fma.f32(float %180, float %518, float %945) > %949 = call float @llvm.fma.f32(float %181, float %518, float %946) > %950 = call float @llvm.fma.f32(float %179, float %515, float %941) > %951 = call float @llvm.fma.f32(float %180, float %515, float %942) > %952 = call float @llvm.fma.f32(float %181, float %515, float %943) > %953 = call float @llvm.fma.f32(float %950, float %817, float %240) > %954 = call float @llvm.fma.f32(float %951, float %817, float %241) > %955 = call float @llvm.fma.f32(float %952, float %817, float %229) > %956 = call float @llvm.fma.f32(float %947, float %819, float %240) > %957 = call float @llvm.fma.f32(float %948, float %819, float %241) > %958 = call float @llvm.fma.f32(float %949, float %819, float %229) > %959 = fmul float %25, %953 > %960 = fmul float %26, %954 > %961 = fadd float %959, %960 > %962 = fmul float %27, %955 > %963 = fadd float %961, %962 > %964 = fadd float %963, %28 > %965 = fmul float %29, %953 > %966 = fmul float %30, %954 > %967 = fadd float %965, %966 > %968 = fmul float %31, %955 > %969 = fadd float %967, %968 > %970 = fadd float %969, %32 > %971 = fmul float %33, %953 > %972 = fmul float %34, %954 > %973 = fadd float %971, %972 > %974 = fmul float %35, %955 > %975 = fadd float %973, %974 > %976 = fadd float %975, %36 > %977 = fcmp oeq float %976, 0.000000e+00 > %978 = fcmp oeq float %976, 0.000000e+00 > %979 = fcmp ogt float %964, 0.000000e+00 > %980 = select i1 %979, float 1.000000e+00, float %964 > %981 = fcmp oge float %980, 0.000000e+00 > %982 = fcmp ogt float %970, 0.000000e+00 > %983 = select i1 %982, float 1.000000e+00, float %970 > %984 = fcmp oge float %983, 0.000000e+00 > %.op223 = fmul float %980, 0x4600000000000000 > %985 = select i1 %981, float %.op223, float 0xC600000000000000 > %.op224 = fmul float %983, 0x4600000000000000 > %986 = select i1 %984, float %.op224, float 0xC600000000000000 > %987 = fdiv float 1.000000e+00, %976 > %988 = fmul float %964, %987 > %989 = fmul float %970, %987 > %990 = select i1 %977, float %985, float %988 > %991 = select i1 %978, float %986, float %989 > %992 = call float @llvm.fma.f32(float %990, float 5.000000e-01, float 5.000000e-01) > %993 = call float @llvm.fma.f32(float %991, float -5.000000e-01, float 5.000000e-01) > %994 = bitcast float %992 to i32 > %995 = bitcast float %993 to i32 > %996 = insertelement <2 x i32> undef, i32 %994, i32 0 > %997 = insertelement <2 x i32> %996, i32 %995, i32 1 > %998 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %997, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %999 = extractelement <4 x float> %998, i32 0 > %1000 = fsub float %955, %999 > %1001 = fcmp olt float %1000, 0.000000e+00 > %.192 = select i1 %1001, float %818, float %817 > %1002 = call float @llvm.fabs.f32(float %1000) > %1003 = fsub float %1002, %.192 > %1004 = fcmp une float %.192, 0.000000e+00 > br i1 %1004, label %IF175, label %ELSE176 > >IF175: ; preds = %ENDIF168 > %1005 = fdiv float 1.000000e+00, %.192 > %1006 = fmul float %1003, %1005 > br label %ENDIF174 > >ELSE176: ; preds = %ENDIF168 > %1007 = fcmp ogt float %1003, 0.000000e+00 > %1008 = select i1 %1007, float 1.000000e+00, float %1003 > %1009 = fcmp oge float %1008, 0.000000e+00 > %.op225 = fmul float %1008, 0x4600000000000000 > %1010 = select i1 %1009, float %.op225, float 0xC600000000000000 > br label %ENDIF174 > >ENDIF174: ; preds = %ELSE176, %IF175 > %temp16.1 = phi float [ %1006, %IF175 ], [ %1010, %ELSE176 ] > %1011 = call float @llvm.AMDGPU.clamp.(float %temp16.1, float 0.000000e+00, float 1.000000e+00) > %1012 = fadd float %1011, %940 > %1013 = fmul float %25, %956 > %1014 = fmul float %26, %957 > %1015 = fadd float %1013, %1014 > %1016 = fmul float %27, %958 > %1017 = fadd float %1015, %1016 > %1018 = fadd float %1017, %28 > %1019 = fmul float %29, %956 > %1020 = fmul float %30, %957 > %1021 = fadd float %1019, %1020 > %1022 = fmul float %31, %958 > %1023 = fadd float %1021, %1022 > %1024 = fadd float %1023, %32 > %1025 = fmul float %33, %956 > %1026 = fmul float %34, %957 > %1027 = fadd float %1025, %1026 > %1028 = fmul float %35, %958 > %1029 = fadd float %1027, %1028 > %1030 = fadd float %1029, %36 > %1031 = fcmp oeq float %1030, 0.000000e+00 > %1032 = fcmp oeq float %1030, 0.000000e+00 > %1033 = fcmp ogt float %1018, 0.000000e+00 > %1034 = select i1 %1033, float 1.000000e+00, float %1018 > %1035 = fcmp oge float %1034, 0.000000e+00 > %1036 = fcmp ogt float %1024, 0.000000e+00 > %1037 = select i1 %1036, float 1.000000e+00, float %1024 > %1038 = fcmp oge float %1037, 0.000000e+00 > %.op226 = fmul float %1034, 0x4600000000000000 > %1039 = select i1 %1035, float %.op226, float 0xC600000000000000 > %.op227 = fmul float %1037, 0x4600000000000000 > %1040 = select i1 %1038, float %.op227, float 0xC600000000000000 > %1041 = fdiv float 1.000000e+00, %1030 > %1042 = fmul float %1018, %1041 > %1043 = fmul float %1024, %1041 > %1044 = select i1 %1031, float %1039, float %1042 > %1045 = select i1 %1032, float %1040, float %1043 > %1046 = call float @llvm.fma.f32(float %1044, float 5.000000e-01, float 5.000000e-01) > %1047 = call float @llvm.fma.f32(float %1045, float -5.000000e-01, float 5.000000e-01) > %1048 = bitcast float %1046 to i32 > %1049 = bitcast float %1047 to i32 > %1050 = insertelement <2 x i32> undef, i32 %1048, i32 0 > %1051 = insertelement <2 x i32> %1050, i32 %1049, i32 1 > %1052 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1051, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1053 = extractelement <4 x float> %1052, i32 0 > %1054 = fsub float %958, %1053 > %1055 = fcmp olt float %1054, 0.000000e+00 > %.193 = select i1 %1055, float %820, float %819 > %1056 = call float @llvm.fabs.f32(float %1054) > %1057 = fsub float %1056, %.193 > %1058 = fcmp une float %.193, 0.000000e+00 > br i1 %1058, label %IF181, label %ELSE182 > >IF181: ; preds = %ENDIF174 > %1059 = fdiv float 1.000000e+00, %.193 > %1060 = fmul float %1057, %1059 > br label %ENDIF180 > >ELSE182: ; preds = %ENDIF174 > %1061 = fcmp ogt float %1057, 0.000000e+00 > %1062 = select i1 %1061, float 1.000000e+00, float %1057 > %1063 = fcmp oge float %1062, 0.000000e+00 > %.op228 = fmul float %1062, 0x4600000000000000 > %1064 = select i1 %1063, float %.op228, float 0xC600000000000000 > br label %ENDIF180 > >ENDIF180: ; preds = %ELSE182, %IF181 > %temp8.1 = phi float [ %1060, %IF181 ], [ %1064, %ELSE182 ] > %1065 = call float @llvm.AMDGPU.clamp.(float %temp8.1, float 0.000000e+00, float 1.000000e+00) > %1066 = fadd float %1065, %1012 > %1067 = fmul float %1066, 0x3FC5555560000000 > %1068 = fmul float %1067, %1067 > %1069 = call float @llvm.minnum.f32(float %1068, float %662) > %1070 = bitcast float %5 to i32 > %1071 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %1070, 10 > %1072 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1071, float %1069, 11 > %1073 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1072, float %1068, 12 > %1074 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1073, float 0.000000e+00, 13 > %1075 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1074, float 1.000000e+00, 14 > %1076 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1075, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1076 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SAMP[6] >DCL SAMP[7] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], SHADOW2D_ARRAY, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], CUBE, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 2D, FLOAT >DCL SVIEW[6], 2D, FLOAT >DCL SVIEW[7], 2D, FLOAT >DCL CONST[1][0..6] >DCL CONST[2][0..23] >DCL TEMP[0..17], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, -0.0000, 1.0000} >IMM[1] UINT32 {1, 272, 0, 64} >IMM[2] UINT32 {80, 96, 336, 368} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] FLT32 { 0.5000, -0.5000, 0.2500, 2.0000} >IMM[5] UINT32 {256, 16, 32, 48} >IMM[6] FLT32 { -1.0000, 4096.0000, 0.0040, 2.0040} >IMM[7] UINT32 {288, 0, 0, 0} >IMM[8] FLT32 { 0.1250, 0.5098, 0.1500, 0.2500} >IMM[9] FLT32 { 0.2500, 1.0000, 0.0000, 0.0000} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[1].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[1].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D > 14: MOV TEMP[3].z, TEMP[2].xxxx > 15: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xxxx > 16: ADD TEMP[3].xyz, -TEMP[3].xyzz, CONST[2][17].xyzz > 17: MUL TEMP[1].xyz, TEMP[3].xyzz, CONST[2][0].xxxx > 18: DP3 TEMP[2].x, CONST[1][4].xyzz, TEMP[3].xyzz > 19: DP3 TEMP[4].x, CONST[1][5].xyzz, TEMP[3].xyzz > 20: MOV TEMP[2].y, TEMP[4].xxxx > 21: DP3 TEMP[4].x, CONST[1][6].xyzz, TEMP[3].xyzz > 22: MOV TEMP[2].z, TEMP[4].xxxx > 23: DP3 TEMP[5].x, -TEMP[2].xyzz, -TEMP[2].xyzz > 24: SQRT TEMP[5].x, TEMP[5].xxxx > 25: FSNE TEMP[6].x, CONST[2][21].xxxx, IMM[0].xxxx > 26: UIF TEMP[6].xxxx :0 > 27: RCP TEMP[6].x, CONST[2][21].xxxx > 28: MUL TEMP[6].x, TEMP[5].xxxx, TEMP[6].xxxx > 29: ELSE :0 > 30: SSG TEMP[7].x, TEMP[5].xxxx > 31: MUL TEMP[6].x, IMM[0].yyyy, TEMP[7].xxxx > 32: ENDIF > 33: FSEQ TEMP[7].xyz, TEMP[5].xxxx, IMM[0].xxxx > 34: SSG TEMP[8].xyz, -TEMP[2].xyzz > 35: MUL TEMP[8].xyz, IMM[0].yyyy, TEMP[8].xyzz > 36: RCP TEMP[5].xyz, TEMP[5].xxxx > 37: MUL TEMP[5].xyz, -TEMP[2].xyzz, TEMP[5].xyzz > 38: UCMP TEMP[5].xyz, TEMP[7].xyzz, TEMP[8].xyzz, TEMP[5].xyzz > 39: FSGE TEMP[4].x, IMM[0].zzzz, TEMP[4].xxxx > 40: AND TEMP[4].x, TEMP[4].xxxx, IMM[3].xxxx > 41: INEG TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[7].x, TEMP[5].zzzz, IMM[0].wwww > 43: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 44: FSEQ TEMP[8].xy, TEMP[7].xxxx, IMM[0].xxxx > 45: SSG TEMP[9].xy, TEMP[5].xyyy > 46: MUL TEMP[9].xy, IMM[0].yyyy, TEMP[9].xyyy > 47: RCP TEMP[7].xy, TEMP[7].xxxx > 48: MUL TEMP[7].xy, TEMP[5].xyyy, TEMP[7].xyyy > 49: UCMP TEMP[7].xy, TEMP[8].xyyy, TEMP[9].xyyy, TEMP[7].xyyy > 50: ADD TEMP[2].xy, TEMP[7].xyyy, IMM[4].xxxx > 51: ADD TEMP[7].x, -TEMP[5].zzzz, IMM[0].wwww > 52: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 53: FSEQ TEMP[8].xy, TEMP[7].xxxx, IMM[0].xxxx > 54: SSG TEMP[9].xy, TEMP[5].xyyy > 55: MUL TEMP[9].xy, IMM[0].yyyy, TEMP[9].xyyy > 56: RCP TEMP[7].xy, TEMP[7].xxxx > 57: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[7].xyyy > 58: UCMP TEMP[5].xy, TEMP[8].xyyy, TEMP[9].xyyy, TEMP[5].xyyy > 59: ADD TEMP[3].xy, TEMP[5].xyyy, IMM[4].xxxx > 60: ADD TEMP[5].x, -TEMP[2].yyyy, IMM[0].wwww > 61: MOV TEMP[2].z, TEMP[5].xxxx > 62: MOV TEMP[2].w, IMM[0].xxxx > 63: ADD TEMP[5].x, -TEMP[3].yyyy, IMM[0].wwww > 64: MOV TEMP[3].z, TEMP[5].xxxx > 65: MOV TEMP[3].w, IMM[0].wwww > 66: USNE TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 67: UIF TEMP[5].xxxx :0 > 68: MOV TEMP[5].x, TEMP[2].xxxx > 69: ELSE :0 > 70: MOV TEMP[5].x, TEMP[3].xxxx > 71: ENDIF > 72: MOV TEMP[5].x, TEMP[5].xxxx > 73: USNE TEMP[7].x, TEMP[4].xxxx, IMM[1].zzzz > 74: UIF TEMP[7].xxxx :0 > 75: MOV TEMP[7].x, TEMP[2].zzzz > 76: ELSE :0 > 77: MOV TEMP[7].x, TEMP[3].zzzz > 78: ENDIF > 79: MOV TEMP[5].y, TEMP[7].xxxx > 80: USNE TEMP[4].x, TEMP[4].xxxx, IMM[1].zzzz > 81: UIF TEMP[4].xxxx :0 > 82: MOV TEMP[4].x, TEMP[2].wwww > 83: ELSE :0 > 84: MOV TEMP[4].x, TEMP[3].wwww > 85: ENDIF > 86: MOV TEMP[5].z, TEMP[4].xxxx > 87: ADD TEMP[4].x, -TEMP[6].xxxx, IMM[0].wwww > 88: FMA TEMP[6], CONST[2][23].zwzw, IMM[4].yyxy, TEMP[5].xyxy > 89: MOV TEMP[7].xy, TEMP[6].xyxx > 90: MOV TEMP[7].z, TEMP[5].zzzz > 91: MOV TEMP[8].xyz, TEMP[7].xyzz > 92: MOV TEMP[8].w, TEMP[4].xxxx > 93: TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D_ARRAY > 94: MOV TEMP[7].xy, TEMP[6].zwzz > 95: MOV TEMP[6].xyz, TEMP[7].xyzz > 96: MOV TEMP[6].w, TEMP[4].xxxx > 97: TEX TEMP[6].x, TEMP[6], SAMP[1], SHADOW2D_ARRAY > 98: FMA TEMP[5], CONST[2][23].zwzw, IMM[4].yxxx, TEMP[5].xyxy > 99: MOV TEMP[7].xy, TEMP[5].xyxx >100: MOV TEMP[9].xyz, TEMP[7].xyzz >101: MOV TEMP[9].w, TEMP[4].xxxx >102: TEX TEMP[9].x, TEMP[9], SAMP[1], SHADOW2D_ARRAY >103: MOV TEMP[7].xy, TEMP[5].zwzz >104: MOV TEMP[5].xyz, TEMP[7].xyzz >105: MOV TEMP[5].w, TEMP[4].xxxx >106: TEX TEMP[4].x, TEMP[5], SAMP[1], SHADOW2D_ARRAY >107: ADD TEMP[5].x, TEMP[8].xxxx, TEMP[6].xxxx >108: ADD TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx >109: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx >110: FMA TEMP[4].x, -TEMP[4].xxxx, IMM[4].zzzz, IMM[0].wwww >111: ADD TEMP[5].x, -CONST[2][16].wwww, IMM[0].wwww >112: FMA TEMP[4].x, CONST[2][16].wwww, TEMP[4].xxxx, TEMP[5].xxxx >113: DP3 TEMP[3].x, CONST[1][4].xyzz, TEMP[1].xyzz >114: DP3 TEMP[5].x, CONST[1][5].xyzz, TEMP[1].xyzz >115: MOV TEMP[3].y, TEMP[5].xxxx >116: DP3 TEMP[5].x, CONST[1][6].xyzz, TEMP[1].xyzz >117: MOV TEMP[3].z, TEMP[5].xxxx >118: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz >119: SQRT TEMP[6].x, TEMP[5].xxxx >120: MOV TEMP[2].y, TEMP[6].xxxx >121: MOV TEMP[2].x, IMM[0].xxxx >122: MOV TEMP[2].xy, TEMP[2].xyyy >123: MOV TEMP[2].w, IMM[0].xxxx >124: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D >125: DP3 TEMP[7].x, CONST[2][1].xyzz, TEMP[1].xyzz >126: DP3 TEMP[6].x, CONST[2][2].xyzz, TEMP[1].xyzz >127: MOV TEMP[7].y, TEMP[6].xxxx >128: DP3 TEMP[6].x, CONST[2][3].xyzz, TEMP[1].xyzz >129: MOV TEMP[7].z, TEMP[6].xxxx >130: MOV TEMP[6].xyz, TEMP[7].xyzz >131: MOV TEMP[6].w, IMM[0].xxxx >132: TXL TEMP[6], TEMP[6], SAMP[3], CUBE >133: MUL TEMP[1].xyz, TEMP[6].xyzz, CONST[2][16].xyzz >134: MOV TEMP[8].xy, TEMP[0].xyyy >135: TEX TEMP[8], TEMP[8], SAMP[4], 2D >136: MOV TEMP[7].xyz, TEMP[8] >137: MUL TEMP[9].xyz, TEMP[1].xyzz, TEMP[8].xyzz >138: DP3 TEMP[10].x, IN[2].xyzz, IN[2].xyzz >139: RSQ TEMP[10].x, TEMP[10].xxxx >140: MOV TEMP[11].xy, TEMP[0].xyyy >141: TEX TEMP[11], TEMP[11], SAMP[5], 2D >142: FMA TEMP[12].xyz, TEMP[11].xyzz, IMM[4].wwww, IMM[6].xxxx >143: DP3 TEMP[13].x, TEMP[12].xyzz, TEMP[12].xyzz >144: RSQ TEMP[13].x, TEMP[13].xxxx >145: MUL TEMP[12].xyz, TEMP[13].xxxx, TEMP[12].xyzz >146: RSQ TEMP[5].x, TEMP[5].xxxx >147: MUL TEMP[3].xyz, TEMP[5].xxxx, TEMP[3].xyzz >148: MOV TEMP[5].xy, TEMP[0].xyyy >149: TEX TEMP[5], TEMP[5], SAMP[6], 2D >150: MUL TEMP[13].xyz, TEMP[6].wwww, CONST[2][18].xyzz >151: MUL TEMP[14].x, TEMP[5].xxxx, TEMP[5].xxxx >152: MOV TEMP[15].xy, TEMP[0].xyyy >153: MOV TEMP[15].w, IMM[0].xxxx >154: TXL TEMP[15].xy, TEMP[15], SAMP[7], 2D >155: FMA TEMP[10].xyz, IN[2].xyzz, TEMP[10].xxxx, TEMP[3].xyzz >156: DP3 TEMP[16].x, TEMP[10].xyzz, TEMP[10].xyzz >157: RSQ TEMP[16].x, TEMP[16].xxxx >158: MUL TEMP[10].xyz, TEMP[16].xxxx, TEMP[10].xyzz >159: DP3 TEMP[16].x, TEMP[12].xyzz, TEMP[10].xyzz >160: MOV_SAT TEMP[16].x, TEMP[16].xxxx >161: FMA TEMP[14].xy, TEMP[14].xxxx, IMM[6].yyyy, IMM[6].zwww >162: MUL TEMP[17].x, TEMP[14].yyyy, IMM[8].xxxx >163: LG2 TEMP[16].x, TEMP[16].xxxx >164: MUL TEMP[14].x, TEMP[16].xxxx, TEMP[14].xxxx >165: EX2 TEMP[14].x, TEMP[14].xxxx >166: MUL TEMP[14].x, TEMP[17].xxxx, TEMP[14].xxxx >167: ADD TEMP[16].x, -TEMP[5].zzzz, IMM[0].wwww >168: DP3 TEMP[10].x, TEMP[3].xyzz, TEMP[10].xyzz >169: MOV_SAT TEMP[10].x, TEMP[10].xxxx >170: ADD TEMP[10].x, -TEMP[10].xxxx, IMM[0].wwww >171: MUL TEMP[17].x, TEMP[10].xxxx, TEMP[10].xxxx >172: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[17].xxxx >173: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[17].xxxx >174: FMA TEMP[10].x, TEMP[16].xxxx, TEMP[10].xxxx, TEMP[5].zzzz >175: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[10].xxxx >176: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xxxx >177: MUL TEMP[6].xyz, TEMP[5].yyyy, TEMP[13].xyzz >178: DP3 TEMP[12].x, TEMP[12].xyzz, TEMP[3].xyzz >179: MOV TEMP[0].x, TEMP[12].xxxx >180: ADD TEMP[3].x, TEMP[8].wwww, TEMP[12].xxxx >181: ADD TEMP[3].x, TEMP[3].xxxx, IMM[6].xxxx >182: FSNE TEMP[13].x, TEMP[8].wwww, IMM[0].xxxx >183: UIF TEMP[13].xxxx :0 >184: RCP TEMP[13].x, TEMP[8].wwww >185: MUL TEMP[13].x, TEMP[3].xxxx, TEMP[13].xxxx >186: ELSE :0 >187: SSG TEMP[14].x, TEMP[3].xxxx >188: MUL TEMP[13].x, IMM[0].yyyy, TEMP[14].xxxx >189: ENDIF >190: FMA TEMP[6].xyz, TEMP[6].xyzz, TEMP[10].xxxx, TEMP[9].xyzz >191: MOV_SAT TEMP[10].x, TEMP[13].xxxx >192: MUL TEMP[3].xyz, TEMP[10].xxxx, TEMP[6].xyzz >193: FSLT TEMP[6].x, IMM[0].xxxx, TEMP[5].wwww >194: AND TEMP[6].x, TEMP[6].xxxx, IMM[3].xxxx >195: INEG TEMP[6].x, TEMP[6].xxxx >196: USNE TEMP[6].x, TEMP[6].xxxx, IMM[1].zzzz >197: UIF TEMP[6].xxxx :0 >198: FSLT TEMP[6].x, TEMP[11].wwww, IMM[8].yyyy >199: AND TEMP[6].x, TEMP[6].xxxx, IMM[3].xxxx >200: INEG TEMP[6].x, TEMP[6].xxxx >201: ADD TEMP[9].xyz, TEMP[9].xyzz, TEMP[9].xyzz >202: MAX TEMP[10].x, TEMP[8].zzzz, TEMP[8].yyyy >203: MAX TEMP[10].x, TEMP[10].xxxx, TEMP[8].xxxx >204: FSEQ TEMP[11].xyz, TEMP[10].xxxx, IMM[0].xxxx >205: SSG TEMP[13].xyz, TEMP[8].xyzz >206: MUL TEMP[13].xyz, IMM[0].yyyy, TEMP[13].xyzz >207: RCP TEMP[10].xyz, TEMP[10].xxxx >208: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[10].xyzz >209: UCMP TEMP[8].xyz, TEMP[11].xyzz, TEMP[13].xyzz, TEMP[8].xyzz >210: MOV_SAT TEMP[8].xyz, TEMP[8].xyzz >211: MUL TEMP[7].xyz, TEMP[8].xyzz, TEMP[8].xyzz >212: MOV_SAT TEMP[8].xyz, TEMP[1].xyzz >213: MUL TEMP[1].xyz, TEMP[8].xyzz, TEMP[7].xyzz >214: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[8].zzzz >215: USNE TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >216: UIF TEMP[8].xxxx :0 >217: MOV TEMP[8].x, TEMP[9].xxxx >218: ELSE :0 >219: MOV TEMP[8].x, TEMP[1].xxxx >220: ENDIF >221: MOV TEMP[8].x, TEMP[8].xxxx >222: USNE TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >223: UIF TEMP[10].xxxx :0 >224: MOV TEMP[10].x, TEMP[9].yyyy >225: ELSE :0 >226: MOV TEMP[10].x, TEMP[1].yyyy >227: ENDIF >228: MOV TEMP[8].y, TEMP[10].xxxx >229: USNE TEMP[6].x, TEMP[6].xxxx, IMM[1].zzzz >230: UIF TEMP[6].xxxx :0 >231: MOV TEMP[6].x, TEMP[9].zzzz >232: ELSE :0 >233: MOV TEMP[6].x, TEMP[1].zzzz >234: ENDIF >235: MOV TEMP[8].z, TEMP[6].xxxx >236: ADD TEMP[6].x, TEMP[5].wwww, IMM[4].yyyy >237: MOV_SAT TEMP[6].x, TEMP[6].xxxx >238: MUL TEMP[7].xyz, TEMP[6].xxxx, TEMP[8].xyzz >239: ADD TEMP[6].xy, -TEMP[12].xxxx, IMM[9].xyyy >240: MOV_SAT TEMP[6].xy, TEMP[6].xyyy >241: FMA TEMP[7].xyz, TEMP[7].xyzz, TEMP[6].xxxx, TEMP[3].xyzz >242: MIN TEMP[5].x, TEMP[5].wwww, IMM[4].xxxx >243: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[8].xyzz >244: MUL TEMP[1].xyz, TEMP[6].yyyy, TEMP[1].xyzz >245: ADD TEMP[5].x, TEMP[12].xxxx, IMM[4].zzzz >246: MOV_SAT TEMP[0].x, TEMP[5].xxxx >247: FMA TEMP[3].xyz, TEMP[1].xyzz, TEMP[0].xxxx, TEMP[7].xyzz >248: ENDIF >249: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[2].xyzz >250: MUL TEMP[0].xyz, TEMP[4].xxxx, TEMP[3].xyzz >251: MUL TEMP[0].xyz, TEMP[15].yyyy, TEMP[0].xyzz >252: MOV TEMP[0].w, IMM[0].wwww >253: MOV OUT[0], TEMP[0] >254: END >radeonsi: Compiling shader 88 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 256) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 260) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 264) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 268) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 272) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 276) > %52 = call float @llvm.SI.load.const(<16 x i8> %35, i32 280) > %53 = call float @llvm.SI.load.const(<16 x i8> %35, i32 288) > %54 = call float @llvm.SI.load.const(<16 x i8> %35, i32 292) > %55 = call float @llvm.SI.load.const(<16 x i8> %35, i32 296) > %56 = call float @llvm.SI.load.const(<16 x i8> %35, i32 336) > %57 = call float @llvm.SI.load.const(<16 x i8> %35, i32 376) > %58 = call float @llvm.SI.load.const(<16 x i8> %35, i32 380) > %59 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 > %61 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %62 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %61, i64 0, i64 3 > %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 > %64 = extractelement <8 x i32> %60, i32 7 > %65 = extractelement <4 x i32> %63, i32 0 > %66 = and i32 %65, %64 > %67 = insertelement <4 x i32> %63, i32 %66, i32 0 > %68 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %69 = load <8 x i32>, <8 x i32> addrspace(2)* %68, align 32, !tbaa !0 > %70 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %71 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %70, i64 0, i64 7 > %72 = load <4 x i32>, <4 x i32> addrspace(2)* %71, align 16, !tbaa !0 > %73 = extractelement <8 x i32> %69, i32 7 > %74 = extractelement <4 x i32> %72, i32 0 > %75 = and i32 %74, %73 > %76 = insertelement <4 x i32> %72, i32 %75, i32 0 > %77 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %78 = load <8 x i32>, <8 x i32> addrspace(2)* %77, align 32, !tbaa !0 > %79 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %80 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %79, i64 0, i64 11 > %81 = load <4 x i32>, <4 x i32> addrspace(2)* %80, align 16, !tbaa !0 > %82 = extractelement <8 x i32> %78, i32 7 > %83 = extractelement <4 x i32> %81, i32 0 > %84 = and i32 %83, %82 > %85 = insertelement <4 x i32> %81, i32 %84, i32 0 > %86 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %87 = load <8 x i32>, <8 x i32> addrspace(2)* %86, align 32, !tbaa !0 > %88 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %89 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %88, i64 0, i64 15 > %90 = load <4 x i32>, <4 x i32> addrspace(2)* %89, align 16, !tbaa !0 > %91 = extractelement <8 x i32> %87, i32 7 > %92 = extractelement <4 x i32> %90, i32 0 > %93 = and i32 %92, %91 > %94 = insertelement <4 x i32> %90, i32 %93, i32 0 > %95 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %96 = load <8 x i32>, <8 x i32> addrspace(2)* %95, align 32, !tbaa !0 > %97 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %98 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %97, i64 0, i64 19 > %99 = load <4 x i32>, <4 x i32> addrspace(2)* %98, align 16, !tbaa !0 > %100 = extractelement <8 x i32> %96, i32 7 > %101 = extractelement <4 x i32> %99, i32 0 > %102 = and i32 %101, %100 > %103 = insertelement <4 x i32> %99, i32 %102, i32 0 > %104 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %105 = load <8 x i32>, <8 x i32> addrspace(2)* %104, align 32, !tbaa !0 > %106 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %107 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %106, i64 0, i64 23 > %108 = load <4 x i32>, <4 x i32> addrspace(2)* %107, align 16, !tbaa !0 > %109 = extractelement <8 x i32> %105, i32 7 > %110 = extractelement <4 x i32> %108, i32 0 > %111 = and i32 %110, %109 > %112 = insertelement <4 x i32> %108, i32 %111, i32 0 > %113 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12 > %114 = load <8 x i32>, <8 x i32> addrspace(2)* %113, align 32, !tbaa !0 > %115 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %116 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %115, i64 0, i64 27 > %117 = load <4 x i32>, <4 x i32> addrspace(2)* %116, align 16, !tbaa !0 > %118 = extractelement <8 x i32> %114, i32 7 > %119 = extractelement <4 x i32> %117, i32 0 > %120 = and i32 %119, %118 > %121 = insertelement <4 x i32> %117, i32 %120, i32 0 > %122 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 14 > %123 = load <8 x i32>, <8 x i32> addrspace(2)* %122, align 32, !tbaa !0 > %124 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %125 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %124, i64 0, i64 31 > %126 = load <4 x i32>, <4 x i32> addrspace(2)* %125, align 16, !tbaa !0 > %127 = extractelement <8 x i32> %123, i32 7 > %128 = extractelement <4 x i32> %126, i32 0 > %129 = and i32 %128, %127 > %130 = insertelement <4 x i32> %126, i32 %129, i32 0 > %131 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %132 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %133 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %134 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %135 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %136 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %137 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %138 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %139 = fcmp oeq float %133, 0.000000e+00 > %140 = fcmp oeq float %133, 0.000000e+00 > %141 = fcmp ogt float %131, 0.000000e+00 > %142 = select i1 %141, float 1.000000e+00, float %131 > %143 = fcmp oge float %142, 0.000000e+00 > %144 = fcmp ogt float %132, 0.000000e+00 > %145 = select i1 %144, float 1.000000e+00, float %132 > %146 = fcmp oge float %145, 0.000000e+00 > %.op = fmul float %142, 0x4600000000000000 > %147 = select i1 %143, float %.op, float 0xC600000000000000 > %.op99 = fmul float %145, 0x4600000000000000 > %148 = select i1 %146, float %.op99, float 0xC600000000000000 > %149 = fdiv float 1.000000e+00, %133 > %150 = fmul float %131, %149 > %151 = fmul float %132, %149 > %152 = select i1 %139, float %147, float %150 > %153 = select i1 %140, float %148, float %151 > %154 = fcmp oeq float %133, 0.000000e+00 > %155 = fcmp oeq float %133, 0.000000e+00 > %156 = fcmp ogt float %134, 0.000000e+00 > %157 = select i1 %156, float 1.000000e+00, float %134 > %158 = fcmp oge float %157, 0.000000e+00 > %159 = fcmp ogt float %135, 0.000000e+00 > %160 = select i1 %159, float 1.000000e+00, float %135 > %161 = fcmp oge float %160, 0.000000e+00 > %.op100 = fmul float %157, 0x4600000000000000 > %162 = select i1 %158, float %.op100, float 0xC600000000000000 > %.op101 = fmul float %160, 0x4600000000000000 > %163 = select i1 %161, float %.op101, float 0xC600000000000000 > %164 = fdiv float 1.000000e+00, %133 > %165 = fmul float %134, %164 > %166 = fmul float %135, %164 > %167 = select i1 %154, float %162, float %165 > %168 = select i1 %155, float %163, float %166 > %169 = bitcast float %152 to i32 > %170 = bitcast float %153 to i32 > %171 = insertelement <2 x i32> undef, i32 %169, i32 0 > %172 = insertelement <2 x i32> %171, i32 %170, i32 1 > %173 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %172, <8 x i32> %60, <4 x i32> %67, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %174 = extractelement <4 x float> %173, i32 0 > %175 = fmul float %167, %174 > %176 = fmul float %168, %174 > %177 = fsub float %50, %175 > %178 = fsub float %51, %176 > %179 = fsub float %52, %174 > %180 = fmul float %177, %36 > %181 = fmul float %178, %36 > %182 = fmul float %179, %36 > %183 = fmul float %25, %177 > %184 = fmul float %26, %178 > %185 = fadd float %184, %183 > %186 = fmul float %27, %179 > %187 = fadd float %185, %186 > %188 = fmul float %28, %177 > %189 = fmul float %29, %178 > %190 = fadd float %189, %188 > %191 = fmul float %30, %179 > %192 = fadd float %190, %191 > %193 = fmul float %31, %177 > %194 = fmul float %32, %178 > %195 = fadd float %194, %193 > %196 = fmul float %33, %179 > %197 = fadd float %195, %196 > %198 = fmul float %187, %187 > %199 = fmul float %192, %192 > %200 = fadd float %199, %198 > %201 = fmul float %197, %197 > %202 = fadd float %200, %201 > %203 = call float @llvm.sqrt.f32(float %202) > %204 = fcmp une float %56, 0.000000e+00 > br i1 %204, label %IF, label %ELSE > >IF: ; preds = %main_body > %205 = fdiv float 1.000000e+00, %56 > %206 = fmul float %203, %205 > br label %ENDIF > >ELSE: ; preds = %main_body > %207 = fcmp ogt float %203, 0.000000e+00 > %208 = select i1 %207, float 1.000000e+00, float %203 > %209 = fcmp oge float %208, 0.000000e+00 > %.op102 = fmul float %208, 0x4600000000000000 > %210 = select i1 %209, float %.op102, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp24.0 = phi float [ %206, %IF ], [ %210, %ELSE ] > %211 = fcmp oeq float %203, 0.000000e+00 > %212 = fcmp oeq float %203, 0.000000e+00 > %213 = fcmp oeq float %203, 0.000000e+00 > %214 = fsub float -0.000000e+00, %187 > %215 = fcmp olt float %187, -0.000000e+00 > %216 = select i1 %215, float 1.000000e+00, float %214 > %217 = fcmp oge float %216, 0.000000e+00 > %218 = fsub float -0.000000e+00, %192 > %219 = fcmp olt float %192, -0.000000e+00 > %220 = select i1 %219, float 1.000000e+00, float %218 > %221 = fcmp oge float %220, 0.000000e+00 > %222 = fsub float -0.000000e+00, %197 > %223 = fcmp olt float %197, -0.000000e+00 > %224 = select i1 %223, float 1.000000e+00, float %222 > %225 = fcmp oge float %224, 0.000000e+00 > %.op103 = fmul float %216, 0x4600000000000000 > %226 = select i1 %217, float %.op103, float 0xC600000000000000 > %.op104 = fmul float %220, 0x4600000000000000 > %227 = select i1 %221, float %.op104, float 0xC600000000000000 > %.op105 = fmul float %224, 0x4600000000000000 > %228 = select i1 %225, float %.op105, float 0xC600000000000000 > %229 = fdiv float 1.000000e+00, %203 > %230 = fmul float %187, %229 > %231 = fsub float -0.000000e+00, %230 > %232 = fmul float %192, %229 > %233 = fsub float -0.000000e+00, %232 > %234 = fmul float %197, %229 > %235 = fsub float -0.000000e+00, %234 > %236 = select i1 %211, float %226, float %231 > %237 = select i1 %212, float %227, float %233 > %238 = select i1 %213, float %228, float %235 > %239 = fcmp ole float %197, -0.000000e+00 > %240 = fadd float %238, 1.000000e+00 > %241 = fadd float %240, %240 > %242 = fcmp oeq float %241, 0.000000e+00 > %243 = fcmp oeq float %241, 0.000000e+00 > %244 = fcmp ogt float %236, 0.000000e+00 > %245 = select i1 %244, float 1.000000e+00, float %236 > %246 = fcmp oge float %245, 0.000000e+00 > %247 = fcmp ogt float %237, 0.000000e+00 > %248 = select i1 %247, float 1.000000e+00, float %237 > %249 = fcmp oge float %248, 0.000000e+00 > %.op106 = fmul float %245, 0x4600000000000000 > %250 = select i1 %246, float %.op106, float 0xC600000000000000 > %.op107 = fmul float %248, 0x4600000000000000 > %251 = select i1 %249, float %.op107, float 0xC600000000000000 > %252 = fdiv float 1.000000e+00, %241 > %253 = fmul float %236, %252 > %254 = fmul float %237, %252 > %255 = select i1 %242, float %250, float %253 > %256 = select i1 %243, float %251, float %254 > %257 = fsub float 1.000000e+00, %238 > %258 = fadd float %257, %257 > %259 = fcmp oeq float %258, 0.000000e+00 > %260 = fcmp oeq float %258, 0.000000e+00 > %261 = fcmp ogt float %236, 0.000000e+00 > %262 = select i1 %261, float 1.000000e+00, float %236 > %263 = fcmp oge float %262, 0.000000e+00 > %264 = fcmp ogt float %237, 0.000000e+00 > %265 = select i1 %264, float 1.000000e+00, float %237 > %266 = fcmp oge float %265, 0.000000e+00 > %.op108 = fmul float %262, 0x4600000000000000 > %267 = select i1 %263, float %.op108, float 0xC600000000000000 > %.op109 = fmul float %265, 0x4600000000000000 > %268 = select i1 %266, float %.op109, float 0xC600000000000000 > %269 = fdiv float 1.000000e+00, %258 > %270 = fmul float %236, %269 > %271 = fmul float %237, %269 > %272 = select i1 %259, float %267, float %270 > %273 = select i1 %260, float %268, float %271 > %..v = select i1 %239, float %255, float %272 > %. = fadd float %..v, 5.000000e-01 > %temp28.0.v.v = select i1 %239, float %256, float %273 > %temp28.0.v = fadd float %temp28.0.v.v, 5.000000e-01 > %temp28.0 = fsub float 1.000000e+00, %temp28.0.v > %.96 = select i1 %239, float 0.000000e+00, float 1.000000e+00 > %274 = fsub float 1.000000e+00, %temp24.0 > %275 = call float @llvm.fma.f32(float %57, float -5.000000e-01, float %.) > %276 = call float @llvm.fma.f32(float %58, float -5.000000e-01, float %temp28.0) > %277 = call float @llvm.fma.f32(float %57, float 5.000000e-01, float %.) > %278 = call float @llvm.fma.f32(float %58, float -5.000000e-01, float %temp28.0) > %279 = bitcast float %274 to i32 > %280 = bitcast float %275 to i32 > %281 = bitcast float %276 to i32 > %282 = bitcast float %.96 to i32 > %283 = insertelement <4 x i32> undef, i32 %279, i32 0 > %284 = insertelement <4 x i32> %283, i32 %280, i32 1 > %285 = insertelement <4 x i32> %284, i32 %281, i32 2 > %286 = insertelement <4 x i32> %285, i32 %282, i32 3 > %287 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %286, <8 x i32> %69, <4 x i32> %76, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %288 = extractelement <4 x float> %287, i32 0 > %289 = bitcast float %274 to i32 > %290 = bitcast float %277 to i32 > %291 = bitcast float %278 to i32 > %292 = bitcast float %.96 to i32 > %293 = insertelement <4 x i32> undef, i32 %289, i32 0 > %294 = insertelement <4 x i32> %293, i32 %290, i32 1 > %295 = insertelement <4 x i32> %294, i32 %291, i32 2 > %296 = insertelement <4 x i32> %295, i32 %292, i32 3 > %297 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %296, <8 x i32> %69, <4 x i32> %76, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %298 = extractelement <4 x float> %297, i32 0 > %299 = call float @llvm.fma.f32(float %57, float -5.000000e-01, float %.) > %300 = call float @llvm.fma.f32(float %58, float 5.000000e-01, float %temp28.0) > %301 = call float @llvm.fma.f32(float %57, float 5.000000e-01, float %.) > %302 = call float @llvm.fma.f32(float %58, float 5.000000e-01, float %temp28.0) > %303 = bitcast float %274 to i32 > %304 = bitcast float %299 to i32 > %305 = bitcast float %300 to i32 > %306 = bitcast float %.96 to i32 > %307 = insertelement <4 x i32> undef, i32 %303, i32 0 > %308 = insertelement <4 x i32> %307, i32 %304, i32 1 > %309 = insertelement <4 x i32> %308, i32 %305, i32 2 > %310 = insertelement <4 x i32> %309, i32 %306, i32 3 > %311 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %310, <8 x i32> %69, <4 x i32> %76, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %312 = extractelement <4 x float> %311, i32 0 > %313 = bitcast float %274 to i32 > %314 = bitcast float %301 to i32 > %315 = bitcast float %302 to i32 > %316 = bitcast float %.96 to i32 > %317 = insertelement <4 x i32> undef, i32 %313, i32 0 > %318 = insertelement <4 x i32> %317, i32 %314, i32 1 > %319 = insertelement <4 x i32> %318, i32 %315, i32 2 > %320 = insertelement <4 x i32> %319, i32 %316, i32 3 > %321 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %320, <8 x i32> %69, <4 x i32> %76, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) > %322 = extractelement <4 x float> %321, i32 0 > %323 = fadd float %288, %298 > %324 = fadd float %312, %323 > %325 = fadd float %322, %324 > %326 = fsub float -0.000000e+00, %325 > %327 = call float @llvm.fma.f32(float %326, float 2.500000e-01, float 1.000000e+00) > %328 = fsub float 1.000000e+00, %49 > %329 = call float @llvm.fma.f32(float %49, float %327, float %328) > %330 = fmul float %25, %180 > %331 = fmul float %26, %181 > %332 = fadd float %331, %330 > %333 = fmul float %27, %182 > %334 = fadd float %332, %333 > %335 = fmul float %28, %180 > %336 = fmul float %29, %181 > %337 = fadd float %336, %335 > %338 = fmul float %30, %182 > %339 = fadd float %337, %338 > %340 = fmul float %31, %180 > %341 = fmul float %32, %181 > %342 = fadd float %341, %340 > %343 = fmul float %33, %182 > %344 = fadd float %342, %343 > %345 = fmul float %334, %334 > %346 = fmul float %339, %339 > %347 = fadd float %346, %345 > %348 = fmul float %344, %344 > %349 = fadd float %347, %348 > %350 = call float @llvm.sqrt.f32(float %349) > %351 = bitcast float %350 to i32 > %352 = insertelement <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, i32 %351, i32 1 > %353 = insertelement <4 x i32> %352, i32 0, i32 2 > %354 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %353, <8 x i32> %78, <4 x i32> %85, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %355 = extractelement <4 x float> %354, i32 0 > %356 = extractelement <4 x float> %354, i32 1 > %357 = extractelement <4 x float> %354, i32 2 > %358 = fmul float %37, %180 > %359 = fmul float %38, %181 > %360 = fadd float %359, %358 > %361 = fmul float %39, %182 > %362 = fadd float %360, %361 > %363 = fmul float %40, %180 > %364 = fmul float %41, %181 > %365 = fadd float %364, %363 > %366 = fmul float %42, %182 > %367 = fadd float %365, %366 > %368 = fmul float %43, %180 > %369 = fmul float %44, %181 > %370 = fadd float %369, %368 > %371 = fmul float %45, %182 > %372 = fadd float %370, %371 > %373 = insertelement <4 x float> undef, float %362, i32 0 > %374 = insertelement <4 x float> %373, float %367, i32 1 > %375 = insertelement <4 x float> %374, float %372, i32 2 > %376 = insertelement <4 x float> %375, float 0.000000e+00, i32 3 > %377 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %376) > %378 = extractelement <4 x float> %377, i32 0 > %379 = extractelement <4 x float> %377, i32 1 > %380 = extractelement <4 x float> %377, i32 2 > %381 = call float @llvm.fabs.f32(float %380) > %382 = fdiv float 1.000000e+00, %381 > %383 = fmul float %378, %382 > %384 = fadd float %383, 1.500000e+00 > %385 = fmul float %379, %382 > %386 = fadd float %385, 1.500000e+00 > %387 = bitcast float %386 to i32 > %388 = bitcast float %384 to i32 > %bc = bitcast <4 x float> %377 to <4 x i32> > %389 = extractelement <4 x i32> %bc, i32 3 > %390 = insertelement <4 x i32> undef, i32 %387, i32 0 > %391 = insertelement <4 x i32> %390, i32 %388, i32 1 > %392 = insertelement <4 x i32> %391, i32 %389, i32 2 > %393 = insertelement <4 x i32> %392, i32 0, i32 3 > %394 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %393, <8 x i32> %87, <4 x i32> %94, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %395 = extractelement <4 x float> %394, i32 0 > %396 = extractelement <4 x float> %394, i32 1 > %397 = extractelement <4 x float> %394, i32 2 > %398 = extractelement <4 x float> %394, i32 3 > %399 = fmul float %395, %46 > %400 = fmul float %396, %47 > %401 = fmul float %397, %48 > %402 = bitcast float %152 to i32 > %403 = bitcast float %153 to i32 > %404 = insertelement <2 x i32> undef, i32 %402, i32 0 > %405 = insertelement <2 x i32> %404, i32 %403, i32 1 > %406 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %405, <8 x i32> %96, <4 x i32> %103, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %407 = extractelement <4 x float> %406, i32 0 > %408 = extractelement <4 x float> %406, i32 1 > %409 = extractelement <4 x float> %406, i32 2 > %410 = extractelement <4 x float> %406, i32 3 > %411 = fmul float %399, %407 > %412 = fmul float %400, %408 > %413 = fmul float %401, %409 > %414 = fmul float %136, %136 > %415 = fmul float %137, %137 > %416 = fadd float %415, %414 > %417 = fmul float %138, %138 > %418 = fadd float %416, %417 > %419 = call float @llvm.AMDGPU.rsq.clamped.f32(float %418) > %420 = bitcast float %152 to i32 > %421 = bitcast float %153 to i32 > %422 = insertelement <2 x i32> undef, i32 %420, i32 0 > %423 = insertelement <2 x i32> %422, i32 %421, i32 1 > %424 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %423, <8 x i32> %105, <4 x i32> %112, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %425 = extractelement <4 x float> %424, i32 0 > %426 = extractelement <4 x float> %424, i32 1 > %427 = extractelement <4 x float> %424, i32 2 > %428 = extractelement <4 x float> %424, i32 3 > %429 = call float @llvm.fma.f32(float %425, float 2.000000e+00, float -1.000000e+00) > %430 = call float @llvm.fma.f32(float %426, float 2.000000e+00, float -1.000000e+00) > %431 = call float @llvm.fma.f32(float %427, float 2.000000e+00, float -1.000000e+00) > %432 = fmul float %429, %429 > %433 = fmul float %430, %430 > %434 = fadd float %433, %432 > %435 = fmul float %431, %431 > %436 = fadd float %434, %435 > %437 = call float @llvm.AMDGPU.rsq.clamped.f32(float %436) > %438 = fmul float %437, %429 > %439 = fmul float %437, %430 > %440 = fmul float %437, %431 > %441 = call float @llvm.AMDGPU.rsq.clamped.f32(float %349) > %442 = fmul float %441, %334 > %443 = fmul float %441, %339 > %444 = fmul float %441, %344 > %445 = bitcast float %152 to i32 > %446 = bitcast float %153 to i32 > %447 = insertelement <2 x i32> undef, i32 %445, i32 0 > %448 = insertelement <2 x i32> %447, i32 %446, i32 1 > %449 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %448, <8 x i32> %114, <4 x i32> %121, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %450 = extractelement <4 x float> %449, i32 0 > %451 = extractelement <4 x float> %449, i32 1 > %452 = extractelement <4 x float> %449, i32 2 > %453 = extractelement <4 x float> %449, i32 3 > %454 = fmul float %398, %53 > %455 = fmul float %398, %54 > %456 = fmul float %398, %55 > %457 = fmul float %450, %450 > %458 = bitcast float %152 to i32 > %459 = bitcast float %153 to i32 > %460 = insertelement <4 x i32> undef, i32 %458, i32 0 > %461 = insertelement <4 x i32> %460, i32 %459, i32 1 > %462 = insertelement <4 x i32> %461, i32 0, i32 2 > %463 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %462, <8 x i32> %123, <4 x i32> %130, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %464 = extractelement <4 x float> %463, i32 0 > %465 = extractelement <4 x float> %463, i32 1 > %466 = call float @llvm.fma.f32(float %136, float %419, float %442) > %467 = call float @llvm.fma.f32(float %137, float %419, float %443) > %468 = call float @llvm.fma.f32(float %138, float %419, float %444) > %469 = fmul float %466, %466 > %470 = fmul float %467, %467 > %471 = fadd float %470, %469 > %472 = fmul float %468, %468 > %473 = fadd float %471, %472 > %474 = call float @llvm.AMDGPU.rsq.clamped.f32(float %473) > %475 = fmul float %474, %466 > %476 = fmul float %474, %467 > %477 = fmul float %474, %468 > %478 = fmul float %438, %475 > %479 = fmul float %439, %476 > %480 = fadd float %479, %478 > %481 = fmul float %440, %477 > %482 = fadd float %480, %481 > %483 = call float @llvm.AMDGPU.clamp.(float %482, float 0.000000e+00, float 1.000000e+00) > %484 = call float @llvm.fma.f32(float %457, float 4.096000e+03, float 0x3F70624DE0000000) > %485 = call float @llvm.fma.f32(float %457, float 4.096000e+03, float 0x4000083120000000) > %486 = fmul float %485, 1.250000e-01 > %487 = call float @llvm.log2.f32(float %483) > %488 = fmul float %487, %484 > %489 = call float @llvm.exp2.f32(float %488) > %490 = fmul float %486, %489 > %491 = fsub float 1.000000e+00, %452 > %492 = fmul float %442, %475 > %493 = fmul float %443, %476 > %494 = fadd float %493, %492 > %495 = fmul float %444, %477 > %496 = fadd float %494, %495 > %497 = call float @llvm.AMDGPU.clamp.(float %496, float 0.000000e+00, float 1.000000e+00) > %498 = fsub float 1.000000e+00, %497 > %499 = fmul float %498, %498 > %500 = fmul float %499, %499 > %501 = fmul float %498, %500 > %502 = call float @llvm.fma.f32(float %491, float %501, float %452) > %503 = fmul float %490, %502 > %504 = fmul float %454, %464 > %505 = fmul float %455, %464 > %506 = fmul float %456, %464 > %507 = fmul float %451, %504 > %508 = fmul float %451, %505 > %509 = fmul float %451, %506 > %510 = fmul float %438, %442 > %511 = fmul float %439, %443 > %512 = fadd float %511, %510 > %513 = fmul float %440, %444 > %514 = fadd float %512, %513 > %515 = fadd float %410, %514 > %516 = fadd float %515, -1.000000e+00 > %517 = fcmp une float %410, 0.000000e+00 > br i1 %517, label %IF82, label %ELSE83 > >IF82: ; preds = %ENDIF > %518 = fdiv float 1.000000e+00, %410 > %519 = fmul float %516, %518 > br label %ENDIF81 > >ELSE83: ; preds = %ENDIF > %520 = fcmp ogt float %516, 0.000000e+00 > %521 = select i1 %520, float 1.000000e+00, float %516 > %522 = fcmp oge float %521, 0.000000e+00 > %.op110 = fmul float %521, 0x4600000000000000 > %523 = select i1 %522, float %.op110, float 0xC600000000000000 > br label %ENDIF81 > >ENDIF81: ; preds = %ELSE83, %IF82 > %temp52.0 = phi float [ %519, %IF82 ], [ %523, %ELSE83 ] > %524 = call float @llvm.fma.f32(float %507, float %503, float %411) > %525 = call float @llvm.fma.f32(float %508, float %503, float %412) > %526 = call float @llvm.fma.f32(float %509, float %503, float %413) > %527 = call float @llvm.AMDGPU.clamp.(float %temp52.0, float 0.000000e+00, float 1.000000e+00) > %528 = fmul float %527, %524 > %529 = fmul float %527, %525 > %530 = fmul float %527, %526 > %531 = fcmp ogt float %453, 0.000000e+00 > br i1 %531, label %IF85, label %ENDIF84 > >IF85: ; preds = %ENDIF81 > %532 = fcmp olt float %428, 0x3FE0505060000000 > %533 = fadd float %411, %411 > %534 = fadd float %412, %412 > %535 = fadd float %413, %413 > %536 = call float @llvm.maxnum.f32(float %409, float %408) > %537 = call float @llvm.maxnum.f32(float %536, float %407) > %538 = fcmp oeq float %537, 0.000000e+00 > %539 = fcmp oeq float %537, 0.000000e+00 > %540 = fcmp oeq float %537, 0.000000e+00 > %541 = fcmp ogt float %407, 0.000000e+00 > %542 = select i1 %541, float 1.000000e+00, float %407 > %543 = fcmp oge float %542, 0.000000e+00 > %544 = fcmp ogt float %408, 0.000000e+00 > %545 = select i1 %544, float 1.000000e+00, float %408 > %546 = fcmp oge float %545, 0.000000e+00 > %547 = fcmp ogt float %409, 0.000000e+00 > %548 = select i1 %547, float 1.000000e+00, float %409 > %549 = fcmp oge float %548, 0.000000e+00 > %.op111 = fmul float %542, 0x4600000000000000 > %550 = select i1 %543, float %.op111, float 0xC600000000000000 > %.op112 = fmul float %545, 0x4600000000000000 > %551 = select i1 %546, float %.op112, float 0xC600000000000000 > %.op113 = fmul float %548, 0x4600000000000000 > %552 = select i1 %549, float %.op113, float 0xC600000000000000 > %553 = fdiv float 1.000000e+00, %537 > %554 = fmul float %407, %553 > %555 = fmul float %408, %553 > %556 = fmul float %409, %553 > %557 = select i1 %538, float %550, float %554 > %558 = select i1 %539, float %551, float %555 > %559 = select i1 %540, float %552, float %556 > %560 = call float @llvm.AMDGPU.clamp.(float %557, float 0.000000e+00, float 1.000000e+00) > %561 = call float @llvm.AMDGPU.clamp.(float %558, float 0.000000e+00, float 1.000000e+00) > %562 = call float @llvm.AMDGPU.clamp.(float %559, float 0.000000e+00, float 1.000000e+00) > %563 = fmul float %560, %560 > %564 = fmul float %561, %561 > %565 = fmul float %562, %562 > %566 = call float @llvm.AMDGPU.clamp.(float %399, float 0.000000e+00, float 1.000000e+00) > %567 = call float @llvm.AMDGPU.clamp.(float %400, float 0.000000e+00, float 1.000000e+00) > %568 = call float @llvm.AMDGPU.clamp.(float %401, float 0.000000e+00, float 1.000000e+00) > %569 = fmul float %566, %563 > %570 = fmul float %567, %564 > %571 = fmul float %568, %565 > %572 = fmul float %569, 0x3FC3333340000000 > %573 = fmul float %570, 0x3FC3333340000000 > %574 = fmul float %571, 0x3FC3333340000000 > %.97 = select i1 %532, float %533, float %572 > %temp40.0 = select i1 %532, float %534, float %573 > %.98 = select i1 %532, float %535, float %574 > %575 = fadd float %453, -5.000000e-01 > %576 = call float @llvm.AMDGPU.clamp.(float %575, float 0.000000e+00, float 1.000000e+00) > %577 = fmul float %576, %.97 > %578 = fmul float %576, %temp40.0 > %579 = fmul float %576, %.98 > %580 = fsub float 2.500000e-01, %514 > %581 = fsub float 1.000000e+00, %514 > %582 = call float @llvm.AMDGPU.clamp.(float %580, float 0.000000e+00, float 1.000000e+00) > %583 = call float @llvm.AMDGPU.clamp.(float %581, float 0.000000e+00, float 1.000000e+00) > %584 = call float @llvm.fma.f32(float %577, float %582, float %528) > %585 = call float @llvm.fma.f32(float %578, float %582, float %529) > %586 = call float @llvm.fma.f32(float %579, float %582, float %530) > %587 = call float @llvm.minnum.f32(float %453, float 5.000000e-01) > %588 = fmul float %587, %.97 > %589 = fmul float %587, %temp40.0 > %590 = fmul float %587, %.98 > %591 = fmul float %583, %588 > %592 = fmul float %583, %589 > %593 = fmul float %583, %590 > %594 = fadd float %514, 2.500000e-01 > %595 = call float @llvm.AMDGPU.clamp.(float %594, float 0.000000e+00, float 1.000000e+00) > %596 = call float @llvm.fma.f32(float %591, float %595, float %584) > %597 = call float @llvm.fma.f32(float %592, float %595, float %585) > %598 = call float @llvm.fma.f32(float %593, float %595, float %586) > br label %ENDIF84 > >ENDIF84: ; preds = %ENDIF81, %IF85 > %temp12.0 = phi float [ %596, %IF85 ], [ %528, %ENDIF81 ] > %temp13.0 = phi float [ %597, %IF85 ], [ %529, %ENDIF81 ] > %temp14.0 = phi float [ %598, %IF85 ], [ %530, %ENDIF81 ] > %599 = fmul float %temp12.0, %355 > %600 = fmul float %temp13.0, %356 > %601 = fmul float %temp14.0, %357 > %602 = fmul float %329, %599 > %603 = fmul float %329, %600 > %604 = fmul float %329, %601 > %605 = fmul float %465, %602 > %606 = fmul float %465, %603 > %607 = fmul float %465, %604 > %608 = bitcast float %5 to i32 > %609 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %608, 10 > %610 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %609, float %605, 11 > %611 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %610, float %606, 12 > %612 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %611, float %607, 13 > %613 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %612, float 1.000000e+00, 14 > %614 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %613, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %614 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..14] >DCL TEMP[0..42], LOCAL >IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.1000} >IMM[1] UINT32 {0, 176, 160, 192} >IMM[2] FLT32 { -0.2700, 0.0100, 0.0040, 0.0000} >IMM[3] FLT32 { 0.5000, 0.2500, 1.3000, 0.6500} >IMM[4] UINT32 {224, 96, 112, 144} >IMM[5] FLT32 {158456325028528675187087900672.0000, 0.5000, -0.5000, -0.3800} >IMM[6] INT32 {1, 0, 0, 0} >IMM[7] FLT32 { 0.3100, -0.3100, -0.0100, -0.1000} >IMM[8] FLT32 { 2.3000, 1.1500, 3.2000, 1.6000} >IMM[9] FLT32 { -1.0000, 1.0000, 0.3100, -0.3100} >IMM[10] FLT32 { 0.0500, -0.3800, -0.4000, 0.3500} >IMM[11] FLT32 { 4.1000, 2.0500, 5.3000, 2.6500} >IMM[12] FLT32 { 6.1000, 3.0500, 7.7000, 3.8500} >IMM[13] FLT32 { 8.5000, 4.2500, 10.0000, 5.0000} >IMM[14] FLT32 { 0.7100, 0.3400, 0.1000, -0.1500} >IMM[15] FLT32 { 0.2700, -0.0100, 0.0100, 0.3100} >IMM[16] FLT32 { -0.0500, 0.3800, 0.0625, 0.0000} >IMM[17] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].w, IMM[0].xxxx > 1: DP3 TEMP[1].x, IN[1].xyzz, IN[1].xyzz > 2: RSQ TEMP[2].x, TEMP[1].xxxx > 3: MUL TEMP[1].xyz, TEMP[2].xxxx, IN[1].xyzz > 4: MOV TEMP[2].xy, IN[0].xyyy > 5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D > 6: FMA TEMP[3].xyz, TEMP[2].xyzz, IMM[0].yyyy, IMM[0].zzzz > 7: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz > 8: RSQ TEMP[4].x, TEMP[4].xxxx > 9: MUL TEMP[2].xyz, TEMP[4].xxxx, TEMP[3].xyzz > 10: MUL TEMP[3].xyz, TEMP[2].yyyy, CONST[1][11].xyzz > 11: FMA TEMP[4].xyz, TEMP[2].xxxx, CONST[1][10].xyzz, TEMP[3].xyzz > 12: FMA TEMP[4].xyz, TEMP[2].zzzz, CONST[1][12].xyzz, TEMP[4].xyzz > 13: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz > 14: RSQ TEMP[5].x, TEMP[5].xxxx > 15: MUL TEMP[2].xyz, TEMP[5].xxxx, TEMP[4].xyzz > 16: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[2].xyzz > 17: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx > 18: FMA TEMP[1].xyz, TEMP[2].xyzz, -TEMP[4].xxxx, TEMP[1].xyzz > 19: FMA TEMP[4].xyz, TEMP[1].xyzz, IMM[0].yyyy, TEMP[2].xyzz > 20: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz > 21: RSQ TEMP[5].x, TEMP[5].xxxx > 22: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[4].xyzz > 23: MUL TEMP[3].xyz, TEMP[2].yzxx, TEMP[1].zxyy > 24: FMA TEMP[4].xyz, TEMP[1].yzxx, TEMP[2].zxyy, -TEMP[3].xyzz > 25: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz > 26: RSQ TEMP[5].x, TEMP[5].xxxx > 27: MUL TEMP[3].xyz, TEMP[5].xxxx, TEMP[4].xyzz > 28: MOV TEMP[4].xy, IN[0].zwww > 29: TEX TEMP[4].xy, TEMP[4], SAMP[1], 2D > 30: FMA TEMP[5].xy, TEMP[4].xyyy, IMM[0].yyyy, IMM[0].zzzz > 31: DP2 TEMP[6].x, TEMP[5].xyyy, TEMP[5].xyyy > 32: RSQ TEMP[6].x, TEMP[6].xxxx > 33: MUL TEMP[4].xy, TEMP[6].xxxx, TEMP[5].xyyy > 34: MOV TEMP[4].zw, -TEMP[4].xxxx > 35: DP2 TEMP[5].x, TEMP[4].zyyy, IMM[0].wwww > 36: DP2 TEMP[6].x, TEMP[4].wyyy, IMM[2].xyyy > 37: MUL TEMP[7].xyz, TEMP[1].yzxx, TEMP[3].zxyy > 38: FMA TEMP[7].xyz, TEMP[3].yzxx, TEMP[1].zxyy, -TEMP[7].xyzz > 39: FMA TEMP[5].xyz, TEMP[7].xyzz, TEMP[5].xxxx, TEMP[1].xyzz > 40: DP2 TEMP[8].x, TEMP[4].yxxx, IMM[0].wwww > 41: FMA TEMP[5].xyz, TEMP[3].xyzz, TEMP[8].xxxx, TEMP[5].xyzz > 42: MOV TEMP[9].xy, IN[0].xyyy > 43: TEX TEMP[9].x, TEMP[9], SAMP[2], 2D > 44: MOV TEMP[10].z, TEMP[9].xxxx > 45: MUL TEMP[9].xy, TEMP[9].xxxx, IMM[2].zyyy > 46: MAX TEMP[9].xy, TEMP[9].xyyy, IMM[0].xxxx > 47: MUL TEMP[11], TEMP[9].xxxx, IMM[3] > 48: MOV TEMP[10].w, IMM[0].xxxx > 49: DP2 TEMP[12].x, TEMP[10].zwww, CONST[1][14].xyyy > 50: MUL TEMP[10].xy, TEMP[12].xxxx, IN[2].xyyy > 51: FMA TEMP[12].xyz, TEMP[5].xyzz, TEMP[11].xxxx, TEMP[10].xyzz > 52: MOV TEMP[0].xyz, TEMP[12].xyzx > 53: DP4 TEMP[5].x, CONST[1][6], TEMP[0] > 54: DP4 TEMP[13].x, CONST[1][7], TEMP[0] > 55: MOV TEMP[5].y, TEMP[13].xxxx > 56: DP4 TEMP[13].x, CONST[1][9], TEMP[0] > 57: FSEQ TEMP[14].xy, TEMP[13].xxxx, IMM[2].wwww > 58: SSG TEMP[15].xy, TEMP[5].xyyy > 59: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy > 60: RCP TEMP[13].xy, TEMP[13].xxxx > 61: MUL TEMP[13].xy, TEMP[5].xyyy, TEMP[13].xyyy > 62: UCMP TEMP[0].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[13].xyyy > 63: FMA TEMP[0].xy, TEMP[0].xyyy, IMM[5].yzzz, IMM[3].xxxx > 64: MOV TEMP[13].xy, TEMP[0].xyyy > 65: TEX TEMP[13].x, TEMP[13], SAMP[3], 2D > 66: ADD TEMP[0].x, -TEMP[13].xxxx, TEMP[12].zzzz > 67: FSLT TEMP[12].x, TEMP[0].xxxx, IMM[2].wwww > 68: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx > 69: INEG TEMP[12].x, TEMP[12].xxxx > 70: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx > 71: UIF TEMP[12].xxxx :0 > 72: MOV TEMP[12].x, TEMP[11].yyyy > 73: ELSE :0 > 74: MOV TEMP[12].x, TEMP[11].xxxx > 75: ENDIF > 76: MUL TEMP[13].x, TEMP[12].xxxx, IMM[3].xxxx > 77: FSLT TEMP[14].x, IMM[2].wwww, TEMP[1].zzzz > 78: AND TEMP[14].x, TEMP[14].xxxx, IMM[6].xxxx > 79: INEG TEMP[14].x, TEMP[14].xxxx > 80: FSLT TEMP[15].x, IMM[2].wwww, TEMP[0].xxxx > 81: AND TEMP[15].x, TEMP[15].xxxx, IMM[6].xxxx > 82: INEG TEMP[15].x, TEMP[15].xxxx > 83: AND TEMP[15].x, TEMP[14].xxxx, TEMP[15].xxxx > 84: USNE TEMP[15].x, TEMP[15].xxxx, IMM[1].xxxx > 85: UIF TEMP[15].xxxx :0 > 86: MOV TEMP[13].x, TEMP[13].xxxx > 87: ELSE :0 > 88: MOV TEMP[13].x, TEMP[12].xxxx > 89: ENDIF > 90: ABS TEMP[12].x, TEMP[0].xxxx > 91: ADD TEMP[0].x, -TEMP[13].xxxx, TEMP[12].xxxx > 92: FSNE TEMP[12].x, TEMP[13].xxxx, IMM[2].wwww > 93: UIF TEMP[12].xxxx :0 > 94: RCP TEMP[12].x, TEMP[13].xxxx > 95: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 96: ELSE :0 > 97: SSG TEMP[13].x, TEMP[0].xxxx > 98: MUL TEMP[12].x, IMM[5].xxxx, TEMP[13].xxxx > 99: ENDIF >100: MOV_SAT TEMP[12].x, TEMP[12].xxxx >101: MOV TEMP[5].w, IMM[0].xxxx >102: MUL TEMP[13], TEMP[4].yxyx, IMM[7].xyyx >103: FMA TEMP[15].xyz, TEMP[7].xyzz, TEMP[13].yyyy, TEMP[1].xyzz >104: FMA TEMP[15].xyz, TEMP[3].xyzz, TEMP[13].xxxx, TEMP[15].xyzz >105: FMA TEMP[16].xyz, TEMP[15].xyzz, TEMP[11].zzzz, TEMP[10].xyzz >106: MOV TEMP[5].xyz, TEMP[16].xyzx >107: DP4 TEMP[15].x, CONST[1][6], TEMP[5] >108: DP4 TEMP[17].x, CONST[1][7], TEMP[5] >109: MOV TEMP[15].y, TEMP[17].xxxx >110: DP4 TEMP[17].x, CONST[1][9], TEMP[5] >111: FSEQ TEMP[18].xy, TEMP[17].xxxx, IMM[2].wwww >112: SSG TEMP[19].xy, TEMP[15].xyyy >113: MUL TEMP[19].xy, IMM[5].xxxx, TEMP[19].xyyy >114: RCP TEMP[17].xy, TEMP[17].xxxx >115: MUL TEMP[17].xy, TEMP[15].xyyy, TEMP[17].xyyy >116: UCMP TEMP[17].xy, TEMP[18].xyyy, TEMP[19].xyyy, TEMP[17].xyyy >117: FMA TEMP[17].xy, TEMP[17].xyyy, IMM[5].yzzz, IMM[3].xxxx >118: MOV TEMP[17].xy, TEMP[17].xyyy >119: TEX TEMP[17].x, TEMP[17], SAMP[3], 2D >120: ADD TEMP[16].x, -TEMP[17].xxxx, TEMP[16].zzzz >121: FSLT TEMP[17].x, TEMP[16].xxxx, IMM[2].wwww >122: AND TEMP[17].x, TEMP[17].xxxx, IMM[6].xxxx >123: INEG TEMP[17].x, TEMP[17].xxxx >124: USNE TEMP[17].x, TEMP[17].xxxx, IMM[1].xxxx >125: UIF TEMP[17].xxxx :0 >126: MOV TEMP[17].x, TEMP[11].wwww >127: ELSE :0 >128: MOV TEMP[17].x, TEMP[11].zzzz >129: ENDIF >130: MUL TEMP[18].x, TEMP[17].xxxx, IMM[3].xxxx >131: FSLT TEMP[19].x, IMM[2].wwww, TEMP[16].xxxx >132: AND TEMP[19].x, TEMP[19].xxxx, IMM[6].xxxx >133: INEG TEMP[19].x, TEMP[19].xxxx >134: AND TEMP[19].x, TEMP[14].xxxx, TEMP[19].xxxx >135: USNE TEMP[19].x, TEMP[19].xxxx, IMM[1].xxxx >136: UIF TEMP[19].xxxx :0 >137: MOV TEMP[18].x, TEMP[18].xxxx >138: ELSE :0 >139: MOV TEMP[18].x, TEMP[17].xxxx >140: ENDIF >141: ABS TEMP[16].x, TEMP[16].xxxx >142: ADD TEMP[16].x, -TEMP[18].xxxx, TEMP[16].xxxx >143: FSNE TEMP[17].x, TEMP[18].xxxx, IMM[2].wwww >144: UIF TEMP[17].xxxx :0 >145: RCP TEMP[17].x, TEMP[18].xxxx >146: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[17].xxxx >147: ELSE :0 >148: SSG TEMP[16].x, TEMP[16].xxxx >149: MUL TEMP[17].x, IMM[5].xxxx, TEMP[16].xxxx >150: ENDIF >151: MOV_SAT TEMP[16].x, TEMP[17].xxxx >152: ADD TEMP[0].x, TEMP[16].xxxx, TEMP[12].xxxx >153: MOV TEMP[5].w, IMM[0].xxxx >154: DP2 TEMP[12].x, TEMP[4].yxxx, IMM[2].xyyy >155: FMA TEMP[15].xyz, TEMP[7].xyzz, TEMP[6].xxxx, TEMP[1].xyzz >156: FMA TEMP[15].xyz, TEMP[3].xyzz, TEMP[12].xxxx, TEMP[15].xyzz >157: MUL TEMP[6], TEMP[9].xxxx, IMM[8] >158: FMA TEMP[16].xyz, TEMP[15].xyzz, TEMP[6].xxxx, TEMP[10].xyzz >159: MOV TEMP[5].xyz, TEMP[16].xyzx >160: DP4 TEMP[15].x, CONST[1][6], TEMP[5] >161: DP4 TEMP[17].x, CONST[1][7], TEMP[5] >162: MOV TEMP[15].y, TEMP[17].xxxx >163: DP4 TEMP[17].x, CONST[1][9], TEMP[5] >164: FSEQ TEMP[18].xy, TEMP[17].xxxx, IMM[2].wwww >165: SSG TEMP[19].xy, TEMP[15].xyyy >166: MUL TEMP[19].xy, IMM[5].xxxx, TEMP[19].xyyy >167: RCP TEMP[17].xy, TEMP[17].xxxx >168: MUL TEMP[17].xy, TEMP[15].xyyy, TEMP[17].xyyy >169: UCMP TEMP[5].xy, TEMP[18].xyyy, TEMP[19].xyyy, TEMP[17].xyyy >170: FMA TEMP[5].xy, TEMP[5].xyyy, IMM[5].yzzz, IMM[3].xxxx >171: MOV TEMP[17].xy, TEMP[5].xyyy >172: TEX TEMP[17].x, TEMP[17], SAMP[3], 2D >173: ADD TEMP[16].x, -TEMP[17].xxxx, TEMP[16].zzzz >174: FSLT TEMP[17].x, TEMP[16].xxxx, IMM[2].wwww >175: AND TEMP[17].x, TEMP[17].xxxx, IMM[6].xxxx >176: INEG TEMP[17].x, TEMP[17].xxxx >177: USNE TEMP[17].x, TEMP[17].xxxx, IMM[1].xxxx >178: UIF TEMP[17].xxxx :0 >179: MOV TEMP[17].x, TEMP[6].yyyy >180: ELSE :0 >181: MOV TEMP[17].x, TEMP[6].xxxx >182: ENDIF >183: MUL TEMP[18].x, TEMP[17].xxxx, IMM[3].xxxx >184: FSLT TEMP[19].x, IMM[2].wwww, TEMP[16].xxxx >185: AND TEMP[19].x, TEMP[19].xxxx, IMM[6].xxxx >186: INEG TEMP[19].x, TEMP[19].xxxx >187: AND TEMP[19].x, TEMP[14].xxxx, TEMP[19].xxxx >188: USNE TEMP[19].x, TEMP[19].xxxx, IMM[1].xxxx >189: UIF TEMP[19].xxxx :0 >190: MOV TEMP[18].x, TEMP[18].xxxx >191: ELSE :0 >192: MOV TEMP[18].x, TEMP[17].xxxx >193: ENDIF >194: ABS TEMP[16].x, TEMP[16].xxxx >195: ADD TEMP[16].x, TEMP[16].xxxx, -TEMP[18].xxxx >196: FSNE TEMP[17].x, TEMP[18].xxxx, IMM[2].wwww >197: UIF TEMP[17].xxxx :0 >198: RCP TEMP[17].x, TEMP[18].xxxx >199: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[17].xxxx >200: ELSE :0 >201: SSG TEMP[16].x, TEMP[16].xxxx >202: MUL TEMP[17].x, IMM[5].xxxx, TEMP[16].xxxx >203: ENDIF >204: MOV_SAT TEMP[16].x, TEMP[17].xxxx >205: ADD TEMP[0].x, TEMP[16].xxxx, TEMP[0].xxxx >206: MOV TEMP[5].w, IMM[0].xxxx >207: DP2 TEMP[16].x, TEMP[4].yxxx, IMM[7].zyyy >208: MUL TEMP[15], TEMP[4].xyyy, IMM[9] >209: DP2 TEMP[17].x, TEMP[15].xyyy, IMM[7].zyyy >210: FMA TEMP[18].xyz, TEMP[7].xyzz, TEMP[17].xxxx, TEMP[1].xyzz >211: FMA TEMP[18].xyz, TEMP[3].xyzz, TEMP[16].xxxx, TEMP[18].xyzz >212: FMA TEMP[19].xyz, TEMP[18].xyzz, TEMP[6].zzzz, TEMP[10].xyzz >213: MOV TEMP[5].xyz, TEMP[19].xyzx >214: DP4 TEMP[18].x, CONST[1][6], TEMP[5] >215: DP4 TEMP[20].x, CONST[1][7], TEMP[5] >216: MOV TEMP[18].y, TEMP[20].xxxx >217: DP4 TEMP[20].x, CONST[1][9], TEMP[5] >218: FSEQ TEMP[21].xy, TEMP[20].xxxx, IMM[2].wwww >219: SSG TEMP[22].xy, TEMP[18].xyyy >220: MUL TEMP[22].xy, IMM[5].xxxx, TEMP[22].xyyy >221: RCP TEMP[20].xy, TEMP[20].xxxx >222: MUL TEMP[20].xy, TEMP[18].xyyy, TEMP[20].xyyy >223: UCMP TEMP[5].xy, TEMP[21].xyyy, TEMP[22].xyyy, TEMP[20].xyyy >224: FMA TEMP[5].xy, TEMP[5].xyyy, IMM[5].yzzz, IMM[3].xxxx >225: MOV TEMP[20].xy, TEMP[5].xyyy >226: TEX TEMP[20].x, TEMP[20], SAMP[3], 2D >227: ADD TEMP[19].x, -TEMP[20].xxxx, TEMP[19].zzzz >228: FSLT TEMP[20].x, TEMP[19].xxxx, IMM[2].wwww >229: AND TEMP[20].x, TEMP[20].xxxx, IMM[6].xxxx >230: INEG TEMP[20].x, TEMP[20].xxxx >231: USNE TEMP[20].x, TEMP[20].xxxx, IMM[1].xxxx >232: UIF TEMP[20].xxxx :0 >233: MOV TEMP[20].x, TEMP[6].wwww >234: ELSE :0 >235: MOV TEMP[20].x, TEMP[6].zzzz >236: ENDIF >237: MUL TEMP[5].x, TEMP[20].xxxx, IMM[3].xxxx >238: FSLT TEMP[21].x, IMM[2].wwww, TEMP[19].xxxx >239: AND TEMP[21].x, TEMP[21].xxxx, IMM[6].xxxx >240: INEG TEMP[21].x, TEMP[21].xxxx >241: AND TEMP[21].x, TEMP[14].xxxx, TEMP[21].xxxx >242: USNE TEMP[21].x, TEMP[21].xxxx, IMM[1].xxxx >243: UIF TEMP[21].xxxx :0 >244: MOV TEMP[21].x, TEMP[5].xxxx >245: ELSE :0 >246: MOV TEMP[21].x, TEMP[20].xxxx >247: ENDIF >248: ABS TEMP[19].x, TEMP[19].xxxx >249: ADD TEMP[19].x, TEMP[19].xxxx, -TEMP[21].xxxx >250: FSNE TEMP[20].x, TEMP[21].xxxx, IMM[2].wwww >251: UIF TEMP[20].xxxx :0 >252: RCP TEMP[20].x, TEMP[21].xxxx >253: MUL TEMP[20].x, TEMP[19].xxxx, TEMP[20].xxxx >254: ELSE :0 >255: SSG TEMP[19].x, TEMP[19].xxxx >256: MUL TEMP[20].x, IMM[5].xxxx, TEMP[19].xxxx >257: ENDIF >258: MOV_SAT TEMP[19].x, TEMP[20].xxxx >259: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[19].xxxx >260: MOV TEMP[5].w, IMM[0].xxxx >261: DP2 TEMP[19].x, TEMP[4].yxxx, IMM[10].xyyy >262: DP2 TEMP[20].x, TEMP[15].xyyy, IMM[10].xyyy >263: FMA TEMP[18].xyz, TEMP[7].xyzz, TEMP[20].xxxx, TEMP[1].xyzz >264: FMA TEMP[18].xyz, TEMP[3].xyzz, TEMP[19].xxxx, TEMP[18].xyzz >265: MUL TEMP[21], TEMP[9].xxxx, IMM[11] >266: FMA TEMP[22].xyz, TEMP[18].xyzz, TEMP[21].xxxx, TEMP[10].xyzz >267: MOV TEMP[5].xyz, TEMP[22].xyzx >268: DP4 TEMP[18].x, CONST[1][6], TEMP[5] >269: DP4 TEMP[23].x, CONST[1][7], TEMP[5] >270: MOV TEMP[18].y, TEMP[23].xxxx >271: DP4 TEMP[23].x, CONST[1][9], TEMP[5] >272: FSEQ TEMP[24].xy, TEMP[23].xxxx, IMM[2].wwww >273: SSG TEMP[25].xy, TEMP[18].xyyy >274: MUL TEMP[25].xy, IMM[5].xxxx, TEMP[25].xyyy >275: RCP TEMP[23].xy, TEMP[23].xxxx >276: MUL TEMP[23].xy, TEMP[18].xyyy, TEMP[23].xyyy >277: UCMP TEMP[5].xy, TEMP[24].xyyy, TEMP[25].xyyy, TEMP[23].xyyy >278: FMA TEMP[5].xy, TEMP[5].xyyy, IMM[5].yzzz, IMM[3].xxxx >279: MOV TEMP[23].xy, TEMP[5].xyyy >280: TEX TEMP[23].x, TEMP[23], SAMP[3], 2D >281: ADD TEMP[5].x, -TEMP[23].xxxx, TEMP[22].zzzz >282: FSLT TEMP[22].x, TEMP[5].xxxx, IMM[2].wwww >283: AND TEMP[22].x, TEMP[22].xxxx, IMM[6].xxxx >284: INEG TEMP[22].x, TEMP[22].xxxx >285: USNE TEMP[22].x, TEMP[22].xxxx, IMM[1].xxxx >286: UIF TEMP[22].xxxx :0 >287: MOV TEMP[22].x, TEMP[21].yyyy >288: ELSE :0 >289: MOV TEMP[22].x, TEMP[21].xxxx >290: ENDIF >291: MUL TEMP[23].x, TEMP[22].xxxx, IMM[3].xxxx >292: FSLT TEMP[24].x, IMM[2].wwww, TEMP[5].xxxx >293: AND TEMP[24].x, TEMP[24].xxxx, IMM[6].xxxx >294: INEG TEMP[24].x, TEMP[24].xxxx >295: AND TEMP[24].x, TEMP[14].xxxx, TEMP[24].xxxx >296: USNE TEMP[24].x, TEMP[24].xxxx, IMM[1].xxxx >297: UIF TEMP[24].xxxx :0 >298: MOV TEMP[23].x, TEMP[23].xxxx >299: ELSE :0 >300: MOV TEMP[23].x, TEMP[22].xxxx >301: ENDIF >302: ABS TEMP[22].x, TEMP[5].xxxx >303: ADD TEMP[5].x, -TEMP[23].xxxx, TEMP[22].xxxx >304: FSNE TEMP[22].x, TEMP[23].xxxx, IMM[2].wwww >305: UIF TEMP[22].xxxx :0 >306: RCP TEMP[22].x, TEMP[23].xxxx >307: MUL TEMP[22].x, TEMP[5].xxxx, TEMP[22].xxxx >308: ELSE :0 >309: SSG TEMP[23].x, TEMP[5].xxxx >310: MUL TEMP[22].x, IMM[5].xxxx, TEMP[23].xxxx >311: ENDIF >312: MOV_SAT TEMP[22].x, TEMP[22].xxxx >313: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[22].xxxx >314: MOV TEMP[5].w, IMM[0].xxxx >315: DP2 TEMP[22].x, TEMP[4].yxxx, IMM[3].xxxx >316: DP2 TEMP[13].x, TEMP[15].xyyy, IMM[3].xxxx >317: FMA TEMP[18].xyz, TEMP[7].xyzz, TEMP[13].xxxx, TEMP[1].xyzz >318: FMA TEMP[18].xyz, TEMP[3].xyzz, TEMP[22].xxxx, TEMP[18].xyzz >319: FMA TEMP[23].xyz, TEMP[18].xyzz, TEMP[21].zzzz, TEMP[10].xyzz >320: MOV TEMP[5].xyz, TEMP[23].xyzx >321: DP4 TEMP[18].x, CONST[1][6], TEMP[5] >322: DP4 TEMP[24].x, CONST[1][7], TEMP[5] >323: MOV TEMP[18].y, TEMP[24].xxxx >324: DP4 TEMP[24].x, CONST[1][9], TEMP[5] >325: FSEQ TEMP[25].xy, TEMP[24].xxxx, IMM[2].wwww >326: SSG TEMP[26].xy, TEMP[18].xyyy >327: MUL TEMP[26].xy, IMM[5].xxxx, TEMP[26].xyyy >328: RCP TEMP[24].xy, TEMP[24].xxxx >329: MUL TEMP[24].xy, TEMP[18].xyyy, TEMP[24].xyyy >330: UCMP TEMP[5].xy, TEMP[25].xyyy, TEMP[26].xyyy, TEMP[24].xyyy >331: FMA TEMP[5].xy, TEMP[5].xyyy, IMM[5].yzzz, IMM[3].xxxx >332: MOV TEMP[24].xy, TEMP[5].xyyy >333: TEX TEMP[24].x, TEMP[24], SAMP[3], 2D >334: ADD TEMP[5].x, -TEMP[24].xxxx, TEMP[23].zzzz >335: FSLT TEMP[23].x, TEMP[5].xxxx, IMM[2].wwww >336: AND TEMP[23].x, TEMP[23].xxxx, IMM[6].xxxx >337: INEG TEMP[23].x, TEMP[23].xxxx >338: USNE TEMP[23].x, TEMP[23].xxxx, IMM[1].xxxx >339: UIF TEMP[23].xxxx :0 >340: MOV TEMP[23].x, TEMP[21].wwww >341: ELSE :0 >342: MOV TEMP[23].x, TEMP[21].zzzz >343: ENDIF >344: MUL TEMP[24].x, TEMP[23].xxxx, IMM[3].xxxx >345: FSLT TEMP[25].x, IMM[2].wwww, TEMP[5].xxxx >346: AND TEMP[25].x, TEMP[25].xxxx, IMM[6].xxxx >347: INEG TEMP[25].x, TEMP[25].xxxx >348: AND TEMP[25].x, TEMP[14].xxxx, TEMP[25].xxxx >349: USNE TEMP[25].x, TEMP[25].xxxx, IMM[1].xxxx >350: UIF TEMP[25].xxxx :0 >351: MOV TEMP[24].x, TEMP[24].xxxx >352: ELSE :0 >353: MOV TEMP[24].x, TEMP[23].xxxx >354: ENDIF >355: ABS TEMP[23].x, TEMP[5].xxxx >356: ADD TEMP[5].x, -TEMP[24].xxxx, TEMP[23].xxxx >357: FSNE TEMP[23].x, TEMP[24].xxxx, IMM[2].wwww >358: UIF TEMP[23].xxxx :0 >359: RCP TEMP[23].x, TEMP[24].xxxx >360: MUL TEMP[23].x, TEMP[5].xxxx, TEMP[23].xxxx >361: ELSE :0 >362: SSG TEMP[24].x, TEMP[5].xxxx >363: MUL TEMP[23].x, IMM[5].xxxx, TEMP[24].xxxx >364: ENDIF >365: MOV_SAT TEMP[23].x, TEMP[23].xxxx >366: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[23].xxxx >367: MUL TEMP[5], TEMP[9].xxxx, IMM[12] >368: MOV TEMP[18].w, IMM[0].xxxx >369: DP2 TEMP[23].x, TEMP[4].yxxx, IMM[10].zwww >370: DP2 TEMP[24].x, TEMP[15].xyyy, IMM[10].zwww >371: FMA TEMP[25].xyz, TEMP[7].xyzz, TEMP[24].xxxx, TEMP[1].xyzz >372: FMA TEMP[25].xyz, TEMP[3].xyzz, TEMP[23].xxxx, TEMP[25].xyzz >373: FMA TEMP[26].xyz, TEMP[25].xyzz, TEMP[5].xxxx, TEMP[10].xyzz >374: MOV TEMP[18].xyz, TEMP[26].xyzx >375: DP4 TEMP[25].x, CONST[1][6], TEMP[18] >376: DP4 TEMP[27].x, CONST[1][7], TEMP[18] >377: MOV TEMP[25].y, TEMP[27].xxxx >378: DP4 TEMP[27].x, CONST[1][9], TEMP[18] >379: FSEQ TEMP[28].xy, TEMP[27].xxxx, IMM[2].wwww >380: SSG TEMP[29].xy, TEMP[25].xyyy >381: MUL TEMP[29].xy, IMM[5].xxxx, TEMP[29].xyyy >382: RCP TEMP[27].xy, TEMP[27].xxxx >383: MUL TEMP[27].xy, TEMP[25].xyyy, TEMP[27].xyyy >384: UCMP TEMP[18].xy, TEMP[28].xyyy, TEMP[29].xyyy, TEMP[27].xyyy >385: FMA TEMP[18].xy, TEMP[18].xyyy, IMM[5].yzzz, IMM[3].xxxx >386: MOV TEMP[27].xy, TEMP[18].xyyy >387: TEX TEMP[27].x, TEMP[27], SAMP[3], 2D >388: ADD TEMP[18].x, -TEMP[27].xxxx, TEMP[26].zzzz >389: FSLT TEMP[26].x, TEMP[18].xxxx, IMM[2].wwww >390: AND TEMP[26].x, TEMP[26].xxxx, IMM[6].xxxx >391: INEG TEMP[26].x, TEMP[26].xxxx >392: USNE TEMP[26].x, TEMP[26].xxxx, IMM[1].xxxx >393: UIF TEMP[26].xxxx :0 >394: MOV TEMP[26].x, TEMP[5].yyyy >395: ELSE :0 >396: MOV TEMP[26].x, TEMP[5].xxxx >397: ENDIF >398: MUL TEMP[27].x, TEMP[26].xxxx, IMM[3].xxxx >399: FSLT TEMP[28].x, IMM[2].wwww, TEMP[18].xxxx >400: AND TEMP[28].x, TEMP[28].xxxx, IMM[6].xxxx >401: INEG TEMP[28].x, TEMP[28].xxxx >402: AND TEMP[28].x, TEMP[14].xxxx, TEMP[28].xxxx >403: USNE TEMP[28].x, TEMP[28].xxxx, IMM[1].xxxx >404: UIF TEMP[28].xxxx :0 >405: MOV TEMP[27].x, TEMP[27].xxxx >406: ELSE :0 >407: MOV TEMP[27].x, TEMP[26].xxxx >408: ENDIF >409: ABS TEMP[26].x, TEMP[18].xxxx >410: ADD TEMP[26].x, -TEMP[27].xxxx, TEMP[26].xxxx >411: FSNE TEMP[28].x, TEMP[27].xxxx, IMM[2].wwww >412: UIF TEMP[28].xxxx :0 >413: RCP TEMP[27].x, TEMP[27].xxxx >414: MUL TEMP[27].x, TEMP[26].xxxx, TEMP[27].xxxx >415: ELSE :0 >416: SSG TEMP[26].x, TEMP[26].xxxx >417: MUL TEMP[27].x, IMM[5].xxxx, TEMP[26].xxxx >418: ENDIF >419: MOV_SAT TEMP[26].x, TEMP[27].xxxx >420: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[26].xxxx >421: MOV TEMP[18].w, IMM[0].xxxx >422: DP2 TEMP[5].x, TEMP[4].yxxx, IMM[5].wzzz >423: DP2 TEMP[26].x, TEMP[15].xyyy, IMM[5].wzzz >424: FMA TEMP[25].xyz, TEMP[7].xyzz, TEMP[26].xxxx, TEMP[1].xyzz >425: FMA TEMP[25].xyz, TEMP[3].xyzz, TEMP[5].xxxx, TEMP[25].xyzz >426: FMA TEMP[27].xyz, TEMP[25].xyzz, TEMP[5].zzzz, TEMP[10].xyzz >427: MOV TEMP[18].xyz, TEMP[27].xyzx >428: DP4 TEMP[25].x, CONST[1][6], TEMP[18] >429: DP4 TEMP[28].x, CONST[1][7], TEMP[18] >430: MOV TEMP[25].y, TEMP[28].xxxx >431: DP4 TEMP[28].x, CONST[1][9], TEMP[18] >432: FSEQ TEMP[29].xy, TEMP[28].xxxx, IMM[2].wwww >433: SSG TEMP[30].xy, TEMP[25].xyyy >434: MUL TEMP[30].xy, IMM[5].xxxx, TEMP[30].xyyy >435: RCP TEMP[28].xy, TEMP[28].xxxx >436: MUL TEMP[28].xy, TEMP[25].xyyy, TEMP[28].xyyy >437: UCMP TEMP[18].xy, TEMP[29].xyyy, TEMP[30].xyyy, TEMP[28].xyyy >438: FMA TEMP[18].xy, TEMP[18].xyyy, IMM[5].yzzz, IMM[3].xxxx >439: MOV TEMP[28].xy, TEMP[18].xyyy >440: TEX TEMP[28].x, TEMP[28], SAMP[3], 2D >441: ADD TEMP[18].x, -TEMP[28].xxxx, TEMP[27].zzzz >442: FSLT TEMP[27].x, TEMP[18].xxxx, IMM[2].wwww >443: AND TEMP[27].x, TEMP[27].xxxx, IMM[6].xxxx >444: INEG TEMP[27].x, TEMP[27].xxxx >445: USNE TEMP[27].x, TEMP[27].xxxx, IMM[1].xxxx >446: UIF TEMP[27].xxxx :0 >447: MOV TEMP[27].x, TEMP[5].wwww >448: ELSE :0 >449: MOV TEMP[27].x, TEMP[5].zzzz >450: ENDIF >451: MUL TEMP[28].x, TEMP[27].xxxx, IMM[3].xxxx >452: FSLT TEMP[29].x, IMM[2].wwww, TEMP[18].xxxx >453: AND TEMP[29].x, TEMP[29].xxxx, IMM[6].xxxx >454: INEG TEMP[29].x, TEMP[29].xxxx >455: AND TEMP[29].x, TEMP[14].xxxx, TEMP[29].xxxx >456: USNE TEMP[29].x, TEMP[29].xxxx, IMM[1].xxxx >457: UIF TEMP[29].xxxx :0 >458: MOV TEMP[28].x, TEMP[28].xxxx >459: ELSE :0 >460: MOV TEMP[28].x, TEMP[27].xxxx >461: ENDIF >462: ABS TEMP[27].x, TEMP[18].xxxx >463: ADD TEMP[27].x, -TEMP[28].xxxx, TEMP[27].xxxx >464: FSNE TEMP[29].x, TEMP[28].xxxx, IMM[2].wwww >465: UIF TEMP[29].xxxx :0 >466: RCP TEMP[28].x, TEMP[28].xxxx >467: MUL TEMP[28].x, TEMP[27].xxxx, TEMP[28].xxxx >468: ELSE :0 >469: SSG TEMP[27].x, TEMP[27].xxxx >470: MUL TEMP[28].x, IMM[5].xxxx, TEMP[27].xxxx >471: ENDIF >472: MOV_SAT TEMP[27].x, TEMP[28].xxxx >473: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[27].xxxx >474: MUL TEMP[18], TEMP[9].xxxx, IMM[13] >475: MOV TEMP[25].w, IMM[0].xxxx >476: DP2 TEMP[27].x, TEMP[4].yxxx, IMM[14].xyyy >477: DP2 TEMP[28].x, TEMP[15].xyyy, IMM[14].xyyy >478: FMA TEMP[29].xyz, TEMP[7].xyzz, TEMP[28].xxxx, TEMP[1].xyzz >479: FMA TEMP[29].xyz, TEMP[3].xyzz, TEMP[27].xxxx, TEMP[29].xyzz >480: FMA TEMP[30].xyz, TEMP[29].xyzz, TEMP[18].xxxx, TEMP[10].xyzz >481: MOV TEMP[25].xyz, TEMP[30].xyzx >482: DP4 TEMP[29].x, CONST[1][6], TEMP[25] >483: DP4 TEMP[31].x, CONST[1][7], TEMP[25] >484: MOV TEMP[29].y, TEMP[31].xxxx >485: DP4 TEMP[31].x, CONST[1][9], TEMP[25] >486: FSEQ TEMP[32].xy, TEMP[31].xxxx, IMM[2].wwww >487: SSG TEMP[33].xy, TEMP[29].xyyy >488: MUL TEMP[33].xy, IMM[5].xxxx, TEMP[33].xyyy >489: RCP TEMP[31].xy, TEMP[31].xxxx >490: MUL TEMP[31].xy, TEMP[29].xyyy, TEMP[31].xyyy >491: UCMP TEMP[31].xy, TEMP[32].xyyy, TEMP[33].xyyy, TEMP[31].xyyy >492: FMA TEMP[31].xy, TEMP[31].xyyy, IMM[5].yzzz, IMM[3].xxxx >493: MOV TEMP[31].xy, TEMP[31].xyyy >494: TEX TEMP[31].x, TEMP[31], SAMP[3], 2D >495: ADD TEMP[30].x, -TEMP[31].xxxx, TEMP[30].zzzz >496: FSLT TEMP[31].x, TEMP[30].xxxx, IMM[2].wwww >497: AND TEMP[31].x, TEMP[31].xxxx, IMM[6].xxxx >498: INEG TEMP[31].x, TEMP[31].xxxx >499: USNE TEMP[31].x, TEMP[31].xxxx, IMM[1].xxxx >500: UIF TEMP[31].xxxx :0 >501: MOV TEMP[31].x, TEMP[18].yyyy >502: ELSE :0 >503: MOV TEMP[31].x, TEMP[18].xxxx >504: ENDIF >505: MUL TEMP[32].x, TEMP[31].xxxx, IMM[3].xxxx >506: FSLT TEMP[33].x, IMM[2].wwww, TEMP[30].xxxx >507: AND TEMP[33].x, TEMP[33].xxxx, IMM[6].xxxx >508: INEG TEMP[33].x, TEMP[33].xxxx >509: AND TEMP[33].x, TEMP[14].xxxx, TEMP[33].xxxx >510: USNE TEMP[33].x, TEMP[33].xxxx, IMM[1].xxxx >511: UIF TEMP[33].xxxx :0 >512: MOV TEMP[32].x, TEMP[32].xxxx >513: ELSE :0 >514: MOV TEMP[32].x, TEMP[31].xxxx >515: ENDIF >516: ABS TEMP[30].x, TEMP[30].xxxx >517: ADD TEMP[30].x, TEMP[30].xxxx, -TEMP[32].xxxx >518: FSNE TEMP[31].x, TEMP[32].xxxx, IMM[2].wwww >519: UIF TEMP[31].xxxx :0 >520: RCP TEMP[31].x, TEMP[32].xxxx >521: MUL TEMP[31].x, TEMP[30].xxxx, TEMP[31].xxxx >522: ELSE :0 >523: SSG TEMP[30].x, TEMP[30].xxxx >524: MUL TEMP[31].x, IMM[5].xxxx, TEMP[30].xxxx >525: ENDIF >526: MOV_SAT TEMP[30].x, TEMP[31].xxxx >527: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[30].xxxx >528: MOV TEMP[25].w, IMM[0].xxxx >529: DP2 TEMP[30].x, TEMP[4].yxxx, IMM[14].zwww >530: DP2 TEMP[18].x, TEMP[15].xyyy, IMM[14].zwww >531: FMA TEMP[29].xyz, TEMP[7].xyzz, TEMP[18].xxxx, TEMP[1].xyzz >532: FMA TEMP[29].xyz, TEMP[3].xyzz, TEMP[30].xxxx, TEMP[29].xyzz >533: FMA TEMP[31].xyz, TEMP[29].xyzz, TEMP[18].zzzz, TEMP[10].xyzz >534: MOV TEMP[25].xyz, TEMP[31].xyzx >535: DP4 TEMP[29].x, CONST[1][6], TEMP[25] >536: DP4 TEMP[32].x, CONST[1][7], TEMP[25] >537: MOV TEMP[29].y, TEMP[32].xxxx >538: DP4 TEMP[32].x, CONST[1][9], TEMP[25] >539: FSEQ TEMP[33].xy, TEMP[32].xxxx, IMM[2].wwww >540: SSG TEMP[34].xy, TEMP[29].xyyy >541: MUL TEMP[34].xy, IMM[5].xxxx, TEMP[34].xyyy >542: RCP TEMP[32].xy, TEMP[32].xxxx >543: MUL TEMP[32].xy, TEMP[29].xyyy, TEMP[32].xyyy >544: UCMP TEMP[32].xy, TEMP[33].xyyy, TEMP[34].xyyy, TEMP[32].xyyy >545: FMA TEMP[32].xy, TEMP[32].xyyy, IMM[5].yzzz, IMM[3].xxxx >546: MOV TEMP[32].xy, TEMP[32].xyyy >547: TEX TEMP[32].x, TEMP[32], SAMP[3], 2D >548: ADD TEMP[31].x, -TEMP[32].xxxx, TEMP[31].zzzz >549: FSLT TEMP[32].x, TEMP[31].xxxx, IMM[2].wwww >550: AND TEMP[32].x, TEMP[32].xxxx, IMM[6].xxxx >551: INEG TEMP[32].x, TEMP[32].xxxx >552: USNE TEMP[32].x, TEMP[32].xxxx, IMM[1].xxxx >553: UIF TEMP[32].xxxx :0 >554: MOV TEMP[32].x, TEMP[18].wwww >555: ELSE :0 >556: MOV TEMP[32].x, TEMP[18].zzzz >557: ENDIF >558: MUL TEMP[33].x, TEMP[32].xxxx, IMM[3].xxxx >559: FSLT TEMP[34].x, IMM[2].wwww, TEMP[31].xxxx >560: AND TEMP[34].x, TEMP[34].xxxx, IMM[6].xxxx >561: INEG TEMP[34].x, TEMP[34].xxxx >562: AND TEMP[34].x, TEMP[14].xxxx, TEMP[34].xxxx >563: USNE TEMP[34].x, TEMP[34].xxxx, IMM[1].xxxx >564: UIF TEMP[34].xxxx :0 >565: MOV TEMP[33].x, TEMP[33].xxxx >566: ELSE :0 >567: MOV TEMP[33].x, TEMP[32].xxxx >568: ENDIF >569: ABS TEMP[31].x, TEMP[31].xxxx >570: ADD TEMP[31].x, -TEMP[33].xxxx, TEMP[31].xxxx >571: FSNE TEMP[32].x, TEMP[33].xxxx, IMM[2].wwww >572: UIF TEMP[32].xxxx :0 >573: RCP TEMP[32].x, TEMP[33].xxxx >574: MUL TEMP[32].x, TEMP[31].xxxx, TEMP[32].xxxx >575: ELSE :0 >576: SSG TEMP[31].x, TEMP[31].xxxx >577: MUL TEMP[32].x, IMM[5].xxxx, TEMP[31].xxxx >578: ENDIF >579: MOV_SAT TEMP[31].x, TEMP[32].xxxx >580: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[31].xxxx >581: MOV TEMP[25].w, IMM[0].xxxx >582: DP2 TEMP[31].x, TEMP[4].yxxx, IMM[7].wwww >583: DP2 TEMP[32].x, TEMP[15].xyyy, IMM[7].wwww >584: FMA TEMP[29].xyz, TEMP[7].xyzz, TEMP[32].xxxx, TEMP[1].xyzz >585: FMA TEMP[29].xyz, TEMP[3].xyzz, TEMP[31].xxxx, TEMP[29].xyzz >586: FMA TEMP[33].xyz, TEMP[29].xyzz, TEMP[11].xxxx, TEMP[10].xyzz >587: MOV TEMP[25].xyz, TEMP[33].xyzx >588: DP4 TEMP[29].x, CONST[1][6], TEMP[25] >589: DP4 TEMP[34].x, CONST[1][7], TEMP[25] >590: MOV TEMP[29].y, TEMP[34].xxxx >591: DP4 TEMP[34].x, CONST[1][9], TEMP[25] >592: FSEQ TEMP[35].xy, TEMP[34].xxxx, IMM[2].wwww >593: SSG TEMP[36].xy, TEMP[29].xyyy >594: MUL TEMP[36].xy, IMM[5].xxxx, TEMP[36].xyyy >595: RCP TEMP[34].xy, TEMP[34].xxxx >596: MUL TEMP[34].xy, TEMP[29].xyyy, TEMP[34].xyyy >597: UCMP TEMP[34].xy, TEMP[35].xyyy, TEMP[36].xyyy, TEMP[34].xyyy >598: FMA TEMP[34].xy, TEMP[34].xyyy, IMM[5].yzzz, IMM[3].xxxx >599: MOV TEMP[34].xy, TEMP[34].xyyy >600: TEX TEMP[34].x, TEMP[34], SAMP[3], 2D >601: ADD TEMP[33].x, -TEMP[34].xxxx, TEMP[33].zzzz >602: FSLT TEMP[34].x, TEMP[33].xxxx, IMM[2].wwww >603: AND TEMP[34].x, TEMP[34].xxxx, IMM[6].xxxx >604: INEG TEMP[34].x, TEMP[34].xxxx >605: USNE TEMP[34].x, TEMP[34].xxxx, IMM[1].xxxx >606: UIF TEMP[34].xxxx :0 >607: MOV TEMP[34].x, TEMP[11].yyyy >608: ELSE :0 >609: MOV TEMP[34].x, TEMP[11].xxxx >610: ENDIF >611: MUL TEMP[35].x, TEMP[34].xxxx, IMM[3].xxxx >612: FSLT TEMP[36].x, IMM[2].wwww, TEMP[33].xxxx >613: AND TEMP[36].x, TEMP[36].xxxx, IMM[6].xxxx >614: INEG TEMP[36].x, TEMP[36].xxxx >615: AND TEMP[36].x, TEMP[14].xxxx, TEMP[36].xxxx >616: USNE TEMP[36].x, TEMP[36].xxxx, IMM[1].xxxx >617: UIF TEMP[36].xxxx :0 >618: MOV TEMP[35].x, TEMP[35].xxxx >619: ELSE :0 >620: MOV TEMP[35].x, TEMP[34].xxxx >621: ENDIF >622: ABS TEMP[33].x, TEMP[33].xxxx >623: ADD TEMP[33].x, -TEMP[35].xxxx, TEMP[33].xxxx >624: FSNE TEMP[34].x, TEMP[35].xxxx, IMM[2].wwww >625: UIF TEMP[34].xxxx :0 >626: RCP TEMP[34].x, TEMP[35].xxxx >627: MUL TEMP[34].x, TEMP[33].xxxx, TEMP[34].xxxx >628: ELSE :0 >629: SSG TEMP[33].x, TEMP[33].xxxx >630: MUL TEMP[34].x, IMM[5].xxxx, TEMP[33].xxxx >631: ENDIF >632: MOV_SAT TEMP[33].x, TEMP[34].xxxx >633: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[33].xxxx >634: MOV TEMP[25].w, IMM[0].xxxx >635: FMA TEMP[29].xyz, TEMP[7].xyzz, TEMP[13].wwww, TEMP[1].xyzz >636: FMA TEMP[29].xyz, TEMP[3].xyzz, TEMP[13].zzzz, TEMP[29].xyzz >637: FMA TEMP[33].xyz, TEMP[29].xyzz, TEMP[11].zzzz, TEMP[10].xyzz >638: MOV TEMP[25].xyz, TEMP[33].xyzx >639: DP4 TEMP[11].x, CONST[1][6], TEMP[25] >640: DP4 TEMP[34].x, CONST[1][7], TEMP[25] >641: MOV TEMP[11].y, TEMP[34].xxxx >642: DP4 TEMP[34].x, CONST[1][9], TEMP[25] >643: FSEQ TEMP[35].xy, TEMP[34].xxxx, IMM[2].wwww >644: SSG TEMP[36].xy, TEMP[11].xyyy >645: MUL TEMP[36].xy, IMM[5].xxxx, TEMP[36].xyyy >646: RCP TEMP[34].xy, TEMP[34].xxxx >647: MUL TEMP[34].xy, TEMP[11].xyyy, TEMP[34].xyyy >648: UCMP TEMP[11].xy, TEMP[35].xyyy, TEMP[36].xyyy, TEMP[34].xyyy >649: FMA TEMP[11].xy, TEMP[11].xyyy, IMM[5].yzzz, IMM[3].xxxx >650: MOV TEMP[34].xy, TEMP[11].xyyy >651: TEX TEMP[34].x, TEMP[34], SAMP[3], 2D >652: ADD TEMP[11].x, -TEMP[34].xxxx, TEMP[33].zzzz >653: FSLT TEMP[33].x, TEMP[11].xxxx, IMM[2].wwww >654: AND TEMP[33].x, TEMP[33].xxxx, IMM[6].xxxx >655: INEG TEMP[33].x, TEMP[33].xxxx >656: USNE TEMP[33].x, TEMP[33].xxxx, IMM[1].xxxx >657: UIF TEMP[33].xxxx :0 >658: MOV TEMP[33].x, TEMP[11].wwww >659: ELSE :0 >660: MOV TEMP[33].x, TEMP[11].zzzz >661: ENDIF >662: MUL TEMP[34].x, TEMP[33].xxxx, IMM[3].xxxx >663: FSLT TEMP[35].x, IMM[2].wwww, TEMP[11].xxxx >664: AND TEMP[35].x, TEMP[35].xxxx, IMM[6].xxxx >665: INEG TEMP[35].x, TEMP[35].xxxx >666: AND TEMP[35].x, TEMP[14].xxxx, TEMP[35].xxxx >667: USNE TEMP[35].x, TEMP[35].xxxx, IMM[1].xxxx >668: UIF TEMP[35].xxxx :0 >669: MOV TEMP[34].x, TEMP[34].xxxx >670: ELSE :0 >671: MOV TEMP[34].x, TEMP[33].xxxx >672: ENDIF >673: ABS TEMP[33].x, TEMP[11].xxxx >674: ADD TEMP[11].x, -TEMP[34].xxxx, TEMP[33].xxxx >675: FSNE TEMP[33].x, TEMP[34].xxxx, IMM[2].wwww >676: UIF TEMP[33].xxxx :0 >677: RCP TEMP[33].x, TEMP[34].xxxx >678: MUL TEMP[33].x, TEMP[11].xxxx, TEMP[33].xxxx >679: ELSE :0 >680: SSG TEMP[34].x, TEMP[11].xxxx >681: MUL TEMP[33].x, IMM[5].xxxx, TEMP[34].xxxx >682: ENDIF >683: MOV_SAT TEMP[33].x, TEMP[33].xxxx >684: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[33].xxxx >685: MOV TEMP[11].w, IMM[0].xxxx >686: DP2 TEMP[33].x, TEMP[4].yxxx, IMM[15].xyyy >687: DP2 TEMP[34].x, TEMP[15].xyyy, IMM[15].xyyy >688: FMA TEMP[25].xyz, TEMP[7].xyzz, TEMP[34].xxxx, TEMP[1].xyzz >689: FMA TEMP[25].xyz, TEMP[3].xyzz, TEMP[33].xxxx, TEMP[25].xyzz >690: FMA TEMP[35].xyz, TEMP[25].xyzz, TEMP[6].xxxx, TEMP[10].xyzz >691: MOV TEMP[11].xyz, TEMP[35].xyzx >692: DP4 TEMP[25].x, CONST[1][6], TEMP[11] >693: DP4 TEMP[36].x, CONST[1][7], TEMP[11] >694: MOV TEMP[25].y, TEMP[36].xxxx >695: DP4 TEMP[36].x, CONST[1][9], TEMP[11] >696: FSEQ TEMP[37].xy, TEMP[36].xxxx, IMM[2].wwww >697: SSG TEMP[38].xy, TEMP[25].xyyy >698: MUL TEMP[38].xy, IMM[5].xxxx, TEMP[38].xyyy >699: RCP TEMP[36].xy, TEMP[36].xxxx >700: MUL TEMP[36].xy, TEMP[25].xyyy, TEMP[36].xyyy >701: UCMP TEMP[11].xy, TEMP[37].xyyy, TEMP[38].xyyy, TEMP[36].xyyy >702: FMA TEMP[11].xy, TEMP[11].xyyy, IMM[5].yzzz, IMM[3].xxxx >703: MOV TEMP[36].xy, TEMP[11].xyyy >704: TEX TEMP[36].x, TEMP[36], SAMP[3], 2D >705: ADD TEMP[11].x, -TEMP[36].xxxx, TEMP[35].zzzz >706: FSLT TEMP[35].x, TEMP[11].xxxx, IMM[2].wwww >707: AND TEMP[35].x, TEMP[35].xxxx, IMM[6].xxxx >708: INEG TEMP[35].x, TEMP[35].xxxx >709: USNE TEMP[35].x, TEMP[35].xxxx, IMM[1].xxxx >710: UIF TEMP[35].xxxx :0 >711: MOV TEMP[35].x, TEMP[6].yyyy >712: ELSE :0 >713: MOV TEMP[35].x, TEMP[6].xxxx >714: ENDIF >715: MUL TEMP[36].x, TEMP[35].xxxx, IMM[3].xxxx >716: FSLT TEMP[37].x, IMM[2].wwww, TEMP[11].xxxx >717: AND TEMP[37].x, TEMP[37].xxxx, IMM[6].xxxx >718: INEG TEMP[37].x, TEMP[37].xxxx >719: AND TEMP[37].x, TEMP[14].xxxx, TEMP[37].xxxx >720: USNE TEMP[37].x, TEMP[37].xxxx, IMM[1].xxxx >721: UIF TEMP[37].xxxx :0 >722: MOV TEMP[36].x, TEMP[36].xxxx >723: ELSE :0 >724: MOV TEMP[36].x, TEMP[35].xxxx >725: ENDIF >726: ABS TEMP[35].x, TEMP[11].xxxx >727: ADD TEMP[11].x, -TEMP[36].xxxx, TEMP[35].xxxx >728: FSNE TEMP[35].x, TEMP[36].xxxx, IMM[2].wwww >729: UIF TEMP[35].xxxx :0 >730: RCP TEMP[35].x, TEMP[36].xxxx >731: MUL TEMP[35].x, TEMP[11].xxxx, TEMP[35].xxxx >732: ELSE :0 >733: SSG TEMP[36].x, TEMP[11].xxxx >734: MUL TEMP[35].x, IMM[5].xxxx, TEMP[36].xxxx >735: ENDIF >736: MOV_SAT TEMP[35].x, TEMP[35].xxxx >737: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[35].xxxx >738: MOV TEMP[11].w, IMM[0].xxxx >739: DP2 TEMP[6].x, TEMP[4].yxxx, IMM[15].zwww >740: DP2 TEMP[35].x, TEMP[15].xyyy, IMM[15].zwww >741: FMA TEMP[25].xyz, TEMP[7].xyzz, TEMP[35].xxxx, TEMP[1].xyzz >742: FMA TEMP[25].xyz, TEMP[3].xyzz, TEMP[6].xxxx, TEMP[25].xyzz >743: FMA TEMP[36].xyz, TEMP[25].xyzz, TEMP[6].zzzz, TEMP[10].xyzz >744: MOV TEMP[11].xyz, TEMP[36].xyzx >745: DP4 TEMP[25].x, CONST[1][6], TEMP[11] >746: DP4 TEMP[37].x, CONST[1][7], TEMP[11] >747: MOV TEMP[25].y, TEMP[37].xxxx >748: DP4 TEMP[37].x, CONST[1][9], TEMP[11] >749: FSEQ TEMP[38].xy, TEMP[37].xxxx, IMM[2].wwww >750: SSG TEMP[39].xy, TEMP[25].xyyy >751: MUL TEMP[39].xy, IMM[5].xxxx, TEMP[39].xyyy >752: RCP TEMP[37].xy, TEMP[37].xxxx >753: MUL TEMP[37].xy, TEMP[25].xyyy, TEMP[37].xyyy >754: UCMP TEMP[11].xy, TEMP[38].xyyy, TEMP[39].xyyy, TEMP[37].xyyy >755: FMA TEMP[11].xy, TEMP[11].xyyy, IMM[5].yzzz, IMM[3].xxxx >756: MOV TEMP[37].xy, TEMP[11].xyyy >757: TEX TEMP[37].x, TEMP[37], SAMP[3], 2D >758: ADD TEMP[11].x, -TEMP[37].xxxx, TEMP[36].zzzz >759: FSLT TEMP[36].x, TEMP[11].xxxx, IMM[2].wwww >760: AND TEMP[36].x, TEMP[36].xxxx, IMM[6].xxxx >761: INEG TEMP[36].x, TEMP[36].xxxx >762: USNE TEMP[36].x, TEMP[36].xxxx, IMM[1].xxxx >763: UIF TEMP[36].xxxx :0 >764: MOV TEMP[36].x, TEMP[6].wwww >765: ELSE :0 >766: MOV TEMP[36].x, TEMP[6].zzzz >767: ENDIF >768: MUL TEMP[37].x, TEMP[36].xxxx, IMM[3].xxxx >769: FSLT TEMP[38].x, IMM[2].wwww, TEMP[11].xxxx >770: AND TEMP[38].x, TEMP[38].xxxx, IMM[6].xxxx >771: INEG TEMP[38].x, TEMP[38].xxxx >772: AND TEMP[38].x, TEMP[14].xxxx, TEMP[38].xxxx >773: USNE TEMP[38].x, TEMP[38].xxxx, IMM[1].xxxx >774: UIF TEMP[38].xxxx :0 >775: MOV TEMP[37].x, TEMP[37].xxxx >776: ELSE :0 >777: MOV TEMP[37].x, TEMP[36].xxxx >778: ENDIF >779: ABS TEMP[36].x, TEMP[11].xxxx >780: ADD TEMP[11].x, -TEMP[37].xxxx, TEMP[36].xxxx >781: FSNE TEMP[36].x, TEMP[37].xxxx, IMM[2].wwww >782: UIF TEMP[36].xxxx :0 >783: RCP TEMP[36].x, TEMP[37].xxxx >784: MUL TEMP[36].x, TEMP[11].xxxx, TEMP[36].xxxx >785: ELSE :0 >786: SSG TEMP[37].x, TEMP[11].xxxx >787: MUL TEMP[36].x, IMM[5].xxxx, TEMP[37].xxxx >788: ENDIF >789: MOV_SAT TEMP[36].x, TEMP[36].xxxx >790: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[36].xxxx >791: MOV TEMP[11].w, IMM[0].xxxx >792: DP2 TEMP[36].x, TEMP[4].yxxx, IMM[16].xyyy >793: DP2 TEMP[4].x, TEMP[4].yxxx, IMM[5].zzzz >794: DP2 TEMP[37].x, TEMP[15].xyyy, IMM[16].xyyy >795: FMA TEMP[25].xyz, TEMP[7].xyzz, TEMP[37].xxxx, TEMP[1].xyzz >796: FMA TEMP[25].xyz, TEMP[3].xyzz, TEMP[36].xxxx, TEMP[25].xyzz >797: FMA TEMP[38].xyz, TEMP[25].xyzz, TEMP[21].xxxx, TEMP[10].xyzz >798: MOV TEMP[11].xyz, TEMP[38].xyzx >799: DP4 TEMP[25].x, CONST[1][6], TEMP[11] >800: DP4 TEMP[39].x, CONST[1][7], TEMP[11] >801: MOV TEMP[25].y, TEMP[39].xxxx >802: DP4 TEMP[39].x, CONST[1][9], TEMP[11] >803: FSEQ TEMP[40].xy, TEMP[39].xxxx, IMM[2].wwww >804: SSG TEMP[41].xy, TEMP[25].xyyy >805: MUL TEMP[41].xy, IMM[5].xxxx, TEMP[41].xyyy >806: RCP TEMP[39].xy, TEMP[39].xxxx >807: MUL TEMP[39].xy, TEMP[25].xyyy, TEMP[39].xyyy >808: UCMP TEMP[11].xy, TEMP[40].xyyy, TEMP[41].xyyy, TEMP[39].xyyy >809: FMA TEMP[11].xy, TEMP[11].xyyy, IMM[5].yzzz, IMM[3].xxxx >810: MOV TEMP[39].xy, TEMP[11].xyyy >811: TEX TEMP[39].x, TEMP[39], SAMP[3], 2D >812: ADD TEMP[11].x, -TEMP[39].xxxx, TEMP[38].zzzz >813: FSLT TEMP[38].x, TEMP[11].xxxx, IMM[2].wwww >814: AND TEMP[38].x, TEMP[38].xxxx, IMM[6].xxxx >815: INEG TEMP[38].x, TEMP[38].xxxx >816: USNE TEMP[38].x, TEMP[38].xxxx, IMM[1].xxxx >817: UIF TEMP[38].xxxx :0 >818: MOV TEMP[38].x, TEMP[21].yyyy >819: ELSE :0 >820: MOV TEMP[38].x, TEMP[21].xxxx >821: ENDIF >822: MUL TEMP[39].x, TEMP[38].xxxx, IMM[3].xxxx >823: FSLT TEMP[40].x, IMM[2].wwww, TEMP[11].xxxx >824: AND TEMP[40].x, TEMP[40].xxxx, IMM[6].xxxx >825: INEG TEMP[40].x, TEMP[40].xxxx >826: AND TEMP[40].x, TEMP[14].xxxx, TEMP[40].xxxx >827: USNE TEMP[40].x, TEMP[40].xxxx, IMM[1].xxxx >828: UIF TEMP[40].xxxx :0 >829: MOV TEMP[39].x, TEMP[39].xxxx >830: ELSE :0 >831: MOV TEMP[39].x, TEMP[38].xxxx >832: ENDIF >833: ABS TEMP[38].x, TEMP[11].xxxx >834: ADD TEMP[11].x, -TEMP[39].xxxx, TEMP[38].xxxx >835: FSNE TEMP[38].x, TEMP[39].xxxx, IMM[2].wwww >836: UIF TEMP[38].xxxx :0 >837: RCP TEMP[38].x, TEMP[39].xxxx >838: MUL TEMP[38].x, TEMP[11].xxxx, TEMP[38].xxxx >839: ELSE :0 >840: SSG TEMP[39].x, TEMP[11].xxxx >841: MUL TEMP[38].x, IMM[5].xxxx, TEMP[39].xxxx >842: ENDIF >843: MOV_SAT TEMP[38].x, TEMP[38].xxxx >844: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[38].xxxx >845: MOV TEMP[11].w, IMM[0].xxxx >846: DP2 TEMP[38].x, TEMP[15].xyyy, IMM[5].zzzz >847: FMA TEMP[1].xyz, TEMP[7].xyzz, TEMP[38].xxxx, TEMP[1].xyzz >848: FMA TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xxxx, TEMP[1].xyzz >849: FMA TEMP[39].xyz, TEMP[1].xyzz, TEMP[21].zzzz, TEMP[10].xyzz >850: MOV TEMP[11].xyz, TEMP[39].xyzx >851: DP4 TEMP[1].x, CONST[1][6], TEMP[11] >852: DP4 TEMP[40].x, CONST[1][7], TEMP[11] >853: MOV TEMP[1].y, TEMP[40].xxxx >854: DP4 TEMP[40].x, CONST[1][9], TEMP[11] >855: FSEQ TEMP[41].xy, TEMP[40].xxxx, IMM[2].wwww >856: SSG TEMP[42].xy, TEMP[1].xyyy >857: MUL TEMP[42].xy, IMM[5].xxxx, TEMP[42].xyyy >858: RCP TEMP[40].xy, TEMP[40].xxxx >859: MUL TEMP[40].xy, TEMP[1].xyyy, TEMP[40].xyyy >860: UCMP TEMP[1].xy, TEMP[41].xyyy, TEMP[42].xyyy, TEMP[40].xyyy >861: FMA TEMP[1].xy, TEMP[1].xyyy, IMM[5].yzzz, IMM[3].xxxx >862: MOV TEMP[40].xy, TEMP[1].xyyy >863: TEX TEMP[40].x, TEMP[40], SAMP[3], 2D >864: ADD TEMP[1].x, -TEMP[40].xxxx, TEMP[39].zzzz >865: FSLT TEMP[39].x, IMM[2].wwww, TEMP[1].xxxx >866: AND TEMP[39].x, TEMP[39].xxxx, IMM[6].xxxx >867: INEG TEMP[39].x, TEMP[39].xxxx >868: AND TEMP[14].x, TEMP[14].xxxx, TEMP[39].xxxx >869: FSLT TEMP[39].x, TEMP[1].xxxx, IMM[2].wwww >870: AND TEMP[39].x, TEMP[39].xxxx, IMM[6].xxxx >871: INEG TEMP[39].x, TEMP[39].xxxx >872: USNE TEMP[39].x, TEMP[39].xxxx, IMM[1].xxxx >873: UIF TEMP[39].xxxx :0 >874: MOV TEMP[39].x, TEMP[21].wwww >875: ELSE :0 >876: MOV TEMP[39].x, TEMP[21].zzzz >877: ENDIF >878: MUL TEMP[40].x, TEMP[39].xxxx, IMM[3].xxxx >879: USNE TEMP[14].x, TEMP[14].xxxx, IMM[1].xxxx >880: UIF TEMP[14].xxxx :0 >881: MOV TEMP[14].x, TEMP[40].xxxx >882: ELSE :0 >883: MOV TEMP[14].x, TEMP[39].xxxx >884: ENDIF >885: ABS TEMP[39].x, TEMP[1].xxxx >886: ADD TEMP[1].x, -TEMP[14].xxxx, TEMP[39].xxxx >887: FSNE TEMP[39].x, TEMP[14].xxxx, IMM[2].wwww >888: UIF TEMP[39].xxxx :0 >889: RCP TEMP[14].x, TEMP[14].xxxx >890: MUL TEMP[14].x, TEMP[1].xxxx, TEMP[14].xxxx >891: ELSE :0 >892: SSG TEMP[39].x, TEMP[1].xxxx >893: MUL TEMP[14].x, IMM[5].xxxx, TEMP[39].xxxx >894: ENDIF >895: MOV_SAT TEMP[14].x, TEMP[14].xxxx >896: ADD TEMP[0].x, TEMP[14].xxxx, TEMP[0].xxxx >897: MUL TEMP[0].x, TEMP[0].xxxx, IMM[16].zzzz >898: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx >899: MOV TEMP[11].w, IMM[0].xxxx >900: DP2 TEMP[14].x, TEMP[15].xyyy, IMM[0].wwww >901: MUL TEMP[1].xyz, TEMP[2].zxyy, TEMP[3].yzxx >902: FMA TEMP[1].xyz, TEMP[2].yzxx, TEMP[3].zxyy, -TEMP[1].xyzz >903: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[14].xxxx, TEMP[2].xyzz >904: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[8].xxxx, TEMP[7].xyzz >905: MUL TEMP[21], TEMP[9].yyyy, IMM[3] >906: FMA TEMP[8].xyz, TEMP[7].xyzz, TEMP[21].xxxx, TEMP[10].xyzz >907: MOV TEMP[11].xyz, TEMP[8].xyzx >908: DP4 TEMP[7].x, CONST[1][6], TEMP[11] >909: DP4 TEMP[14].x, CONST[1][7], TEMP[11] >910: MOV TEMP[7].y, TEMP[14].xxxx >911: DP4 TEMP[14].x, CONST[1][9], TEMP[11] >912: FSEQ TEMP[39].xy, TEMP[14].xxxx, IMM[2].wwww >913: SSG TEMP[40].xy, TEMP[7].xyyy >914: MUL TEMP[40].xy, IMM[5].xxxx, TEMP[40].xyyy >915: RCP TEMP[14].xy, TEMP[14].xxxx >916: MUL TEMP[14].xy, TEMP[7].xyyy, TEMP[14].xyyy >917: UCMP TEMP[7].xy, TEMP[39].xyyy, TEMP[40].xyyy, TEMP[14].xyyy >918: FMA TEMP[7].xy, TEMP[7].xyyy, IMM[5].yzzz, IMM[3].xxxx >919: MOV TEMP[14].xy, TEMP[7].xyyy >920: TEX TEMP[14].x, TEMP[14], SAMP[3], 2D >921: ADD TEMP[8].x, -TEMP[14].xxxx, TEMP[8].zzzz >922: FSLT TEMP[14].x, TEMP[8].xxxx, IMM[2].wwww >923: AND TEMP[14].x, TEMP[14].xxxx, IMM[6].xxxx >924: INEG TEMP[14].x, TEMP[14].xxxx >925: USNE TEMP[14].x, TEMP[14].xxxx, IMM[1].xxxx >926: UIF TEMP[14].xxxx :0 >927: MOV TEMP[14].x, TEMP[21].yyyy >928: ELSE :0 >929: MOV TEMP[14].x, TEMP[21].xxxx >930: ENDIF >931: ABS TEMP[8].x, TEMP[8].xxxx >932: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[14].xxxx >933: FSNE TEMP[39].x, TEMP[14].xxxx, IMM[2].wwww >934: UIF TEMP[39].xxxx :0 >935: RCP TEMP[14].x, TEMP[14].xxxx >936: MUL TEMP[14].x, TEMP[8].xxxx, TEMP[14].xxxx >937: ELSE :0 >938: SSG TEMP[8].x, TEMP[8].xxxx >939: MUL TEMP[14].x, IMM[5].xxxx, TEMP[8].xxxx >940: ENDIF >941: MOV_SAT TEMP[8].x, TEMP[14].xxxx >942: MOV TEMP[11].w, IMM[0].xxxx >943: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[13].yyyy, TEMP[2].xyzz >944: FMA TEMP[25].xyz, TEMP[1].xyzz, TEMP[13].wwww, TEMP[2].xyzz >945: FMA TEMP[25].xyz, TEMP[3].xyzz, TEMP[15].wwww, TEMP[25].xyzz >946: FMA TEMP[14].xyz, TEMP[25].xyzz, TEMP[21].zzzz, TEMP[10].xyzz >947: MOV TEMP[25].xyz, TEMP[14].xyzx >948: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[15].zzzz, TEMP[7].xyzz >949: DP2 TEMP[39].x, TEMP[15].xyyy, IMM[2].xyyy >950: FMA TEMP[15].xyz, TEMP[1].xyzz, TEMP[39].xxxx, TEMP[2].xyzz >951: FMA TEMP[15].xyz, TEMP[3].xyzz, TEMP[12].xxxx, TEMP[15].xyzz >952: FMA TEMP[12].xyz, TEMP[7].xyzz, TEMP[21].zzzz, TEMP[10].xyzz >953: MOV TEMP[11].xyz, TEMP[12].xyzx >954: DP4 TEMP[7].x, CONST[1][6], TEMP[11] >955: DP4 TEMP[39].x, CONST[1][7], TEMP[11] >956: MOV TEMP[7].y, TEMP[39].xxxx >957: DP4 TEMP[39].x, CONST[1][9], TEMP[11] >958: FSEQ TEMP[40].xy, TEMP[39].xxxx, IMM[2].wwww >959: SSG TEMP[41].xy, TEMP[7].xyyy >960: MUL TEMP[41].xy, IMM[5].xxxx, TEMP[41].xyyy >961: RCP TEMP[39].xy, TEMP[39].xxxx >962: MUL TEMP[39].xy, TEMP[7].xyyy, TEMP[39].xyyy >963: UCMP TEMP[7].xy, TEMP[40].xyyy, TEMP[41].xyyy, TEMP[39].xyyy >964: FMA TEMP[7].xy, TEMP[7].xyyy, IMM[5].yzzz, IMM[3].xxxx >965: MOV TEMP[39].xy, TEMP[7].xyyy >966: TEX TEMP[39].x, TEMP[39], SAMP[3], 2D >967: ADD TEMP[12].x, -TEMP[39].xxxx, TEMP[12].zzzz >968: FSLT TEMP[39].x, TEMP[12].xxxx, IMM[2].wwww >969: AND TEMP[39].x, TEMP[39].xxxx, IMM[6].xxxx >970: INEG TEMP[39].x, TEMP[39].xxxx >971: USNE TEMP[39].x, TEMP[39].xxxx, IMM[1].xxxx >972: UIF TEMP[39].xxxx :0 >973: MOV TEMP[39].x, TEMP[21].wwww >974: ELSE :0 >975: MOV TEMP[39].x, TEMP[21].zzzz >976: ENDIF >977: ABS TEMP[12].x, TEMP[12].xxxx >978: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[39].xxxx >979: FSNE TEMP[40].x, TEMP[39].xxxx, IMM[2].wwww >980: UIF TEMP[40].xxxx :0 >981: RCP TEMP[39].x, TEMP[39].xxxx >982: MUL TEMP[39].x, TEMP[12].xxxx, TEMP[39].xxxx >983: ELSE :0 >984: SSG TEMP[12].x, TEMP[12].xxxx >985: MUL TEMP[39].x, IMM[5].xxxx, TEMP[12].xxxx >986: ENDIF >987: MOV_SAT TEMP[12].x, TEMP[39].xxxx >988: ADD TEMP[8].x, TEMP[12].xxxx, TEMP[8].xxxx >989: MUL TEMP[11], TEMP[9].yyyy, IMM[8] >990: FMA TEMP[12].xyz, TEMP[15].xyzz, TEMP[11].xxxx, TEMP[10].xyzz >991: MOV TEMP[15].xyz, TEMP[12].xyzx >992: MOV TEMP[15].w, IMM[0].xxxx >993: DP4 TEMP[7].x, CONST[1][6], TEMP[15] >994: DP4 TEMP[39].x, CONST[1][7], TEMP[15] >995: MOV TEMP[7].y, TEMP[39].xxxx >996: DP4 TEMP[39].x, CONST[1][9], TEMP[15] >997: FSEQ TEMP[40].xy, TEMP[39].xxxx, IMM[2].wwww >998: SSG TEMP[41].xy, TEMP[7].xyyy >999: MUL TEMP[41].xy, IMM[5].xxxx, TEMP[41].xyyy >1000: RCP TEMP[39].xy, TEMP[39].xxxx >1001: MUL TEMP[39].xy, TEMP[7].xyyy, TEMP[39].xyyy >1002: UCMP TEMP[7].xy, TEMP[40].xyyy, TEMP[41].xyyy, TEMP[39].xyyy >1003: FMA TEMP[7].xy, TEMP[7].xyyy, IMM[5].yzzz, IMM[3].xxxx >1004: MOV TEMP[39].xy, TEMP[7].xyyy >1005: TEX TEMP[39].x, TEMP[39], SAMP[3], 2D >1006: ADD TEMP[12].x, -TEMP[39].xxxx, TEMP[12].zzzz >1007: FSLT TEMP[39].x, TEMP[12].xxxx, IMM[2].wwww >1008: AND TEMP[39].x, TEMP[39].xxxx, IMM[6].xxxx >1009: INEG TEMP[39].x, TEMP[39].xxxx >1010: USNE TEMP[39].x, TEMP[39].xxxx, IMM[1].xxxx >1011: UIF TEMP[39].xxxx :0 >1012: MOV TEMP[39].x, TEMP[11].yyyy >1013: ELSE :0 >1014: MOV TEMP[39].x, TEMP[11].xxxx >1015: ENDIF >1016: ABS TEMP[12].x, TEMP[12].xxxx >1017: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[39].xxxx >1018: FSNE TEMP[40].x, TEMP[39].xxxx, IMM[2].wwww >1019: UIF TEMP[40].xxxx :0 >1020: RCP TEMP[39].x, TEMP[39].xxxx >1021: MUL TEMP[39].x, TEMP[12].xxxx, TEMP[39].xxxx >1022: ELSE :0 >1023: SSG TEMP[12].x, TEMP[12].xxxx >1024: MUL TEMP[39].x, IMM[5].xxxx, TEMP[12].xxxx >1025: ENDIF >1026: MOV_SAT TEMP[12].x, TEMP[39].xxxx >1027: ADD TEMP[8].x, TEMP[12].xxxx, TEMP[8].xxxx >1028: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[17].xxxx, TEMP[2].xyzz >1029: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[16].xxxx, TEMP[7].xyzz >1030: FMA TEMP[12].xyz, TEMP[7].xyzz, TEMP[11].zzzz, TEMP[10].xyzz >1031: MOV TEMP[15].xyz, TEMP[12].xyzx >1032: MOV TEMP[15].w, IMM[0].xxxx >1033: DP4 TEMP[7].x, CONST[1][6], TEMP[15] >1034: DP4 TEMP[16].x, CONST[1][7], TEMP[15] >1035: MOV TEMP[7].y, TEMP[16].xxxx >1036: DP4 TEMP[16].x, CONST[1][9], TEMP[15] >1037: FSEQ TEMP[17].xy, TEMP[16].xxxx, IMM[2].wwww >1038: SSG TEMP[39].xy, TEMP[7].xyyy >1039: MUL TEMP[39].xy, IMM[5].xxxx, TEMP[39].xyyy >1040: RCP TEMP[16].xy, TEMP[16].xxxx >1041: MUL TEMP[16].xy, TEMP[7].xyyy, TEMP[16].xyyy >1042: UCMP TEMP[16].xy, TEMP[17].xyyy, TEMP[39].xyyy, TEMP[16].xyyy >1043: FMA TEMP[16].xy, TEMP[16].xyyy, IMM[5].yzzz, IMM[3].xxxx >1044: MOV TEMP[16].xy, TEMP[16].xyyy >1045: TEX TEMP[16].x, TEMP[16], SAMP[3], 2D >1046: ADD TEMP[12].x, -TEMP[16].xxxx, TEMP[12].zzzz >1047: FSLT TEMP[16].x, TEMP[12].xxxx, IMM[2].wwww >1048: AND TEMP[16].x, TEMP[16].xxxx, IMM[6].xxxx >1049: INEG TEMP[16].x, TEMP[16].xxxx >1050: USNE TEMP[16].x, TEMP[16].xxxx, IMM[1].xxxx >1051: UIF TEMP[16].xxxx :0 >1052: MOV TEMP[16].x, TEMP[11].wwww >1053: ELSE :0 >1054: MOV TEMP[16].x, TEMP[11].zzzz >1055: ENDIF >1056: ABS TEMP[12].x, TEMP[12].xxxx >1057: ADD TEMP[12].x, -TEMP[16].xxxx, TEMP[12].xxxx >1058: FSNE TEMP[17].x, TEMP[16].xxxx, IMM[2].wwww >1059: UIF TEMP[17].xxxx :0 >1060: RCP TEMP[16].x, TEMP[16].xxxx >1061: MUL TEMP[16].x, TEMP[12].xxxx, TEMP[16].xxxx >1062: ELSE :0 >1063: SSG TEMP[12].x, TEMP[12].xxxx >1064: MUL TEMP[16].x, IMM[5].xxxx, TEMP[12].xxxx >1065: ENDIF >1066: MOV_SAT TEMP[12].x, TEMP[16].xxxx >1067: ADD TEMP[8].x, TEMP[12].xxxx, TEMP[8].xxxx >1068: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[20].xxxx, TEMP[2].xyzz >1069: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[19].xxxx, TEMP[7].xyzz >1070: MUL TEMP[15], TEMP[9].yyyy, IMM[11] >1071: FMA TEMP[12].xyz, TEMP[7].xyzz, TEMP[15].xxxx, TEMP[10].xyzz >1072: MOV TEMP[7].xyz, TEMP[12].xyzx >1073: MOV TEMP[7].w, IMM[0].xxxx >1074: DP4 TEMP[29].x, CONST[1][6], TEMP[7] >1075: DP4 TEMP[16].x, CONST[1][7], TEMP[7] >1076: MOV TEMP[29].y, TEMP[16].xxxx >1077: DP4 TEMP[16].x, CONST[1][9], TEMP[7] >1078: FSEQ TEMP[17].xy, TEMP[16].xxxx, IMM[2].wwww >1079: SSG TEMP[19].xy, TEMP[29].xyyy >1080: MUL TEMP[19].xy, IMM[5].xxxx, TEMP[19].xyyy >1081: RCP TEMP[16].xy, TEMP[16].xxxx >1082: MUL TEMP[16].xy, TEMP[29].xyyy, TEMP[16].xyyy >1083: UCMP TEMP[16].xy, TEMP[17].xyyy, TEMP[19].xyyy, TEMP[16].xyyy >1084: FMA TEMP[16].xy, TEMP[16].xyyy, IMM[5].yzzz, IMM[3].xxxx >1085: MOV TEMP[16].xy, TEMP[16].xyyy >1086: TEX TEMP[16].x, TEMP[16], SAMP[3], 2D >1087: ADD TEMP[12].x, -TEMP[16].xxxx, TEMP[12].zzzz >1088: FSLT TEMP[16].x, TEMP[12].xxxx, IMM[2].wwww >1089: AND TEMP[16].x, TEMP[16].xxxx, IMM[6].xxxx >1090: INEG TEMP[16].x, TEMP[16].xxxx >1091: USNE TEMP[16].x, TEMP[16].xxxx, IMM[1].xxxx >1092: UIF TEMP[16].xxxx :0 >1093: MOV TEMP[16].x, TEMP[15].yyyy >1094: ELSE :0 >1095: MOV TEMP[16].x, TEMP[15].xxxx >1096: ENDIF >1097: ABS TEMP[12].x, TEMP[12].xxxx >1098: ADD TEMP[12].x, -TEMP[16].xxxx, TEMP[12].xxxx >1099: FSNE TEMP[17].x, TEMP[16].xxxx, IMM[2].wwww >1100: UIF TEMP[17].xxxx :0 >1101: RCP TEMP[16].x, TEMP[16].xxxx >1102: MUL TEMP[16].x, TEMP[12].xxxx, TEMP[16].xxxx >1103: ELSE :0 >1104: SSG TEMP[12].x, TEMP[12].xxxx >1105: MUL TEMP[16].x, IMM[5].xxxx, TEMP[12].xxxx >1106: ENDIF >1107: MOV_SAT TEMP[12].x, TEMP[16].xxxx >1108: ADD TEMP[8].x, TEMP[12].xxxx, TEMP[8].xxxx >1109: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[13].xxxx, TEMP[2].xyzz >1110: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[22].xxxx, TEMP[7].xyzz >1111: FMA TEMP[12].xyz, TEMP[7].xyzz, TEMP[15].zzzz, TEMP[10].xyzz >1112: MOV TEMP[7].xyz, TEMP[12].xyzx >1113: MOV TEMP[7].w, IMM[0].xxxx >1114: DP4 TEMP[13].x, CONST[1][6], TEMP[7] >1115: DP4 TEMP[16].x, CONST[1][7], TEMP[7] >1116: MOV TEMP[13].y, TEMP[16].xxxx >1117: DP4 TEMP[16].x, CONST[1][9], TEMP[7] >1118: FSEQ TEMP[17].xy, TEMP[16].xxxx, IMM[2].wwww >1119: SSG TEMP[19].xy, TEMP[13].xyyy >1120: MUL TEMP[19].xy, IMM[5].xxxx, TEMP[19].xyyy >1121: RCP TEMP[16].xy, TEMP[16].xxxx >1122: MUL TEMP[16].xy, TEMP[13].xyyy, TEMP[16].xyyy >1123: UCMP TEMP[16].xy, TEMP[17].xyyy, TEMP[19].xyyy, TEMP[16].xyyy >1124: FMA TEMP[16].xy, TEMP[16].xyyy, IMM[5].yzzz, IMM[3].xxxx >1125: MOV TEMP[16].xy, TEMP[16].xyyy >1126: TEX TEMP[16].x, TEMP[16], SAMP[3], 2D >1127: ADD TEMP[12].x, -TEMP[16].xxxx, TEMP[12].zzzz >1128: FSLT TEMP[16].x, TEMP[12].xxxx, IMM[2].wwww >1129: AND TEMP[16].x, TEMP[16].xxxx, IMM[6].xxxx >1130: INEG TEMP[16].x, TEMP[16].xxxx >1131: USNE TEMP[16].x, TEMP[16].xxxx, IMM[1].xxxx >1132: UIF TEMP[16].xxxx :0 >1133: MOV TEMP[16].x, TEMP[15].wwww >1134: ELSE :0 >1135: MOV TEMP[16].x, TEMP[15].zzzz >1136: ENDIF >1137: ABS TEMP[12].x, TEMP[12].xxxx >1138: ADD TEMP[12].x, -TEMP[16].xxxx, TEMP[12].xxxx >1139: FSNE TEMP[17].x, TEMP[16].xxxx, IMM[2].wwww >1140: UIF TEMP[17].xxxx :0 >1141: RCP TEMP[16].x, TEMP[16].xxxx >1142: MUL TEMP[16].x, TEMP[12].xxxx, TEMP[16].xxxx >1143: ELSE :0 >1144: SSG TEMP[12].x, TEMP[12].xxxx >1145: MUL TEMP[16].x, IMM[5].xxxx, TEMP[12].xxxx >1146: ENDIF >1147: MOV_SAT TEMP[12].x, TEMP[16].xxxx >1148: ADD TEMP[8].x, TEMP[12].xxxx, TEMP[8].xxxx >1149: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[24].xxxx, TEMP[2].xyzz >1150: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[23].xxxx, TEMP[7].xyzz >1151: MUL TEMP[23], TEMP[9].yyyy, IMM[12] >1152: MUL TEMP[29], TEMP[9].yyyy, IMM[13] >1153: FMA TEMP[9].xyz, TEMP[7].xyzz, TEMP[23].xxxx, TEMP[10].xyzz >1154: MOV TEMP[7].xyz, TEMP[9].xyzx >1155: MOV TEMP[7].w, IMM[0].xxxx >1156: DP4 TEMP[13].x, CONST[1][6], TEMP[7] >1157: DP4 TEMP[12].x, CONST[1][7], TEMP[7] >1158: MOV TEMP[13].y, TEMP[12].xxxx >1159: DP4 TEMP[12].x, CONST[1][9], TEMP[7] >1160: FSEQ TEMP[16].xy, TEMP[12].xxxx, IMM[2].wwww >1161: SSG TEMP[17].xy, TEMP[13].xyyy >1162: MUL TEMP[17].xy, IMM[5].xxxx, TEMP[17].xyyy >1163: RCP TEMP[12].xy, TEMP[12].xxxx >1164: MUL TEMP[12].xy, TEMP[13].xyyy, TEMP[12].xyyy >1165: UCMP TEMP[12].xy, TEMP[16].xyyy, TEMP[17].xyyy, TEMP[12].xyyy >1166: FMA TEMP[12].xy, TEMP[12].xyyy, IMM[5].yzzz, IMM[3].xxxx >1167: MOV TEMP[12].xy, TEMP[12].xyyy >1168: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >1169: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].zzzz >1170: FSLT TEMP[12].x, TEMP[9].xxxx, IMM[2].wwww >1171: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >1172: INEG TEMP[12].x, TEMP[12].xxxx >1173: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >1174: UIF TEMP[12].xxxx :0 >1175: MOV TEMP[12].x, TEMP[23].yyyy >1176: ELSE :0 >1177: MOV TEMP[12].x, TEMP[23].xxxx >1178: ENDIF >1179: ABS TEMP[9].x, TEMP[9].xxxx >1180: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].xxxx >1181: FSNE TEMP[13].x, TEMP[12].xxxx, IMM[2].wwww >1182: UIF TEMP[13].xxxx :0 >1183: RCP TEMP[12].x, TEMP[12].xxxx >1184: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx >1185: ELSE :0 >1186: SSG TEMP[9].x, TEMP[9].xxxx >1187: MUL TEMP[12].x, IMM[5].xxxx, TEMP[9].xxxx >1188: ENDIF >1189: MOV_SAT TEMP[9].x, TEMP[12].xxxx >1190: ADD TEMP[8].x, TEMP[9].xxxx, TEMP[8].xxxx >1191: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[26].xxxx, TEMP[2].xyzz >1192: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[5].xxxx, TEMP[7].xyzz >1193: FMA TEMP[9].xyz, TEMP[7].xyzz, TEMP[23].zzzz, TEMP[10].xyzz >1194: MOV TEMP[7].xyz, TEMP[9].xyzx >1195: MOV TEMP[7].w, IMM[0].xxxx >1196: DP4 TEMP[5].x, CONST[1][6], TEMP[7] >1197: DP4 TEMP[12].x, CONST[1][7], TEMP[7] >1198: MOV TEMP[5].y, TEMP[12].xxxx >1199: DP4 TEMP[12].x, CONST[1][9], TEMP[7] >1200: FSEQ TEMP[13].xy, TEMP[12].xxxx, IMM[2].wwww >1201: SSG TEMP[16].xy, TEMP[5].xyyy >1202: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >1203: RCP TEMP[12].xy, TEMP[12].xxxx >1204: MUL TEMP[12].xy, TEMP[5].xyyy, TEMP[12].xyyy >1205: UCMP TEMP[12].xy, TEMP[13].xyyy, TEMP[16].xyyy, TEMP[12].xyyy >1206: FMA TEMP[12].xy, TEMP[12].xyyy, IMM[5].yzzz, IMM[3].xxxx >1207: MOV TEMP[12].xy, TEMP[12].xyyy >1208: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >1209: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].zzzz >1210: FSLT TEMP[12].x, TEMP[9].xxxx, IMM[2].wwww >1211: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >1212: INEG TEMP[12].x, TEMP[12].xxxx >1213: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >1214: UIF TEMP[12].xxxx :0 >1215: MOV TEMP[12].x, TEMP[23].wwww >1216: ELSE :0 >1217: MOV TEMP[12].x, TEMP[23].zzzz >1218: ENDIF >1219: ABS TEMP[9].x, TEMP[9].xxxx >1220: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].xxxx >1221: FSNE TEMP[13].x, TEMP[12].xxxx, IMM[2].wwww >1222: UIF TEMP[13].xxxx :0 >1223: RCP TEMP[12].x, TEMP[12].xxxx >1224: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx >1225: ELSE :0 >1226: SSG TEMP[9].x, TEMP[9].xxxx >1227: MUL TEMP[12].x, IMM[5].xxxx, TEMP[9].xxxx >1228: ENDIF >1229: MOV_SAT TEMP[9].x, TEMP[12].xxxx >1230: ADD TEMP[8].x, TEMP[9].xxxx, TEMP[8].xxxx >1231: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[28].xxxx, TEMP[2].xyzz >1232: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[27].xxxx, TEMP[7].xyzz >1233: FMA TEMP[9].xyz, TEMP[7].xyzz, TEMP[29].xxxx, TEMP[10].xyzz >1234: MOV TEMP[7].xyz, TEMP[9].xyzx >1235: MOV TEMP[7].w, IMM[0].xxxx >1236: DP4 TEMP[5].x, CONST[1][6], TEMP[7] >1237: DP4 TEMP[12].x, CONST[1][7], TEMP[7] >1238: MOV TEMP[5].y, TEMP[12].xxxx >1239: DP4 TEMP[12].x, CONST[1][9], TEMP[7] >1240: FSEQ TEMP[13].xy, TEMP[12].xxxx, IMM[2].wwww >1241: SSG TEMP[16].xy, TEMP[5].xyyy >1242: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >1243: RCP TEMP[12].xy, TEMP[12].xxxx >1244: MUL TEMP[12].xy, TEMP[5].xyyy, TEMP[12].xyyy >1245: UCMP TEMP[12].xy, TEMP[13].xyyy, TEMP[16].xyyy, TEMP[12].xyyy >1246: FMA TEMP[12].xy, TEMP[12].xyyy, IMM[5].yzzz, IMM[3].xxxx >1247: MOV TEMP[12].xy, TEMP[12].xyyy >1248: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >1249: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].zzzz >1250: FSLT TEMP[12].x, TEMP[9].xxxx, IMM[2].wwww >1251: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >1252: INEG TEMP[12].x, TEMP[12].xxxx >1253: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >1254: UIF TEMP[12].xxxx :0 >1255: MOV TEMP[12].x, TEMP[29].yyyy >1256: ELSE :0 >1257: MOV TEMP[12].x, TEMP[29].xxxx >1258: ENDIF >1259: ABS TEMP[9].x, TEMP[9].xxxx >1260: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].xxxx >1261: FSNE TEMP[13].x, TEMP[12].xxxx, IMM[2].wwww >1262: UIF TEMP[13].xxxx :0 >1263: RCP TEMP[12].x, TEMP[12].xxxx >1264: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx >1265: ELSE :0 >1266: SSG TEMP[9].x, TEMP[9].xxxx >1267: MUL TEMP[12].x, IMM[5].xxxx, TEMP[9].xxxx >1268: ENDIF >1269: MOV_SAT TEMP[9].x, TEMP[12].xxxx >1270: ADD TEMP[8].x, TEMP[9].xxxx, TEMP[8].xxxx >1271: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[18].xxxx, TEMP[2].xyzz >1272: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[30].xxxx, TEMP[7].xyzz >1273: FMA TEMP[9].xyz, TEMP[7].xyzz, TEMP[29].zzzz, TEMP[10].xyzz >1274: MOV TEMP[7].xyz, TEMP[9].xyzx >1275: MOV TEMP[7].w, IMM[0].xxxx >1276: DP4 TEMP[5].x, CONST[1][6], TEMP[7] >1277: DP4 TEMP[12].x, CONST[1][7], TEMP[7] >1278: MOV TEMP[5].y, TEMP[12].xxxx >1279: DP4 TEMP[12].x, CONST[1][9], TEMP[7] >1280: FSEQ TEMP[13].xy, TEMP[12].xxxx, IMM[2].wwww >1281: SSG TEMP[16].xy, TEMP[5].xyyy >1282: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >1283: RCP TEMP[12].xy, TEMP[12].xxxx >1284: MUL TEMP[12].xy, TEMP[5].xyyy, TEMP[12].xyyy >1285: UCMP TEMP[12].xy, TEMP[13].xyyy, TEMP[16].xyyy, TEMP[12].xyyy >1286: FMA TEMP[12].xy, TEMP[12].xyyy, IMM[5].yzzz, IMM[3].xxxx >1287: MOV TEMP[12].xy, TEMP[12].xyyy >1288: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >1289: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].zzzz >1290: FSLT TEMP[12].x, TEMP[9].xxxx, IMM[2].wwww >1291: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >1292: INEG TEMP[12].x, TEMP[12].xxxx >1293: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >1294: UIF TEMP[12].xxxx :0 >1295: MOV TEMP[12].x, TEMP[29].wwww >1296: ELSE :0 >1297: MOV TEMP[12].x, TEMP[29].zzzz >1298: ENDIF >1299: ABS TEMP[9].x, TEMP[9].xxxx >1300: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].xxxx >1301: FSNE TEMP[13].x, TEMP[12].xxxx, IMM[2].wwww >1302: UIF TEMP[13].xxxx :0 >1303: RCP TEMP[12].x, TEMP[12].xxxx >1304: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx >1305: ELSE :0 >1306: SSG TEMP[9].x, TEMP[9].xxxx >1307: MUL TEMP[12].x, IMM[5].xxxx, TEMP[9].xxxx >1308: ENDIF >1309: MOV_SAT TEMP[9].x, TEMP[12].xxxx >1310: ADD TEMP[8].x, TEMP[9].xxxx, TEMP[8].xxxx >1311: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[32].xxxx, TEMP[2].xyzz >1312: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[31].xxxx, TEMP[7].xyzz >1313: FMA TEMP[9].xyz, TEMP[7].xyzz, TEMP[21].xxxx, TEMP[10].xyzz >1314: MOV TEMP[7].xyz, TEMP[9].xyzx >1315: MOV TEMP[7].w, IMM[0].xxxx >1316: DP4 TEMP[5].x, CONST[1][6], TEMP[7] >1317: DP4 TEMP[12].x, CONST[1][7], TEMP[7] >1318: MOV TEMP[5].y, TEMP[12].xxxx >1319: DP4 TEMP[12].x, CONST[1][9], TEMP[7] >1320: FSEQ TEMP[13].xy, TEMP[12].xxxx, IMM[2].wwww >1321: SSG TEMP[16].xy, TEMP[5].xyyy >1322: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >1323: RCP TEMP[12].xy, TEMP[12].xxxx >1324: MUL TEMP[12].xy, TEMP[5].xyyy, TEMP[12].xyyy >1325: UCMP TEMP[12].xy, TEMP[13].xyyy, TEMP[16].xyyy, TEMP[12].xyyy >1326: FMA TEMP[12].xy, TEMP[12].xyyy, IMM[5].yzzz, IMM[3].xxxx >1327: MOV TEMP[12].xy, TEMP[12].xyyy >1328: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >1329: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].zzzz >1330: FSLT TEMP[12].x, TEMP[9].xxxx, IMM[2].wwww >1331: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >1332: INEG TEMP[12].x, TEMP[12].xxxx >1333: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >1334: UIF TEMP[12].xxxx :0 >1335: MOV TEMP[12].x, TEMP[21].yyyy >1336: ELSE :0 >1337: MOV TEMP[12].x, TEMP[21].xxxx >1338: ENDIF >1339: ABS TEMP[9].x, TEMP[9].xxxx >1340: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].xxxx >1341: FSNE TEMP[13].x, TEMP[12].xxxx, IMM[2].wwww >1342: UIF TEMP[13].xxxx :0 >1343: RCP TEMP[12].x, TEMP[12].xxxx >1344: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx >1345: ELSE :0 >1346: SSG TEMP[9].x, TEMP[9].xxxx >1347: MUL TEMP[12].x, IMM[5].xxxx, TEMP[9].xxxx >1348: ENDIF >1349: MOV_SAT TEMP[9].x, TEMP[12].xxxx >1350: ADD TEMP[8].x, TEMP[9].xxxx, TEMP[8].xxxx >1351: MOV TEMP[25].w, IMM[0].xxxx >1352: DP4 TEMP[7].x, CONST[1][6], TEMP[25] >1353: DP4 TEMP[9].x, CONST[1][7], TEMP[25] >1354: MOV TEMP[7].y, TEMP[9].xxxx >1355: DP4 TEMP[9].x, CONST[1][9], TEMP[25] >1356: FSEQ TEMP[12].xy, TEMP[9].xxxx, IMM[2].wwww >1357: SSG TEMP[13].xy, TEMP[7].xyyy >1358: MUL TEMP[13].xy, IMM[5].xxxx, TEMP[13].xyyy >1359: RCP TEMP[9].xy, TEMP[9].xxxx >1360: MUL TEMP[9].xy, TEMP[7].xyyy, TEMP[9].xyyy >1361: UCMP TEMP[9].xy, TEMP[12].xyyy, TEMP[13].xyyy, TEMP[9].xyyy >1362: FMA TEMP[9].xy, TEMP[9].xyyy, IMM[5].yzzz, IMM[3].xxxx >1363: MOV TEMP[9].xy, TEMP[9].xyyy >1364: TEX TEMP[9].x, TEMP[9], SAMP[3], 2D >1365: ADD TEMP[9].x, -TEMP[9].xxxx, TEMP[14].zzzz >1366: FSLT TEMP[12].x, TEMP[9].xxxx, IMM[2].wwww >1367: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >1368: INEG TEMP[12].x, TEMP[12].xxxx >1369: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >1370: UIF TEMP[12].xxxx :0 >1371: MOV TEMP[12].x, TEMP[21].wwww >1372: ELSE :0 >1373: MOV TEMP[12].x, TEMP[21].zzzz >1374: ENDIF >1375: ABS TEMP[9].x, TEMP[9].xxxx >1376: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].xxxx >1377: FSNE TEMP[13].x, TEMP[12].xxxx, IMM[2].wwww >1378: UIF TEMP[13].xxxx :0 >1379: RCP TEMP[12].x, TEMP[12].xxxx >1380: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx >1381: ELSE :0 >1382: SSG TEMP[9].x, TEMP[9].xxxx >1383: MUL TEMP[12].x, IMM[5].xxxx, TEMP[9].xxxx >1384: ENDIF >1385: MOV_SAT TEMP[9].x, TEMP[12].xxxx >1386: ADD TEMP[8].x, TEMP[9].xxxx, TEMP[8].xxxx >1387: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[34].xxxx, TEMP[2].xyzz >1388: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[33].xxxx, TEMP[7].xyzz >1389: FMA TEMP[9].xyz, TEMP[7].xyzz, TEMP[11].xxxx, TEMP[10].xyzz >1390: MOV TEMP[7].xyz, TEMP[9].xyzx >1391: MOV TEMP[7].w, IMM[0].xxxx >1392: DP4 TEMP[5].x, CONST[1][6], TEMP[7] >1393: DP4 TEMP[12].x, CONST[1][7], TEMP[7] >1394: MOV TEMP[5].y, TEMP[12].xxxx >1395: DP4 TEMP[12].x, CONST[1][9], TEMP[7] >1396: FSEQ TEMP[13].xy, TEMP[12].xxxx, IMM[2].wwww >1397: SSG TEMP[14].xy, TEMP[5].xyyy >1398: MUL TEMP[14].xy, IMM[5].xxxx, TEMP[14].xyyy >1399: RCP TEMP[12].xy, TEMP[12].xxxx >1400: MUL TEMP[12].xy, TEMP[5].xyyy, TEMP[12].xyyy >1401: UCMP TEMP[12].xy, TEMP[13].xyyy, TEMP[14].xyyy, TEMP[12].xyyy >1402: FMA TEMP[12].xy, TEMP[12].xyyy, IMM[5].yzzz, IMM[3].xxxx >1403: MOV TEMP[12].xy, TEMP[12].xyyy >1404: TEX TEMP[12].x, TEMP[12], SAMP[3], 2D >1405: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].zzzz >1406: FSLT TEMP[12].x, TEMP[9].xxxx, IMM[2].wwww >1407: AND TEMP[12].x, TEMP[12].xxxx, IMM[6].xxxx >1408: INEG TEMP[12].x, TEMP[12].xxxx >1409: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx >1410: UIF TEMP[12].xxxx :0 >1411: MOV TEMP[12].x, TEMP[11].yyyy >1412: ELSE :0 >1413: MOV TEMP[12].x, TEMP[11].xxxx >1414: ENDIF >1415: ABS TEMP[9].x, TEMP[9].xxxx >1416: ADD TEMP[9].x, -TEMP[12].xxxx, TEMP[9].xxxx >1417: FSNE TEMP[13].x, TEMP[12].xxxx, IMM[2].wwww >1418: UIF TEMP[13].xxxx :0 >1419: RCP TEMP[12].x, TEMP[12].xxxx >1420: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx >1421: ELSE :0 >1422: SSG TEMP[9].x, TEMP[9].xxxx >1423: MUL TEMP[12].x, IMM[5].xxxx, TEMP[9].xxxx >1424: ENDIF >1425: MOV_SAT TEMP[9].x, TEMP[12].xxxx >1426: ADD TEMP[8].x, TEMP[9].xxxx, TEMP[8].xxxx >1427: FMA TEMP[7].xyz, TEMP[1].xyzz, TEMP[35].xxxx, TEMP[2].xyzz >1428: FMA TEMP[7].xyz, TEMP[3].xyzz, TEMP[6].xxxx, TEMP[7].xyzz >1429: FMA TEMP[6].xyz, TEMP[7].xyzz, TEMP[11].zzzz, TEMP[10].xyzz >1430: MOV TEMP[7].xyz, TEMP[6].xyzx >1431: MOV TEMP[7].w, IMM[0].xxxx >1432: DP4 TEMP[5].x, CONST[1][6], TEMP[7] >1433: DP4 TEMP[9].x, CONST[1][7], TEMP[7] >1434: MOV TEMP[5].y, TEMP[9].xxxx >1435: DP4 TEMP[7].x, CONST[1][9], TEMP[7] >1436: FSEQ TEMP[9].xy, TEMP[7].xxxx, IMM[2].wwww >1437: SSG TEMP[12].xy, TEMP[5].xyyy >1438: MUL TEMP[12].xy, IMM[5].xxxx, TEMP[12].xyyy >1439: RCP TEMP[7].xy, TEMP[7].xxxx >1440: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[7].xyyy >1441: UCMP TEMP[5].xy, TEMP[9].xyyy, TEMP[12].xyyy, TEMP[5].xyyy >1442: FMA TEMP[5].xy, TEMP[5].xyyy, IMM[5].yzzz, IMM[3].xxxx >1443: MOV TEMP[5].xy, TEMP[5].xyyy >1444: TEX TEMP[5].x, TEMP[5], SAMP[3], 2D >1445: ADD TEMP[5].x, -TEMP[5].xxxx, TEMP[6].zzzz >1446: FSLT TEMP[6].x, TEMP[5].xxxx, IMM[2].wwww >1447: AND TEMP[6].x, TEMP[6].xxxx, IMM[6].xxxx >1448: INEG TEMP[6].x, TEMP[6].xxxx >1449: USNE TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx >1450: UIF TEMP[6].xxxx :0 >1451: MOV TEMP[6].x, TEMP[11].wwww >1452: ELSE :0 >1453: MOV TEMP[6].x, TEMP[11].zzzz >1454: ENDIF >1455: ABS TEMP[5].x, TEMP[5].xxxx >1456: ADD TEMP[5].x, -TEMP[6].xxxx, TEMP[5].xxxx >1457: FSNE TEMP[7].x, TEMP[6].xxxx, IMM[2].wwww >1458: UIF TEMP[7].xxxx :0 >1459: RCP TEMP[6].x, TEMP[6].xxxx >1460: MUL TEMP[6].x, TEMP[5].xxxx, TEMP[6].xxxx >1461: ELSE :0 >1462: SSG TEMP[5].x, TEMP[5].xxxx >1463: MUL TEMP[6].x, IMM[5].xxxx, TEMP[5].xxxx >1464: ENDIF >1465: MOV_SAT TEMP[5].x, TEMP[6].xxxx >1466: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[8].xxxx >1467: FMA TEMP[6].xyz, TEMP[1].xyzz, TEMP[37].xxxx, TEMP[2].xyzz >1468: FMA TEMP[1].xyz, TEMP[1].xyzz, TEMP[38].xxxx, TEMP[2].xyzz >1469: FMA TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xxxx, TEMP[1].xyzz >1470: FMA TEMP[2].xyz, TEMP[3].xyzz, TEMP[36].xxxx, TEMP[6].xyzz >1471: FMA TEMP[4].xyz, TEMP[2].xyzz, TEMP[15].xxxx, TEMP[10].xyzz >1472: MOV TEMP[2].xyz, TEMP[4].xyzx >1473: FMA TEMP[6].xyz, TEMP[1].xyzz, TEMP[15].zzzz, TEMP[10].xyzz >1474: MOV TEMP[1].xyz, TEMP[6].xyzx >1475: MOV TEMP[2].w, IMM[0].xxxx >1476: DP4 TEMP[3].x, CONST[1][6], TEMP[2] >1477: DP4 TEMP[7].x, CONST[1][7], TEMP[2] >1478: MOV TEMP[3].y, TEMP[7].xxxx >1479: DP4 TEMP[7].x, CONST[1][9], TEMP[2] >1480: FSEQ TEMP[8].xy, TEMP[7].xxxx, IMM[2].wwww >1481: SSG TEMP[9].xy, TEMP[3].xyyy >1482: MUL TEMP[9].xy, IMM[5].xxxx, TEMP[9].xyyy >1483: RCP TEMP[7].xy, TEMP[7].xxxx >1484: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >1485: UCMP TEMP[3].xy, TEMP[8].xyyy, TEMP[9].xyyy, TEMP[3].xyyy >1486: FMA TEMP[3].xy, TEMP[3].xyyy, IMM[5].yzzz, IMM[3].xxxx >1487: MOV TEMP[3].xy, TEMP[3].xyyy >1488: TEX TEMP[3].x, TEMP[3], SAMP[3], 2D >1489: ADD TEMP[3].x, -TEMP[3].xxxx, TEMP[4].zzzz >1490: FSLT TEMP[4].x, TEMP[3].xxxx, IMM[2].wwww >1491: AND TEMP[4].x, TEMP[4].xxxx, IMM[6].xxxx >1492: INEG TEMP[4].x, TEMP[4].xxxx >1493: USNE TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx >1494: UIF TEMP[4].xxxx :0 >1495: MOV TEMP[4].x, TEMP[15].yyyy >1496: ELSE :0 >1497: MOV TEMP[4].x, TEMP[15].xxxx >1498: ENDIF >1499: ABS TEMP[3].x, TEMP[3].xxxx >1500: ADD TEMP[3].x, -TEMP[4].xxxx, TEMP[3].xxxx >1501: FSNE TEMP[7].x, TEMP[4].xxxx, IMM[2].wwww >1502: UIF TEMP[7].xxxx :0 >1503: RCP TEMP[4].x, TEMP[4].xxxx >1504: MUL TEMP[4].x, TEMP[3].xxxx, TEMP[4].xxxx >1505: ELSE :0 >1506: SSG TEMP[3].x, TEMP[3].xxxx >1507: MUL TEMP[4].x, IMM[5].xxxx, TEMP[3].xxxx >1508: ENDIF >1509: MOV_SAT TEMP[3].x, TEMP[4].xxxx >1510: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx >1511: MOV TEMP[1].w, IMM[0].xxxx >1512: DP4 TEMP[2].x, CONST[1][6], TEMP[1] >1513: DP4 TEMP[4].x, CONST[1][7], TEMP[1] >1514: MOV TEMP[2].y, TEMP[4].xxxx >1515: DP4 TEMP[1].x, CONST[1][9], TEMP[1] >1516: FSEQ TEMP[4].xy, TEMP[1].xxxx, IMM[2].wwww >1517: SSG TEMP[5].xy, TEMP[2].xyyy >1518: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >1519: RCP TEMP[1].xy, TEMP[1].xxxx >1520: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[1].xyyy >1521: UCMP TEMP[1].xy, TEMP[4].xyyy, TEMP[5].xyyy, TEMP[1].xyyy >1522: FMA TEMP[1].xy, TEMP[1].xyyy, IMM[5].yzzz, IMM[3].xxxx >1523: MOV TEMP[1].xy, TEMP[1].xyyy >1524: TEX TEMP[1].x, TEMP[1], SAMP[3], 2D >1525: ADD TEMP[1].x, -TEMP[1].xxxx, TEMP[6].zzzz >1526: FSLT TEMP[2].x, TEMP[1].xxxx, IMM[2].wwww >1527: AND TEMP[2].x, TEMP[2].xxxx, IMM[6].xxxx >1528: INEG TEMP[2].x, TEMP[2].xxxx >1529: USNE TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx >1530: UIF TEMP[2].xxxx :0 >1531: MOV TEMP[2].x, TEMP[15].wwww >1532: ELSE :0 >1533: MOV TEMP[2].x, TEMP[15].zzzz >1534: ENDIF >1535: ABS TEMP[1].x, TEMP[1].xxxx >1536: ADD TEMP[1].x, -TEMP[2].xxxx, TEMP[1].xxxx >1537: FSNE TEMP[4].x, TEMP[2].xxxx, IMM[2].wwww >1538: UIF TEMP[4].xxxx :0 >1539: RCP TEMP[2].x, TEMP[2].xxxx >1540: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx >1541: ELSE :0 >1542: SSG TEMP[1].x, TEMP[1].xxxx >1543: MUL TEMP[2].x, IMM[5].xxxx, TEMP[1].xxxx >1544: ENDIF >1545: MOV_SAT TEMP[1].x, TEMP[2].xxxx >1546: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx >1547: MUL TEMP[1].x, TEMP[1].xxxx, IMM[16].zzzz >1548: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx >1549: MIN TEMP[0].x, TEMP[1].xxxx, TEMP[0].xxxx >1550: MOV TEMP[0].y, TEMP[1].xxxx >1551: MOV TEMP[0].zw, IMM[17].yyxy >1552: MOV OUT[0], TEMP[0] >1553: END >radeonsi: Compiling shader 89 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 124) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 144) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 148) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 152) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 156) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 160) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 164) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 168) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 176) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 180) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 184) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 200) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 224) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 228) > %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 > %50 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %50, i64 0, i64 3 > %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 > %53 = extractelement <8 x i32> %49, i32 7 > %54 = extractelement <4 x i32> %52, i32 0 > %55 = and i32 %54, %53 > %56 = insertelement <4 x i32> %52, i32 %55, i32 0 > %57 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %58 = load <8 x i32>, <8 x i32> addrspace(2)* %57, align 32, !tbaa !0 > %59 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %60 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %59, i64 0, i64 7 > %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0 > %62 = extractelement <8 x i32> %58, i32 7 > %63 = extractelement <4 x i32> %61, i32 0 > %64 = and i32 %63, %62 > %65 = insertelement <4 x i32> %61, i32 %64, i32 0 > %66 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, align 32, !tbaa !0 > %68 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %69 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %68, i64 0, i64 11 > %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 > %71 = extractelement <8 x i32> %67, i32 7 > %72 = extractelement <4 x i32> %70, i32 0 > %73 = and i32 %72, %71 > %74 = insertelement <4 x i32> %70, i32 %73, i32 0 > %75 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0 > %77 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %78 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %77, i64 0, i64 15 > %79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !tbaa !0 > %80 = extractelement <8 x i32> %76, i32 7 > %81 = extractelement <4 x i32> %79, i32 0 > %82 = and i32 %81, %80 > %83 = insertelement <4 x i32> %79, i32 %82, i32 0 > %84 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %93 = fmul float %88, %88 > %94 = fmul float %89, %89 > %95 = fadd float %94, %93 > %96 = fmul float %90, %90 > %97 = fadd float %95, %96 > %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97) > %99 = fmul float %98, %88 > %100 = fmul float %98, %89 > %101 = fmul float %98, %90 > %102 = bitcast float %84 to i32 > %103 = bitcast float %85 to i32 > %104 = insertelement <2 x i32> undef, i32 %102, i32 0 > %105 = insertelement <2 x i32> %104, i32 %103, i32 1 > %106 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %105, <8 x i32> %49, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %107 = extractelement <4 x float> %106, i32 0 > %108 = extractelement <4 x float> %106, i32 1 > %109 = extractelement <4 x float> %106, i32 2 > %110 = call float @llvm.fma.f32(float %107, float 2.000000e+00, float -1.000000e+00) > %111 = call float @llvm.fma.f32(float %108, float 2.000000e+00, float -1.000000e+00) > %112 = call float @llvm.fma.f32(float %109, float 2.000000e+00, float -1.000000e+00) > %113 = fmul float %110, %110 > %114 = fmul float %111, %111 > %115 = fadd float %114, %113 > %116 = fmul float %112, %112 > %117 = fadd float %115, %116 > %118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117) > %119 = fmul float %118, %110 > %120 = fmul float %118, %111 > %121 = fmul float %118, %112 > %122 = fmul float %120, %40 > %123 = fmul float %120, %41 > %124 = fmul float %120, %42 > %125 = call float @llvm.fma.f32(float %119, float %37, float %122) > %126 = call float @llvm.fma.f32(float %119, float %38, float %123) > %127 = call float @llvm.fma.f32(float %119, float %39, float %124) > %128 = call float @llvm.fma.f32(float %121, float %43, float %125) > %129 = call float @llvm.fma.f32(float %121, float %44, float %126) > %130 = call float @llvm.fma.f32(float %121, float %45, float %127) > %131 = fmul float %128, %128 > %132 = fmul float %129, %129 > %133 = fadd float %132, %131 > %134 = fmul float %130, %130 > %135 = fadd float %133, %134 > %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135) > %137 = fmul float %136, %128 > %138 = fmul float %136, %129 > %139 = fmul float %136, %130 > %140 = fmul float %99, %137 > %141 = fmul float %100, %138 > %142 = fadd float %141, %140 > %143 = fmul float %101, %139 > %144 = fadd float %142, %143 > %145 = fadd float %144, %144 > %146 = fsub float -0.000000e+00, %145 > %147 = call float @llvm.fma.f32(float %137, float %146, float %99) > %148 = fsub float -0.000000e+00, %145 > %149 = call float @llvm.fma.f32(float %138, float %148, float %100) > %150 = fsub float -0.000000e+00, %145 > %151 = call float @llvm.fma.f32(float %139, float %150, float %101) > %152 = call float @llvm.fma.f32(float %147, float 2.000000e+00, float %137) > %153 = call float @llvm.fma.f32(float %149, float 2.000000e+00, float %138) > %154 = call float @llvm.fma.f32(float %151, float 2.000000e+00, float %139) > %155 = fmul float %152, %152 > %156 = fmul float %153, %153 > %157 = fadd float %156, %155 > %158 = fmul float %154, %154 > %159 = fadd float %157, %158 > %160 = call float @llvm.AMDGPU.rsq.clamped.f32(float %159) > %161 = fmul float %160, %152 > %162 = fmul float %160, %153 > %163 = fmul float %160, %154 > %164 = fmul float %138, %163 > %165 = fmul float %139, %161 > %166 = fmul float %137, %162 > %167 = fsub float -0.000000e+00, %164 > %168 = call float @llvm.fma.f32(float %162, float %139, float %167) > %169 = fsub float -0.000000e+00, %165 > %170 = call float @llvm.fma.f32(float %163, float %137, float %169) > %171 = fsub float -0.000000e+00, %166 > %172 = call float @llvm.fma.f32(float %161, float %138, float %171) > %173 = fmul float %168, %168 > %174 = fmul float %170, %170 > %175 = fadd float %174, %173 > %176 = fmul float %172, %172 > %177 = fadd float %175, %176 > %178 = call float @llvm.AMDGPU.rsq.clamped.f32(float %177) > %179 = fmul float %178, %168 > %180 = fmul float %178, %170 > %181 = fmul float %178, %172 > %182 = bitcast float %86 to i32 > %183 = bitcast float %87 to i32 > %184 = insertelement <2 x i32> undef, i32 %182, i32 0 > %185 = insertelement <2 x i32> %184, i32 %183, i32 1 > %186 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %185, <8 x i32> %58, <4 x i32> %65, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %187 = extractelement <4 x float> %186, i32 0 > %188 = extractelement <4 x float> %186, i32 1 > %189 = call float @llvm.fma.f32(float %187, float 2.000000e+00, float -1.000000e+00) > %190 = call float @llvm.fma.f32(float %188, float 2.000000e+00, float -1.000000e+00) > %191 = fmul float %189, %189 > %192 = fmul float %190, %190 > %193 = fadd float %191, %192 > %194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193) > %195 = fmul float %194, %189 > %196 = fmul float %194, %190 > %197 = fmul float %195, 0xBFB99999A0000000 > %198 = fmul float %196, 0x3FB99999A0000000 > %199 = fadd float %197, %198 > %200 = fmul float %195, 0x3FD147AE20000000 > %201 = fmul float %196, 0x3F847AE140000000 > %202 = fadd float %200, %201 > %203 = fmul float %162, %181 > %204 = fmul float %163, %179 > %205 = fmul float %161, %180 > %206 = fsub float -0.000000e+00, %203 > %207 = call float @llvm.fma.f32(float %180, float %163, float %206) > %208 = fsub float -0.000000e+00, %204 > %209 = call float @llvm.fma.f32(float %181, float %161, float %208) > %210 = fsub float -0.000000e+00, %205 > %211 = call float @llvm.fma.f32(float %179, float %162, float %210) > %212 = call float @llvm.fma.f32(float %207, float %199, float %161) > %213 = call float @llvm.fma.f32(float %209, float %199, float %162) > %214 = call float @llvm.fma.f32(float %211, float %199, float %163) > %215 = fmul float %196, 0x3FB99999A0000000 > %216 = fmul float %195, 0x3FB99999A0000000 > %217 = fadd float %215, %216 > %218 = call float @llvm.fma.f32(float %179, float %217, float %212) > %219 = call float @llvm.fma.f32(float %180, float %217, float %213) > %220 = call float @llvm.fma.f32(float %181, float %217, float %214) > %221 = bitcast float %84 to i32 > %222 = bitcast float %85 to i32 > %223 = insertelement <2 x i32> undef, i32 %221, i32 0 > %224 = insertelement <2 x i32> %223, i32 %222, i32 1 > %225 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %224, <8 x i32> %67, <4 x i32> %74, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %226 = extractelement <4 x float> %225, i32 0 > %227 = fmul float %226, 0x3F70624DE0000000 > %228 = fmul float %226, 0x3F847AE140000000 > %229 = call float @llvm.maxnum.f32(float %227, float 1.000000e+00) > %230 = call float @llvm.maxnum.f32(float %228, float 1.000000e+00) > %231 = fmul float %229, 5.000000e-01 > %232 = fmul float %229, 2.500000e-01 > %233 = fmul float %229, 0x3FF4CCCCC0000000 > %234 = fmul float %229, 0x3FE4CCCCC0000000 > %235 = fmul float %226, %46 > %236 = fadd float %235, %47 > %237 = fmul float %236, %91 > %238 = fmul float %236, %92 > %239 = call float @llvm.fma.f32(float %218, float %231, float %237) > %240 = call float @llvm.fma.f32(float %219, float %231, float %238) > %241 = call float @llvm.fma.f32(float %220, float %231, float %226) > %242 = fmul float %25, %239 > %243 = fmul float %26, %240 > %244 = fadd float %242, %243 > %245 = fmul float %27, %241 > %246 = fadd float %244, %245 > %247 = fadd float %246, %28 > %248 = fmul float %29, %239 > %249 = fmul float %30, %240 > %250 = fadd float %248, %249 > %251 = fmul float %31, %241 > %252 = fadd float %250, %251 > %253 = fadd float %252, %32 > %254 = fmul float %33, %239 > %255 = fmul float %34, %240 > %256 = fadd float %254, %255 > %257 = fmul float %35, %241 > %258 = fadd float %256, %257 > %259 = fadd float %258, %36 > %260 = fcmp oeq float %259, 0.000000e+00 > %261 = fcmp oeq float %259, 0.000000e+00 > %262 = fcmp ogt float %247, 0.000000e+00 > %263 = select i1 %262, float 1.000000e+00, float %247 > %264 = fcmp oge float %263, 0.000000e+00 > %265 = fcmp ogt float %253, 0.000000e+00 > %266 = select i1 %265, float 1.000000e+00, float %253 > %267 = fcmp oge float %266, 0.000000e+00 > %.op = fmul float %263, 0x4600000000000000 > %268 = select i1 %264, float %.op, float 0xC600000000000000 > %.op440 = fmul float %266, 0x4600000000000000 > %269 = select i1 %267, float %.op440, float 0xC600000000000000 > %270 = fdiv float 1.000000e+00, %259 > %271 = fmul float %247, %270 > %272 = fmul float %253, %270 > %273 = select i1 %260, float %268, float %271 > %274 = select i1 %261, float %269, float %272 > %275 = call float @llvm.fma.f32(float %273, float 5.000000e-01, float 5.000000e-01) > %276 = call float @llvm.fma.f32(float %274, float -5.000000e-01, float 5.000000e-01) > %277 = bitcast float %275 to i32 > %278 = bitcast float %276 to i32 > %279 = insertelement <2 x i32> undef, i32 %277, i32 0 > %280 = insertelement <2 x i32> %279, i32 %278, i32 1 > %281 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %280, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %282 = extractelement <4 x float> %281, i32 0 > %283 = fsub float %241, %282 > %284 = fcmp olt float %283, 0.000000e+00 > %. = select i1 %284, float %232, float %231 > %285 = fmul float %., 5.000000e-01 > %286 = fcmp ogt float %163, 0.000000e+00 > %287 = fcmp ogt float %283, 0.000000e+00 > %288 = and i1 %286, %287 > %temp52.0 = select i1 %288, float %285, float %. > %289 = call float @llvm.fabs.f32(float %283) > %290 = fsub float %289, %temp52.0 > %291 = fcmp une float %temp52.0, 0.000000e+00 > br i1 %291, label %IF176, label %ELSE177 > >IF176: ; preds = %main_body > %292 = fdiv float 1.000000e+00, %temp52.0 > %293 = fmul float %290, %292 > br label %ENDIF175 > >ELSE177: ; preds = %main_body > %294 = fcmp ogt float %290, 0.000000e+00 > %295 = select i1 %294, float 1.000000e+00, float %290 > %296 = fcmp oge float %295, 0.000000e+00 > %.op441 = fmul float %295, 0x4600000000000000 > %297 = select i1 %296, float %.op441, float 0xC600000000000000 > br label %ENDIF175 > >ENDIF175: ; preds = %ELSE177, %IF176 > %temp48.1 = phi float [ %293, %IF176 ], [ %297, %ELSE177 ] > %298 = call float @llvm.AMDGPU.clamp.(float %temp48.1, float 0.000000e+00, float 1.000000e+00) > %299 = fmul float %196, 0x3FD3D70A40000000 > %300 = fmul float %195, 0xBFD3D70A40000000 > %301 = fmul float %196, 0xBFD3D70A40000000 > %302 = fmul float %195, 0x3FD3D70A40000000 > %303 = call float @llvm.fma.f32(float %207, float %300, float %161) > %304 = call float @llvm.fma.f32(float %209, float %300, float %162) > %305 = call float @llvm.fma.f32(float %211, float %300, float %163) > %306 = call float @llvm.fma.f32(float %179, float %299, float %303) > %307 = call float @llvm.fma.f32(float %180, float %299, float %304) > %308 = call float @llvm.fma.f32(float %181, float %299, float %305) > %309 = call float @llvm.fma.f32(float %306, float %233, float %237) > %310 = call float @llvm.fma.f32(float %307, float %233, float %238) > %311 = call float @llvm.fma.f32(float %308, float %233, float %226) > %312 = fmul float %25, %309 > %313 = fmul float %26, %310 > %314 = fadd float %312, %313 > %315 = fmul float %27, %311 > %316 = fadd float %314, %315 > %317 = fadd float %316, %28 > %318 = fmul float %29, %309 > %319 = fmul float %30, %310 > %320 = fadd float %318, %319 > %321 = fmul float %31, %311 > %322 = fadd float %320, %321 > %323 = fadd float %322, %32 > %324 = fmul float %33, %309 > %325 = fmul float %34, %310 > %326 = fadd float %324, %325 > %327 = fmul float %35, %311 > %328 = fadd float %326, %327 > %329 = fadd float %328, %36 > %330 = fcmp oeq float %329, 0.000000e+00 > %331 = fcmp oeq float %329, 0.000000e+00 > %332 = fcmp ogt float %317, 0.000000e+00 > %333 = select i1 %332, float 1.000000e+00, float %317 > %334 = fcmp oge float %333, 0.000000e+00 > %335 = fcmp ogt float %323, 0.000000e+00 > %336 = select i1 %335, float 1.000000e+00, float %323 > %337 = fcmp oge float %336, 0.000000e+00 > %.op442 = fmul float %333, 0x4600000000000000 > %338 = select i1 %334, float %.op442, float 0xC600000000000000 > %.op443 = fmul float %336, 0x4600000000000000 > %339 = select i1 %337, float %.op443, float 0xC600000000000000 > %340 = fdiv float 1.000000e+00, %329 > %341 = fmul float %317, %340 > %342 = fmul float %323, %340 > %343 = select i1 %330, float %338, float %341 > %344 = select i1 %331, float %339, float %342 > %345 = call float @llvm.fma.f32(float %343, float 5.000000e-01, float 5.000000e-01) > %346 = call float @llvm.fma.f32(float %344, float -5.000000e-01, float 5.000000e-01) > %347 = bitcast float %345 to i32 > %348 = bitcast float %346 to i32 > %349 = insertelement <2 x i32> undef, i32 %347, i32 0 > %350 = insertelement <2 x i32> %349, i32 %348, i32 1 > %351 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %350, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %352 = extractelement <4 x float> %351, i32 0 > %353 = fsub float %311, %352 > %354 = fcmp olt float %353, 0.000000e+00 > %.409 = select i1 %354, float %234, float %233 > %355 = fmul float %.409, 5.000000e-01 > %356 = fcmp ogt float %353, 0.000000e+00 > %357 = and i1 %286, %356 > %temp72.0 = select i1 %357, float %355, float %.409 > %358 = call float @llvm.fabs.f32(float %353) > %359 = fsub float %358, %temp72.0 > %360 = fcmp une float %temp72.0, 0.000000e+00 > br i1 %360, label %IF185, label %ELSE186 > >IF185: ; preds = %ENDIF175 > %361 = fdiv float 1.000000e+00, %temp72.0 > %362 = fmul float %359, %361 > br label %ENDIF184 > >ELSE186: ; preds = %ENDIF175 > %363 = fcmp ogt float %359, 0.000000e+00 > %364 = select i1 %363, float 1.000000e+00, float %359 > %365 = fcmp oge float %364, 0.000000e+00 > %.op444 = fmul float %364, 0x4600000000000000 > %366 = select i1 %365, float %.op444, float 0xC600000000000000 > br label %ENDIF184 > >ENDIF184: ; preds = %ELSE186, %IF185 > %temp68.1 = phi float [ %362, %IF185 ], [ %366, %ELSE186 ] > %367 = call float @llvm.AMDGPU.clamp.(float %temp68.1, float 0.000000e+00, float 1.000000e+00) > %368 = fadd float %367, %298 > %369 = fmul float %196, 0xBFD147AE20000000 > %370 = fmul float %195, 0x3F847AE140000000 > %371 = fadd float %369, %370 > %372 = call float @llvm.fma.f32(float %207, float %202, float %161) > %373 = call float @llvm.fma.f32(float %209, float %202, float %162) > %374 = call float @llvm.fma.f32(float %211, float %202, float %163) > %375 = call float @llvm.fma.f32(float %179, float %371, float %372) > %376 = call float @llvm.fma.f32(float %180, float %371, float %373) > %377 = call float @llvm.fma.f32(float %181, float %371, float %374) > %378 = fmul float %229, 0x4002666660000000 > %379 = fmul float %229, 0x3FF2666660000000 > %380 = fmul float %229, 0x40099999A0000000 > %381 = fmul float %229, 0x3FF99999A0000000 > %382 = call float @llvm.fma.f32(float %375, float %378, float %237) > %383 = call float @llvm.fma.f32(float %376, float %378, float %238) > %384 = call float @llvm.fma.f32(float %377, float %378, float %226) > %385 = fmul float %25, %382 > %386 = fmul float %26, %383 > %387 = fadd float %385, %386 > %388 = fmul float %27, %384 > %389 = fadd float %387, %388 > %390 = fadd float %389, %28 > %391 = fmul float %29, %382 > %392 = fmul float %30, %383 > %393 = fadd float %391, %392 > %394 = fmul float %31, %384 > %395 = fadd float %393, %394 > %396 = fadd float %395, %32 > %397 = fmul float %33, %382 > %398 = fmul float %34, %383 > %399 = fadd float %397, %398 > %400 = fmul float %35, %384 > %401 = fadd float %399, %400 > %402 = fadd float %401, %36 > %403 = fcmp oeq float %402, 0.000000e+00 > %404 = fcmp oeq float %402, 0.000000e+00 > %405 = fcmp ogt float %390, 0.000000e+00 > %406 = select i1 %405, float 1.000000e+00, float %390 > %407 = fcmp oge float %406, 0.000000e+00 > %408 = fcmp ogt float %396, 0.000000e+00 > %409 = select i1 %408, float 1.000000e+00, float %396 > %410 = fcmp oge float %409, 0.000000e+00 > %.op445 = fmul float %406, 0x4600000000000000 > %411 = select i1 %407, float %.op445, float 0xC600000000000000 > %.op446 = fmul float %409, 0x4600000000000000 > %412 = select i1 %410, float %.op446, float 0xC600000000000000 > %413 = fdiv float 1.000000e+00, %402 > %414 = fmul float %390, %413 > %415 = fmul float %396, %413 > %416 = select i1 %403, float %411, float %414 > %417 = select i1 %404, float %412, float %415 > %418 = call float @llvm.fma.f32(float %416, float 5.000000e-01, float 5.000000e-01) > %419 = call float @llvm.fma.f32(float %417, float -5.000000e-01, float 5.000000e-01) > %420 = bitcast float %418 to i32 > %421 = bitcast float %419 to i32 > %422 = insertelement <2 x i32> undef, i32 %420, i32 0 > %423 = insertelement <2 x i32> %422, i32 %421, i32 1 > %424 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %423, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %425 = extractelement <4 x float> %424, i32 0 > %426 = fsub float %384, %425 > %427 = fcmp olt float %426, 0.000000e+00 > %.410 = select i1 %427, float %379, float %378 > %428 = fmul float %.410, 5.000000e-01 > %429 = fcmp ogt float %426, 0.000000e+00 > %430 = and i1 %286, %429 > %temp72.1 = select i1 %430, float %428, float %.410 > %431 = call float @llvm.fabs.f32(float %426) > %432 = fsub float %431, %temp72.1 > %433 = fcmp une float %temp72.1, 0.000000e+00 > br i1 %433, label %IF194, label %ELSE195 > >IF194: ; preds = %ENDIF184 > %434 = fdiv float 1.000000e+00, %temp72.1 > %435 = fmul float %432, %434 > br label %ENDIF193 > >ELSE195: ; preds = %ENDIF184 > %436 = fcmp ogt float %432, 0.000000e+00 > %437 = select i1 %436, float 1.000000e+00, float %432 > %438 = fcmp oge float %437, 0.000000e+00 > %.op447 = fmul float %437, 0x4600000000000000 > %439 = select i1 %438, float %.op447, float 0xC600000000000000 > br label %ENDIF193 > >ENDIF193: ; preds = %ELSE195, %IF194 > %temp68.3 = phi float [ %435, %IF194 ], [ %439, %ELSE195 ] > %440 = call float @llvm.AMDGPU.clamp.(float %temp68.3, float 0.000000e+00, float 1.000000e+00) > %441 = fadd float %440, %368 > %442 = fmul float %196, 0xBF847AE140000000 > %443 = fmul float %195, 0xBFD3D70A40000000 > %444 = fadd float %442, %443 > %445 = fmul float %196, 0x3FD3D70A40000000 > %446 = fmul float %196, 0xBFD3D70A40000000 > %447 = fmul float %195, 0x3F847AE140000000 > %448 = fmul float %196, 0xBFD3D70A40000000 > %449 = fadd float %447, %448 > %450 = call float @llvm.fma.f32(float %207, float %449, float %161) > %451 = call float @llvm.fma.f32(float %209, float %449, float %162) > %452 = call float @llvm.fma.f32(float %211, float %449, float %163) > %453 = call float @llvm.fma.f32(float %179, float %444, float %450) > %454 = call float @llvm.fma.f32(float %180, float %444, float %451) > %455 = call float @llvm.fma.f32(float %181, float %444, float %452) > %456 = call float @llvm.fma.f32(float %453, float %380, float %237) > %457 = call float @llvm.fma.f32(float %454, float %380, float %238) > %458 = call float @llvm.fma.f32(float %455, float %380, float %226) > %459 = fmul float %25, %456 > %460 = fmul float %26, %457 > %461 = fadd float %459, %460 > %462 = fmul float %27, %458 > %463 = fadd float %461, %462 > %464 = fadd float %463, %28 > %465 = fmul float %29, %456 > %466 = fmul float %30, %457 > %467 = fadd float %465, %466 > %468 = fmul float %31, %458 > %469 = fadd float %467, %468 > %470 = fadd float %469, %32 > %471 = fmul float %33, %456 > %472 = fmul float %34, %457 > %473 = fadd float %471, %472 > %474 = fmul float %35, %458 > %475 = fadd float %473, %474 > %476 = fadd float %475, %36 > %477 = fcmp oeq float %476, 0.000000e+00 > %478 = fcmp oeq float %476, 0.000000e+00 > %479 = fcmp ogt float %464, 0.000000e+00 > %480 = select i1 %479, float 1.000000e+00, float %464 > %481 = fcmp oge float %480, 0.000000e+00 > %482 = fcmp ogt float %470, 0.000000e+00 > %483 = select i1 %482, float 1.000000e+00, float %470 > %484 = fcmp oge float %483, 0.000000e+00 > %.op448 = fmul float %480, 0x4600000000000000 > %485 = select i1 %481, float %.op448, float 0xC600000000000000 > %.op449 = fmul float %483, 0x4600000000000000 > %486 = select i1 %484, float %.op449, float 0xC600000000000000 > %487 = fdiv float 1.000000e+00, %476 > %488 = fmul float %464, %487 > %489 = fmul float %470, %487 > %490 = select i1 %477, float %485, float %488 > %491 = select i1 %478, float %486, float %489 > %492 = call float @llvm.fma.f32(float %490, float 5.000000e-01, float 5.000000e-01) > %493 = call float @llvm.fma.f32(float %491, float -5.000000e-01, float 5.000000e-01) > %494 = bitcast float %492 to i32 > %495 = bitcast float %493 to i32 > %496 = insertelement <2 x i32> undef, i32 %494, i32 0 > %497 = insertelement <2 x i32> %496, i32 %495, i32 1 > %498 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %497, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %499 = extractelement <4 x float> %498, i32 0 > %500 = fsub float %458, %499 > %501 = fcmp olt float %500, 0.000000e+00 > %.411 = select i1 %501, float %381, float %380 > %502 = fmul float %.411, 5.000000e-01 > %503 = fcmp ogt float %500, 0.000000e+00 > %504 = and i1 %286, %503 > %temp84.0 = select i1 %504, float %502, float %.411 > %505 = call float @llvm.fabs.f32(float %500) > %506 = fsub float %505, %temp84.0 > %507 = fcmp une float %temp84.0, 0.000000e+00 > br i1 %507, label %IF203, label %ELSE204 > >IF203: ; preds = %ENDIF193 > %508 = fdiv float 1.000000e+00, %temp84.0 > %509 = fmul float %506, %508 > br label %ENDIF202 > >ELSE204: ; preds = %ENDIF193 > %510 = fcmp ogt float %506, 0.000000e+00 > %511 = select i1 %510, float 1.000000e+00, float %506 > %512 = fcmp oge float %511, 0.000000e+00 > %.op450 = fmul float %511, 0x4600000000000000 > %513 = select i1 %512, float %.op450, float 0xC600000000000000 > br label %ENDIF202 > >ENDIF202: ; preds = %ELSE204, %IF203 > %temp80.1 = phi float [ %509, %IF203 ], [ %513, %ELSE204 ] > %514 = call float @llvm.AMDGPU.clamp.(float %temp80.1, float 0.000000e+00, float 1.000000e+00) > %515 = fadd float %441, %514 > %516 = fmul float %196, 0x3FA99999A0000000 > %517 = fmul float %195, 0xBFD851EB80000000 > %518 = fadd float %516, %517 > %519 = fmul float %195, 0xBFA99999A0000000 > %520 = fmul float %196, 0xBFD851EB80000000 > %521 = fadd float %519, %520 > %522 = call float @llvm.fma.f32(float %207, float %521, float %161) > %523 = call float @llvm.fma.f32(float %209, float %521, float %162) > %524 = call float @llvm.fma.f32(float %211, float %521, float %163) > %525 = call float @llvm.fma.f32(float %179, float %518, float %522) > %526 = call float @llvm.fma.f32(float %180, float %518, float %523) > %527 = call float @llvm.fma.f32(float %181, float %518, float %524) > %528 = fmul float %229, 0x4010666660000000 > %529 = fmul float %229, 0x4000666660000000 > %530 = fmul float %229, 0x4015333340000000 > %531 = fmul float %229, 0x4005333340000000 > %532 = call float @llvm.fma.f32(float %525, float %528, float %237) > %533 = call float @llvm.fma.f32(float %526, float %528, float %238) > %534 = call float @llvm.fma.f32(float %527, float %528, float %226) > %535 = fmul float %25, %532 > %536 = fmul float %26, %533 > %537 = fadd float %535, %536 > %538 = fmul float %27, %534 > %539 = fadd float %537, %538 > %540 = fadd float %539, %28 > %541 = fmul float %29, %532 > %542 = fmul float %30, %533 > %543 = fadd float %541, %542 > %544 = fmul float %31, %534 > %545 = fadd float %543, %544 > %546 = fadd float %545, %32 > %547 = fmul float %33, %532 > %548 = fmul float %34, %533 > %549 = fadd float %547, %548 > %550 = fmul float %35, %534 > %551 = fadd float %549, %550 > %552 = fadd float %551, %36 > %553 = fcmp oeq float %552, 0.000000e+00 > %554 = fcmp oeq float %552, 0.000000e+00 > %555 = fcmp ogt float %540, 0.000000e+00 > %556 = select i1 %555, float 1.000000e+00, float %540 > %557 = fcmp oge float %556, 0.000000e+00 > %558 = fcmp ogt float %546, 0.000000e+00 > %559 = select i1 %558, float 1.000000e+00, float %546 > %560 = fcmp oge float %559, 0.000000e+00 > %.op451 = fmul float %556, 0x4600000000000000 > %561 = select i1 %557, float %.op451, float 0xC600000000000000 > %.op452 = fmul float %559, 0x4600000000000000 > %562 = select i1 %560, float %.op452, float 0xC600000000000000 > %563 = fdiv float 1.000000e+00, %552 > %564 = fmul float %540, %563 > %565 = fmul float %546, %563 > %566 = select i1 %553, float %561, float %564 > %567 = select i1 %554, float %562, float %565 > %568 = call float @llvm.fma.f32(float %566, float 5.000000e-01, float 5.000000e-01) > %569 = call float @llvm.fma.f32(float %567, float -5.000000e-01, float 5.000000e-01) > %570 = bitcast float %568 to i32 > %571 = bitcast float %569 to i32 > %572 = insertelement <2 x i32> undef, i32 %570, i32 0 > %573 = insertelement <2 x i32> %572, i32 %571, i32 1 > %574 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %573, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %575 = extractelement <4 x float> %574, i32 0 > %576 = fsub float %534, %575 > %577 = fcmp olt float %576, 0.000000e+00 > %.412 = select i1 %577, float %529, float %528 > %578 = fmul float %.412, 5.000000e-01 > %579 = fcmp ogt float %576, 0.000000e+00 > %580 = and i1 %286, %579 > %temp92.0 = select i1 %580, float %578, float %.412 > %581 = call float @llvm.fabs.f32(float %576) > %582 = fsub float %581, %temp92.0 > %583 = fcmp une float %temp92.0, 0.000000e+00 > br i1 %583, label %IF212, label %ELSE213 > >IF212: ; preds = %ENDIF202 > %584 = fdiv float 1.000000e+00, %temp92.0 > %585 = fmul float %582, %584 > br label %ENDIF211 > >ELSE213: ; preds = %ENDIF202 > %586 = fcmp ogt float %582, 0.000000e+00 > %587 = select i1 %586, float 1.000000e+00, float %582 > %588 = fcmp oge float %587, 0.000000e+00 > %.op453 = fmul float %587, 0x4600000000000000 > %589 = select i1 %588, float %.op453, float 0xC600000000000000 > br label %ENDIF211 > >ENDIF211: ; preds = %ELSE213, %IF212 > %temp88.1 = phi float [ %585, %IF212 ], [ %589, %ELSE213 ] > %590 = call float @llvm.AMDGPU.clamp.(float %temp88.1, float 0.000000e+00, float 1.000000e+00) > %591 = fadd float %515, %590 > %592 = fmul float %196, 5.000000e-01 > %593 = fmul float %195, 5.000000e-01 > %594 = fadd float %592, %593 > %595 = fmul float %195, -5.000000e-01 > %596 = fmul float %196, 5.000000e-01 > %597 = fadd float %595, %596 > %598 = call float @llvm.fma.f32(float %207, float %597, float %161) > %599 = call float @llvm.fma.f32(float %209, float %597, float %162) > %600 = call float @llvm.fma.f32(float %211, float %597, float %163) > %601 = call float @llvm.fma.f32(float %179, float %594, float %598) > %602 = call float @llvm.fma.f32(float %180, float %594, float %599) > %603 = call float @llvm.fma.f32(float %181, float %594, float %600) > %604 = call float @llvm.fma.f32(float %601, float %530, float %237) > %605 = call float @llvm.fma.f32(float %602, float %530, float %238) > %606 = call float @llvm.fma.f32(float %603, float %530, float %226) > %607 = fmul float %25, %604 > %608 = fmul float %26, %605 > %609 = fadd float %607, %608 > %610 = fmul float %27, %606 > %611 = fadd float %609, %610 > %612 = fadd float %611, %28 > %613 = fmul float %29, %604 > %614 = fmul float %30, %605 > %615 = fadd float %613, %614 > %616 = fmul float %31, %606 > %617 = fadd float %615, %616 > %618 = fadd float %617, %32 > %619 = fmul float %33, %604 > %620 = fmul float %34, %605 > %621 = fadd float %619, %620 > %622 = fmul float %35, %606 > %623 = fadd float %621, %622 > %624 = fadd float %623, %36 > %625 = fcmp oeq float %624, 0.000000e+00 > %626 = fcmp oeq float %624, 0.000000e+00 > %627 = fcmp ogt float %612, 0.000000e+00 > %628 = select i1 %627, float 1.000000e+00, float %612 > %629 = fcmp oge float %628, 0.000000e+00 > %630 = fcmp ogt float %618, 0.000000e+00 > %631 = select i1 %630, float 1.000000e+00, float %618 > %632 = fcmp oge float %631, 0.000000e+00 > %.op454 = fmul float %628, 0x4600000000000000 > %633 = select i1 %629, float %.op454, float 0xC600000000000000 > %.op455 = fmul float %631, 0x4600000000000000 > %634 = select i1 %632, float %.op455, float 0xC600000000000000 > %635 = fdiv float 1.000000e+00, %624 > %636 = fmul float %612, %635 > %637 = fmul float %618, %635 > %638 = select i1 %625, float %633, float %636 > %639 = select i1 %626, float %634, float %637 > %640 = call float @llvm.fma.f32(float %638, float 5.000000e-01, float 5.000000e-01) > %641 = call float @llvm.fma.f32(float %639, float -5.000000e-01, float 5.000000e-01) > %642 = bitcast float %640 to i32 > %643 = bitcast float %641 to i32 > %644 = insertelement <2 x i32> undef, i32 %642, i32 0 > %645 = insertelement <2 x i32> %644, i32 %643, i32 1 > %646 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %645, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %647 = extractelement <4 x float> %646, i32 0 > %648 = fsub float %606, %647 > %649 = fcmp olt float %648, 0.000000e+00 > %.413 = select i1 %649, float %531, float %530 > %650 = fmul float %.413, 5.000000e-01 > %651 = fcmp ogt float %648, 0.000000e+00 > %652 = and i1 %286, %651 > %temp96.0 = select i1 %652, float %650, float %.413 > %653 = call float @llvm.fabs.f32(float %648) > %654 = fsub float %653, %temp96.0 > %655 = fcmp une float %temp96.0, 0.000000e+00 > br i1 %655, label %IF221, label %ELSE222 > >IF221: ; preds = %ENDIF211 > %656 = fdiv float 1.000000e+00, %temp96.0 > %657 = fmul float %654, %656 > br label %ENDIF220 > >ELSE222: ; preds = %ENDIF211 > %658 = fcmp ogt float %654, 0.000000e+00 > %659 = select i1 %658, float 1.000000e+00, float %654 > %660 = fcmp oge float %659, 0.000000e+00 > %.op456 = fmul float %659, 0x4600000000000000 > %661 = select i1 %660, float %.op456, float 0xC600000000000000 > br label %ENDIF220 > >ENDIF220: ; preds = %ELSE222, %IF221 > %temp92.2 = phi float [ %657, %IF221 ], [ %661, %ELSE222 ] > %662 = call float @llvm.AMDGPU.clamp.(float %temp92.2, float 0.000000e+00, float 1.000000e+00) > %663 = fadd float %591, %662 > %664 = fmul float %229, 0x4018666660000000 > %665 = fmul float %229, 0x4008666660000000 > %666 = fmul float %229, 0x401ECCCCC0000000 > %667 = fmul float %229, 0x400ECCCCC0000000 > %668 = fmul float %196, 0xBFD99999A0000000 > %669 = fmul float %195, 0x3FD6666660000000 > %670 = fadd float %668, %669 > %671 = fmul float %195, 0x3FD99999A0000000 > %672 = fmul float %196, 0x3FD6666660000000 > %673 = fadd float %671, %672 > %674 = call float @llvm.fma.f32(float %207, float %673, float %161) > %675 = call float @llvm.fma.f32(float %209, float %673, float %162) > %676 = call float @llvm.fma.f32(float %211, float %673, float %163) > %677 = call float @llvm.fma.f32(float %179, float %670, float %674) > %678 = call float @llvm.fma.f32(float %180, float %670, float %675) > %679 = call float @llvm.fma.f32(float %181, float %670, float %676) > %680 = call float @llvm.fma.f32(float %677, float %664, float %237) > %681 = call float @llvm.fma.f32(float %678, float %664, float %238) > %682 = call float @llvm.fma.f32(float %679, float %664, float %226) > %683 = fmul float %25, %680 > %684 = fmul float %26, %681 > %685 = fadd float %683, %684 > %686 = fmul float %27, %682 > %687 = fadd float %685, %686 > %688 = fadd float %687, %28 > %689 = fmul float %29, %680 > %690 = fmul float %30, %681 > %691 = fadd float %689, %690 > %692 = fmul float %31, %682 > %693 = fadd float %691, %692 > %694 = fadd float %693, %32 > %695 = fmul float %33, %680 > %696 = fmul float %34, %681 > %697 = fadd float %695, %696 > %698 = fmul float %35, %682 > %699 = fadd float %697, %698 > %700 = fadd float %699, %36 > %701 = fcmp oeq float %700, 0.000000e+00 > %702 = fcmp oeq float %700, 0.000000e+00 > %703 = fcmp ogt float %688, 0.000000e+00 > %704 = select i1 %703, float 1.000000e+00, float %688 > %705 = fcmp oge float %704, 0.000000e+00 > %706 = fcmp ogt float %694, 0.000000e+00 > %707 = select i1 %706, float 1.000000e+00, float %694 > %708 = fcmp oge float %707, 0.000000e+00 > %.op457 = fmul float %704, 0x4600000000000000 > %709 = select i1 %705, float %.op457, float 0xC600000000000000 > %.op458 = fmul float %707, 0x4600000000000000 > %710 = select i1 %708, float %.op458, float 0xC600000000000000 > %711 = fdiv float 1.000000e+00, %700 > %712 = fmul float %688, %711 > %713 = fmul float %694, %711 > %714 = select i1 %701, float %709, float %712 > %715 = select i1 %702, float %710, float %713 > %716 = call float @llvm.fma.f32(float %714, float 5.000000e-01, float 5.000000e-01) > %717 = call float @llvm.fma.f32(float %715, float -5.000000e-01, float 5.000000e-01) > %718 = bitcast float %716 to i32 > %719 = bitcast float %717 to i32 > %720 = insertelement <2 x i32> undef, i32 %718, i32 0 > %721 = insertelement <2 x i32> %720, i32 %719, i32 1 > %722 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %721, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %723 = extractelement <4 x float> %722, i32 0 > %724 = fsub float %682, %723 > %725 = fcmp olt float %724, 0.000000e+00 > %.414 = select i1 %725, float %665, float %664 > %726 = fmul float %.414, 5.000000e-01 > %727 = fcmp ogt float %724, 0.000000e+00 > %728 = and i1 %286, %727 > %temp108.0 = select i1 %728, float %726, float %.414 > %729 = call float @llvm.fabs.f32(float %724) > %730 = fsub float %729, %temp108.0 > %731 = fcmp une float %temp108.0, 0.000000e+00 > br i1 %731, label %IF230, label %ELSE231 > >IF230: ; preds = %ENDIF220 > %732 = fdiv float 1.000000e+00, %temp108.0 > %733 = fmul float %730, %732 > br label %ENDIF229 > >ELSE231: ; preds = %ENDIF220 > %734 = fcmp ogt float %730, 0.000000e+00 > %735 = select i1 %734, float 1.000000e+00, float %730 > %736 = fcmp oge float %735, 0.000000e+00 > %.op459 = fmul float %735, 0x4600000000000000 > %737 = select i1 %736, float %.op459, float 0xC600000000000000 > br label %ENDIF229 > >ENDIF229: ; preds = %ELSE231, %IF230 > %temp108.1 = phi float [ %733, %IF230 ], [ %737, %ELSE231 ] > %738 = call float @llvm.AMDGPU.clamp.(float %temp108.1, float 0.000000e+00, float 1.000000e+00) > %739 = fadd float %663, %738 > %740 = fmul float %196, 0xBFD851EB80000000 > %741 = fmul float %195, -5.000000e-01 > %742 = fadd float %740, %741 > %743 = fmul float %195, 0x3FD851EB80000000 > %744 = fmul float %196, -5.000000e-01 > %745 = fadd float %743, %744 > %746 = call float @llvm.fma.f32(float %207, float %745, float %161) > %747 = call float @llvm.fma.f32(float %209, float %745, float %162) > %748 = call float @llvm.fma.f32(float %211, float %745, float %163) > %749 = call float @llvm.fma.f32(float %179, float %742, float %746) > %750 = call float @llvm.fma.f32(float %180, float %742, float %747) > %751 = call float @llvm.fma.f32(float %181, float %742, float %748) > %752 = call float @llvm.fma.f32(float %749, float %666, float %237) > %753 = call float @llvm.fma.f32(float %750, float %666, float %238) > %754 = call float @llvm.fma.f32(float %751, float %666, float %226) > %755 = fmul float %25, %752 > %756 = fmul float %26, %753 > %757 = fadd float %755, %756 > %758 = fmul float %27, %754 > %759 = fadd float %757, %758 > %760 = fadd float %759, %28 > %761 = fmul float %29, %752 > %762 = fmul float %30, %753 > %763 = fadd float %761, %762 > %764 = fmul float %31, %754 > %765 = fadd float %763, %764 > %766 = fadd float %765, %32 > %767 = fmul float %33, %752 > %768 = fmul float %34, %753 > %769 = fadd float %767, %768 > %770 = fmul float %35, %754 > %771 = fadd float %769, %770 > %772 = fadd float %771, %36 > %773 = fcmp oeq float %772, 0.000000e+00 > %774 = fcmp oeq float %772, 0.000000e+00 > %775 = fcmp ogt float %760, 0.000000e+00 > %776 = select i1 %775, float 1.000000e+00, float %760 > %777 = fcmp oge float %776, 0.000000e+00 > %778 = fcmp ogt float %766, 0.000000e+00 > %779 = select i1 %778, float 1.000000e+00, float %766 > %780 = fcmp oge float %779, 0.000000e+00 > %.op460 = fmul float %776, 0x4600000000000000 > %781 = select i1 %777, float %.op460, float 0xC600000000000000 > %.op461 = fmul float %779, 0x4600000000000000 > %782 = select i1 %780, float %.op461, float 0xC600000000000000 > %783 = fdiv float 1.000000e+00, %772 > %784 = fmul float %760, %783 > %785 = fmul float %766, %783 > %786 = select i1 %773, float %781, float %784 > %787 = select i1 %774, float %782, float %785 > %788 = call float @llvm.fma.f32(float %786, float 5.000000e-01, float 5.000000e-01) > %789 = call float @llvm.fma.f32(float %787, float -5.000000e-01, float 5.000000e-01) > %790 = bitcast float %788 to i32 > %791 = bitcast float %789 to i32 > %792 = insertelement <2 x i32> undef, i32 %790, i32 0 > %793 = insertelement <2 x i32> %792, i32 %791, i32 1 > %794 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %793, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %795 = extractelement <4 x float> %794, i32 0 > %796 = fsub float %754, %795 > %797 = fcmp olt float %796, 0.000000e+00 > %.415 = select i1 %797, float %667, float %666 > %798 = fmul float %.415, 5.000000e-01 > %799 = fcmp ogt float %796, 0.000000e+00 > %800 = and i1 %286, %799 > %temp112.0 = select i1 %800, float %798, float %.415 > %801 = call float @llvm.fabs.f32(float %796) > %802 = fsub float %801, %temp112.0 > %803 = fcmp une float %temp112.0, 0.000000e+00 > br i1 %803, label %IF239, label %ELSE240 > >IF239: ; preds = %ENDIF229 > %804 = fdiv float 1.000000e+00, %temp112.0 > %805 = fmul float %802, %804 > br label %ENDIF238 > >ELSE240: ; preds = %ENDIF229 > %806 = fcmp ogt float %802, 0.000000e+00 > %807 = select i1 %806, float 1.000000e+00, float %802 > %808 = fcmp oge float %807, 0.000000e+00 > %.op462 = fmul float %807, 0x4600000000000000 > %809 = select i1 %808, float %.op462, float 0xC600000000000000 > br label %ENDIF238 > >ENDIF238: ; preds = %ELSE240, %IF239 > %temp112.1 = phi float [ %805, %IF239 ], [ %809, %ELSE240 ] > %810 = call float @llvm.AMDGPU.clamp.(float %temp112.1, float 0.000000e+00, float 1.000000e+00) > %811 = fadd float %739, %810 > %812 = fmul float %229, 8.500000e+00 > %813 = fmul float %229, 4.250000e+00 > %814 = fmul float %229, 1.000000e+01 > %815 = fmul float %229, 5.000000e+00 > %816 = fmul float %196, 0x3FE6B851E0000000 > %817 = fmul float %195, 0x3FD5C28F60000000 > %818 = fadd float %816, %817 > %819 = fmul float %195, 0xBFE6B851E0000000 > %820 = fmul float %196, 0x3FD5C28F60000000 > %821 = fadd float %819, %820 > %822 = call float @llvm.fma.f32(float %207, float %821, float %161) > %823 = call float @llvm.fma.f32(float %209, float %821, float %162) > %824 = call float @llvm.fma.f32(float %211, float %821, float %163) > %825 = call float @llvm.fma.f32(float %179, float %818, float %822) > %826 = call float @llvm.fma.f32(float %180, float %818, float %823) > %827 = call float @llvm.fma.f32(float %181, float %818, float %824) > %828 = call float @llvm.fma.f32(float %825, float %812, float %237) > %829 = call float @llvm.fma.f32(float %826, float %812, float %238) > %830 = call float @llvm.fma.f32(float %827, float %812, float %226) > %831 = fmul float %25, %828 > %832 = fmul float %26, %829 > %833 = fadd float %831, %832 > %834 = fmul float %27, %830 > %835 = fadd float %833, %834 > %836 = fadd float %835, %28 > %837 = fmul float %29, %828 > %838 = fmul float %30, %829 > %839 = fadd float %837, %838 > %840 = fmul float %31, %830 > %841 = fadd float %839, %840 > %842 = fadd float %841, %32 > %843 = fmul float %33, %828 > %844 = fmul float %34, %829 > %845 = fadd float %843, %844 > %846 = fmul float %35, %830 > %847 = fadd float %845, %846 > %848 = fadd float %847, %36 > %849 = fcmp oeq float %848, 0.000000e+00 > %850 = fcmp oeq float %848, 0.000000e+00 > %851 = fcmp ogt float %836, 0.000000e+00 > %852 = select i1 %851, float 1.000000e+00, float %836 > %853 = fcmp oge float %852, 0.000000e+00 > %854 = fcmp ogt float %842, 0.000000e+00 > %855 = select i1 %854, float 1.000000e+00, float %842 > %856 = fcmp oge float %855, 0.000000e+00 > %.op463 = fmul float %852, 0x4600000000000000 > %857 = select i1 %853, float %.op463, float 0xC600000000000000 > %.op464 = fmul float %855, 0x4600000000000000 > %858 = select i1 %856, float %.op464, float 0xC600000000000000 > %859 = fdiv float 1.000000e+00, %848 > %860 = fmul float %836, %859 > %861 = fmul float %842, %859 > %862 = select i1 %849, float %857, float %860 > %863 = select i1 %850, float %858, float %861 > %864 = call float @llvm.fma.f32(float %862, float 5.000000e-01, float 5.000000e-01) > %865 = call float @llvm.fma.f32(float %863, float -5.000000e-01, float 5.000000e-01) > %866 = bitcast float %864 to i32 > %867 = bitcast float %865 to i32 > %868 = insertelement <2 x i32> undef, i32 %866, i32 0 > %869 = insertelement <2 x i32> %868, i32 %867, i32 1 > %870 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %869, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %871 = extractelement <4 x float> %870, i32 0 > %872 = fsub float %830, %871 > %873 = fcmp olt float %872, 0.000000e+00 > %.416 = select i1 %873, float %813, float %812 > %874 = fmul float %.416, 5.000000e-01 > %875 = fcmp ogt float %872, 0.000000e+00 > %876 = and i1 %286, %875 > %temp128.0 = select i1 %876, float %874, float %.416 > %877 = call float @llvm.fabs.f32(float %872) > %878 = fsub float %877, %temp128.0 > %879 = fcmp une float %temp128.0, 0.000000e+00 > br i1 %879, label %IF248, label %ELSE249 > >IF248: ; preds = %ENDIF238 > %880 = fdiv float 1.000000e+00, %temp128.0 > %881 = fmul float %878, %880 > br label %ENDIF247 > >ELSE249: ; preds = %ENDIF238 > %882 = fcmp ogt float %878, 0.000000e+00 > %883 = select i1 %882, float 1.000000e+00, float %878 > %884 = fcmp oge float %883, 0.000000e+00 > %.op465 = fmul float %883, 0x4600000000000000 > %885 = select i1 %884, float %.op465, float 0xC600000000000000 > br label %ENDIF247 > >ENDIF247: ; preds = %ELSE249, %IF248 > %temp124.1 = phi float [ %881, %IF248 ], [ %885, %ELSE249 ] > %886 = call float @llvm.AMDGPU.clamp.(float %temp124.1, float 0.000000e+00, float 1.000000e+00) > %887 = fadd float %811, %886 > %888 = fmul float %196, 0x3FB99999A0000000 > %889 = fmul float %195, 0xBFC3333340000000 > %890 = fadd float %888, %889 > %891 = fmul float %195, 0xBFB99999A0000000 > %892 = fmul float %196, 0xBFC3333340000000 > %893 = fadd float %891, %892 > %894 = call float @llvm.fma.f32(float %207, float %893, float %161) > %895 = call float @llvm.fma.f32(float %209, float %893, float %162) > %896 = call float @llvm.fma.f32(float %211, float %893, float %163) > %897 = call float @llvm.fma.f32(float %179, float %890, float %894) > %898 = call float @llvm.fma.f32(float %180, float %890, float %895) > %899 = call float @llvm.fma.f32(float %181, float %890, float %896) > %900 = call float @llvm.fma.f32(float %897, float %814, float %237) > %901 = call float @llvm.fma.f32(float %898, float %814, float %238) > %902 = call float @llvm.fma.f32(float %899, float %814, float %226) > %903 = fmul float %25, %900 > %904 = fmul float %26, %901 > %905 = fadd float %903, %904 > %906 = fmul float %27, %902 > %907 = fadd float %905, %906 > %908 = fadd float %907, %28 > %909 = fmul float %29, %900 > %910 = fmul float %30, %901 > %911 = fadd float %909, %910 > %912 = fmul float %31, %902 > %913 = fadd float %911, %912 > %914 = fadd float %913, %32 > %915 = fmul float %33, %900 > %916 = fmul float %34, %901 > %917 = fadd float %915, %916 > %918 = fmul float %35, %902 > %919 = fadd float %917, %918 > %920 = fadd float %919, %36 > %921 = fcmp oeq float %920, 0.000000e+00 > %922 = fcmp oeq float %920, 0.000000e+00 > %923 = fcmp ogt float %908, 0.000000e+00 > %924 = select i1 %923, float 1.000000e+00, float %908 > %925 = fcmp oge float %924, 0.000000e+00 > %926 = fcmp ogt float %914, 0.000000e+00 > %927 = select i1 %926, float 1.000000e+00, float %914 > %928 = fcmp oge float %927, 0.000000e+00 > %.op466 = fmul float %924, 0x4600000000000000 > %929 = select i1 %925, float %.op466, float 0xC600000000000000 > %.op467 = fmul float %927, 0x4600000000000000 > %930 = select i1 %928, float %.op467, float 0xC600000000000000 > %931 = fdiv float 1.000000e+00, %920 > %932 = fmul float %908, %931 > %933 = fmul float %914, %931 > %934 = select i1 %921, float %929, float %932 > %935 = select i1 %922, float %930, float %933 > %936 = call float @llvm.fma.f32(float %934, float 5.000000e-01, float 5.000000e-01) > %937 = call float @llvm.fma.f32(float %935, float -5.000000e-01, float 5.000000e-01) > %938 = bitcast float %936 to i32 > %939 = bitcast float %937 to i32 > %940 = insertelement <2 x i32> undef, i32 %938, i32 0 > %941 = insertelement <2 x i32> %940, i32 %939, i32 1 > %942 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %941, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %943 = extractelement <4 x float> %942, i32 0 > %944 = fsub float %902, %943 > %945 = fcmp olt float %944, 0.000000e+00 > %.417 = select i1 %945, float %815, float %814 > %946 = fmul float %.417, 5.000000e-01 > %947 = fcmp ogt float %944, 0.000000e+00 > %948 = and i1 %286, %947 > %temp132.0 = select i1 %948, float %946, float %.417 > %949 = call float @llvm.fabs.f32(float %944) > %950 = fsub float %949, %temp132.0 > %951 = fcmp une float %temp132.0, 0.000000e+00 > br i1 %951, label %IF257, label %ELSE258 > >IF257: ; preds = %ENDIF247 > %952 = fdiv float 1.000000e+00, %temp132.0 > %953 = fmul float %950, %952 > br label %ENDIF256 > >ELSE258: ; preds = %ENDIF247 > %954 = fcmp ogt float %950, 0.000000e+00 > %955 = select i1 %954, float 1.000000e+00, float %950 > %956 = fcmp oge float %955, 0.000000e+00 > %.op468 = fmul float %955, 0x4600000000000000 > %957 = select i1 %956, float %.op468, float 0xC600000000000000 > br label %ENDIF256 > >ENDIF256: ; preds = %ELSE258, %IF257 > %temp128.2 = phi float [ %953, %IF257 ], [ %957, %ELSE258 ] > %958 = call float @llvm.AMDGPU.clamp.(float %temp128.2, float 0.000000e+00, float 1.000000e+00) > %959 = fadd float %887, %958 > %960 = fmul float %196, 0xBFB99999A0000000 > %961 = fmul float %195, 0xBFB99999A0000000 > %962 = fadd float %960, %961 > %963 = fmul float %195, 0x3FB99999A0000000 > %964 = fmul float %196, 0xBFB99999A0000000 > %965 = fadd float %963, %964 > %966 = call float @llvm.fma.f32(float %207, float %965, float %161) > %967 = call float @llvm.fma.f32(float %209, float %965, float %162) > %968 = call float @llvm.fma.f32(float %211, float %965, float %163) > %969 = call float @llvm.fma.f32(float %179, float %962, float %966) > %970 = call float @llvm.fma.f32(float %180, float %962, float %967) > %971 = call float @llvm.fma.f32(float %181, float %962, float %968) > %972 = call float @llvm.fma.f32(float %969, float %231, float %237) > %973 = call float @llvm.fma.f32(float %970, float %231, float %238) > %974 = call float @llvm.fma.f32(float %971, float %231, float %226) > %975 = fmul float %25, %972 > %976 = fmul float %26, %973 > %977 = fadd float %975, %976 > %978 = fmul float %27, %974 > %979 = fadd float %977, %978 > %980 = fadd float %979, %28 > %981 = fmul float %29, %972 > %982 = fmul float %30, %973 > %983 = fadd float %981, %982 > %984 = fmul float %31, %974 > %985 = fadd float %983, %984 > %986 = fadd float %985, %32 > %987 = fmul float %33, %972 > %988 = fmul float %34, %973 > %989 = fadd float %987, %988 > %990 = fmul float %35, %974 > %991 = fadd float %989, %990 > %992 = fadd float %991, %36 > %993 = fcmp oeq float %992, 0.000000e+00 > %994 = fcmp oeq float %992, 0.000000e+00 > %995 = fcmp ogt float %980, 0.000000e+00 > %996 = select i1 %995, float 1.000000e+00, float %980 > %997 = fcmp oge float %996, 0.000000e+00 > %998 = fcmp ogt float %986, 0.000000e+00 > %999 = select i1 %998, float 1.000000e+00, float %986 > %1000 = fcmp oge float %999, 0.000000e+00 > %.op469 = fmul float %996, 0x4600000000000000 > %1001 = select i1 %997, float %.op469, float 0xC600000000000000 > %.op470 = fmul float %999, 0x4600000000000000 > %1002 = select i1 %1000, float %.op470, float 0xC600000000000000 > %1003 = fdiv float 1.000000e+00, %992 > %1004 = fmul float %980, %1003 > %1005 = fmul float %986, %1003 > %1006 = select i1 %993, float %1001, float %1004 > %1007 = select i1 %994, float %1002, float %1005 > %1008 = call float @llvm.fma.f32(float %1006, float 5.000000e-01, float 5.000000e-01) > %1009 = call float @llvm.fma.f32(float %1007, float -5.000000e-01, float 5.000000e-01) > %1010 = bitcast float %1008 to i32 > %1011 = bitcast float %1009 to i32 > %1012 = insertelement <2 x i32> undef, i32 %1010, i32 0 > %1013 = insertelement <2 x i32> %1012, i32 %1011, i32 1 > %1014 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1013, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1015 = extractelement <4 x float> %1014, i32 0 > %1016 = fsub float %974, %1015 > %1017 = fcmp olt float %1016, 0.000000e+00 > %.418 = select i1 %1017, float %232, float %231 > %1018 = fmul float %.418, 5.000000e-01 > %1019 = fcmp ogt float %1016, 0.000000e+00 > %1020 = and i1 %286, %1019 > %temp140.0 = select i1 %1020, float %1018, float %.418 > %1021 = call float @llvm.fabs.f32(float %1016) > %1022 = fsub float %1021, %temp140.0 > %1023 = fcmp une float %temp140.0, 0.000000e+00 > br i1 %1023, label %IF266, label %ELSE267 > >IF266: ; preds = %ENDIF256 > %1024 = fdiv float 1.000000e+00, %temp140.0 > %1025 = fmul float %1022, %1024 > br label %ENDIF265 > >ELSE267: ; preds = %ENDIF256 > %1026 = fcmp ogt float %1022, 0.000000e+00 > %1027 = select i1 %1026, float 1.000000e+00, float %1022 > %1028 = fcmp oge float %1027, 0.000000e+00 > %.op471 = fmul float %1027, 0x4600000000000000 > %1029 = select i1 %1028, float %.op471, float 0xC600000000000000 > br label %ENDIF265 > >ENDIF265: ; preds = %ELSE267, %IF266 > %temp136.1 = phi float [ %1025, %IF266 ], [ %1029, %ELSE267 ] > %1030 = call float @llvm.AMDGPU.clamp.(float %temp136.1, float 0.000000e+00, float 1.000000e+00) > %1031 = fadd float %959, %1030 > %1032 = call float @llvm.fma.f32(float %207, float %302, float %161) > %1033 = call float @llvm.fma.f32(float %209, float %302, float %162) > %1034 = call float @llvm.fma.f32(float %211, float %302, float %163) > %1035 = call float @llvm.fma.f32(float %179, float %301, float %1032) > %1036 = call float @llvm.fma.f32(float %180, float %301, float %1033) > %1037 = call float @llvm.fma.f32(float %181, float %301, float %1034) > %1038 = call float @llvm.fma.f32(float %1035, float %233, float %237) > %1039 = call float @llvm.fma.f32(float %1036, float %233, float %238) > %1040 = call float @llvm.fma.f32(float %1037, float %233, float %226) > %1041 = fmul float %25, %1038 > %1042 = fmul float %26, %1039 > %1043 = fadd float %1041, %1042 > %1044 = fmul float %27, %1040 > %1045 = fadd float %1043, %1044 > %1046 = fadd float %1045, %28 > %1047 = fmul float %29, %1038 > %1048 = fmul float %30, %1039 > %1049 = fadd float %1047, %1048 > %1050 = fmul float %31, %1040 > %1051 = fadd float %1049, %1050 > %1052 = fadd float %1051, %32 > %1053 = fmul float %33, %1038 > %1054 = fmul float %34, %1039 > %1055 = fadd float %1053, %1054 > %1056 = fmul float %35, %1040 > %1057 = fadd float %1055, %1056 > %1058 = fadd float %1057, %36 > %1059 = fcmp oeq float %1058, 0.000000e+00 > %1060 = fcmp oeq float %1058, 0.000000e+00 > %1061 = fcmp ogt float %1046, 0.000000e+00 > %1062 = select i1 %1061, float 1.000000e+00, float %1046 > %1063 = fcmp oge float %1062, 0.000000e+00 > %1064 = fcmp ogt float %1052, 0.000000e+00 > %1065 = select i1 %1064, float 1.000000e+00, float %1052 > %1066 = fcmp oge float %1065, 0.000000e+00 > %.op472 = fmul float %1062, 0x4600000000000000 > %1067 = select i1 %1063, float %.op472, float 0xC600000000000000 > %.op473 = fmul float %1065, 0x4600000000000000 > %1068 = select i1 %1066, float %.op473, float 0xC600000000000000 > %1069 = fdiv float 1.000000e+00, %1058 > %1070 = fmul float %1046, %1069 > %1071 = fmul float %1052, %1069 > %1072 = select i1 %1059, float %1067, float %1070 > %1073 = select i1 %1060, float %1068, float %1071 > %1074 = call float @llvm.fma.f32(float %1072, float 5.000000e-01, float 5.000000e-01) > %1075 = call float @llvm.fma.f32(float %1073, float -5.000000e-01, float 5.000000e-01) > %1076 = bitcast float %1074 to i32 > %1077 = bitcast float %1075 to i32 > %1078 = insertelement <2 x i32> undef, i32 %1076, i32 0 > %1079 = insertelement <2 x i32> %1078, i32 %1077, i32 1 > %1080 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1079, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1081 = extractelement <4 x float> %1080, i32 0 > %1082 = fsub float %1040, %1081 > %1083 = fcmp olt float %1082, 0.000000e+00 > %.419 = select i1 %1083, float %234, float %233 > %1084 = fmul float %.419, 5.000000e-01 > %1085 = fcmp ogt float %1082, 0.000000e+00 > %1086 = and i1 %286, %1085 > %temp136.2 = select i1 %1086, float %1084, float %.419 > %1087 = call float @llvm.fabs.f32(float %1082) > %1088 = fsub float %1087, %temp136.2 > %1089 = fcmp une float %temp136.2, 0.000000e+00 > br i1 %1089, label %IF275, label %ELSE276 > >IF275: ; preds = %ENDIF265 > %1090 = fdiv float 1.000000e+00, %temp136.2 > %1091 = fmul float %1088, %1090 > br label %ENDIF274 > >ELSE276: ; preds = %ENDIF265 > %1092 = fcmp ogt float %1088, 0.000000e+00 > %1093 = select i1 %1092, float 1.000000e+00, float %1088 > %1094 = fcmp oge float %1093, 0.000000e+00 > %.op474 = fmul float %1093, 0x4600000000000000 > %1095 = select i1 %1094, float %.op474, float 0xC600000000000000 > br label %ENDIF274 > >ENDIF274: ; preds = %ELSE276, %IF275 > %temp132.2 = phi float [ %1091, %IF275 ], [ %1095, %ELSE276 ] > %1096 = call float @llvm.AMDGPU.clamp.(float %temp132.2, float 0.000000e+00, float 1.000000e+00) > %1097 = fadd float %1031, %1096 > %1098 = fmul float %196, 0x3FD147AE20000000 > %1099 = fmul float %195, 0xBF847AE140000000 > %1100 = fadd float %1098, %1099 > %1101 = fmul float %195, 0xBFD147AE20000000 > %1102 = fmul float %196, 0xBF847AE140000000 > %1103 = fadd float %1101, %1102 > %1104 = call float @llvm.fma.f32(float %207, float %1103, float %161) > %1105 = call float @llvm.fma.f32(float %209, float %1103, float %162) > %1106 = call float @llvm.fma.f32(float %211, float %1103, float %163) > %1107 = call float @llvm.fma.f32(float %179, float %1100, float %1104) > %1108 = call float @llvm.fma.f32(float %180, float %1100, float %1105) > %1109 = call float @llvm.fma.f32(float %181, float %1100, float %1106) > %1110 = call float @llvm.fma.f32(float %1107, float %378, float %237) > %1111 = call float @llvm.fma.f32(float %1108, float %378, float %238) > %1112 = call float @llvm.fma.f32(float %1109, float %378, float %226) > %1113 = fmul float %25, %1110 > %1114 = fmul float %26, %1111 > %1115 = fadd float %1113, %1114 > %1116 = fmul float %27, %1112 > %1117 = fadd float %1115, %1116 > %1118 = fadd float %1117, %28 > %1119 = fmul float %29, %1110 > %1120 = fmul float %30, %1111 > %1121 = fadd float %1119, %1120 > %1122 = fmul float %31, %1112 > %1123 = fadd float %1121, %1122 > %1124 = fadd float %1123, %32 > %1125 = fmul float %33, %1110 > %1126 = fmul float %34, %1111 > %1127 = fadd float %1125, %1126 > %1128 = fmul float %35, %1112 > %1129 = fadd float %1127, %1128 > %1130 = fadd float %1129, %36 > %1131 = fcmp oeq float %1130, 0.000000e+00 > %1132 = fcmp oeq float %1130, 0.000000e+00 > %1133 = fcmp ogt float %1118, 0.000000e+00 > %1134 = select i1 %1133, float 1.000000e+00, float %1118 > %1135 = fcmp oge float %1134, 0.000000e+00 > %1136 = fcmp ogt float %1124, 0.000000e+00 > %1137 = select i1 %1136, float 1.000000e+00, float %1124 > %1138 = fcmp oge float %1137, 0.000000e+00 > %.op475 = fmul float %1134, 0x4600000000000000 > %1139 = select i1 %1135, float %.op475, float 0xC600000000000000 > %.op476 = fmul float %1137, 0x4600000000000000 > %1140 = select i1 %1138, float %.op476, float 0xC600000000000000 > %1141 = fdiv float 1.000000e+00, %1130 > %1142 = fmul float %1118, %1141 > %1143 = fmul float %1124, %1141 > %1144 = select i1 %1131, float %1139, float %1142 > %1145 = select i1 %1132, float %1140, float %1143 > %1146 = call float @llvm.fma.f32(float %1144, float 5.000000e-01, float 5.000000e-01) > %1147 = call float @llvm.fma.f32(float %1145, float -5.000000e-01, float 5.000000e-01) > %1148 = bitcast float %1146 to i32 > %1149 = bitcast float %1147 to i32 > %1150 = insertelement <2 x i32> undef, i32 %1148, i32 0 > %1151 = insertelement <2 x i32> %1150, i32 %1149, i32 1 > %1152 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1151, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1153 = extractelement <4 x float> %1152, i32 0 > %1154 = fsub float %1112, %1153 > %1155 = fcmp olt float %1154, 0.000000e+00 > %.420 = select i1 %1155, float %379, float %378 > %1156 = fmul float %.420, 5.000000e-01 > %1157 = fcmp ogt float %1154, 0.000000e+00 > %1158 = and i1 %286, %1157 > %temp144.0 = select i1 %1158, float %1156, float %.420 > %1159 = call float @llvm.fabs.f32(float %1154) > %1160 = fsub float %1159, %temp144.0 > %1161 = fcmp une float %temp144.0, 0.000000e+00 > br i1 %1161, label %IF284, label %ELSE285 > >IF284: ; preds = %ENDIF274 > %1162 = fdiv float 1.000000e+00, %temp144.0 > %1163 = fmul float %1160, %1162 > br label %ENDIF283 > >ELSE285: ; preds = %ENDIF274 > %1164 = fcmp ogt float %1160, 0.000000e+00 > %1165 = select i1 %1164, float 1.000000e+00, float %1160 > %1166 = fcmp oge float %1165, 0.000000e+00 > %.op477 = fmul float %1165, 0x4600000000000000 > %1167 = select i1 %1166, float %.op477, float 0xC600000000000000 > br label %ENDIF283 > >ENDIF283: ; preds = %ELSE285, %IF284 > %temp140.2 = phi float [ %1163, %IF284 ], [ %1167, %ELSE285 ] > %1168 = call float @llvm.AMDGPU.clamp.(float %temp140.2, float 0.000000e+00, float 1.000000e+00) > %1169 = fadd float %1097, %1168 > %1170 = fmul float %196, 0x3F847AE140000000 > %1171 = fmul float %195, 0x3FD3D70A40000000 > %1172 = fadd float %1170, %1171 > %1173 = fmul float %195, 0xBF847AE140000000 > %1174 = fmul float %196, 0x3FD3D70A40000000 > %1175 = fadd float %1173, %1174 > %1176 = call float @llvm.fma.f32(float %207, float %1175, float %161) > %1177 = call float @llvm.fma.f32(float %209, float %1175, float %162) > %1178 = call float @llvm.fma.f32(float %211, float %1175, float %163) > %1179 = call float @llvm.fma.f32(float %179, float %1172, float %1176) > %1180 = call float @llvm.fma.f32(float %180, float %1172, float %1177) > %1181 = call float @llvm.fma.f32(float %181, float %1172, float %1178) > %1182 = call float @llvm.fma.f32(float %1179, float %380, float %237) > %1183 = call float @llvm.fma.f32(float %1180, float %380, float %238) > %1184 = call float @llvm.fma.f32(float %1181, float %380, float %226) > %1185 = fmul float %25, %1182 > %1186 = fmul float %26, %1183 > %1187 = fadd float %1185, %1186 > %1188 = fmul float %27, %1184 > %1189 = fadd float %1187, %1188 > %1190 = fadd float %1189, %28 > %1191 = fmul float %29, %1182 > %1192 = fmul float %30, %1183 > %1193 = fadd float %1191, %1192 > %1194 = fmul float %31, %1184 > %1195 = fadd float %1193, %1194 > %1196 = fadd float %1195, %32 > %1197 = fmul float %33, %1182 > %1198 = fmul float %34, %1183 > %1199 = fadd float %1197, %1198 > %1200 = fmul float %35, %1184 > %1201 = fadd float %1199, %1200 > %1202 = fadd float %1201, %36 > %1203 = fcmp oeq float %1202, 0.000000e+00 > %1204 = fcmp oeq float %1202, 0.000000e+00 > %1205 = fcmp ogt float %1190, 0.000000e+00 > %1206 = select i1 %1205, float 1.000000e+00, float %1190 > %1207 = fcmp oge float %1206, 0.000000e+00 > %1208 = fcmp ogt float %1196, 0.000000e+00 > %1209 = select i1 %1208, float 1.000000e+00, float %1196 > %1210 = fcmp oge float %1209, 0.000000e+00 > %.op478 = fmul float %1206, 0x4600000000000000 > %1211 = select i1 %1207, float %.op478, float 0xC600000000000000 > %.op479 = fmul float %1209, 0x4600000000000000 > %1212 = select i1 %1210, float %.op479, float 0xC600000000000000 > %1213 = fdiv float 1.000000e+00, %1202 > %1214 = fmul float %1190, %1213 > %1215 = fmul float %1196, %1213 > %1216 = select i1 %1203, float %1211, float %1214 > %1217 = select i1 %1204, float %1212, float %1215 > %1218 = call float @llvm.fma.f32(float %1216, float 5.000000e-01, float 5.000000e-01) > %1219 = call float @llvm.fma.f32(float %1217, float -5.000000e-01, float 5.000000e-01) > %1220 = bitcast float %1218 to i32 > %1221 = bitcast float %1219 to i32 > %1222 = insertelement <2 x i32> undef, i32 %1220, i32 0 > %1223 = insertelement <2 x i32> %1222, i32 %1221, i32 1 > %1224 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1223, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1225 = extractelement <4 x float> %1224, i32 0 > %1226 = fsub float %1184, %1225 > %1227 = fcmp olt float %1226, 0.000000e+00 > %.421 = select i1 %1227, float %381, float %380 > %1228 = fmul float %.421, 5.000000e-01 > %1229 = fcmp ogt float %1226, 0.000000e+00 > %1230 = and i1 %286, %1229 > %temp148.0 = select i1 %1230, float %1228, float %.421 > %1231 = call float @llvm.fabs.f32(float %1226) > %1232 = fsub float %1231, %temp148.0 > %1233 = fcmp une float %temp148.0, 0.000000e+00 > br i1 %1233, label %IF293, label %ELSE294 > >IF293: ; preds = %ENDIF283 > %1234 = fdiv float 1.000000e+00, %temp148.0 > %1235 = fmul float %1232, %1234 > br label %ENDIF292 > >ELSE294: ; preds = %ENDIF283 > %1236 = fcmp ogt float %1232, 0.000000e+00 > %1237 = select i1 %1236, float 1.000000e+00, float %1232 > %1238 = fcmp oge float %1237, 0.000000e+00 > %.op480 = fmul float %1237, 0x4600000000000000 > %1239 = select i1 %1238, float %.op480, float 0xC600000000000000 > br label %ENDIF292 > >ENDIF292: ; preds = %ELSE294, %IF293 > %temp144.2 = phi float [ %1235, %IF293 ], [ %1239, %ELSE294 ] > %1240 = call float @llvm.AMDGPU.clamp.(float %temp144.2, float 0.000000e+00, float 1.000000e+00) > %1241 = fadd float %1169, %1240 > %1242 = fmul float %196, 0xBFA99999A0000000 > %1243 = fmul float %195, 0x3FD851EB80000000 > %1244 = fadd float %1242, %1243 > %1245 = fmul float %196, -5.000000e-01 > %1246 = fmul float %195, -5.000000e-01 > %1247 = fadd float %1245, %1246 > %1248 = fmul float %195, 0x3FA99999A0000000 > %1249 = fmul float %196, 0x3FD851EB80000000 > %1250 = fadd float %1248, %1249 > %1251 = call float @llvm.fma.f32(float %207, float %1250, float %161) > %1252 = call float @llvm.fma.f32(float %209, float %1250, float %162) > %1253 = call float @llvm.fma.f32(float %211, float %1250, float %163) > %1254 = call float @llvm.fma.f32(float %179, float %1244, float %1251) > %1255 = call float @llvm.fma.f32(float %180, float %1244, float %1252) > %1256 = call float @llvm.fma.f32(float %181, float %1244, float %1253) > %1257 = call float @llvm.fma.f32(float %1254, float %528, float %237) > %1258 = call float @llvm.fma.f32(float %1255, float %528, float %238) > %1259 = call float @llvm.fma.f32(float %1256, float %528, float %226) > %1260 = fmul float %25, %1257 > %1261 = fmul float %26, %1258 > %1262 = fadd float %1260, %1261 > %1263 = fmul float %27, %1259 > %1264 = fadd float %1262, %1263 > %1265 = fadd float %1264, %28 > %1266 = fmul float %29, %1257 > %1267 = fmul float %30, %1258 > %1268 = fadd float %1266, %1267 > %1269 = fmul float %31, %1259 > %1270 = fadd float %1268, %1269 > %1271 = fadd float %1270, %32 > %1272 = fmul float %33, %1257 > %1273 = fmul float %34, %1258 > %1274 = fadd float %1272, %1273 > %1275 = fmul float %35, %1259 > %1276 = fadd float %1274, %1275 > %1277 = fadd float %1276, %36 > %1278 = fcmp oeq float %1277, 0.000000e+00 > %1279 = fcmp oeq float %1277, 0.000000e+00 > %1280 = fcmp ogt float %1265, 0.000000e+00 > %1281 = select i1 %1280, float 1.000000e+00, float %1265 > %1282 = fcmp oge float %1281, 0.000000e+00 > %1283 = fcmp ogt float %1271, 0.000000e+00 > %1284 = select i1 %1283, float 1.000000e+00, float %1271 > %1285 = fcmp oge float %1284, 0.000000e+00 > %.op481 = fmul float %1281, 0x4600000000000000 > %1286 = select i1 %1282, float %.op481, float 0xC600000000000000 > %.op482 = fmul float %1284, 0x4600000000000000 > %1287 = select i1 %1285, float %.op482, float 0xC600000000000000 > %1288 = fdiv float 1.000000e+00, %1277 > %1289 = fmul float %1265, %1288 > %1290 = fmul float %1271, %1288 > %1291 = select i1 %1278, float %1286, float %1289 > %1292 = select i1 %1279, float %1287, float %1290 > %1293 = call float @llvm.fma.f32(float %1291, float 5.000000e-01, float 5.000000e-01) > %1294 = call float @llvm.fma.f32(float %1292, float -5.000000e-01, float 5.000000e-01) > %1295 = bitcast float %1293 to i32 > %1296 = bitcast float %1294 to i32 > %1297 = insertelement <2 x i32> undef, i32 %1295, i32 0 > %1298 = insertelement <2 x i32> %1297, i32 %1296, i32 1 > %1299 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1298, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1300 = extractelement <4 x float> %1299, i32 0 > %1301 = fsub float %1259, %1300 > %1302 = fcmp olt float %1301, 0.000000e+00 > %.422 = select i1 %1302, float %529, float %528 > %1303 = fmul float %.422, 5.000000e-01 > %1304 = fcmp ogt float %1301, 0.000000e+00 > %1305 = and i1 %286, %1304 > %temp156.0 = select i1 %1305, float %1303, float %.422 > %1306 = call float @llvm.fabs.f32(float %1301) > %1307 = fsub float %1306, %temp156.0 > %1308 = fcmp une float %temp156.0, 0.000000e+00 > br i1 %1308, label %IF302, label %ELSE303 > >IF302: ; preds = %ENDIF292 > %1309 = fdiv float 1.000000e+00, %temp156.0 > %1310 = fmul float %1307, %1309 > br label %ENDIF301 > >ELSE303: ; preds = %ENDIF292 > %1311 = fcmp ogt float %1307, 0.000000e+00 > %1312 = select i1 %1311, float 1.000000e+00, float %1307 > %1313 = fcmp oge float %1312, 0.000000e+00 > %.op483 = fmul float %1312, 0x4600000000000000 > %1314 = select i1 %1313, float %.op483, float 0xC600000000000000 > br label %ENDIF301 > >ENDIF301: ; preds = %ELSE303, %IF302 > %temp152.1 = phi float [ %1310, %IF302 ], [ %1314, %ELSE303 ] > %1315 = call float @llvm.AMDGPU.clamp.(float %temp152.1, float 0.000000e+00, float 1.000000e+00) > %1316 = fadd float %1241, %1315 > %1317 = fmul float %195, 5.000000e-01 > %1318 = fmul float %196, -5.000000e-01 > %1319 = fadd float %1317, %1318 > %1320 = call float @llvm.fma.f32(float %207, float %1319, float %161) > %1321 = call float @llvm.fma.f32(float %209, float %1319, float %162) > %1322 = call float @llvm.fma.f32(float %211, float %1319, float %163) > %1323 = call float @llvm.fma.f32(float %179, float %1247, float %1320) > %1324 = call float @llvm.fma.f32(float %180, float %1247, float %1321) > %1325 = call float @llvm.fma.f32(float %181, float %1247, float %1322) > %1326 = call float @llvm.fma.f32(float %1323, float %530, float %237) > %1327 = call float @llvm.fma.f32(float %1324, float %530, float %238) > %1328 = call float @llvm.fma.f32(float %1325, float %530, float %226) > %1329 = fmul float %25, %1326 > %1330 = fmul float %26, %1327 > %1331 = fadd float %1329, %1330 > %1332 = fmul float %27, %1328 > %1333 = fadd float %1331, %1332 > %1334 = fadd float %1333, %28 > %1335 = fmul float %29, %1326 > %1336 = fmul float %30, %1327 > %1337 = fadd float %1335, %1336 > %1338 = fmul float %31, %1328 > %1339 = fadd float %1337, %1338 > %1340 = fadd float %1339, %32 > %1341 = fmul float %33, %1326 > %1342 = fmul float %34, %1327 > %1343 = fadd float %1341, %1342 > %1344 = fmul float %35, %1328 > %1345 = fadd float %1343, %1344 > %1346 = fadd float %1345, %36 > %1347 = fcmp oeq float %1346, 0.000000e+00 > %1348 = fcmp oeq float %1346, 0.000000e+00 > %1349 = fcmp ogt float %1334, 0.000000e+00 > %1350 = select i1 %1349, float 1.000000e+00, float %1334 > %1351 = fcmp oge float %1350, 0.000000e+00 > %1352 = fcmp ogt float %1340, 0.000000e+00 > %1353 = select i1 %1352, float 1.000000e+00, float %1340 > %1354 = fcmp oge float %1353, 0.000000e+00 > %.op484 = fmul float %1350, 0x4600000000000000 > %1355 = select i1 %1351, float %.op484, float 0xC600000000000000 > %.op485 = fmul float %1353, 0x4600000000000000 > %1356 = select i1 %1354, float %.op485, float 0xC600000000000000 > %1357 = fdiv float 1.000000e+00, %1346 > %1358 = fmul float %1334, %1357 > %1359 = fmul float %1340, %1357 > %1360 = select i1 %1347, float %1355, float %1358 > %1361 = select i1 %1348, float %1356, float %1359 > %1362 = call float @llvm.fma.f32(float %1360, float 5.000000e-01, float 5.000000e-01) > %1363 = call float @llvm.fma.f32(float %1361, float -5.000000e-01, float 5.000000e-01) > %1364 = bitcast float %1362 to i32 > %1365 = bitcast float %1363 to i32 > %1366 = insertelement <2 x i32> undef, i32 %1364, i32 0 > %1367 = insertelement <2 x i32> %1366, i32 %1365, i32 1 > %1368 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1367, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1369 = extractelement <4 x float> %1368, i32 0 > %1370 = fsub float %1328, %1369 > %1371 = fcmp ogt float %1370, 0.000000e+00 > %1372 = and i1 %286, %1371 > %1373 = fcmp olt float %1370, 0.000000e+00 > %.423 = select i1 %1373, float %531, float %530 > %1374 = fmul float %.423, 5.000000e-01 > %temp56.0 = select i1 %1372, float %1374, float %.423 > %1375 = call float @llvm.fabs.f32(float %1370) > %1376 = fsub float %1375, %temp56.0 > %1377 = fcmp une float %temp56.0, 0.000000e+00 > br i1 %1377, label %IF311, label %ELSE312 > >IF311: ; preds = %ENDIF301 > %1378 = fdiv float 1.000000e+00, %temp56.0 > %1379 = fmul float %1376, %1378 > br label %ENDIF310 > >ELSE312: ; preds = %ENDIF301 > %1380 = fcmp ogt float %1376, 0.000000e+00 > %1381 = select i1 %1380, float 1.000000e+00, float %1376 > %1382 = fcmp oge float %1381, 0.000000e+00 > %.op486 = fmul float %1381, 0x4600000000000000 > %1383 = select i1 %1382, float %.op486, float 0xC600000000000000 > br label %ENDIF310 > >ENDIF310: ; preds = %ELSE312, %IF311 > %temp56.1 = phi float [ %1379, %IF311 ], [ %1383, %ELSE312 ] > %1384 = call float @llvm.AMDGPU.clamp.(float %temp56.1, float 0.000000e+00, float 1.000000e+00) > %1385 = fadd float %1384, %1316 > %1386 = fmul float %1385, 6.250000e-02 > %1387 = fmul float %1386, %1386 > %1388 = fmul float %195, 0xBFB99999A0000000 > %1389 = fmul float %196, 0x3FB99999A0000000 > %1390 = fadd float %1388, %1389 > %1391 = fmul float %139, %180 > %1392 = fmul float %137, %181 > %1393 = fmul float %138, %179 > %1394 = fsub float -0.000000e+00, %1391 > %1395 = call float @llvm.fma.f32(float %138, float %181, float %1394) > %1396 = fsub float -0.000000e+00, %1392 > %1397 = call float @llvm.fma.f32(float %139, float %179, float %1396) > %1398 = fsub float -0.000000e+00, %1393 > %1399 = call float @llvm.fma.f32(float %137, float %180, float %1398) > %1400 = call float @llvm.fma.f32(float %1395, float %1390, float %137) > %1401 = call float @llvm.fma.f32(float %1397, float %1390, float %138) > %1402 = call float @llvm.fma.f32(float %1399, float %1390, float %139) > %1403 = call float @llvm.fma.f32(float %179, float %217, float %1400) > %1404 = call float @llvm.fma.f32(float %180, float %217, float %1401) > %1405 = call float @llvm.fma.f32(float %181, float %217, float %1402) > %1406 = fmul float %230, 5.000000e-01 > %1407 = fmul float %230, 2.500000e-01 > %1408 = fmul float %230, 0x3FF4CCCCC0000000 > %1409 = fmul float %230, 0x3FE4CCCCC0000000 > %1410 = call float @llvm.fma.f32(float %1403, float %1406, float %237) > %1411 = call float @llvm.fma.f32(float %1404, float %1406, float %238) > %1412 = call float @llvm.fma.f32(float %1405, float %1406, float %226) > %1413 = fmul float %25, %1410 > %1414 = fmul float %26, %1411 > %1415 = fadd float %1413, %1414 > %1416 = fmul float %27, %1412 > %1417 = fadd float %1415, %1416 > %1418 = fadd float %1417, %28 > %1419 = fmul float %29, %1410 > %1420 = fmul float %30, %1411 > %1421 = fadd float %1419, %1420 > %1422 = fmul float %31, %1412 > %1423 = fadd float %1421, %1422 > %1424 = fadd float %1423, %32 > %1425 = fmul float %33, %1410 > %1426 = fmul float %34, %1411 > %1427 = fadd float %1425, %1426 > %1428 = fmul float %35, %1412 > %1429 = fadd float %1427, %1428 > %1430 = fadd float %1429, %36 > %1431 = fcmp oeq float %1430, 0.000000e+00 > %1432 = fcmp oeq float %1430, 0.000000e+00 > %1433 = fcmp ogt float %1418, 0.000000e+00 > %1434 = select i1 %1433, float 1.000000e+00, float %1418 > %1435 = fcmp oge float %1434, 0.000000e+00 > %1436 = fcmp ogt float %1424, 0.000000e+00 > %1437 = select i1 %1436, float 1.000000e+00, float %1424 > %1438 = fcmp oge float %1437, 0.000000e+00 > %.op487 = fmul float %1434, 0x4600000000000000 > %1439 = select i1 %1435, float %.op487, float 0xC600000000000000 > %.op488 = fmul float %1437, 0x4600000000000000 > %1440 = select i1 %1438, float %.op488, float 0xC600000000000000 > %1441 = fdiv float 1.000000e+00, %1430 > %1442 = fmul float %1418, %1441 > %1443 = fmul float %1424, %1441 > %1444 = select i1 %1431, float %1439, float %1442 > %1445 = select i1 %1432, float %1440, float %1443 > %1446 = call float @llvm.fma.f32(float %1444, float 5.000000e-01, float 5.000000e-01) > %1447 = call float @llvm.fma.f32(float %1445, float -5.000000e-01, float 5.000000e-01) > %1448 = bitcast float %1446 to i32 > %1449 = bitcast float %1447 to i32 > %1450 = insertelement <2 x i32> undef, i32 %1448, i32 0 > %1451 = insertelement <2 x i32> %1450, i32 %1449, i32 1 > %1452 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1451, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1453 = extractelement <4 x float> %1452, i32 0 > %1454 = fsub float %1412, %1453 > %1455 = fcmp olt float %1454, 0.000000e+00 > %.424 = select i1 %1455, float %1407, float %1406 > %1456 = call float @llvm.fabs.f32(float %1454) > %1457 = fsub float %1456, %.424 > %1458 = fcmp une float %.424, 0.000000e+00 > br i1 %1458, label %IF317, label %ELSE318 > >IF317: ; preds = %ENDIF310 > %1459 = fdiv float 1.000000e+00, %.424 > %1460 = fmul float %1457, %1459 > br label %ENDIF316 > >ELSE318: ; preds = %ENDIF310 > %1461 = fcmp ogt float %1457, 0.000000e+00 > %1462 = select i1 %1461, float 1.000000e+00, float %1457 > %1463 = fcmp oge float %1462, 0.000000e+00 > %.op489 = fmul float %1462, 0x4600000000000000 > %1464 = select i1 %1463, float %.op489, float 0xC600000000000000 > br label %ENDIF316 > >ENDIF316: ; preds = %ELSE318, %IF317 > %temp56.3 = phi float [ %1460, %IF317 ], [ %1464, %ELSE318 ] > %1465 = call float @llvm.AMDGPU.clamp.(float %temp56.3, float 0.000000e+00, float 1.000000e+00) > %1466 = call float @llvm.fma.f32(float %1395, float %300, float %137) > %1467 = call float @llvm.fma.f32(float %1397, float %300, float %138) > %1468 = call float @llvm.fma.f32(float %1399, float %300, float %139) > %1469 = call float @llvm.fma.f32(float %1395, float %302, float %137) > %1470 = call float @llvm.fma.f32(float %1397, float %302, float %138) > %1471 = call float @llvm.fma.f32(float %1399, float %302, float %139) > %1472 = call float @llvm.fma.f32(float %179, float %446, float %1469) > %1473 = call float @llvm.fma.f32(float %180, float %446, float %1470) > %1474 = call float @llvm.fma.f32(float %181, float %446, float %1471) > %1475 = call float @llvm.fma.f32(float %1472, float %1408, float %237) > %1476 = call float @llvm.fma.f32(float %1473, float %1408, float %238) > %1477 = call float @llvm.fma.f32(float %1474, float %1408, float %226) > %1478 = call float @llvm.fma.f32(float %179, float %445, float %1466) > %1479 = call float @llvm.fma.f32(float %180, float %445, float %1467) > %1480 = call float @llvm.fma.f32(float %181, float %445, float %1468) > %1481 = fmul float %195, 0x3FD147AE20000000 > %1482 = fmul float %196, 0x3F847AE140000000 > %1483 = fadd float %1481, %1482 > %1484 = call float @llvm.fma.f32(float %1395, float %1483, float %137) > %1485 = call float @llvm.fma.f32(float %1397, float %1483, float %138) > %1486 = call float @llvm.fma.f32(float %1399, float %1483, float %139) > %1487 = call float @llvm.fma.f32(float %179, float %371, float %1484) > %1488 = call float @llvm.fma.f32(float %180, float %371, float %1485) > %1489 = call float @llvm.fma.f32(float %181, float %371, float %1486) > %1490 = call float @llvm.fma.f32(float %1478, float %1408, float %237) > %1491 = call float @llvm.fma.f32(float %1479, float %1408, float %238) > %1492 = call float @llvm.fma.f32(float %1480, float %1408, float %226) > %1493 = fmul float %25, %1490 > %1494 = fmul float %26, %1491 > %1495 = fadd float %1493, %1494 > %1496 = fmul float %27, %1492 > %1497 = fadd float %1495, %1496 > %1498 = fadd float %1497, %28 > %1499 = fmul float %29, %1490 > %1500 = fmul float %30, %1491 > %1501 = fadd float %1499, %1500 > %1502 = fmul float %31, %1492 > %1503 = fadd float %1501, %1502 > %1504 = fadd float %1503, %32 > %1505 = fmul float %33, %1490 > %1506 = fmul float %34, %1491 > %1507 = fadd float %1505, %1506 > %1508 = fmul float %35, %1492 > %1509 = fadd float %1507, %1508 > %1510 = fadd float %1509, %36 > %1511 = fcmp oeq float %1510, 0.000000e+00 > %1512 = fcmp oeq float %1510, 0.000000e+00 > %1513 = fcmp ogt float %1498, 0.000000e+00 > %1514 = select i1 %1513, float 1.000000e+00, float %1498 > %1515 = fcmp oge float %1514, 0.000000e+00 > %1516 = fcmp ogt float %1504, 0.000000e+00 > %1517 = select i1 %1516, float 1.000000e+00, float %1504 > %1518 = fcmp oge float %1517, 0.000000e+00 > %.op490 = fmul float %1514, 0x4600000000000000 > %1519 = select i1 %1515, float %.op490, float 0xC600000000000000 > %.op491 = fmul float %1517, 0x4600000000000000 > %1520 = select i1 %1518, float %.op491, float 0xC600000000000000 > %1521 = fdiv float 1.000000e+00, %1510 > %1522 = fmul float %1498, %1521 > %1523 = fmul float %1504, %1521 > %1524 = select i1 %1511, float %1519, float %1522 > %1525 = select i1 %1512, float %1520, float %1523 > %1526 = call float @llvm.fma.f32(float %1524, float 5.000000e-01, float 5.000000e-01) > %1527 = call float @llvm.fma.f32(float %1525, float -5.000000e-01, float 5.000000e-01) > %1528 = bitcast float %1526 to i32 > %1529 = bitcast float %1527 to i32 > %1530 = insertelement <2 x i32> undef, i32 %1528, i32 0 > %1531 = insertelement <2 x i32> %1530, i32 %1529, i32 1 > %1532 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1531, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1533 = extractelement <4 x float> %1532, i32 0 > %1534 = fsub float %1492, %1533 > %1535 = fcmp olt float %1534, 0.000000e+00 > %.425 = select i1 %1535, float %1409, float %1408 > %1536 = call float @llvm.fabs.f32(float %1534) > %1537 = fsub float %1536, %.425 > %1538 = fcmp une float %.425, 0.000000e+00 > br i1 %1538, label %IF323, label %ELSE324 > >IF323: ; preds = %ENDIF316 > %1539 = fdiv float 1.000000e+00, %.425 > %1540 = fmul float %1537, %1539 > br label %ENDIF322 > >ELSE324: ; preds = %ENDIF316 > %1541 = fcmp ogt float %1537, 0.000000e+00 > %1542 = select i1 %1541, float 1.000000e+00, float %1537 > %1543 = fcmp oge float %1542, 0.000000e+00 > %.op492 = fmul float %1542, 0x4600000000000000 > %1544 = select i1 %1543, float %.op492, float 0xC600000000000000 > br label %ENDIF322 > >ENDIF322: ; preds = %ELSE324, %IF323 > %temp156.3 = phi float [ %1540, %IF323 ], [ %1544, %ELSE324 ] > %1545 = call float @llvm.AMDGPU.clamp.(float %temp156.3, float 0.000000e+00, float 1.000000e+00) > %1546 = fadd float %1545, %1465 > %1547 = fmul float %230, 0x4002666660000000 > %1548 = fmul float %230, 0x3FF2666660000000 > %1549 = fmul float %230, 0x40099999A0000000 > %1550 = fmul float %230, 0x3FF99999A0000000 > %1551 = call float @llvm.fma.f32(float %1487, float %1547, float %237) > %1552 = call float @llvm.fma.f32(float %1488, float %1547, float %238) > %1553 = call float @llvm.fma.f32(float %1489, float %1547, float %226) > %1554 = fmul float %25, %1551 > %1555 = fmul float %26, %1552 > %1556 = fadd float %1554, %1555 > %1557 = fmul float %27, %1553 > %1558 = fadd float %1556, %1557 > %1559 = fadd float %1558, %28 > %1560 = fmul float %29, %1551 > %1561 = fmul float %30, %1552 > %1562 = fadd float %1560, %1561 > %1563 = fmul float %31, %1553 > %1564 = fadd float %1562, %1563 > %1565 = fadd float %1564, %32 > %1566 = fmul float %33, %1551 > %1567 = fmul float %34, %1552 > %1568 = fadd float %1566, %1567 > %1569 = fmul float %35, %1553 > %1570 = fadd float %1568, %1569 > %1571 = fadd float %1570, %36 > %1572 = fcmp oeq float %1571, 0.000000e+00 > %1573 = fcmp oeq float %1571, 0.000000e+00 > %1574 = fcmp ogt float %1559, 0.000000e+00 > %1575 = select i1 %1574, float 1.000000e+00, float %1559 > %1576 = fcmp oge float %1575, 0.000000e+00 > %1577 = fcmp ogt float %1565, 0.000000e+00 > %1578 = select i1 %1577, float 1.000000e+00, float %1565 > %1579 = fcmp oge float %1578, 0.000000e+00 > %.op493 = fmul float %1575, 0x4600000000000000 > %1580 = select i1 %1576, float %.op493, float 0xC600000000000000 > %.op494 = fmul float %1578, 0x4600000000000000 > %1581 = select i1 %1579, float %.op494, float 0xC600000000000000 > %1582 = fdiv float 1.000000e+00, %1571 > %1583 = fmul float %1559, %1582 > %1584 = fmul float %1565, %1582 > %1585 = select i1 %1572, float %1580, float %1583 > %1586 = select i1 %1573, float %1581, float %1584 > %1587 = call float @llvm.fma.f32(float %1585, float 5.000000e-01, float 5.000000e-01) > %1588 = call float @llvm.fma.f32(float %1586, float -5.000000e-01, float 5.000000e-01) > %1589 = bitcast float %1587 to i32 > %1590 = bitcast float %1588 to i32 > %1591 = insertelement <2 x i32> undef, i32 %1589, i32 0 > %1592 = insertelement <2 x i32> %1591, i32 %1590, i32 1 > %1593 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1592, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1594 = extractelement <4 x float> %1593, i32 0 > %1595 = fsub float %1553, %1594 > %1596 = fcmp olt float %1595, 0.000000e+00 > %.426 = select i1 %1596, float %1548, float %1547 > %1597 = call float @llvm.fabs.f32(float %1595) > %1598 = fsub float %1597, %.426 > %1599 = fcmp une float %.426, 0.000000e+00 > br i1 %1599, label %IF329, label %ELSE330 > >IF329: ; preds = %ENDIF322 > %1600 = fdiv float 1.000000e+00, %.426 > %1601 = fmul float %1598, %1600 > br label %ENDIF328 > >ELSE330: ; preds = %ENDIF322 > %1602 = fcmp ogt float %1598, 0.000000e+00 > %1603 = select i1 %1602, float 1.000000e+00, float %1598 > %1604 = fcmp oge float %1603, 0.000000e+00 > %.op495 = fmul float %1603, 0x4600000000000000 > %1605 = select i1 %1604, float %.op495, float 0xC600000000000000 > br label %ENDIF328 > >ENDIF328: ; preds = %ELSE330, %IF329 > %temp156.5 = phi float [ %1601, %IF329 ], [ %1605, %ELSE330 ] > %1606 = call float @llvm.AMDGPU.clamp.(float %temp156.5, float 0.000000e+00, float 1.000000e+00) > %1607 = fadd float %1606, %1546 > %1608 = call float @llvm.fma.f32(float %1395, float %449, float %137) > %1609 = call float @llvm.fma.f32(float %1397, float %449, float %138) > %1610 = call float @llvm.fma.f32(float %1399, float %449, float %139) > %1611 = call float @llvm.fma.f32(float %179, float %444, float %1608) > %1612 = call float @llvm.fma.f32(float %180, float %444, float %1609) > %1613 = call float @llvm.fma.f32(float %181, float %444, float %1610) > %1614 = call float @llvm.fma.f32(float %1611, float %1549, float %237) > %1615 = call float @llvm.fma.f32(float %1612, float %1549, float %238) > %1616 = call float @llvm.fma.f32(float %1613, float %1549, float %226) > %1617 = fmul float %25, %1614 > %1618 = fmul float %26, %1615 > %1619 = fadd float %1617, %1618 > %1620 = fmul float %27, %1616 > %1621 = fadd float %1619, %1620 > %1622 = fadd float %1621, %28 > %1623 = fmul float %29, %1614 > %1624 = fmul float %30, %1615 > %1625 = fadd float %1623, %1624 > %1626 = fmul float %31, %1616 > %1627 = fadd float %1625, %1626 > %1628 = fadd float %1627, %32 > %1629 = fmul float %33, %1614 > %1630 = fmul float %34, %1615 > %1631 = fadd float %1629, %1630 > %1632 = fmul float %35, %1616 > %1633 = fadd float %1631, %1632 > %1634 = fadd float %1633, %36 > %1635 = fcmp oeq float %1634, 0.000000e+00 > %1636 = fcmp oeq float %1634, 0.000000e+00 > %1637 = fcmp ogt float %1622, 0.000000e+00 > %1638 = select i1 %1637, float 1.000000e+00, float %1622 > %1639 = fcmp oge float %1638, 0.000000e+00 > %1640 = fcmp ogt float %1628, 0.000000e+00 > %1641 = select i1 %1640, float 1.000000e+00, float %1628 > %1642 = fcmp oge float %1641, 0.000000e+00 > %.op496 = fmul float %1638, 0x4600000000000000 > %1643 = select i1 %1639, float %.op496, float 0xC600000000000000 > %.op497 = fmul float %1641, 0x4600000000000000 > %1644 = select i1 %1642, float %.op497, float 0xC600000000000000 > %1645 = fdiv float 1.000000e+00, %1634 > %1646 = fmul float %1622, %1645 > %1647 = fmul float %1628, %1645 > %1648 = select i1 %1635, float %1643, float %1646 > %1649 = select i1 %1636, float %1644, float %1647 > %1650 = call float @llvm.fma.f32(float %1648, float 5.000000e-01, float 5.000000e-01) > %1651 = call float @llvm.fma.f32(float %1649, float -5.000000e-01, float 5.000000e-01) > %1652 = bitcast float %1650 to i32 > %1653 = bitcast float %1651 to i32 > %1654 = insertelement <2 x i32> undef, i32 %1652, i32 0 > %1655 = insertelement <2 x i32> %1654, i32 %1653, i32 1 > %1656 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1655, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1657 = extractelement <4 x float> %1656, i32 0 > %1658 = fsub float %1616, %1657 > %1659 = fcmp olt float %1658, 0.000000e+00 > %.427 = select i1 %1659, float %1550, float %1549 > %1660 = call float @llvm.fabs.f32(float %1658) > %1661 = fsub float %1660, %.427 > %1662 = fcmp une float %.427, 0.000000e+00 > br i1 %1662, label %IF335, label %ELSE336 > >IF335: ; preds = %ENDIF328 > %1663 = fdiv float 1.000000e+00, %.427 > %1664 = fmul float %1661, %1663 > br label %ENDIF334 > >ELSE336: ; preds = %ENDIF328 > %1665 = fcmp ogt float %1661, 0.000000e+00 > %1666 = select i1 %1665, float 1.000000e+00, float %1661 > %1667 = fcmp oge float %1666, 0.000000e+00 > %.op498 = fmul float %1666, 0x4600000000000000 > %1668 = select i1 %1667, float %.op498, float 0xC600000000000000 > br label %ENDIF334 > >ENDIF334: ; preds = %ELSE336, %IF335 > %temp64.1 = phi float [ %1664, %IF335 ], [ %1668, %ELSE336 ] > %1669 = call float @llvm.AMDGPU.clamp.(float %temp64.1, float 0.000000e+00, float 1.000000e+00) > %1670 = fadd float %1669, %1607 > %1671 = call float @llvm.fma.f32(float %1395, float %521, float %137) > %1672 = call float @llvm.fma.f32(float %1397, float %521, float %138) > %1673 = call float @llvm.fma.f32(float %1399, float %521, float %139) > %1674 = call float @llvm.fma.f32(float %179, float %518, float %1671) > %1675 = call float @llvm.fma.f32(float %180, float %518, float %1672) > %1676 = call float @llvm.fma.f32(float %181, float %518, float %1673) > %1677 = fmul float %230, 0x4010666660000000 > %1678 = fmul float %230, 0x4000666660000000 > %1679 = fmul float %230, 0x4015333340000000 > %1680 = fmul float %230, 0x4005333340000000 > %1681 = call float @llvm.fma.f32(float %1674, float %1677, float %237) > %1682 = call float @llvm.fma.f32(float %1675, float %1677, float %238) > %1683 = call float @llvm.fma.f32(float %1676, float %1677, float %226) > %1684 = fmul float %25, %1681 > %1685 = fmul float %26, %1682 > %1686 = fadd float %1684, %1685 > %1687 = fmul float %27, %1683 > %1688 = fadd float %1686, %1687 > %1689 = fadd float %1688, %28 > %1690 = fmul float %29, %1681 > %1691 = fmul float %30, %1682 > %1692 = fadd float %1690, %1691 > %1693 = fmul float %31, %1683 > %1694 = fadd float %1692, %1693 > %1695 = fadd float %1694, %32 > %1696 = fmul float %33, %1681 > %1697 = fmul float %34, %1682 > %1698 = fadd float %1696, %1697 > %1699 = fmul float %35, %1683 > %1700 = fadd float %1698, %1699 > %1701 = fadd float %1700, %36 > %1702 = fcmp oeq float %1701, 0.000000e+00 > %1703 = fcmp oeq float %1701, 0.000000e+00 > %1704 = fcmp ogt float %1689, 0.000000e+00 > %1705 = select i1 %1704, float 1.000000e+00, float %1689 > %1706 = fcmp oge float %1705, 0.000000e+00 > %1707 = fcmp ogt float %1695, 0.000000e+00 > %1708 = select i1 %1707, float 1.000000e+00, float %1695 > %1709 = fcmp oge float %1708, 0.000000e+00 > %.op499 = fmul float %1705, 0x4600000000000000 > %1710 = select i1 %1706, float %.op499, float 0xC600000000000000 > %.op500 = fmul float %1708, 0x4600000000000000 > %1711 = select i1 %1709, float %.op500, float 0xC600000000000000 > %1712 = fdiv float 1.000000e+00, %1701 > %1713 = fmul float %1689, %1712 > %1714 = fmul float %1695, %1712 > %1715 = select i1 %1702, float %1710, float %1713 > %1716 = select i1 %1703, float %1711, float %1714 > %1717 = call float @llvm.fma.f32(float %1715, float 5.000000e-01, float 5.000000e-01) > %1718 = call float @llvm.fma.f32(float %1716, float -5.000000e-01, float 5.000000e-01) > %1719 = bitcast float %1717 to i32 > %1720 = bitcast float %1718 to i32 > %1721 = insertelement <2 x i32> undef, i32 %1719, i32 0 > %1722 = insertelement <2 x i32> %1721, i32 %1720, i32 1 > %1723 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1722, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1724 = extractelement <4 x float> %1723, i32 0 > %1725 = fsub float %1683, %1724 > %1726 = fcmp olt float %1725, 0.000000e+00 > %.428 = select i1 %1726, float %1678, float %1677 > %1727 = call float @llvm.fabs.f32(float %1725) > %1728 = fsub float %1727, %.428 > %1729 = fcmp une float %.428, 0.000000e+00 > br i1 %1729, label %IF341, label %ELSE342 > >IF341: ; preds = %ENDIF334 > %1730 = fdiv float 1.000000e+00, %.428 > %1731 = fmul float %1728, %1730 > br label %ENDIF340 > >ELSE342: ; preds = %ENDIF334 > %1732 = fcmp ogt float %1728, 0.000000e+00 > %1733 = select i1 %1732, float 1.000000e+00, float %1728 > %1734 = fcmp oge float %1733, 0.000000e+00 > %.op501 = fmul float %1733, 0x4600000000000000 > %1735 = select i1 %1734, float %.op501, float 0xC600000000000000 > br label %ENDIF340 > >ENDIF340: ; preds = %ELSE342, %IF341 > %temp64.3 = phi float [ %1731, %IF341 ], [ %1735, %ELSE342 ] > %1736 = call float @llvm.AMDGPU.clamp.(float %temp64.3, float 0.000000e+00, float 1.000000e+00) > %1737 = fadd float %1736, %1670 > %1738 = call float @llvm.fma.f32(float %1395, float %597, float %137) > %1739 = call float @llvm.fma.f32(float %1397, float %597, float %138) > %1740 = call float @llvm.fma.f32(float %1399, float %597, float %139) > %1741 = call float @llvm.fma.f32(float %179, float %594, float %1738) > %1742 = call float @llvm.fma.f32(float %180, float %594, float %1739) > %1743 = call float @llvm.fma.f32(float %181, float %594, float %1740) > %1744 = call float @llvm.fma.f32(float %1741, float %1679, float %237) > %1745 = call float @llvm.fma.f32(float %1742, float %1679, float %238) > %1746 = call float @llvm.fma.f32(float %1743, float %1679, float %226) > %1747 = fmul float %25, %1744 > %1748 = fmul float %26, %1745 > %1749 = fadd float %1747, %1748 > %1750 = fmul float %27, %1746 > %1751 = fadd float %1749, %1750 > %1752 = fadd float %1751, %28 > %1753 = fmul float %29, %1744 > %1754 = fmul float %30, %1745 > %1755 = fadd float %1753, %1754 > %1756 = fmul float %31, %1746 > %1757 = fadd float %1755, %1756 > %1758 = fadd float %1757, %32 > %1759 = fmul float %33, %1744 > %1760 = fmul float %34, %1745 > %1761 = fadd float %1759, %1760 > %1762 = fmul float %35, %1746 > %1763 = fadd float %1761, %1762 > %1764 = fadd float %1763, %36 > %1765 = fcmp oeq float %1764, 0.000000e+00 > %1766 = fcmp oeq float %1764, 0.000000e+00 > %1767 = fcmp ogt float %1752, 0.000000e+00 > %1768 = select i1 %1767, float 1.000000e+00, float %1752 > %1769 = fcmp oge float %1768, 0.000000e+00 > %1770 = fcmp ogt float %1758, 0.000000e+00 > %1771 = select i1 %1770, float 1.000000e+00, float %1758 > %1772 = fcmp oge float %1771, 0.000000e+00 > %.op502 = fmul float %1768, 0x4600000000000000 > %1773 = select i1 %1769, float %.op502, float 0xC600000000000000 > %.op503 = fmul float %1771, 0x4600000000000000 > %1774 = select i1 %1772, float %.op503, float 0xC600000000000000 > %1775 = fdiv float 1.000000e+00, %1764 > %1776 = fmul float %1752, %1775 > %1777 = fmul float %1758, %1775 > %1778 = select i1 %1765, float %1773, float %1776 > %1779 = select i1 %1766, float %1774, float %1777 > %1780 = call float @llvm.fma.f32(float %1778, float 5.000000e-01, float 5.000000e-01) > %1781 = call float @llvm.fma.f32(float %1779, float -5.000000e-01, float 5.000000e-01) > %1782 = bitcast float %1780 to i32 > %1783 = bitcast float %1781 to i32 > %1784 = insertelement <2 x i32> undef, i32 %1782, i32 0 > %1785 = insertelement <2 x i32> %1784, i32 %1783, i32 1 > %1786 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1785, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1787 = extractelement <4 x float> %1786, i32 0 > %1788 = fsub float %1746, %1787 > %1789 = fcmp olt float %1788, 0.000000e+00 > %.429 = select i1 %1789, float %1680, float %1679 > %1790 = call float @llvm.fabs.f32(float %1788) > %1791 = fsub float %1790, %.429 > %1792 = fcmp une float %.429, 0.000000e+00 > br i1 %1792, label %IF347, label %ELSE348 > >IF347: ; preds = %ENDIF340 > %1793 = fdiv float 1.000000e+00, %.429 > %1794 = fmul float %1791, %1793 > br label %ENDIF346 > >ELSE348: ; preds = %ENDIF340 > %1795 = fcmp ogt float %1791, 0.000000e+00 > %1796 = select i1 %1795, float 1.000000e+00, float %1791 > %1797 = fcmp oge float %1796, 0.000000e+00 > %.op504 = fmul float %1796, 0x4600000000000000 > %1798 = select i1 %1797, float %.op504, float 0xC600000000000000 > br label %ENDIF346 > >ENDIF346: ; preds = %ELSE348, %IF347 > %temp64.5 = phi float [ %1794, %IF347 ], [ %1798, %ELSE348 ] > %1799 = call float @llvm.AMDGPU.clamp.(float %temp64.5, float 0.000000e+00, float 1.000000e+00) > %1800 = fadd float %1799, %1737 > %1801 = call float @llvm.fma.f32(float %1395, float %673, float %137) > %1802 = call float @llvm.fma.f32(float %1397, float %673, float %138) > %1803 = call float @llvm.fma.f32(float %1399, float %673, float %139) > %1804 = call float @llvm.fma.f32(float %179, float %670, float %1801) > %1805 = call float @llvm.fma.f32(float %180, float %670, float %1802) > %1806 = call float @llvm.fma.f32(float %181, float %670, float %1803) > %1807 = fmul float %230, 0x4018666660000000 > %1808 = fmul float %230, 0x4008666660000000 > %1809 = fmul float %230, 0x401ECCCCC0000000 > %1810 = fmul float %230, 0x400ECCCCC0000000 > %1811 = fmul float %230, 8.500000e+00 > %1812 = fmul float %230, 4.250000e+00 > %1813 = fmul float %230, 1.000000e+01 > %1814 = fmul float %230, 5.000000e+00 > %1815 = call float @llvm.fma.f32(float %1804, float %1807, float %237) > %1816 = call float @llvm.fma.f32(float %1805, float %1807, float %238) > %1817 = call float @llvm.fma.f32(float %1806, float %1807, float %226) > %1818 = fmul float %25, %1815 > %1819 = fmul float %26, %1816 > %1820 = fadd float %1818, %1819 > %1821 = fmul float %27, %1817 > %1822 = fadd float %1820, %1821 > %1823 = fadd float %1822, %28 > %1824 = fmul float %29, %1815 > %1825 = fmul float %30, %1816 > %1826 = fadd float %1824, %1825 > %1827 = fmul float %31, %1817 > %1828 = fadd float %1826, %1827 > %1829 = fadd float %1828, %32 > %1830 = fmul float %33, %1815 > %1831 = fmul float %34, %1816 > %1832 = fadd float %1830, %1831 > %1833 = fmul float %35, %1817 > %1834 = fadd float %1832, %1833 > %1835 = fadd float %1834, %36 > %1836 = fcmp oeq float %1835, 0.000000e+00 > %1837 = fcmp oeq float %1835, 0.000000e+00 > %1838 = fcmp ogt float %1823, 0.000000e+00 > %1839 = select i1 %1838, float 1.000000e+00, float %1823 > %1840 = fcmp oge float %1839, 0.000000e+00 > %1841 = fcmp ogt float %1829, 0.000000e+00 > %1842 = select i1 %1841, float 1.000000e+00, float %1829 > %1843 = fcmp oge float %1842, 0.000000e+00 > %.op505 = fmul float %1839, 0x4600000000000000 > %1844 = select i1 %1840, float %.op505, float 0xC600000000000000 > %.op506 = fmul float %1842, 0x4600000000000000 > %1845 = select i1 %1843, float %.op506, float 0xC600000000000000 > %1846 = fdiv float 1.000000e+00, %1835 > %1847 = fmul float %1823, %1846 > %1848 = fmul float %1829, %1846 > %1849 = select i1 %1836, float %1844, float %1847 > %1850 = select i1 %1837, float %1845, float %1848 > %1851 = call float @llvm.fma.f32(float %1849, float 5.000000e-01, float 5.000000e-01) > %1852 = call float @llvm.fma.f32(float %1850, float -5.000000e-01, float 5.000000e-01) > %1853 = bitcast float %1851 to i32 > %1854 = bitcast float %1852 to i32 > %1855 = insertelement <2 x i32> undef, i32 %1853, i32 0 > %1856 = insertelement <2 x i32> %1855, i32 %1854, i32 1 > %1857 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1856, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1858 = extractelement <4 x float> %1857, i32 0 > %1859 = fsub float %1817, %1858 > %1860 = fcmp olt float %1859, 0.000000e+00 > %.430 = select i1 %1860, float %1808, float %1807 > %1861 = call float @llvm.fabs.f32(float %1859) > %1862 = fsub float %1861, %.430 > %1863 = fcmp une float %.430, 0.000000e+00 > br i1 %1863, label %IF353, label %ELSE354 > >IF353: ; preds = %ENDIF346 > %1864 = fdiv float 1.000000e+00, %.430 > %1865 = fmul float %1862, %1864 > br label %ENDIF352 > >ELSE354: ; preds = %ENDIF346 > %1866 = fcmp ogt float %1862, 0.000000e+00 > %1867 = select i1 %1866, float 1.000000e+00, float %1862 > %1868 = fcmp oge float %1867, 0.000000e+00 > %.op507 = fmul float %1867, 0x4600000000000000 > %1869 = select i1 %1868, float %.op507, float 0xC600000000000000 > br label %ENDIF352 > >ENDIF352: ; preds = %ELSE354, %IF353 > %temp48.3 = phi float [ %1865, %IF353 ], [ %1869, %ELSE354 ] > %1870 = call float @llvm.AMDGPU.clamp.(float %temp48.3, float 0.000000e+00, float 1.000000e+00) > %1871 = fadd float %1870, %1800 > %1872 = call float @llvm.fma.f32(float %1395, float %745, float %137) > %1873 = call float @llvm.fma.f32(float %1397, float %745, float %138) > %1874 = call float @llvm.fma.f32(float %1399, float %745, float %139) > %1875 = call float @llvm.fma.f32(float %179, float %742, float %1872) > %1876 = call float @llvm.fma.f32(float %180, float %742, float %1873) > %1877 = call float @llvm.fma.f32(float %181, float %742, float %1874) > %1878 = call float @llvm.fma.f32(float %1875, float %1809, float %237) > %1879 = call float @llvm.fma.f32(float %1876, float %1809, float %238) > %1880 = call float @llvm.fma.f32(float %1877, float %1809, float %226) > %1881 = fmul float %25, %1878 > %1882 = fmul float %26, %1879 > %1883 = fadd float %1881, %1882 > %1884 = fmul float %27, %1880 > %1885 = fadd float %1883, %1884 > %1886 = fadd float %1885, %28 > %1887 = fmul float %29, %1878 > %1888 = fmul float %30, %1879 > %1889 = fadd float %1887, %1888 > %1890 = fmul float %31, %1880 > %1891 = fadd float %1889, %1890 > %1892 = fadd float %1891, %32 > %1893 = fmul float %33, %1878 > %1894 = fmul float %34, %1879 > %1895 = fadd float %1893, %1894 > %1896 = fmul float %35, %1880 > %1897 = fadd float %1895, %1896 > %1898 = fadd float %1897, %36 > %1899 = fcmp oeq float %1898, 0.000000e+00 > %1900 = fcmp oeq float %1898, 0.000000e+00 > %1901 = fcmp ogt float %1886, 0.000000e+00 > %1902 = select i1 %1901, float 1.000000e+00, float %1886 > %1903 = fcmp oge float %1902, 0.000000e+00 > %1904 = fcmp ogt float %1892, 0.000000e+00 > %1905 = select i1 %1904, float 1.000000e+00, float %1892 > %1906 = fcmp oge float %1905, 0.000000e+00 > %.op508 = fmul float %1902, 0x4600000000000000 > %1907 = select i1 %1903, float %.op508, float 0xC600000000000000 > %.op509 = fmul float %1905, 0x4600000000000000 > %1908 = select i1 %1906, float %.op509, float 0xC600000000000000 > %1909 = fdiv float 1.000000e+00, %1898 > %1910 = fmul float %1886, %1909 > %1911 = fmul float %1892, %1909 > %1912 = select i1 %1899, float %1907, float %1910 > %1913 = select i1 %1900, float %1908, float %1911 > %1914 = call float @llvm.fma.f32(float %1912, float 5.000000e-01, float 5.000000e-01) > %1915 = call float @llvm.fma.f32(float %1913, float -5.000000e-01, float 5.000000e-01) > %1916 = bitcast float %1914 to i32 > %1917 = bitcast float %1915 to i32 > %1918 = insertelement <2 x i32> undef, i32 %1916, i32 0 > %1919 = insertelement <2 x i32> %1918, i32 %1917, i32 1 > %1920 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1919, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1921 = extractelement <4 x float> %1920, i32 0 > %1922 = fsub float %1880, %1921 > %1923 = fcmp olt float %1922, 0.000000e+00 > %.431 = select i1 %1923, float %1810, float %1809 > %1924 = call float @llvm.fabs.f32(float %1922) > %1925 = fsub float %1924, %.431 > %1926 = fcmp une float %.431, 0.000000e+00 > br i1 %1926, label %IF359, label %ELSE360 > >IF359: ; preds = %ENDIF352 > %1927 = fdiv float 1.000000e+00, %.431 > %1928 = fmul float %1925, %1927 > br label %ENDIF358 > >ELSE360: ; preds = %ENDIF352 > %1929 = fcmp ogt float %1925, 0.000000e+00 > %1930 = select i1 %1929, float 1.000000e+00, float %1925 > %1931 = fcmp oge float %1930, 0.000000e+00 > %.op510 = fmul float %1930, 0x4600000000000000 > %1932 = select i1 %1931, float %.op510, float 0xC600000000000000 > br label %ENDIF358 > >ENDIF358: ; preds = %ELSE360, %IF359 > %temp48.5 = phi float [ %1928, %IF359 ], [ %1932, %ELSE360 ] > %1933 = call float @llvm.AMDGPU.clamp.(float %temp48.5, float 0.000000e+00, float 1.000000e+00) > %1934 = fadd float %1933, %1871 > %1935 = call float @llvm.fma.f32(float %1395, float %821, float %137) > %1936 = call float @llvm.fma.f32(float %1397, float %821, float %138) > %1937 = call float @llvm.fma.f32(float %1399, float %821, float %139) > %1938 = call float @llvm.fma.f32(float %179, float %818, float %1935) > %1939 = call float @llvm.fma.f32(float %180, float %818, float %1936) > %1940 = call float @llvm.fma.f32(float %181, float %818, float %1937) > %1941 = call float @llvm.fma.f32(float %1938, float %1811, float %237) > %1942 = call float @llvm.fma.f32(float %1939, float %1811, float %238) > %1943 = call float @llvm.fma.f32(float %1940, float %1811, float %226) > %1944 = fmul float %25, %1941 > %1945 = fmul float %26, %1942 > %1946 = fadd float %1944, %1945 > %1947 = fmul float %27, %1943 > %1948 = fadd float %1946, %1947 > %1949 = fadd float %1948, %28 > %1950 = fmul float %29, %1941 > %1951 = fmul float %30, %1942 > %1952 = fadd float %1950, %1951 > %1953 = fmul float %31, %1943 > %1954 = fadd float %1952, %1953 > %1955 = fadd float %1954, %32 > %1956 = fmul float %33, %1941 > %1957 = fmul float %34, %1942 > %1958 = fadd float %1956, %1957 > %1959 = fmul float %35, %1943 > %1960 = fadd float %1958, %1959 > %1961 = fadd float %1960, %36 > %1962 = fcmp oeq float %1961, 0.000000e+00 > %1963 = fcmp oeq float %1961, 0.000000e+00 > %1964 = fcmp ogt float %1949, 0.000000e+00 > %1965 = select i1 %1964, float 1.000000e+00, float %1949 > %1966 = fcmp oge float %1965, 0.000000e+00 > %1967 = fcmp ogt float %1955, 0.000000e+00 > %1968 = select i1 %1967, float 1.000000e+00, float %1955 > %1969 = fcmp oge float %1968, 0.000000e+00 > %.op511 = fmul float %1965, 0x4600000000000000 > %1970 = select i1 %1966, float %.op511, float 0xC600000000000000 > %.op512 = fmul float %1968, 0x4600000000000000 > %1971 = select i1 %1969, float %.op512, float 0xC600000000000000 > %1972 = fdiv float 1.000000e+00, %1961 > %1973 = fmul float %1949, %1972 > %1974 = fmul float %1955, %1972 > %1975 = select i1 %1962, float %1970, float %1973 > %1976 = select i1 %1963, float %1971, float %1974 > %1977 = call float @llvm.fma.f32(float %1975, float 5.000000e-01, float 5.000000e-01) > %1978 = call float @llvm.fma.f32(float %1976, float -5.000000e-01, float 5.000000e-01) > %1979 = bitcast float %1977 to i32 > %1980 = bitcast float %1978 to i32 > %1981 = insertelement <2 x i32> undef, i32 %1979, i32 0 > %1982 = insertelement <2 x i32> %1981, i32 %1980, i32 1 > %1983 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1982, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %1984 = extractelement <4 x float> %1983, i32 0 > %1985 = fsub float %1943, %1984 > %1986 = fcmp olt float %1985, 0.000000e+00 > %.432 = select i1 %1986, float %1812, float %1811 > %1987 = call float @llvm.fabs.f32(float %1985) > %1988 = fsub float %1987, %.432 > %1989 = fcmp une float %.432, 0.000000e+00 > br i1 %1989, label %IF365, label %ELSE366 > >IF365: ; preds = %ENDIF358 > %1990 = fdiv float 1.000000e+00, %.432 > %1991 = fmul float %1988, %1990 > br label %ENDIF364 > >ELSE366: ; preds = %ENDIF358 > %1992 = fcmp ogt float %1988, 0.000000e+00 > %1993 = select i1 %1992, float 1.000000e+00, float %1988 > %1994 = fcmp oge float %1993, 0.000000e+00 > %.op513 = fmul float %1993, 0x4600000000000000 > %1995 = select i1 %1994, float %.op513, float 0xC600000000000000 > br label %ENDIF364 > >ENDIF364: ; preds = %ELSE366, %IF365 > %temp48.7 = phi float [ %1991, %IF365 ], [ %1995, %ELSE366 ] > %1996 = call float @llvm.AMDGPU.clamp.(float %temp48.7, float 0.000000e+00, float 1.000000e+00) > %1997 = fadd float %1996, %1934 > %1998 = call float @llvm.fma.f32(float %1395, float %893, float %137) > %1999 = call float @llvm.fma.f32(float %1397, float %893, float %138) > %2000 = call float @llvm.fma.f32(float %1399, float %893, float %139) > %2001 = call float @llvm.fma.f32(float %179, float %890, float %1998) > %2002 = call float @llvm.fma.f32(float %180, float %890, float %1999) > %2003 = call float @llvm.fma.f32(float %181, float %890, float %2000) > %2004 = call float @llvm.fma.f32(float %2001, float %1813, float %237) > %2005 = call float @llvm.fma.f32(float %2002, float %1813, float %238) > %2006 = call float @llvm.fma.f32(float %2003, float %1813, float %226) > %2007 = fmul float %25, %2004 > %2008 = fmul float %26, %2005 > %2009 = fadd float %2007, %2008 > %2010 = fmul float %27, %2006 > %2011 = fadd float %2009, %2010 > %2012 = fadd float %2011, %28 > %2013 = fmul float %29, %2004 > %2014 = fmul float %30, %2005 > %2015 = fadd float %2013, %2014 > %2016 = fmul float %31, %2006 > %2017 = fadd float %2015, %2016 > %2018 = fadd float %2017, %32 > %2019 = fmul float %33, %2004 > %2020 = fmul float %34, %2005 > %2021 = fadd float %2019, %2020 > %2022 = fmul float %35, %2006 > %2023 = fadd float %2021, %2022 > %2024 = fadd float %2023, %36 > %2025 = fcmp oeq float %2024, 0.000000e+00 > %2026 = fcmp oeq float %2024, 0.000000e+00 > %2027 = fcmp ogt float %2012, 0.000000e+00 > %2028 = select i1 %2027, float 1.000000e+00, float %2012 > %2029 = fcmp oge float %2028, 0.000000e+00 > %2030 = fcmp ogt float %2018, 0.000000e+00 > %2031 = select i1 %2030, float 1.000000e+00, float %2018 > %2032 = fcmp oge float %2031, 0.000000e+00 > %.op514 = fmul float %2028, 0x4600000000000000 > %2033 = select i1 %2029, float %.op514, float 0xC600000000000000 > %.op515 = fmul float %2031, 0x4600000000000000 > %2034 = select i1 %2032, float %.op515, float 0xC600000000000000 > %2035 = fdiv float 1.000000e+00, %2024 > %2036 = fmul float %2012, %2035 > %2037 = fmul float %2018, %2035 > %2038 = select i1 %2025, float %2033, float %2036 > %2039 = select i1 %2026, float %2034, float %2037 > %2040 = call float @llvm.fma.f32(float %2038, float 5.000000e-01, float 5.000000e-01) > %2041 = call float @llvm.fma.f32(float %2039, float -5.000000e-01, float 5.000000e-01) > %2042 = bitcast float %2040 to i32 > %2043 = bitcast float %2041 to i32 > %2044 = insertelement <2 x i32> undef, i32 %2042, i32 0 > %2045 = insertelement <2 x i32> %2044, i32 %2043, i32 1 > %2046 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %2045, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %2047 = extractelement <4 x float> %2046, i32 0 > %2048 = fsub float %2006, %2047 > %2049 = fcmp olt float %2048, 0.000000e+00 > %.433 = select i1 %2049, float %1814, float %1813 > %2050 = call float @llvm.fabs.f32(float %2048) > %2051 = fsub float %2050, %.433 > %2052 = fcmp une float %.433, 0.000000e+00 > br i1 %2052, label %IF371, label %ELSE372 > >IF371: ; preds = %ENDIF364 > %2053 = fdiv float 1.000000e+00, %.433 > %2054 = fmul float %2051, %2053 > br label %ENDIF370 > >ELSE372: ; preds = %ENDIF364 > %2055 = fcmp ogt float %2051, 0.000000e+00 > %2056 = select i1 %2055, float 1.000000e+00, float %2051 > %2057 = fcmp oge float %2056, 0.000000e+00 > %.op516 = fmul float %2056, 0x4600000000000000 > %2058 = select i1 %2057, float %.op516, float 0xC600000000000000 > br label %ENDIF370 > >ENDIF370: ; preds = %ELSE372, %IF371 > %temp48.9 = phi float [ %2054, %IF371 ], [ %2058, %ELSE372 ] > %2059 = call float @llvm.AMDGPU.clamp.(float %temp48.9, float 0.000000e+00, float 1.000000e+00) > %2060 = fadd float %2059, %1997 > %2061 = call float @llvm.fma.f32(float %1395, float %965, float %137) > %2062 = call float @llvm.fma.f32(float %1397, float %965, float %138) > %2063 = call float @llvm.fma.f32(float %1399, float %965, float %139) > %2064 = call float @llvm.fma.f32(float %179, float %962, float %2061) > %2065 = call float @llvm.fma.f32(float %180, float %962, float %2062) > %2066 = call float @llvm.fma.f32(float %181, float %962, float %2063) > %2067 = call float @llvm.fma.f32(float %2064, float %1406, float %237) > %2068 = call float @llvm.fma.f32(float %2065, float %1406, float %238) > %2069 = call float @llvm.fma.f32(float %2066, float %1406, float %226) > %2070 = fmul float %25, %2067 > %2071 = fmul float %26, %2068 > %2072 = fadd float %2070, %2071 > %2073 = fmul float %27, %2069 > %2074 = fadd float %2072, %2073 > %2075 = fadd float %2074, %28 > %2076 = fmul float %29, %2067 > %2077 = fmul float %30, %2068 > %2078 = fadd float %2076, %2077 > %2079 = fmul float %31, %2069 > %2080 = fadd float %2078, %2079 > %2081 = fadd float %2080, %32 > %2082 = fmul float %33, %2067 > %2083 = fmul float %34, %2068 > %2084 = fadd float %2082, %2083 > %2085 = fmul float %35, %2069 > %2086 = fadd float %2084, %2085 > %2087 = fadd float %2086, %36 > %2088 = fcmp oeq float %2087, 0.000000e+00 > %2089 = fcmp oeq float %2087, 0.000000e+00 > %2090 = fcmp ogt float %2075, 0.000000e+00 > %2091 = select i1 %2090, float 1.000000e+00, float %2075 > %2092 = fcmp oge float %2091, 0.000000e+00 > %2093 = fcmp ogt float %2081, 0.000000e+00 > %2094 = select i1 %2093, float 1.000000e+00, float %2081 > %2095 = fcmp oge float %2094, 0.000000e+00 > %.op517 = fmul float %2091, 0x4600000000000000 > %2096 = select i1 %2092, float %.op517, float 0xC600000000000000 > %.op518 = fmul float %2094, 0x4600000000000000 > %2097 = select i1 %2095, float %.op518, float 0xC600000000000000 > %2098 = fdiv float 1.000000e+00, %2087 > %2099 = fmul float %2075, %2098 > %2100 = fmul float %2081, %2098 > %2101 = select i1 %2088, float %2096, float %2099 > %2102 = select i1 %2089, float %2097, float %2100 > %2103 = call float @llvm.fma.f32(float %2101, float 5.000000e-01, float 5.000000e-01) > %2104 = call float @llvm.fma.f32(float %2102, float -5.000000e-01, float 5.000000e-01) > %2105 = bitcast float %2103 to i32 > %2106 = bitcast float %2104 to i32 > %2107 = insertelement <2 x i32> undef, i32 %2105, i32 0 > %2108 = insertelement <2 x i32> %2107, i32 %2106, i32 1 > %2109 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %2108, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %2110 = extractelement <4 x float> %2109, i32 0 > %2111 = fsub float %2069, %2110 > %2112 = fcmp olt float %2111, 0.000000e+00 > %.434 = select i1 %2112, float %1407, float %1406 > %2113 = call float @llvm.fabs.f32(float %2111) > %2114 = fsub float %2113, %.434 > %2115 = fcmp une float %.434, 0.000000e+00 > br i1 %2115, label %IF377, label %ELSE378 > >IF377: ; preds = %ENDIF370 > %2116 = fdiv float 1.000000e+00, %.434 > %2117 = fmul float %2114, %2116 > br label %ENDIF376 > >ELSE378: ; preds = %ENDIF370 > %2118 = fcmp ogt float %2114, 0.000000e+00 > %2119 = select i1 %2118, float 1.000000e+00, float %2114 > %2120 = fcmp oge float %2119, 0.000000e+00 > %.op519 = fmul float %2119, 0x4600000000000000 > %2121 = select i1 %2120, float %.op519, float 0xC600000000000000 > br label %ENDIF376 > >ENDIF376: ; preds = %ELSE378, %IF377 > %temp48.11 = phi float [ %2117, %IF377 ], [ %2121, %ELSE378 ] > %2122 = call float @llvm.AMDGPU.clamp.(float %temp48.11, float 0.000000e+00, float 1.000000e+00) > %2123 = fadd float %2122, %2060 > %2124 = fmul float %25, %1475 > %2125 = fmul float %26, %1476 > %2126 = fadd float %2124, %2125 > %2127 = fmul float %27, %1477 > %2128 = fadd float %2126, %2127 > %2129 = fadd float %2128, %28 > %2130 = fmul float %29, %1475 > %2131 = fmul float %30, %1476 > %2132 = fadd float %2130, %2131 > %2133 = fmul float %31, %1477 > %2134 = fadd float %2132, %2133 > %2135 = fadd float %2134, %32 > %2136 = fmul float %33, %1475 > %2137 = fmul float %34, %1476 > %2138 = fadd float %2136, %2137 > %2139 = fmul float %35, %1477 > %2140 = fadd float %2138, %2139 > %2141 = fadd float %2140, %36 > %2142 = fcmp oeq float %2141, 0.000000e+00 > %2143 = fcmp oeq float %2141, 0.000000e+00 > %2144 = fcmp ogt float %2129, 0.000000e+00 > %2145 = select i1 %2144, float 1.000000e+00, float %2129 > %2146 = fcmp oge float %2145, 0.000000e+00 > %2147 = fcmp ogt float %2135, 0.000000e+00 > %2148 = select i1 %2147, float 1.000000e+00, float %2135 > %2149 = fcmp oge float %2148, 0.000000e+00 > %.op520 = fmul float %2145, 0x4600000000000000 > %2150 = select i1 %2146, float %.op520, float 0xC600000000000000 > %.op521 = fmul float %2148, 0x4600000000000000 > %2151 = select i1 %2149, float %.op521, float 0xC600000000000000 > %2152 = fdiv float 1.000000e+00, %2141 > %2153 = fmul float %2129, %2152 > %2154 = fmul float %2135, %2152 > %2155 = select i1 %2142, float %2150, float %2153 > %2156 = select i1 %2143, float %2151, float %2154 > %2157 = call float @llvm.fma.f32(float %2155, float 5.000000e-01, float 5.000000e-01) > %2158 = call float @llvm.fma.f32(float %2156, float -5.000000e-01, float 5.000000e-01) > %2159 = bitcast float %2157 to i32 > %2160 = bitcast float %2158 to i32 > %2161 = insertelement <2 x i32> undef, i32 %2159, i32 0 > %2162 = insertelement <2 x i32> %2161, i32 %2160, i32 1 > %2163 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %2162, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %2164 = extractelement <4 x float> %2163, i32 0 > %2165 = fsub float %1477, %2164 > %2166 = fcmp olt float %2165, 0.000000e+00 > %.435 = select i1 %2166, float %1409, float %1408 > %2167 = call float @llvm.fabs.f32(float %2165) > %2168 = fsub float %2167, %.435 > %2169 = fcmp une float %.435, 0.000000e+00 > br i1 %2169, label %IF383, label %ELSE384 > >IF383: ; preds = %ENDIF376 > %2170 = fdiv float 1.000000e+00, %.435 > %2171 = fmul float %2168, %2170 > br label %ENDIF382 > >ELSE384: ; preds = %ENDIF376 > %2172 = fcmp ogt float %2168, 0.000000e+00 > %2173 = select i1 %2172, float 1.000000e+00, float %2168 > %2174 = fcmp oge float %2173, 0.000000e+00 > %.op522 = fmul float %2173, 0x4600000000000000 > %2175 = select i1 %2174, float %.op522, float 0xC600000000000000 > br label %ENDIF382 > >ENDIF382: ; preds = %ELSE384, %IF383 > %temp48.13 = phi float [ %2171, %IF383 ], [ %2175, %ELSE384 ] > %2176 = call float @llvm.AMDGPU.clamp.(float %temp48.13, float 0.000000e+00, float 1.000000e+00) > %2177 = fadd float %2176, %2123 > %2178 = call float @llvm.fma.f32(float %1395, float %1103, float %137) > %2179 = call float @llvm.fma.f32(float %1397, float %1103, float %138) > %2180 = call float @llvm.fma.f32(float %1399, float %1103, float %139) > %2181 = call float @llvm.fma.f32(float %179, float %1100, float %2178) > %2182 = call float @llvm.fma.f32(float %180, float %1100, float %2179) > %2183 = call float @llvm.fma.f32(float %181, float %1100, float %2180) > %2184 = call float @llvm.fma.f32(float %2181, float %1547, float %237) > %2185 = call float @llvm.fma.f32(float %2182, float %1547, float %238) > %2186 = call float @llvm.fma.f32(float %2183, float %1547, float %226) > %2187 = fmul float %25, %2184 > %2188 = fmul float %26, %2185 > %2189 = fadd float %2187, %2188 > %2190 = fmul float %27, %2186 > %2191 = fadd float %2189, %2190 > %2192 = fadd float %2191, %28 > %2193 = fmul float %29, %2184 > %2194 = fmul float %30, %2185 > %2195 = fadd float %2193, %2194 > %2196 = fmul float %31, %2186 > %2197 = fadd float %2195, %2196 > %2198 = fadd float %2197, %32 > %2199 = fmul float %33, %2184 > %2200 = fmul float %34, %2185 > %2201 = fadd float %2199, %2200 > %2202 = fmul float %35, %2186 > %2203 = fadd float %2201, %2202 > %2204 = fadd float %2203, %36 > %2205 = fcmp oeq float %2204, 0.000000e+00 > %2206 = fcmp oeq float %2204, 0.000000e+00 > %2207 = fcmp ogt float %2192, 0.000000e+00 > %2208 = select i1 %2207, float 1.000000e+00, float %2192 > %2209 = fcmp oge float %2208, 0.000000e+00 > %2210 = fcmp ogt float %2198, 0.000000e+00 > %2211 = select i1 %2210, float 1.000000e+00, float %2198 > %2212 = fcmp oge float %2211, 0.000000e+00 > %.op523 = fmul float %2208, 0x4600000000000000 > %2213 = select i1 %2209, float %.op523, float 0xC600000000000000 > %.op524 = fmul float %2211, 0x4600000000000000 > %2214 = select i1 %2212, float %.op524, float 0xC600000000000000 > %2215 = fdiv float 1.000000e+00, %2204 > %2216 = fmul float %2192, %2215 > %2217 = fmul float %2198, %2215 > %2218 = select i1 %2205, float %2213, float %2216 > %2219 = select i1 %2206, float %2214, float %2217 > %2220 = call float @llvm.fma.f32(float %2218, float 5.000000e-01, float 5.000000e-01) > %2221 = call float @llvm.fma.f32(float %2219, float -5.000000e-01, float 5.000000e-01) > %2222 = bitcast float %2220 to i32 > %2223 = bitcast float %2221 to i32 > %2224 = insertelement <2 x i32> undef, i32 %2222, i32 0 > %2225 = insertelement <2 x i32> %2224, i32 %2223, i32 1 > %2226 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %2225, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %2227 = extractelement <4 x float> %2226, i32 0 > %2228 = fsub float %2186, %2227 > %2229 = fcmp olt float %2228, 0.000000e+00 > %.436 = select i1 %2229, float %1548, float %1547 > %2230 = call float @llvm.fabs.f32(float %2228) > %2231 = fsub float %2230, %.436 > %2232 = fcmp une float %.436, 0.000000e+00 > br i1 %2232, label %IF389, label %ELSE390 > >IF389: ; preds = %ENDIF382 > %2233 = fdiv float 1.000000e+00, %.436 > %2234 = fmul float %2231, %2233 > br label %ENDIF388 > >ELSE390: ; preds = %ENDIF382 > %2235 = fcmp ogt float %2231, 0.000000e+00 > %2236 = select i1 %2235, float 1.000000e+00, float %2231 > %2237 = fcmp oge float %2236, 0.000000e+00 > %.op525 = fmul float %2236, 0x4600000000000000 > %2238 = select i1 %2237, float %.op525, float 0xC600000000000000 > br label %ENDIF388 > >ENDIF388: ; preds = %ELSE390, %IF389 > %temp48.15 = phi float [ %2234, %IF389 ], [ %2238, %ELSE390 ] > %2239 = call float @llvm.AMDGPU.clamp.(float %temp48.15, float 0.000000e+00, float 1.000000e+00) > %2240 = fadd float %2239, %2177 > %2241 = call float @llvm.fma.f32(float %1395, float %1175, float %137) > %2242 = call float @llvm.fma.f32(float %1397, float %1175, float %138) > %2243 = call float @llvm.fma.f32(float %1399, float %1175, float %139) > %2244 = call float @llvm.fma.f32(float %179, float %1172, float %2241) > %2245 = call float @llvm.fma.f32(float %180, float %1172, float %2242) > %2246 = call float @llvm.fma.f32(float %181, float %1172, float %2243) > %2247 = call float @llvm.fma.f32(float %2244, float %1549, float %237) > %2248 = call float @llvm.fma.f32(float %2245, float %1549, float %238) > %2249 = call float @llvm.fma.f32(float %2246, float %1549, float %226) > %2250 = fmul float %25, %2247 > %2251 = fmul float %26, %2248 > %2252 = fadd float %2250, %2251 > %2253 = fmul float %27, %2249 > %2254 = fadd float %2252, %2253 > %2255 = fadd float %2254, %28 > %2256 = fmul float %29, %2247 > %2257 = fmul float %30, %2248 > %2258 = fadd float %2256, %2257 > %2259 = fmul float %31, %2249 > %2260 = fadd float %2258, %2259 > %2261 = fadd float %2260, %32 > %2262 = fmul float %33, %2247 > %2263 = fmul float %34, %2248 > %2264 = fadd float %2262, %2263 > %2265 = fmul float %35, %2249 > %2266 = fadd float %2264, %2265 > %2267 = fadd float %2266, %36 > %2268 = fcmp oeq float %2267, 0.000000e+00 > %2269 = fcmp oeq float %2267, 0.000000e+00 > %2270 = fcmp ogt float %2255, 0.000000e+00 > %2271 = select i1 %2270, float 1.000000e+00, float %2255 > %2272 = fcmp oge float %2271, 0.000000e+00 > %2273 = fcmp ogt float %2261, 0.000000e+00 > %2274 = select i1 %2273, float 1.000000e+00, float %2261 > %2275 = fcmp oge float %2274, 0.000000e+00 > %.op526 = fmul float %2271, 0x4600000000000000 > %2276 = select i1 %2272, float %.op526, float 0xC600000000000000 > %.op527 = fmul float %2274, 0x4600000000000000 > %2277 = select i1 %2275, float %.op527, float 0xC600000000000000 > %2278 = fdiv float 1.000000e+00, %2267 > %2279 = fmul float %2255, %2278 > %2280 = fmul float %2261, %2278 > %2281 = select i1 %2268, float %2276, float %2279 > %2282 = select i1 %2269, float %2277, float %2280 > %2283 = call float @llvm.fma.f32(float %2281, float 5.000000e-01, float 5.000000e-01) > %2284 = call float @llvm.fma.f32(float %2282, float -5.000000e-01, float 5.000000e-01) > %2285 = bitcast float %2283 to i32 > %2286 = bitcast float %2284 to i32 > %2287 = insertelement <2 x i32> undef, i32 %2285, i32 0 > %2288 = insertelement <2 x i32> %2287, i32 %2286, i32 1 > %2289 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %2288, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %2290 = extractelement <4 x float> %2289, i32 0 > %2291 = fsub float %2249, %2290 > %2292 = fcmp olt float %2291, 0.000000e+00 > %.437 = select i1 %2292, float %1550, float %1549 > %2293 = call float @llvm.fabs.f32(float %2291) > %2294 = fsub float %2293, %.437 > %2295 = fcmp une float %.437, 0.000000e+00 > br i1 %2295, label %IF395, label %ELSE396 > >IF395: ; preds = %ENDIF388 > %2296 = fdiv float 1.000000e+00, %.437 > %2297 = fmul float %2294, %2296 > br label %ENDIF394 > >ELSE396: ; preds = %ENDIF388 > %2298 = fcmp ogt float %2294, 0.000000e+00 > %2299 = select i1 %2298, float 1.000000e+00, float %2294 > %2300 = fcmp oge float %2299, 0.000000e+00 > %.op528 = fmul float %2299, 0x4600000000000000 > %2301 = select i1 %2300, float %.op528, float 0xC600000000000000 > br label %ENDIF394 > >ENDIF394: ; preds = %ELSE396, %IF395 > %temp24.1 = phi float [ %2297, %IF395 ], [ %2301, %ELSE396 ] > %2302 = call float @llvm.AMDGPU.clamp.(float %temp24.1, float 0.000000e+00, float 1.000000e+00) > %2303 = fadd float %2302, %2240 > %2304 = call float @llvm.fma.f32(float %1395, float %1250, float %137) > %2305 = call float @llvm.fma.f32(float %1397, float %1250, float %138) > %2306 = call float @llvm.fma.f32(float %1399, float %1250, float %139) > %2307 = call float @llvm.fma.f32(float %1395, float %1319, float %137) > %2308 = call float @llvm.fma.f32(float %1397, float %1319, float %138) > %2309 = call float @llvm.fma.f32(float %1399, float %1319, float %139) > %2310 = call float @llvm.fma.f32(float %179, float %1247, float %2307) > %2311 = call float @llvm.fma.f32(float %180, float %1247, float %2308) > %2312 = call float @llvm.fma.f32(float %181, float %1247, float %2309) > %2313 = call float @llvm.fma.f32(float %179, float %1244, float %2304) > %2314 = call float @llvm.fma.f32(float %180, float %1244, float %2305) > %2315 = call float @llvm.fma.f32(float %181, float %1244, float %2306) > %2316 = call float @llvm.fma.f32(float %2313, float %1677, float %237) > %2317 = call float @llvm.fma.f32(float %2314, float %1677, float %238) > %2318 = call float @llvm.fma.f32(float %2315, float %1677, float %226) > %2319 = call float @llvm.fma.f32(float %2310, float %1679, float %237) > %2320 = call float @llvm.fma.f32(float %2311, float %1679, float %238) > %2321 = call float @llvm.fma.f32(float %2312, float %1679, float %226) > %2322 = fmul float %25, %2316 > %2323 = fmul float %26, %2317 > %2324 = fadd float %2322, %2323 > %2325 = fmul float %27, %2318 > %2326 = fadd float %2324, %2325 > %2327 = fadd float %2326, %28 > %2328 = fmul float %29, %2316 > %2329 = fmul float %30, %2317 > %2330 = fadd float %2328, %2329 > %2331 = fmul float %31, %2318 > %2332 = fadd float %2330, %2331 > %2333 = fadd float %2332, %32 > %2334 = fmul float %33, %2316 > %2335 = fmul float %34, %2317 > %2336 = fadd float %2334, %2335 > %2337 = fmul float %35, %2318 > %2338 = fadd float %2336, %2337 > %2339 = fadd float %2338, %36 > %2340 = fcmp oeq float %2339, 0.000000e+00 > %2341 = fcmp oeq float %2339, 0.000000e+00 > %2342 = fcmp ogt float %2327, 0.000000e+00 > %2343 = select i1 %2342, float 1.000000e+00, float %2327 > %2344 = fcmp oge float %2343, 0.000000e+00 > %2345 = fcmp ogt float %2333, 0.000000e+00 > %2346 = select i1 %2345, float 1.000000e+00, float %2333 > %2347 = fcmp oge float %2346, 0.000000e+00 > %.op529 = fmul float %2343, 0x4600000000000000 > %2348 = select i1 %2344, float %.op529, float 0xC600000000000000 > %.op530 = fmul float %2346, 0x4600000000000000 > %2349 = select i1 %2347, float %.op530, float 0xC600000000000000 > %2350 = fdiv float 1.000000e+00, %2339 > %2351 = fmul float %2327, %2350 > %2352 = fmul float %2333, %2350 > %2353 = select i1 %2340, float %2348, float %2351 > %2354 = select i1 %2341, float %2349, float %2352 > %2355 = call float @llvm.fma.f32(float %2353, float 5.000000e-01, float 5.000000e-01) > %2356 = call float @llvm.fma.f32(float %2354, float -5.000000e-01, float 5.000000e-01) > %2357 = bitcast float %2355 to i32 > %2358 = bitcast float %2356 to i32 > %2359 = insertelement <2 x i32> undef, i32 %2357, i32 0 > %2360 = insertelement <2 x i32> %2359, i32 %2358, i32 1 > %2361 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %2360, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %2362 = extractelement <4 x float> %2361, i32 0 > %2363 = fsub float %2318, %2362 > %2364 = fcmp olt float %2363, 0.000000e+00 > %.438 = select i1 %2364, float %1678, float %1677 > %2365 = call float @llvm.fabs.f32(float %2363) > %2366 = fsub float %2365, %.438 > %2367 = fcmp une float %.438, 0.000000e+00 > br i1 %2367, label %IF401, label %ELSE402 > >IF401: ; preds = %ENDIF394 > %2368 = fdiv float 1.000000e+00, %.438 > %2369 = fmul float %2366, %2368 > br label %ENDIF400 > >ELSE402: ; preds = %ENDIF394 > %2370 = fcmp ogt float %2366, 0.000000e+00 > %2371 = select i1 %2370, float 1.000000e+00, float %2366 > %2372 = fcmp oge float %2371, 0.000000e+00 > %.op531 = fmul float %2371, 0x4600000000000000 > %2373 = select i1 %2372, float %.op531, float 0xC600000000000000 > br label %ENDIF400 > >ENDIF400: ; preds = %ELSE402, %IF401 > %temp16.1 = phi float [ %2369, %IF401 ], [ %2373, %ELSE402 ] > %2374 = call float @llvm.AMDGPU.clamp.(float %temp16.1, float 0.000000e+00, float 1.000000e+00) > %2375 = fadd float %2374, %2303 > %2376 = fmul float %25, %2319 > %2377 = fmul float %26, %2320 > %2378 = fadd float %2376, %2377 > %2379 = fmul float %27, %2321 > %2380 = fadd float %2378, %2379 > %2381 = fadd float %2380, %28 > %2382 = fmul float %29, %2319 > %2383 = fmul float %30, %2320 > %2384 = fadd float %2382, %2383 > %2385 = fmul float %31, %2321 > %2386 = fadd float %2384, %2385 > %2387 = fadd float %2386, %32 > %2388 = fmul float %33, %2319 > %2389 = fmul float %34, %2320 > %2390 = fadd float %2388, %2389 > %2391 = fmul float %35, %2321 > %2392 = fadd float %2390, %2391 > %2393 = fadd float %2392, %36 > %2394 = fcmp oeq float %2393, 0.000000e+00 > %2395 = fcmp oeq float %2393, 0.000000e+00 > %2396 = fcmp ogt float %2381, 0.000000e+00 > %2397 = select i1 %2396, float 1.000000e+00, float %2381 > %2398 = fcmp oge float %2397, 0.000000e+00 > %2399 = fcmp ogt float %2387, 0.000000e+00 > %2400 = select i1 %2399, float 1.000000e+00, float %2387 > %2401 = fcmp oge float %2400, 0.000000e+00 > %.op532 = fmul float %2397, 0x4600000000000000 > %2402 = select i1 %2398, float %.op532, float 0xC600000000000000 > %.op533 = fmul float %2400, 0x4600000000000000 > %2403 = select i1 %2401, float %.op533, float 0xC600000000000000 > %2404 = fdiv float 1.000000e+00, %2393 > %2405 = fmul float %2381, %2404 > %2406 = fmul float %2387, %2404 > %2407 = select i1 %2394, float %2402, float %2405 > %2408 = select i1 %2395, float %2403, float %2406 > %2409 = call float @llvm.fma.f32(float %2407, float 5.000000e-01, float 5.000000e-01) > %2410 = call float @llvm.fma.f32(float %2408, float -5.000000e-01, float 5.000000e-01) > %2411 = bitcast float %2409 to i32 > %2412 = bitcast float %2410 to i32 > %2413 = insertelement <2 x i32> undef, i32 %2411, i32 0 > %2414 = insertelement <2 x i32> %2413, i32 %2412, i32 1 > %2415 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %2414, <8 x i32> %76, <4 x i32> %83, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %2416 = extractelement <4 x float> %2415, i32 0 > %2417 = fsub float %2321, %2416 > %2418 = fcmp olt float %2417, 0.000000e+00 > %.439 = select i1 %2418, float %1680, float %1679 > %2419 = call float @llvm.fabs.f32(float %2417) > %2420 = fsub float %2419, %.439 > %2421 = fcmp une float %.439, 0.000000e+00 > br i1 %2421, label %IF407, label %ELSE408 > >IF407: ; preds = %ENDIF400 > %2422 = fdiv float 1.000000e+00, %.439 > %2423 = fmul float %2420, %2422 > br label %ENDIF406 > >ELSE408: ; preds = %ENDIF400 > %2424 = fcmp ogt float %2420, 0.000000e+00 > %2425 = select i1 %2424, float 1.000000e+00, float %2420 > %2426 = fcmp oge float %2425, 0.000000e+00 > %.op534 = fmul float %2425, 0x4600000000000000 > %2427 = select i1 %2426, float %.op534, float 0xC600000000000000 > br label %ENDIF406 > >ENDIF406: ; preds = %ELSE408, %IF407 > %temp8.1 = phi float [ %2423, %IF407 ], [ %2427, %ELSE408 ] > %2428 = call float @llvm.AMDGPU.clamp.(float %temp8.1, float 0.000000e+00, float 1.000000e+00) > %2429 = fadd float %2428, %2375 > %2430 = fmul float %2429, 6.250000e-02 > %2431 = fmul float %2430, %2430 > %2432 = call float @llvm.minnum.f32(float %2431, float %1387) > %2433 = bitcast float %5 to i32 > %2434 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %2433, 10 > %2435 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %2434, float %2432, 11 > %2436 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %2435, float %2431, 12 > %2437 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %2436, float 0.000000e+00, 13 > %2438 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %2437, float 1.000000e+00, 14 > %2439 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %2438, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %2439 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], BUFFER, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..27] >DCL CONST[2][0..22] >DCL TEMP[0..5], LOCAL >IMM[0] INT32 {0, 0, 0, 0} >IMM[1] UINT32 {0, 432, 1, 352} >IMM[2] FLT32 { 0.0000, 158456325028528675187087900672.0000, -0.0040, 6.2000} >IMM[3] FLT32 { 0.5000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].x, IMM[0].xxxx > 1: MOV TEMP[0].w, IMM[1].xxxx > 2: TXF TEMP[0].x, TEMP[0], SAMP[0], BUFFER > 3: FSEQ TEMP[1].xy, IN[0].wwww, IMM[2].xxxx > 4: SSG TEMP[2].xy, IN[0].xyyy > 5: MUL TEMP[2].xy, IMM[2].yyyy, TEMP[2].xyyy > 6: RCP TEMP[3].xy, IN[0].wwww > 7: MUL TEMP[3].xy, IN[0].xyyy, TEMP[3].xyyy > 8: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 9: MOV TEMP[1].xy, TEMP[1].xyyy > 10: TEX TEMP[1].xyz, TEMP[1], SAMP[1], 2D > 11: FMA TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xxxx, IMM[2].zzzz > 12: DP3 TEMP[1].x, TEMP[1].xyzz, CONST[1][27].xyzz > 13: MUL TEMP[1].x, TEMP[1].xxxx, CONST[2][22].xxxx > 14: MOV TEMP[1].w, TEMP[1].xxxx > 15: MAX TEMP[2].xyz, TEMP[0].xyzz, IMM[2].xxxx > 16: FMA TEMP[3].xyz, TEMP[2].xyzz, IMM[2].wwww, IMM[3].xxxx > 17: MUL TEMP[0].xyz, TEMP[2].xyzz, TEMP[3].xyzz > 18: FMA TEMP[3].xyz, TEMP[2].xyzz, IMM[2].wwww, IMM[3].yyyy > 19: FMA TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz, IMM[3].zzzz > 20: FSEQ TEMP[3].xyz, TEMP[2].xyzz, IMM[2].xxxx > 21: SSG TEMP[4].xyz, TEMP[0].xyzz > 22: MUL TEMP[4].xyz, IMM[2].yyyy, TEMP[4].xyzz > 23: RCP TEMP[5].x, TEMP[2].xxxx > 24: RCP TEMP[5].y, TEMP[2].yyyy > 25: RCP TEMP[5].z, TEMP[2].zzzz > 26: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz > 27: UCMP TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[0].xyzz > 28: MOV OUT[0], TEMP[1] > 29: END >radeonsi: Compiling shader 90 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 > %30 = call float @llvm.SI.load.const(<16 x i8> %29, i32 352) > %31 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to <2 x i128> addrspace(2)* > %32 = load <2 x i128>, <2 x i128> addrspace(2)* %31, align 32, !tbaa !0 > %33 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %34 = load <8 x i32>, <8 x i32> addrspace(2)* %33, align 32, !tbaa !0 > %35 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %36 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %35, i64 0, i64 7 > %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !tbaa !0 > %38 = extractelement <8 x i32> %34, i32 7 > %39 = extractelement <4 x i32> %37, i32 0 > %40 = and i32 %39, %38 > %41 = insertelement <4 x i32> %37, i32 %40, i32 0 > %42 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %45 = extractelement <2 x i128> %32, i32 1 > %46 = bitcast i128 %45 to <16 x i8> > %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 0) > %48 = extractelement <4 x float> %47, i32 0 > %49 = fcmp oeq float %44, 0.000000e+00 > %50 = fcmp oeq float %44, 0.000000e+00 > %51 = fcmp ogt float %42, 0.000000e+00 > %52 = select i1 %51, float 1.000000e+00, float %42 > %53 = fcmp oge float %52, 0.000000e+00 > %54 = fcmp ogt float %43, 0.000000e+00 > %55 = select i1 %54, float 1.000000e+00, float %43 > %56 = fcmp oge float %55, 0.000000e+00 > %.op = fmul float %52, 0x4600000000000000 > %57 = select i1 %53, float %.op, float 0xC600000000000000 > %.op24 = fmul float %55, 0x4600000000000000 > %58 = select i1 %56, float %.op24, float 0xC600000000000000 > %59 = fdiv float 1.000000e+00, %44 > %60 = fmul float %42, %59 > %61 = fmul float %43, %59 > %62 = select i1 %49, float %57, float %60 > %63 = select i1 %50, float %58, float %61 > %64 = bitcast float %62 to i32 > %65 = bitcast float %63 to i32 > %66 = insertelement <2 x i32> undef, i32 %64, i32 0 > %67 = insertelement <2 x i32> %66, i32 %65, i32 1 > %68 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %67, <8 x i32> %34, <4 x i32> %41, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %69 = extractelement <4 x float> %68, i32 0 > %70 = extractelement <4 x float> %68, i32 1 > %71 = extractelement <4 x float> %68, i32 2 > %72 = call float @llvm.fma.f32(float %69, float %48, float 0xBF70624DE0000000) > %73 = call float @llvm.fma.f32(float %70, float %48, float 0xBF70624DE0000000) > %74 = call float @llvm.fma.f32(float %71, float %48, float 0xBF70624DE0000000) > %75 = fmul float %69, %25 > %76 = fmul float %70, %26 > %77 = fadd float %76, %75 > %78 = fmul float %71, %27 > %79 = fadd float %77, %78 > %80 = fmul float %79, %30 > %81 = call float @llvm.maxnum.f32(float %72, float 0.000000e+00) > %82 = call float @llvm.maxnum.f32(float %73, float 0.000000e+00) > %83 = call float @llvm.maxnum.f32(float %74, float 0.000000e+00) > %84 = call float @llvm.fma.f32(float %81, float 0x4018CCCCC0000000, float 5.000000e-01) > %85 = call float @llvm.fma.f32(float %82, float 0x4018CCCCC0000000, float 5.000000e-01) > %86 = call float @llvm.fma.f32(float %83, float 0x4018CCCCC0000000, float 5.000000e-01) > %87 = fmul float %81, %84 > %88 = fmul float %82, %85 > %89 = fmul float %83, %86 > %90 = call float @llvm.fma.f32(float %81, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %91 = call float @llvm.fma.f32(float %82, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %92 = call float @llvm.fma.f32(float %83, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %93 = call float @llvm.fma.f32(float %81, float %90, float 0x3FAEB851E0000000) > %94 = call float @llvm.fma.f32(float %82, float %91, float 0x3FAEB851E0000000) > %95 = call float @llvm.fma.f32(float %83, float %92, float 0x3FAEB851E0000000) > %96 = fcmp oeq float %93, 0.000000e+00 > %97 = fcmp oeq float %94, 0.000000e+00 > %98 = fcmp oeq float %95, 0.000000e+00 > %99 = fcmp ogt float %87, 0.000000e+00 > %100 = select i1 %99, float 1.000000e+00, float %87 > %101 = fcmp oge float %100, 0.000000e+00 > %102 = fcmp ogt float %88, 0.000000e+00 > %103 = select i1 %102, float 1.000000e+00, float %88 > %104 = fcmp oge float %103, 0.000000e+00 > %105 = fcmp ogt float %89, 0.000000e+00 > %106 = select i1 %105, float 1.000000e+00, float %89 > %107 = fcmp oge float %106, 0.000000e+00 > %.op25 = fmul float %100, 0x4600000000000000 > %108 = select i1 %101, float %.op25, float 0xC600000000000000 > %.op26 = fmul float %103, 0x4600000000000000 > %109 = select i1 %104, float %.op26, float 0xC600000000000000 > %.op27 = fmul float %106, 0x4600000000000000 > %110 = select i1 %107, float %.op27, float 0xC600000000000000 > %111 = fdiv float 1.000000e+00, %93 > %112 = fdiv float 1.000000e+00, %94 > %113 = fdiv float 1.000000e+00, %95 > %114 = fmul float %87, %111 > %115 = fmul float %88, %112 > %116 = fmul float %89, %113 > %117 = select i1 %96, float %108, float %114 > %118 = select i1 %97, float %109, float %115 > %119 = select i1 %98, float %110, float %116 > %120 = bitcast float %5 to i32 > %121 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %120, 10 > %122 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %121, float %117, 11 > %123 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %122, float %118, 12 > %124 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %123, float %119, 13 > %125 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %124, float %80, 14 > %126 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %125, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %126 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..5] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 80, 0, 0} >IMM[1] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.1037, 0.1718} >IMM[2] FLT32 { -2.4492, -6.3704, -0.6575, -4.4092} >IMM[3] FLT32 { 4.4092, 0.6575, 6.3704, 2.4492} >IMM[4] FLT32 { 0.1815, 0.0430, 0.0000, 0.0000} > 0: FSNE TEMP[0].x, CONST[1][5].xxxx, IMM[1].xxxx > 1: UIF TEMP[0].xxxx :0 > 2: RCP TEMP[0].x, CONST[1][5].xxxx > 3: ELSE :0 > 4: MOV TEMP[0].x, IMM[1].yyyy > 5: ENDIF > 6: FMA TEMP[1], TEMP[0].xxxx, IMM[2], IN[0].xxxx > 7: MOV TEMP[2].xz, TEMP[1].xxzx > 8: FMA TEMP[0], TEMP[0].xxxx, IMM[3], IN[0].xxxx > 9: MOV TEMP[3].xz, TEMP[0].xxzx > 10: MOV TEMP[1].xz, TEMP[1].yywy > 11: MOV TEMP[1].yw, IN[0].yyyy > 12: MOV TEMP[4].xy, TEMP[1].zwww > 13: TEX TEMP[4], TEMP[4], SAMP[0], 2D > 14: MUL TEMP[4], TEMP[4], IMM[1].zzzz > 15: MOV TEMP[2].yw, IN[0].yyyy > 16: MOV TEMP[5].xy, TEMP[2].zwww > 17: TEX TEMP[5], TEMP[5], SAMP[0], 2D > 18: MOV TEMP[2].xy, TEMP[2].xyyy > 19: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 20: MOV TEMP[6].xy, TEMP[1].xyyy > 21: TEX TEMP[6], TEMP[6], SAMP[0], 2D > 22: FMA TEMP[4], TEMP[6], IMM[4].yyyy, TEMP[4] > 23: FMA TEMP[2], TEMP[2], IMM[4].xxxx, TEMP[4] > 24: FMA TEMP[2], TEMP[5], IMM[1].wwww, TEMP[2] > 25: MOV TEMP[1].xz, TEMP[0].yywy > 26: MOV TEMP[1].yw, IN[0].yyyy > 27: MOV TEMP[0].xy, TEMP[1].xyyy > 28: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 29: MOV TEMP[1].xy, TEMP[1].zwww > 30: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 31: FMA TEMP[0], TEMP[0], IMM[1].wwww, TEMP[2] > 32: FMA TEMP[0], TEMP[1], IMM[4].xxxx, TEMP[0] > 33: MOV TEMP[3].yw, IN[0].yyyy > 34: MOV TEMP[1].xy, TEMP[3].xyyy > 35: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 36: MOV TEMP[2].xy, TEMP[3].zwww > 37: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 38: FMA TEMP[0], TEMP[1], IMM[1].zzzz, TEMP[0] > 39: FMA TEMP[0], TEMP[2], IMM[4].yyyy, TEMP[0] > 40: MOV OUT[0], TEMP[0] > 41: END >radeonsi: Compiling shader 91 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 > %28 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %29 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %28, i64 0, i64 3 > %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 > %31 = extractelement <8 x i32> %27, i32 7 > %32 = extractelement <4 x i32> %30, i32 0 > %33 = and i32 %32, %31 > %34 = insertelement <4 x i32> %30, i32 %33, i32 0 > %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %37 = fcmp une float %25, 0.000000e+00 > %38 = fdiv float 1.000000e+00, %25 > %temp.0 = select i1 %37, float %38, float 0x4600000000000000 > %39 = call float @llvm.fma.f32(float %temp.0, float 0xC00397DF20000000, float %35) > %40 = call float @llvm.fma.f32(float %temp.0, float 0xC0197B4360000000, float %35) > %41 = call float @llvm.fma.f32(float %temp.0, float 0xBFE50A87E0000000, float %35) > %42 = call float @llvm.fma.f32(float %temp.0, float 0xC011A300A0000000, float %35) > %43 = call float @llvm.fma.f32(float %temp.0, float 0x4011A300A0000000, float %35) > %44 = call float @llvm.fma.f32(float %temp.0, float 0x3FE50A87E0000000, float %35) > %45 = call float @llvm.fma.f32(float %temp.0, float 0x40197B4360000000, float %35) > %46 = call float @llvm.fma.f32(float %temp.0, float 0x400397DF20000000, float %35) > %47 = bitcast float %42 to i32 > %48 = bitcast float %36 to i32 > %49 = insertelement <2 x i32> undef, i32 %47, i32 0 > %50 = insertelement <2 x i32> %49, i32 %48, i32 1 > %51 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %50, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %52 = extractelement <4 x float> %51, i32 0 > %53 = extractelement <4 x float> %51, i32 1 > %54 = extractelement <4 x float> %51, i32 2 > %55 = extractelement <4 x float> %51, i32 3 > %56 = fmul float %52, 0x3FBA8AA3E0000000 > %57 = fmul float %53, 0x3FBA8AA3E0000000 > %58 = fmul float %54, 0x3FBA8AA3E0000000 > %59 = fmul float %55, 0x3FBA8AA3E0000000 > %60 = bitcast float %41 to i32 > %61 = bitcast float %36 to i32 > %62 = insertelement <2 x i32> undef, i32 %60, i32 0 > %63 = insertelement <2 x i32> %62, i32 %61, i32 1 > %64 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %63, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = extractelement <4 x float> %64, i32 3 > %69 = bitcast float %39 to i32 > %70 = bitcast float %36 to i32 > %71 = insertelement <2 x i32> undef, i32 %69, i32 0 > %72 = insertelement <2 x i32> %71, i32 %70, i32 1 > %73 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %72, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = extractelement <4 x float> %73, i32 3 > %78 = bitcast float %40 to i32 > %79 = bitcast float %36 to i32 > %80 = insertelement <2 x i32> undef, i32 %78, i32 0 > %81 = insertelement <2 x i32> %80, i32 %79, i32 1 > %82 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %81, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %83 = extractelement <4 x float> %82, i32 0 > %84 = extractelement <4 x float> %82, i32 1 > %85 = extractelement <4 x float> %82, i32 2 > %86 = extractelement <4 x float> %82, i32 3 > %87 = call float @llvm.fma.f32(float %83, float 0x3FA604E9A0000000, float %56) > %88 = call float @llvm.fma.f32(float %84, float 0x3FA604E9A0000000, float %57) > %89 = call float @llvm.fma.f32(float %85, float 0x3FA604E9A0000000, float %58) > %90 = call float @llvm.fma.f32(float %86, float 0x3FA604E9A0000000, float %59) > %91 = call float @llvm.fma.f32(float %74, float 0x3FC73AB6A0000000, float %87) > %92 = call float @llvm.fma.f32(float %75, float 0x3FC73AB6A0000000, float %88) > %93 = call float @llvm.fma.f32(float %76, float 0x3FC73AB6A0000000, float %89) > %94 = call float @llvm.fma.f32(float %77, float 0x3FC73AB6A0000000, float %90) > %95 = call float @llvm.fma.f32(float %65, float 0x3FC5FEBD20000000, float %91) > %96 = call float @llvm.fma.f32(float %66, float 0x3FC5FEBD20000000, float %92) > %97 = call float @llvm.fma.f32(float %67, float 0x3FC5FEBD20000000, float %93) > %98 = call float @llvm.fma.f32(float %68, float 0x3FC5FEBD20000000, float %94) > %99 = bitcast float %44 to i32 > %100 = bitcast float %36 to i32 > %101 = insertelement <2 x i32> undef, i32 %99, i32 0 > %102 = insertelement <2 x i32> %101, i32 %100, i32 1 > %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = extractelement <4 x float> %103, i32 2 > %107 = extractelement <4 x float> %103, i32 3 > %108 = bitcast float %46 to i32 > %109 = bitcast float %36 to i32 > %110 = insertelement <2 x i32> undef, i32 %108, i32 0 > %111 = insertelement <2 x i32> %110, i32 %109, i32 1 > %112 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %111, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %113 = extractelement <4 x float> %112, i32 0 > %114 = extractelement <4 x float> %112, i32 1 > %115 = extractelement <4 x float> %112, i32 2 > %116 = extractelement <4 x float> %112, i32 3 > %117 = call float @llvm.fma.f32(float %104, float 0x3FC5FEBD20000000, float %95) > %118 = call float @llvm.fma.f32(float %105, float 0x3FC5FEBD20000000, float %96) > %119 = call float @llvm.fma.f32(float %106, float 0x3FC5FEBD20000000, float %97) > %120 = call float @llvm.fma.f32(float %107, float 0x3FC5FEBD20000000, float %98) > %121 = call float @llvm.fma.f32(float %113, float 0x3FC73AB6A0000000, float %117) > %122 = call float @llvm.fma.f32(float %114, float 0x3FC73AB6A0000000, float %118) > %123 = call float @llvm.fma.f32(float %115, float 0x3FC73AB6A0000000, float %119) > %124 = call float @llvm.fma.f32(float %116, float 0x3FC73AB6A0000000, float %120) > %125 = bitcast float %43 to i32 > %126 = bitcast float %36 to i32 > %127 = insertelement <2 x i32> undef, i32 %125, i32 0 > %128 = insertelement <2 x i32> %127, i32 %126, i32 1 > %129 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %128, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %130 = extractelement <4 x float> %129, i32 0 > %131 = extractelement <4 x float> %129, i32 1 > %132 = extractelement <4 x float> %129, i32 2 > %133 = extractelement <4 x float> %129, i32 3 > %134 = bitcast float %45 to i32 > %135 = bitcast float %36 to i32 > %136 = insertelement <2 x i32> undef, i32 %134, i32 0 > %137 = insertelement <2 x i32> %136, i32 %135, i32 1 > %138 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %137, <8 x i32> %27, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %139 = extractelement <4 x float> %138, i32 0 > %140 = extractelement <4 x float> %138, i32 1 > %141 = extractelement <4 x float> %138, i32 2 > %142 = extractelement <4 x float> %138, i32 3 > %143 = call float @llvm.fma.f32(float %130, float 0x3FBA8AA3E0000000, float %121) > %144 = call float @llvm.fma.f32(float %131, float 0x3FBA8AA3E0000000, float %122) > %145 = call float @llvm.fma.f32(float %132, float 0x3FBA8AA3E0000000, float %123) > %146 = call float @llvm.fma.f32(float %133, float 0x3FBA8AA3E0000000, float %124) > %147 = call float @llvm.fma.f32(float %139, float 0x3FA604E9A0000000, float %143) > %148 = call float @llvm.fma.f32(float %140, float 0x3FA604E9A0000000, float %144) > %149 = call float @llvm.fma.f32(float %141, float 0x3FA604E9A0000000, float %145) > %150 = call float @llvm.fma.f32(float %142, float 0x3FA604E9A0000000, float %146) > %151 = bitcast float %5 to i32 > %152 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %151, 10 > %153 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %152, float %147, 11 > %154 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %153, float %148, 12 > %155 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %154, float %149, 13 > %156 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %155, float %150, 14 > %157 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %156, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %157 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..5] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 80, 0, 0} >IMM[1] FLT32 { 0.0000, 158456325028528675187087900672.0000, -0.5481, 0.5481} >IMM[2] FLT32 { 0.2500, 0.0000, 0.0000, 0.0000} > 0: FSEQ TEMP[0], CONST[1][5].xyxy, IMM[1].xxxx > 1: RCP TEMP[1].xz, CONST[1][5].xxxx > 2: RCP TEMP[1].yw, CONST[1][5].yyyy > 3: UCMP TEMP[0], TEMP[0], IMM[1].yyyy, TEMP[1] > 4: FMA TEMP[1], TEMP[0].zwzw, IMM[1].zzwz, IN[0].xyxy > 5: FMA TEMP[0], TEMP[0], IMM[1].zwww, IN[0].xyxy > 6: MOV TEMP[2].xy, TEMP[1].zwww > 7: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 8: MUL TEMP[2], TEMP[2], IMM[2].xxxx > 9: MOV TEMP[1].xy, TEMP[1].xyyy > 10: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 11: FMA TEMP[1], TEMP[1], IMM[2].xxxx, TEMP[2] > 12: MOV TEMP[2].xy, TEMP[0].xyyy > 13: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 14: MOV TEMP[0].xy, TEMP[0].zwww > 15: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 16: FMA TEMP[1], TEMP[2], IMM[2].xxxx, TEMP[1] > 17: FMA TEMP[0], TEMP[0], IMM[2].xxxx, TEMP[1] > 18: MOV OUT[0], TEMP[0] > 19: END >radeonsi: Compiling shader 92 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %38 = fcmp oeq float %25, 0.000000e+00 > %39 = fcmp oeq float %26, 0.000000e+00 > %40 = fcmp oeq float %25, 0.000000e+00 > %41 = fcmp oeq float %26, 0.000000e+00 > %42 = fdiv float 1.000000e+00, %25 > %43 = fdiv float 1.000000e+00, %26 > %44 = select i1 %38, float 0x4600000000000000, float %42 > %45 = select i1 %39, float 0x4600000000000000, float %43 > %46 = select i1 %40, float 0x4600000000000000, float %42 > %47 = select i1 %41, float 0x4600000000000000, float %43 > %48 = call float @llvm.fma.f32(float %46, float 0xBFE18A5720000000, float %36) > %49 = call float @llvm.fma.f32(float %47, float 0xBFE18A5720000000, float %37) > %50 = call float @llvm.fma.f32(float %46, float 0x3FE18A5720000000, float %36) > %51 = call float @llvm.fma.f32(float %47, float 0xBFE18A5720000000, float %37) > %52 = call float @llvm.fma.f32(float %44, float 0xBFE18A5720000000, float %36) > %53 = call float @llvm.fma.f32(float %45, float 0x3FE18A5720000000, float %37) > %54 = call float @llvm.fma.f32(float %46, float 0x3FE18A5720000000, float %36) > %55 = call float @llvm.fma.f32(float %47, float 0x3FE18A5720000000, float %37) > %56 = bitcast float %50 to i32 > %57 = bitcast float %51 to i32 > %58 = insertelement <2 x i32> undef, i32 %56, i32 0 > %59 = insertelement <2 x i32> %58, i32 %57, i32 1 > %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = fmul float %61, 2.500000e-01 > %66 = fmul float %62, 2.500000e-01 > %67 = fmul float %63, 2.500000e-01 > %68 = fmul float %64, 2.500000e-01 > %69 = bitcast float %48 to i32 > %70 = bitcast float %49 to i32 > %71 = insertelement <2 x i32> undef, i32 %69, i32 0 > %72 = insertelement <2 x i32> %71, i32 %70, i32 1 > %73 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %72, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = extractelement <4 x float> %73, i32 3 > %78 = call float @llvm.fma.f32(float %74, float 2.500000e-01, float %65) > %79 = call float @llvm.fma.f32(float %75, float 2.500000e-01, float %66) > %80 = call float @llvm.fma.f32(float %76, float 2.500000e-01, float %67) > %81 = call float @llvm.fma.f32(float %77, float 2.500000e-01, float %68) > %82 = bitcast float %52 to i32 > %83 = bitcast float %53 to i32 > %84 = insertelement <2 x i32> undef, i32 %82, i32 0 > %85 = insertelement <2 x i32> %84, i32 %83, i32 1 > %86 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %85, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %87 = extractelement <4 x float> %86, i32 0 > %88 = extractelement <4 x float> %86, i32 1 > %89 = extractelement <4 x float> %86, i32 2 > %90 = extractelement <4 x float> %86, i32 3 > %91 = bitcast float %54 to i32 > %92 = bitcast float %55 to i32 > %93 = insertelement <2 x i32> undef, i32 %91, i32 0 > %94 = insertelement <2 x i32> %93, i32 %92, i32 1 > %95 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %94, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %96 = extractelement <4 x float> %95, i32 0 > %97 = extractelement <4 x float> %95, i32 1 > %98 = extractelement <4 x float> %95, i32 2 > %99 = extractelement <4 x float> %95, i32 3 > %100 = call float @llvm.fma.f32(float %87, float 2.500000e-01, float %78) > %101 = call float @llvm.fma.f32(float %88, float 2.500000e-01, float %79) > %102 = call float @llvm.fma.f32(float %89, float 2.500000e-01, float %80) > %103 = call float @llvm.fma.f32(float %90, float 2.500000e-01, float %81) > %104 = call float @llvm.fma.f32(float %96, float 2.500000e-01, float %100) > %105 = call float @llvm.fma.f32(float %97, float 2.500000e-01, float %101) > %106 = call float @llvm.fma.f32(float %98, float 2.500000e-01, float %102) > %107 = call float @llvm.fma.f32(float %99, float 2.500000e-01, float %103) > %108 = bitcast float %5 to i32 > %109 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %108, 10 > %110 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %109, float %104, 11 > %111 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %110, float %105, 12 > %112 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %111, float %106, 13 > %113 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %112, float %107, 14 > %114 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %113, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %114 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..6] >DCL TEMP[0..1], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} >IMM[1] UINT32 {0, 96, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: MOV TEMP[0].zw, IMM[0].yyxy > 2: MUL TEMP[1].xy, CONST[1][6].xyyy, IMM[0].yzzz > 3: FMA TEMP[1].xy, IN[0].xyyy, TEMP[1].xyyy, CONST[1][6].xyyy > 4: MOV OUT[1], TEMP[1] > 5: MOV OUT[0], TEMP[0] > 6: END >radeonsi: Compiling shader 93 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 96) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 100) > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = fsub float -0.000000e+00, %17 > %24 = call float @llvm.fma.f32(float %21, float %16, float %16) > %25 = call float @llvm.fma.f32(float %22, float %23, float %17) > %26 = bitcast i32 %11 to float > %27 = insertvalue <{ float, float, float }> undef, float %26, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %24, float %25, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %27 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..5] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 80, 0, 0} >IMM[1] FLT32 { 0.0000, -158456325028528675187087900672.0000, -4.0000, -2.0000} >IMM[2] FLT32 {158456325028528675187087900672.0000, 0.0000, 2.0000, 4.0000} >IMM[3] FLT32 { 0.2131, 0.1553, 0.2633, 0.2000} >IMM[4] FLT32 { 1.0000, -0.2000, 1.2500, 0.0000} > 0: MUL TEMP[0], CONST[1][5].xxxx, CONST[1][5].yzyz > 1: FSEQ TEMP[1], TEMP[0].zwzw, IMM[1].xxxx > 2: RCP TEMP[2].xz, TEMP[0].zzzz > 3: RCP TEMP[2].yw, TEMP[0].wwww > 4: MUL TEMP[2], IMM[1].zxwx, TEMP[2] > 5: UCMP TEMP[1], TEMP[1], IMM[1].yxyx, TEMP[2] > 6: FSEQ TEMP[2], TEMP[0], IMM[1].xxxx > 7: RCP TEMP[3].x, TEMP[0].xxxx > 8: RCP TEMP[3].y, TEMP[0].yyyy > 9: RCP TEMP[3].z, TEMP[0].zzzz > 10: RCP TEMP[3].w, TEMP[0].wwww > 11: MUL TEMP[3], IMM[2].zywy, TEMP[3] > 12: UCMP TEMP[2], TEMP[2], IMM[2].xyxy, TEMP[3] > 13: ADD TEMP[0], TEMP[2], IN[0].xyxy > 14: ADD TEMP[1], TEMP[1], IN[0].xyxy > 15: MOV TEMP[2].xy, TEMP[1].zwww > 16: TEX TEMP[2].xy, TEMP[2], SAMP[0], 2D > 17: MOV TEMP[3].xy, TEMP[1].xyyy > 18: TEX TEMP[3].xy, TEMP[3], SAMP[0], 2D > 19: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx > 20: FMA TEMP[1].xy, TEMP[3].xyyy, IMM[3].yyyy, TEMP[2].xyyy > 21: MOV TEMP[2].xy, IN[0].xyyy > 22: TEX TEMP[2].xy, TEMP[2], SAMP[0], 2D > 23: FMA TEMP[1].xy, TEMP[2].xyyy, IMM[3].zzzz, TEMP[1].xyyy > 24: MOV TEMP[2].xy, TEMP[0].xyyy > 25: TEX TEMP[2].xy, TEMP[2], SAMP[0], 2D > 26: MOV TEMP[3].xy, TEMP[0].zwww > 27: TEX TEMP[3].xy, TEMP[3], SAMP[0], 2D > 28: FMA TEMP[0].xy, TEMP[2].xyyy, IMM[3].xxxx, TEMP[1].xyyy > 29: FMA TEMP[0].xy, TEMP[3].xyyy, IMM[3].yyyy, TEMP[0].xyyy > 30: FMA TEMP[0].xy, TEMP[0].xyyy, TEMP[0].xyyy, IMM[3].wwww > 31: MIN TEMP[1].xy, TEMP[0].xyyy, IMM[4].xxxx > 32: ADD TEMP[0].xy, TEMP[1].xyyy, IMM[4].yyyy > 33: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[4].zzzz > 34: SQRT TEMP[1].x, TEMP[0].xxxx > 35: SQRT TEMP[1].y, TEMP[0].yyyy > 36: MOV TEMP[0].xy, TEMP[1].xyxx > 37: MOV TEMP[0].zw, IMM[1].xxxx > 38: MOV OUT[0], TEMP[0] > 39: END >radeonsi: Compiling shader 94 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %39 = fmul float %25, %26 > %40 = fmul float %25, %27 > %41 = fmul float %25, %26 > %42 = fmul float %25, %27 > %43 = fcmp oeq float %41, 0.000000e+00 > %44 = fcmp oeq float %42, 0.000000e+00 > %45 = fcmp oeq float %41, 0.000000e+00 > %46 = fcmp oeq float %42, 0.000000e+00 > %47 = fdiv float 1.000000e+00, %41 > %48 = fdiv float 1.000000e+00, %42 > %49 = fmul float %47, -4.000000e+00 > %50 = fmul float %48, 0.000000e+00 > %51 = fmul float %47, -2.000000e+00 > %52 = fmul float %48, 0.000000e+00 > %53 = select i1 %43, float 0xC600000000000000, float %49 > %54 = select i1 %44, float 0.000000e+00, float %50 > %55 = select i1 %45, float 0xC600000000000000, float %51 > %56 = select i1 %46, float 0.000000e+00, float %52 > %57 = fcmp oeq float %39, 0.000000e+00 > %58 = fcmp oeq float %40, 0.000000e+00 > %59 = fcmp oeq float %41, 0.000000e+00 > %60 = fcmp oeq float %42, 0.000000e+00 > %61 = fdiv float 1.000000e+00, %39 > %62 = fdiv float 1.000000e+00, %40 > %63 = fdiv float 1.000000e+00, %41 > %64 = fdiv float 1.000000e+00, %42 > %65 = fmul float %61, 2.000000e+00 > %66 = fmul float %62, 0.000000e+00 > %67 = fmul float %63, 4.000000e+00 > %68 = fmul float %64, 0.000000e+00 > %69 = select i1 %57, float 0x4600000000000000, float %65 > %70 = select i1 %58, float 0.000000e+00, float %66 > %71 = select i1 %59, float 0x4600000000000000, float %67 > %72 = select i1 %60, float 0.000000e+00, float %68 > %73 = fadd float %69, %37 > %74 = fadd float %70, %38 > %75 = fadd float %71, %37 > %76 = fadd float %72, %38 > %77 = fadd float %53, %37 > %78 = fadd float %54, %38 > %79 = fadd float %55, %37 > %80 = fadd float %56, %38 > %81 = bitcast float %79 to i32 > %82 = bitcast float %80 to i32 > %83 = insertelement <2 x i32> undef, i32 %81, i32 0 > %84 = insertelement <2 x i32> %83, i32 %82, i32 1 > %85 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %84, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = bitcast float %77 to i32 > %89 = bitcast float %78 to i32 > %90 = insertelement <2 x i32> undef, i32 %88, i32 0 > %91 = insertelement <2 x i32> %90, i32 %89, i32 1 > %92 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %91, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = fmul float %86, 0x3FCB459720000000 > %96 = fmul float %87, 0x3FCB459720000000 > %97 = call float @llvm.fma.f32(float %93, float 0x3FC3E279C0000000, float %95) > %98 = call float @llvm.fma.f32(float %94, float 0x3FC3E279C0000000, float %96) > %99 = bitcast float %37 to i32 > %100 = bitcast float %38 to i32 > %101 = insertelement <2 x i32> undef, i32 %99, i32 0 > %102 = insertelement <2 x i32> %101, i32 %100, i32 1 > %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = call float @llvm.fma.f32(float %104, float 0x3FD0D956C0000000, float %97) > %107 = call float @llvm.fma.f32(float %105, float 0x3FD0D956C0000000, float %98) > %108 = bitcast float %73 to i32 > %109 = bitcast float %74 to i32 > %110 = insertelement <2 x i32> undef, i32 %108, i32 0 > %111 = insertelement <2 x i32> %110, i32 %109, i32 1 > %112 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %111, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %113 = extractelement <4 x float> %112, i32 0 > %114 = extractelement <4 x float> %112, i32 1 > %115 = bitcast float %75 to i32 > %116 = bitcast float %76 to i32 > %117 = insertelement <2 x i32> undef, i32 %115, i32 0 > %118 = insertelement <2 x i32> %117, i32 %116, i32 1 > %119 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %118, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %120 = extractelement <4 x float> %119, i32 0 > %121 = extractelement <4 x float> %119, i32 1 > %122 = call float @llvm.fma.f32(float %113, float 0x3FCB459720000000, float %106) > %123 = call float @llvm.fma.f32(float %114, float 0x3FCB459720000000, float %107) > %124 = call float @llvm.fma.f32(float %120, float 0x3FC3E279C0000000, float %122) > %125 = call float @llvm.fma.f32(float %121, float 0x3FC3E279C0000000, float %123) > %126 = call float @llvm.fma.f32(float %124, float %124, float 0x3FC99999A0000000) > %127 = call float @llvm.fma.f32(float %125, float %125, float 0x3FC99999A0000000) > %128 = call float @llvm.minnum.f32(float %126, float 1.000000e+00) > %129 = call float @llvm.minnum.f32(float %127, float 1.000000e+00) > %130 = fadd float %128, 0xBFC99999A0000000 > %131 = fadd float %129, 0xBFC99999A0000000 > %132 = fmul float %130, 1.250000e+00 > %133 = fmul float %131, 1.250000e+00 > %134 = call float @llvm.sqrt.f32(float %132) > %135 = call float @llvm.sqrt.f32(float %133) > %136 = bitcast float %5 to i32 > %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %136, 10 > %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %134, 11 > %139 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138, float %135, 12 > %140 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %139, float 0.000000e+00, 13 > %141 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %140, float 0.000000e+00, 14 > %142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %141, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..5] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 80, 0, 0} >IMM[1] FLT32 { 0.0000, -158456325028528675187087900672.0000, -4.0000, -2.0000} >IMM[2] FLT32 { 0.0000, 158456325028528675187087900672.0000, 2.0000, 4.0000} >IMM[3] FLT32 { 0.2131, 0.2633, 0.1553, 0.2000} >IMM[4] FLT32 { 1.0000, -0.2000, 1.2500, 0.0000} > 0: MUL TEMP[0], CONST[1][5].xxxx, CONST[1][5].yzyz > 1: FSEQ TEMP[1], TEMP[0].zwzw, IMM[1].xxxx > 2: RCP TEMP[2].xz, TEMP[0].zzzz > 3: RCP TEMP[2].yw, TEMP[0].wwww > 4: MUL TEMP[2], IMM[1].xzxw, TEMP[2] > 5: UCMP TEMP[1], TEMP[1], IMM[1].xyxy, TEMP[2] > 6: FSEQ TEMP[2], TEMP[0], IMM[1].xxxx > 7: RCP TEMP[3].x, TEMP[0].xxxx > 8: RCP TEMP[3].y, TEMP[0].yyyy > 9: RCP TEMP[3].z, TEMP[0].zzzz > 10: RCP TEMP[3].w, TEMP[0].wwww > 11: MUL TEMP[3], IMM[2].xzxw, TEMP[3] > 12: UCMP TEMP[2], TEMP[2], IMM[2].xyxy, TEMP[3] > 13: ADD TEMP[0], TEMP[2], IN[0].xyxy > 14: ADD TEMP[1], TEMP[1], IN[0].xyxy > 15: MOV TEMP[2].xy, TEMP[1].zwww > 16: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 17: MUL TEMP[2], TEMP[2], IMM[3].xxxx > 18: MOV TEMP[3].xy, IN[0].xyyy > 19: TEX TEMP[3], TEMP[3], SAMP[0], 2D > 20: MOV TEMP[1].xy, TEMP[1].xyyy > 21: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 22: FMA TEMP[1], TEMP[1], IMM[3].zzzz, TEMP[2] > 23: FMA TEMP[1], TEMP[3], IMM[3].yyyy, TEMP[1] > 24: MOV TEMP[2].xy, TEMP[0].xyyy > 25: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 26: FMA TEMP[1], TEMP[2], IMM[3].xxxx, TEMP[1] > 27: MOV TEMP[2].xy, TEMP[0].zwww > 28: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 29: FMA TEMP[1], TEMP[2], IMM[3].zzzz, TEMP[1] > 30: FMA TEMP[0].xy, TEMP[1].xyyy, TEMP[1].xyyy, IMM[3].wwww > 31: MOV TEMP[1].zw, TEMP[1].wwzw > 32: MIN TEMP[2].xy, TEMP[0].xyyy, IMM[4].xxxx > 33: ADD TEMP[0].xy, TEMP[2].xyyy, IMM[4].yyyy > 34: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[4].zzzz > 35: SQRT TEMP[2].x, TEMP[0].xxxx > 36: SQRT TEMP[2].y, TEMP[0].yyyy > 37: MOV TEMP[1].xy, TEMP[2].xyxx > 38: MOV OUT[0], TEMP[1] > 39: END >radeonsi: Compiling shader 95 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %39 = fmul float %25, %26 > %40 = fmul float %25, %27 > %41 = fmul float %25, %26 > %42 = fmul float %25, %27 > %43 = fcmp oeq float %41, 0.000000e+00 > %44 = fcmp oeq float %42, 0.000000e+00 > %45 = fcmp oeq float %41, 0.000000e+00 > %46 = fcmp oeq float %42, 0.000000e+00 > %47 = fdiv float 1.000000e+00, %41 > %48 = fdiv float 1.000000e+00, %42 > %49 = fmul float %47, 0.000000e+00 > %50 = fmul float %48, -4.000000e+00 > %51 = fmul float %47, 0.000000e+00 > %52 = fmul float %48, -2.000000e+00 > %53 = select i1 %43, float 0.000000e+00, float %49 > %54 = select i1 %44, float 0xC600000000000000, float %50 > %55 = select i1 %45, float 0.000000e+00, float %51 > %56 = select i1 %46, float 0xC600000000000000, float %52 > %57 = fcmp oeq float %39, 0.000000e+00 > %58 = fcmp oeq float %40, 0.000000e+00 > %59 = fcmp oeq float %41, 0.000000e+00 > %60 = fcmp oeq float %42, 0.000000e+00 > %61 = fdiv float 1.000000e+00, %39 > %62 = fdiv float 1.000000e+00, %40 > %63 = fdiv float 1.000000e+00, %41 > %64 = fdiv float 1.000000e+00, %42 > %65 = fmul float %61, 0.000000e+00 > %66 = fmul float %62, 2.000000e+00 > %67 = fmul float %63, 0.000000e+00 > %68 = fmul float %64, 4.000000e+00 > %69 = select i1 %57, float 0.000000e+00, float %65 > %70 = select i1 %58, float 0x4600000000000000, float %66 > %71 = select i1 %59, float 0.000000e+00, float %67 > %72 = select i1 %60, float 0x4600000000000000, float %68 > %73 = fadd float %69, %37 > %74 = fadd float %70, %38 > %75 = fadd float %71, %37 > %76 = fadd float %72, %38 > %77 = fadd float %53, %37 > %78 = fadd float %54, %38 > %79 = fadd float %55, %37 > %80 = fadd float %56, %38 > %81 = bitcast float %79 to i32 > %82 = bitcast float %80 to i32 > %83 = insertelement <2 x i32> undef, i32 %81, i32 0 > %84 = insertelement <2 x i32> %83, i32 %82, i32 1 > %85 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %84, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = extractelement <4 x float> %85, i32 2 > %89 = extractelement <4 x float> %85, i32 3 > %90 = fmul float %86, 0x3FCB459720000000 > %91 = fmul float %87, 0x3FCB459720000000 > %92 = fmul float %88, 0x3FCB459720000000 > %93 = fmul float %89, 0x3FCB459720000000 > %94 = bitcast float %37 to i32 > %95 = bitcast float %38 to i32 > %96 = insertelement <2 x i32> undef, i32 %94, i32 0 > %97 = insertelement <2 x i32> %96, i32 %95, i32 1 > %98 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %97, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %99 = extractelement <4 x float> %98, i32 0 > %100 = extractelement <4 x float> %98, i32 1 > %101 = extractelement <4 x float> %98, i32 2 > %102 = extractelement <4 x float> %98, i32 3 > %103 = bitcast float %77 to i32 > %104 = bitcast float %78 to i32 > %105 = insertelement <2 x i32> undef, i32 %103, i32 0 > %106 = insertelement <2 x i32> %105, i32 %104, i32 1 > %107 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %106, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %108 = extractelement <4 x float> %107, i32 0 > %109 = extractelement <4 x float> %107, i32 1 > %110 = extractelement <4 x float> %107, i32 2 > %111 = extractelement <4 x float> %107, i32 3 > %112 = call float @llvm.fma.f32(float %108, float 0x3FC3E279C0000000, float %90) > %113 = call float @llvm.fma.f32(float %109, float 0x3FC3E279C0000000, float %91) > %114 = call float @llvm.fma.f32(float %110, float 0x3FC3E279C0000000, float %92) > %115 = call float @llvm.fma.f32(float %111, float 0x3FC3E279C0000000, float %93) > %116 = call float @llvm.fma.f32(float %99, float 0x3FD0D956C0000000, float %112) > %117 = call float @llvm.fma.f32(float %100, float 0x3FD0D956C0000000, float %113) > %118 = call float @llvm.fma.f32(float %101, float 0x3FD0D956C0000000, float %114) > %119 = call float @llvm.fma.f32(float %102, float 0x3FD0D956C0000000, float %115) > %120 = bitcast float %73 to i32 > %121 = bitcast float %74 to i32 > %122 = insertelement <2 x i32> undef, i32 %120, i32 0 > %123 = insertelement <2 x i32> %122, i32 %121, i32 1 > %124 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %123, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %125 = extractelement <4 x float> %124, i32 0 > %126 = extractelement <4 x float> %124, i32 1 > %127 = extractelement <4 x float> %124, i32 2 > %128 = extractelement <4 x float> %124, i32 3 > %129 = call float @llvm.fma.f32(float %125, float 0x3FCB459720000000, float %116) > %130 = call float @llvm.fma.f32(float %126, float 0x3FCB459720000000, float %117) > %131 = call float @llvm.fma.f32(float %127, float 0x3FCB459720000000, float %118) > %132 = call float @llvm.fma.f32(float %128, float 0x3FCB459720000000, float %119) > %133 = bitcast float %75 to i32 > %134 = bitcast float %76 to i32 > %135 = insertelement <2 x i32> undef, i32 %133, i32 0 > %136 = insertelement <2 x i32> %135, i32 %134, i32 1 > %137 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %136, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %138 = extractelement <4 x float> %137, i32 0 > %139 = extractelement <4 x float> %137, i32 1 > %140 = extractelement <4 x float> %137, i32 2 > %141 = extractelement <4 x float> %137, i32 3 > %142 = call float @llvm.fma.f32(float %138, float 0x3FC3E279C0000000, float %129) > %143 = call float @llvm.fma.f32(float %139, float 0x3FC3E279C0000000, float %130) > %144 = call float @llvm.fma.f32(float %140, float 0x3FC3E279C0000000, float %131) > %145 = call float @llvm.fma.f32(float %141, float 0x3FC3E279C0000000, float %132) > %146 = call float @llvm.fma.f32(float %142, float %142, float 0x3FC99999A0000000) > %147 = call float @llvm.fma.f32(float %143, float %143, float 0x3FC99999A0000000) > %148 = call float @llvm.minnum.f32(float %146, float 1.000000e+00) > %149 = call float @llvm.minnum.f32(float %147, float 1.000000e+00) > %150 = fadd float %148, 0xBFC99999A0000000 > %151 = fadd float %149, 0xBFC99999A0000000 > %152 = fmul float %150, 1.250000e+00 > %153 = fmul float %151, 1.250000e+00 > %154 = call float @llvm.sqrt.f32(float %152) > %155 = call float @llvm.sqrt.f32(float %153) > %156 = bitcast float %5 to i32 > %157 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %156, 10 > %158 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %157, float %154, 11 > %159 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %158, float %155, 12 > %160 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %159, float %144, 13 > %161 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %160, float %145, 14 > %162 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %161, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %162 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..7] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 96, 112, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 3: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][6].xxxx > 4: MUL TEMP[1].xyz, TEMP[0].xyzz, CONST[1][7].wwww > 5: MOV TEMP[2].xy, IN[0].xyyy > 6: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D > 7: MAX TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz > 8: FMA TEMP[0].xyz, TEMP[0].xyzz, CONST[1][7].yyyy, TEMP[1].xyzz > 9: MOV TEMP[0].w, CONST[1][7].xxxx > 10: MOV OUT[0], TEMP[0] > 11: END >radeonsi: Compiling shader 96 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 124) > %29 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 > %31 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %32 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %31, i64 0, i64 3 > %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 > %34 = extractelement <8 x i32> %30, i32 7 > %35 = extractelement <4 x i32> %33, i32 0 > %36 = and i32 %35, %34 > %37 = insertelement <4 x i32> %33, i32 %36, i32 0 > %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 > %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 7 > %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 > %43 = extractelement <8 x i32> %39, i32 7 > %44 = extractelement <4 x i32> %42, i32 0 > %45 = and i32 %44, %43 > %46 = insertelement <4 x i32> %42, i32 %45, i32 0 > %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %49 = bitcast float %47 to i32 > %50 = bitcast float %48 to i32 > %51 = insertelement <2 x i32> undef, i32 %49, i32 0 > %52 = insertelement <2 x i32> %51, i32 %50, i32 1 > %53 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %52, <8 x i32> %30, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = extractelement <4 x float> %53, i32 2 > %57 = extractelement <4 x float> %53, i32 3 > %58 = fmul float %57, %54 > %59 = fmul float %57, %55 > %60 = fmul float %57, %56 > %61 = fmul float %58, %25 > %62 = fmul float %59, %25 > %63 = fmul float %60, %25 > %64 = fmul float %61, %28 > %65 = fmul float %62, %28 > %66 = fmul float %63, %28 > %67 = bitcast float %47 to i32 > %68 = bitcast float %48 to i32 > %69 = insertelement <2 x i32> undef, i32 %67, i32 0 > %70 = insertelement <2 x i32> %69, i32 %68, i32 1 > %71 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %70, <8 x i32> %39, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %72 = extractelement <4 x float> %71, i32 0 > %73 = extractelement <4 x float> %71, i32 1 > %74 = extractelement <4 x float> %71, i32 2 > %75 = call float @llvm.maxnum.f32(float %64, float %72) > %76 = call float @llvm.maxnum.f32(float %65, float %73) > %77 = call float @llvm.maxnum.f32(float %66, float %74) > %78 = call float @llvm.fma.f32(float %61, float %27, float %75) > %79 = call float @llvm.fma.f32(float %62, float %27, float %76) > %80 = call float @llvm.fma.f32(float %63, float %27, float %77) > %81 = bitcast float %5 to i32 > %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %81, 10 > %83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82, float %78, 11 > %84 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %83, float %79, 12 > %85 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %84, float %80, 13 > %86 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %85, float %26, 14 > %87 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %86, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %87 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..7] >DCL TEMP[0..14], LOCAL >IMM[0] UINT32 {0, 96, 112, 0} >IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} >IMM[2] INT32 {48, 1, 0, 0} >IMM[3] FLT32 { 0.0208, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0], CONST[1][6] > 1: ADD TEMP[0].xy, -IN[0].xyyy, CONST[1][6].xyyy > 2: MOV TEMP[1], CONST[1][7] > 3: MUL TEMP[0].xy, TEMP[0].xyyy, CONST[1][7].xxxx > 4: DP2 TEMP[1].x, TEMP[0].xyyy, TEMP[0].xyyy > 5: SQRT TEMP[1].x, TEMP[1].xxxx > 6: FMA TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy > 7: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 8: ADD TEMP[1].x, -TEMP[1].xxxx, IMM[1].zzzz > 9: MOV TEMP[2], CONST[1][7] > 10: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][7].wwww > 11: MOV TEMP[0].z, TEMP[1].xxxx > 12: MOV TEMP[1].xy, IN[0].xyyy > 13: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 14: MOV TEMP[2].xyz, TEMP[1].xyzx > 15: MOV TEMP[1].xyz, TEMP[1].xyzx > 16: MOV TEMP[3].xy, IN[0].xyxx > 17: MOV TEMP[4], CONST[1][7] > 18: MOV TEMP[0].w, CONST[1][7].yyyy > 19: MOV TEMP[2].w, IMM[1].wwww > 20: BGNLOOP :0 > 21: ISGE TEMP[4].x, TEMP[2].wwww, IMM[2].xxxx > 22: AND TEMP[5].x, TEMP[4].xxxx, IMM[2].yyyy > 23: INEG TEMP[6].x, TEMP[5].xxxx > 24: MOV TEMP[1].w, TEMP[6].xxxx > 25: MOV TEMP[7].x, TEMP[6].xxxx > 26: USNE TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx > 27: UIF TEMP[8].xxxx :0 > 28: BRK > 29: ENDIF > 30: FMA TEMP[3].xy, TEMP[0].xyyy, IMM[3].xxxx, TEMP[3].xyyy > 31: MOV TEMP[9].xy, TEMP[3].xyyy > 32: TEX TEMP[10], TEMP[9], SAMP[0], 2D > 33: MOV TEMP[11].xyz, TEMP[10].xyzx > 34: MOV TEMP[12], CONST[1][7] > 35: MUL TEMP[13].x, TEMP[0].wwww, CONST[1][7].zzzz > 36: MOV TEMP[1].w, TEMP[13].xxxx > 37: FMA TEMP[1].xyz, TEMP[10].xyzz, TEMP[0].wwww, TEMP[1].xyzz > 38: UADD TEMP[14].x, TEMP[2].wwww, IMM[2].yyyy > 39: MOV TEMP[2].w, TEMP[14].xxxx > 40: MOV TEMP[0].w, TEMP[13].xxxx > 41: ENDLOOP :0 > 42: MOV_SAT TEMP[1].xyz, TEMP[1].xyzz > 43: MUL TEMP[0].xyz, TEMP[0].zzzz, TEMP[1].xyzz > 44: MOV TEMP[0].w, IMM[1].zzzz > 45: MOV OUT[0], TEMP[0] > 46: END >radeonsi: Compiling shader 97 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 124) > %31 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 > %33 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %34 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %33, i64 0, i64 3 > %35 = load <4 x i32>, <4 x i32> addrspace(2)* %34, align 16, !tbaa !0 > %36 = extractelement <8 x i32> %32, i32 7 > %37 = extractelement <4 x i32> %35, i32 0 > %38 = and i32 %37, %36 > %39 = insertelement <4 x i32> %35, i32 %38, i32 0 > %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %42 = fsub float %25, %40 > %43 = fsub float %26, %41 > %44 = fmul float %42, %27 > %45 = fmul float %43, %27 > %46 = fmul float %44, %44 > %47 = fmul float %45, %45 > %48 = fadd float %46, %47 > %49 = call float @llvm.sqrt.f32(float %48) > %50 = call float @llvm.fma.f32(float %49, float 2.000000e+00, float -1.000000e+00) > %51 = call float @llvm.AMDGPU.clamp.(float %50, float 0.000000e+00, float 1.000000e+00) > %52 = fsub float 1.000000e+00, %51 > %53 = fmul float %52, %30 > %54 = bitcast float %40 to i32 > %55 = bitcast float %41 to i32 > %56 = insertelement <2 x i32> undef, i32 %54, i32 0 > %57 = insertelement <2 x i32> %56, i32 %55, i32 1 > %58 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %57, <8 x i32> %32, <4 x i32> %39, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > br label %LOOP > >LOOP: ; preds = %ENDIF, %main_body > %temp13.0 = phi float [ %41, %main_body ], [ %79, %ENDIF ] > %temp12.0 = phi float [ %40, %main_body ], [ %78, %ENDIF ] > %62 = phi i32 [ 0, %main_body ], [ %92, %ENDIF ] > %63 = phi i32 [ 0, %main_body ], [ %92, %ENDIF ] > %temp6.0 = phi float [ %61, %main_body ], [ %91, %ENDIF ] > %temp5.0 = phi float [ %60, %main_body ], [ %90, %ENDIF ] > %temp4.0 = phi float [ %59, %main_body ], [ %89, %ENDIF ] > %temp3.0 = phi float [ %28, %main_body ], [ %88, %ENDIF ] > %64 = icmp sgt i32 %62, 47 > br i1 %64, label %IF, label %ENDIF > >IF: ; preds = %LOOP > %65 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %66 = call float @llvm.AMDGPU.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00) > %67 = call float @llvm.AMDGPU.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00) > %68 = fmul float %53, %65 > %69 = fmul float %53, %66 > %70 = fmul float %53, %67 > %71 = bitcast float %5 to i32 > %72 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %71, 10 > %73 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %72, float %68, 11 > %74 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %73, float %69, 12 > %75 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %74, float %70, 13 > %76 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %75, float 1.000000e+00, 14 > %77 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %76, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %77 > >ENDIF: ; preds = %LOOP > %78 = call float @llvm.fma.f32(float %44, float 0x3F95555560000000, float %temp12.0) > %79 = call float @llvm.fma.f32(float %45, float 0x3F95555560000000, float %temp13.0) > %80 = bitcast float %78 to i32 > %81 = bitcast float %79 to i32 > %82 = insertelement <2 x i32> undef, i32 %80, i32 0 > %83 = insertelement <2 x i32> %82, i32 %81, i32 1 > %84 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %83, <8 x i32> %32, <4 x i32> %39, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %85 = extractelement <4 x float> %84, i32 0 > %86 = extractelement <4 x float> %84, i32 1 > %87 = extractelement <4 x float> %84, i32 2 > %88 = fmul float %temp3.0, %29 > %89 = call float @llvm.fma.f32(float %85, float %temp3.0, float %temp4.0) > %90 = call float @llvm.fma.f32(float %86, float %temp3.0, float %temp5.0) > %91 = call float @llvm.fma.f32(float %87, float %temp3.0, float %temp6.0) > %92 = add i32 %63, 1 > br label %LOOP >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..6] >DCL TEMP[0..1], LOCAL >IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {0, 96, 80, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D > 2: FSLT TEMP[1].x, IMM[0].xxxx, TEMP[0].xxxx > 3: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 4: INEG TEMP[1].x, TEMP[1].xxxx > 5: USNE TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 6: UIF TEMP[1].xxxx :0 > 7: MOV TEMP[1].x, TEMP[0].xxxx > 8: ELSE :0 > 9: MOV TEMP[1].x, CONST[1][6].yyyy > 10: ENDIF > 11: ADD TEMP[0].x, -TEMP[1].xxxx, CONST[1][5].yyyy > 12: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][5].zzzz > 13: MOV_SAT TEMP[0].x, TEMP[0].xxxx > 14: MOV TEMP[0].w, TEMP[0].xxxx > 15: MOV TEMP[0].xyz, IMM[0].xxxx > 16: MOV OUT[0], TEMP[0] > 17: END >radeonsi: Compiling shader 98 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %39 = bitcast float %37 to i32 > %40 = bitcast float %38 to i32 > %41 = insertelement <2 x i32> undef, i32 %39, i32 0 > %42 = insertelement <2 x i32> %41, i32 %40, i32 1 > %43 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %42, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %44 = extractelement <4 x float> %43, i32 0 > %45 = fcmp ogt float %44, 0.000000e+00 > %. = select i1 %45, float %44, float %27 > %46 = fsub float %25, %. > %47 = fmul float %46, %26 > %48 = call float @llvm.AMDGPU.clamp.(float %47, float 0.000000e+00, float 1.000000e+00) > %49 = bitcast float %5 to i32 > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %49, 10 > %51 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50, float 0.000000e+00, 11 > %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %51, float 0.000000e+00, 12 > %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float 0.000000e+00, 13 > %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %48, 14 > %55 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %55 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..5] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 80, 0, 0} >IMM[1] FLT32 { 0.0000, -158456325028528675187087900672.0000, -2.2477, -0.6156} >IMM[2] FLT32 { 0.3521, 0.1479, 0.0000, 158456325028528675187087900672.0000} >IMM[3] FLT32 { 0.0000, 0.6156, 2.2477, 0.0000} > 0: FSEQ TEMP[0], CONST[1][5].xyxy, IMM[1].xxxx > 1: RCP TEMP[1].xz, CONST[1][5].xxxx > 2: RCP TEMP[1].yw, CONST[1][5].yyyy > 3: MUL TEMP[1], IMM[1].xzxw, TEMP[1] > 4: UCMP TEMP[0], TEMP[0], IMM[1].xyxy, TEMP[1] > 5: ADD TEMP[0], TEMP[0], IN[0].xyxy > 6: MOV TEMP[1].xy, TEMP[0].zwww > 7: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 8: MUL TEMP[1], TEMP[1], IMM[2].xxxx > 9: MOV TEMP[0].xy, TEMP[0].xyyy > 10: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 11: FMA TEMP[0], TEMP[0], IMM[2].yyyy, TEMP[1] > 12: FSEQ TEMP[2], CONST[1][5].xyxy, IMM[1].xxxx > 13: RCP TEMP[3].xz, CONST[1][5].xxxx > 14: RCP TEMP[3].yw, CONST[1][5].yyyy > 15: MUL TEMP[3], IMM[3].xyxz, TEMP[3] > 16: UCMP TEMP[2], TEMP[2], IMM[2].zwzw, TEMP[3] > 17: ADD TEMP[1], TEMP[2], IN[0].xyxy > 18: MOV TEMP[2].xy, TEMP[1].xyyy > 19: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 20: MOV TEMP[1].xy, TEMP[1].zwww > 21: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 22: FMA TEMP[0], TEMP[2], IMM[2].xxxx, TEMP[0] > 23: FMA TEMP[0], TEMP[1], IMM[2].yyyy, TEMP[0] > 24: MOV OUT[0], TEMP[0] > 25: END >radeonsi: Compiling shader 99 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %38 = fcmp oeq float %25, 0.000000e+00 > %39 = fcmp oeq float %26, 0.000000e+00 > %40 = fcmp oeq float %25, 0.000000e+00 > %41 = fcmp oeq float %26, 0.000000e+00 > %42 = fdiv float 1.000000e+00, %25 > %43 = fdiv float 1.000000e+00, %26 > %44 = fmul float %42, 0.000000e+00 > %45 = fmul float %43, 0xC001FB3720000000 > %46 = fmul float %42, 0.000000e+00 > %47 = fmul float %43, 0xBFE3B30460000000 > %48 = select i1 %38, float 0.000000e+00, float %44 > %49 = select i1 %39, float 0xC600000000000000, float %45 > %50 = select i1 %40, float 0.000000e+00, float %46 > %51 = select i1 %41, float 0xC600000000000000, float %47 > %52 = fadd float %48, %36 > %53 = fadd float %49, %37 > %54 = fadd float %50, %36 > %55 = fadd float %51, %37 > %56 = bitcast float %54 to i32 > %57 = bitcast float %55 to i32 > %58 = insertelement <2 x i32> undef, i32 %56, i32 0 > %59 = insertelement <2 x i32> %58, i32 %57, i32 1 > %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = fmul float %61, 0x3FD6889560000000 > %66 = fmul float %62, 0x3FD6889560000000 > %67 = fmul float %63, 0x3FD6889560000000 > %68 = fmul float %64, 0x3FD6889560000000 > %69 = bitcast float %52 to i32 > %70 = bitcast float %53 to i32 > %71 = insertelement <2 x i32> undef, i32 %69, i32 0 > %72 = insertelement <2 x i32> %71, i32 %70, i32 1 > %73 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %72, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = extractelement <4 x float> %73, i32 3 > %78 = call float @llvm.fma.f32(float %74, float 0x3FC2EED540000000, float %65) > %79 = call float @llvm.fma.f32(float %75, float 0x3FC2EED540000000, float %66) > %80 = call float @llvm.fma.f32(float %76, float 0x3FC2EED540000000, float %67) > %81 = call float @llvm.fma.f32(float %77, float 0x3FC2EED540000000, float %68) > %82 = fcmp oeq float %25, 0.000000e+00 > %83 = fcmp oeq float %26, 0.000000e+00 > %84 = fcmp oeq float %25, 0.000000e+00 > %85 = fcmp oeq float %26, 0.000000e+00 > %86 = fdiv float 1.000000e+00, %25 > %87 = fdiv float 1.000000e+00, %26 > %88 = fmul float %86, 0.000000e+00 > %89 = fmul float %87, 0x3FE3B30460000000 > %90 = fmul float %86, 0.000000e+00 > %91 = fmul float %87, 0x4001FB3720000000 > %92 = select i1 %82, float 0.000000e+00, float %88 > %93 = select i1 %83, float 0x4600000000000000, float %89 > %94 = select i1 %84, float 0.000000e+00, float %90 > %95 = select i1 %85, float 0x4600000000000000, float %91 > %96 = fadd float %92, %36 > %97 = fadd float %93, %37 > %98 = fadd float %94, %36 > %99 = fadd float %95, %37 > %100 = bitcast float %96 to i32 > %101 = bitcast float %97 to i32 > %102 = insertelement <2 x i32> undef, i32 %100, i32 0 > %103 = insertelement <2 x i32> %102, i32 %101, i32 1 > %104 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %103, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %105 = extractelement <4 x float> %104, i32 0 > %106 = extractelement <4 x float> %104, i32 1 > %107 = extractelement <4 x float> %104, i32 2 > %108 = extractelement <4 x float> %104, i32 3 > %109 = bitcast float %98 to i32 > %110 = bitcast float %99 to i32 > %111 = insertelement <2 x i32> undef, i32 %109, i32 0 > %112 = insertelement <2 x i32> %111, i32 %110, i32 1 > %113 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %112, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %114 = extractelement <4 x float> %113, i32 0 > %115 = extractelement <4 x float> %113, i32 1 > %116 = extractelement <4 x float> %113, i32 2 > %117 = extractelement <4 x float> %113, i32 3 > %118 = call float @llvm.fma.f32(float %105, float 0x3FD6889560000000, float %78) > %119 = call float @llvm.fma.f32(float %106, float 0x3FD6889560000000, float %79) > %120 = call float @llvm.fma.f32(float %107, float 0x3FD6889560000000, float %80) > %121 = call float @llvm.fma.f32(float %108, float 0x3FD6889560000000, float %81) > %122 = call float @llvm.fma.f32(float %114, float 0x3FC2EED540000000, float %118) > %123 = call float @llvm.fma.f32(float %115, float 0x3FC2EED540000000, float %119) > %124 = call float @llvm.fma.f32(float %116, float 0x3FC2EED540000000, float %120) > %125 = call float @llvm.fma.f32(float %117, float 0x3FC2EED540000000, float %121) > %126 = bitcast float %5 to i32 > %127 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %126, 10 > %128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %127, float %122, 11 > %129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128, float %123, 12 > %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129, float %124, 13 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130, float %125, 14 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..5] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 80, 0, 0} >IMM[1] FLT32 { 0.0000, -158456325028528675187087900672.0000, -2.2477, -0.6156} >IMM[2] FLT32 { 0.3521, 0.1479, 158456325028528675187087900672.0000, 0.0000} >IMM[3] FLT32 { 0.6156, 0.0000, 2.2477, 0.0000} > 0: FSEQ TEMP[0], CONST[1][5].xyxy, IMM[1].xxxx > 1: RCP TEMP[1].xz, CONST[1][5].xxxx > 2: RCP TEMP[1].yw, CONST[1][5].yyyy > 3: MUL TEMP[1], IMM[1].zxwx, TEMP[1] > 4: UCMP TEMP[0], TEMP[0], IMM[1].yxyx, TEMP[1] > 5: ADD TEMP[0], TEMP[0], IN[0].xyxy > 6: MOV TEMP[1].xy, TEMP[0].zwww > 7: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 8: MUL TEMP[1], TEMP[1], IMM[2].xxxx > 9: MOV TEMP[0].xy, TEMP[0].xyyy > 10: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 11: FMA TEMP[0], TEMP[0], IMM[2].yyyy, TEMP[1] > 12: FSEQ TEMP[2], CONST[1][5].xyxy, IMM[1].xxxx > 13: RCP TEMP[3].xz, CONST[1][5].xxxx > 14: RCP TEMP[3].yw, CONST[1][5].yyyy > 15: MUL TEMP[3], IMM[3].xyzy, TEMP[3] > 16: UCMP TEMP[2], TEMP[2], IMM[2].zwzw, TEMP[3] > 17: ADD TEMP[1], TEMP[2], IN[0].xyxy > 18: MOV TEMP[2].xy, TEMP[1].xyyy > 19: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 20: MOV TEMP[1].xy, TEMP[1].zwww > 21: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 22: FMA TEMP[0], TEMP[2], IMM[2].xxxx, TEMP[0] > 23: FMA TEMP[0], TEMP[1], IMM[2].yyyy, TEMP[0] > 24: MOV OUT[0], TEMP[0] > 25: END >radeonsi: Compiling shader 100 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %38 = fcmp oeq float %25, 0.000000e+00 > %39 = fcmp oeq float %26, 0.000000e+00 > %40 = fcmp oeq float %25, 0.000000e+00 > %41 = fcmp oeq float %26, 0.000000e+00 > %42 = fdiv float 1.000000e+00, %25 > %43 = fdiv float 1.000000e+00, %26 > %44 = fmul float %42, 0xC001FB3720000000 > %45 = fmul float %43, 0.000000e+00 > %46 = fmul float %42, 0xBFE3B30460000000 > %47 = fmul float %43, 0.000000e+00 > %48 = select i1 %38, float 0xC600000000000000, float %44 > %49 = select i1 %39, float 0.000000e+00, float %45 > %50 = select i1 %40, float 0xC600000000000000, float %46 > %51 = select i1 %41, float 0.000000e+00, float %47 > %52 = fadd float %48, %36 > %53 = fadd float %49, %37 > %54 = fadd float %50, %36 > %55 = fadd float %51, %37 > %56 = bitcast float %54 to i32 > %57 = bitcast float %55 to i32 > %58 = insertelement <2 x i32> undef, i32 %56, i32 0 > %59 = insertelement <2 x i32> %58, i32 %57, i32 1 > %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = fmul float %61, 0x3FD6889560000000 > %66 = fmul float %62, 0x3FD6889560000000 > %67 = fmul float %63, 0x3FD6889560000000 > %68 = fmul float %64, 0x3FD6889560000000 > %69 = bitcast float %52 to i32 > %70 = bitcast float %53 to i32 > %71 = insertelement <2 x i32> undef, i32 %69, i32 0 > %72 = insertelement <2 x i32> %71, i32 %70, i32 1 > %73 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %72, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = extractelement <4 x float> %73, i32 3 > %78 = call float @llvm.fma.f32(float %74, float 0x3FC2EED540000000, float %65) > %79 = call float @llvm.fma.f32(float %75, float 0x3FC2EED540000000, float %66) > %80 = call float @llvm.fma.f32(float %76, float 0x3FC2EED540000000, float %67) > %81 = call float @llvm.fma.f32(float %77, float 0x3FC2EED540000000, float %68) > %82 = fcmp oeq float %25, 0.000000e+00 > %83 = fcmp oeq float %26, 0.000000e+00 > %84 = fcmp oeq float %25, 0.000000e+00 > %85 = fcmp oeq float %26, 0.000000e+00 > %86 = fdiv float 1.000000e+00, %25 > %87 = fdiv float 1.000000e+00, %26 > %88 = fmul float %86, 0x3FE3B30460000000 > %89 = fmul float %87, 0.000000e+00 > %90 = fmul float %86, 0x4001FB3720000000 > %91 = fmul float %87, 0.000000e+00 > %92 = select i1 %82, float 0x4600000000000000, float %88 > %93 = select i1 %83, float 0.000000e+00, float %89 > %94 = select i1 %84, float 0x4600000000000000, float %90 > %95 = select i1 %85, float 0.000000e+00, float %91 > %96 = fadd float %92, %36 > %97 = fadd float %93, %37 > %98 = fadd float %94, %36 > %99 = fadd float %95, %37 > %100 = bitcast float %96 to i32 > %101 = bitcast float %97 to i32 > %102 = insertelement <2 x i32> undef, i32 %100, i32 0 > %103 = insertelement <2 x i32> %102, i32 %101, i32 1 > %104 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %103, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %105 = extractelement <4 x float> %104, i32 0 > %106 = extractelement <4 x float> %104, i32 1 > %107 = extractelement <4 x float> %104, i32 2 > %108 = extractelement <4 x float> %104, i32 3 > %109 = bitcast float %98 to i32 > %110 = bitcast float %99 to i32 > %111 = insertelement <2 x i32> undef, i32 %109, i32 0 > %112 = insertelement <2 x i32> %111, i32 %110, i32 1 > %113 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %112, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %114 = extractelement <4 x float> %113, i32 0 > %115 = extractelement <4 x float> %113, i32 1 > %116 = extractelement <4 x float> %113, i32 2 > %117 = extractelement <4 x float> %113, i32 3 > %118 = call float @llvm.fma.f32(float %105, float 0x3FD6889560000000, float %78) > %119 = call float @llvm.fma.f32(float %106, float 0x3FD6889560000000, float %79) > %120 = call float @llvm.fma.f32(float %107, float 0x3FD6889560000000, float %80) > %121 = call float @llvm.fma.f32(float %108, float 0x3FD6889560000000, float %81) > %122 = call float @llvm.fma.f32(float %114, float 0x3FC2EED540000000, float %118) > %123 = call float @llvm.fma.f32(float %115, float 0x3FC2EED540000000, float %119) > %124 = call float @llvm.fma.f32(float %116, float 0x3FC2EED540000000, float %120) > %125 = call float @llvm.fma.f32(float %117, float 0x3FC2EED540000000, float %121) > %126 = bitcast float %5 to i32 > %127 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %126, 10 > %128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %127, float %122, 11 > %129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128, float %123, 12 > %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129, float %124, 13 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130, float %125, 14 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..1] >DCL TEMP[0..10], LOCAL >IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 16, 0, 0} >IMM[2] INT32 {1, 0, 0, 0} > 0: MOV TEMP[0], IMM[0].xxxx > 1: MOV TEMP[1], CONST[1][0] > 2: MOV TEMP[1].x, -CONST[1][0].xxxx > 3: BGNLOOP :0 > 4: MOV TEMP[2], CONST[1][0] > 5: FSLT TEMP[3].x, CONST[1][0].xxxx, TEMP[1].xxxx > 6: AND TEMP[4].x, TEMP[3].xxxx, IMM[2].xxxx > 7: INEG TEMP[5].x, TEMP[4].xxxx > 8: MOV TEMP[1].y, TEMP[5].xxxx > 9: MOV TEMP[6].x, TEMP[5].xxxx > 10: USNE TEMP[7].x, TEMP[5].xxxx, IMM[1].xxxx > 11: UIF TEMP[7].xxxx :0 > 12: BRK > 13: ENDIF > 14: MOV TEMP[8], CONST[1][1] > 15: FMA TEMP[2].xy, TEMP[1].xxxx, CONST[1][1].xyyy, IN[2].xyyy > 16: MOV TEMP[1].yz, TEMP[2].yxyy > 17: MOV TEMP[9].xy, TEMP[2].xyyy > 18: MOV TEMP[9].w, IMM[0].xxxx > 19: TXL TEMP[10], TEMP[9], SAMP[0], 2D > 20: ADD TEMP[0], TEMP[0], TEMP[10] > 21: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy > 22: ENDLOOP :0 > 23: MUL TEMP[0], TEMP[0], CONST[1][0].wwww > 24: MOV TEMP[1].xyz, IN[1].xyzx > 25: MOV TEMP[1].w, IMM[0].yyyy > 26: MUL TEMP[0], TEMP[0], TEMP[1] > 27: MUL TEMP[0], TEMP[0], IN[1].wwww > 28: FMA TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] > 29: MOV OUT[0], TEMP[0] > 30: END >radeonsi: Compiling shader 101 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) > %29 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 > %31 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %32 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %31, i64 0, i64 3 > %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 > %34 = extractelement <8 x i32> %30, i32 7 > %35 = extractelement <4 x i32> %33, i32 0 > %36 = and i32 %35, %34 > %37 = insertelement <4 x i32> %33, i32 %36, i32 0 > %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %48 = fsub float -0.000000e+00, %25 > br label %LOOP > >LOOP: ; preds = %ENDIF, %main_body > %temp4.0 = phi float [ %48, %main_body ], [ %88, %ENDIF ] > %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %87, %ENDIF ] > %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %86, %ENDIF ] > %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %85, %ENDIF ] > %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %84, %ENDIF ] > %49 = fcmp olt float %25, %temp4.0 > br i1 %49, label %IF, label %ENDIF > >IF: ; preds = %LOOP > %50 = fmul float %temp.0, %26 > %51 = fmul float %temp1.0, %26 > %52 = fmul float %temp2.0, %26 > %53 = fmul float %temp3.0, %26 > %54 = fmul float %50, %42 > %55 = fmul float %51, %43 > %56 = fmul float %52, %44 > %57 = fmul float %54, %45 > %58 = fmul float %55, %45 > %59 = fmul float %56, %45 > %60 = fmul float %53, %45 > %61 = call float @llvm.fma.f32(float %38, float %60, float %57) > %62 = call float @llvm.fma.f32(float %39, float %60, float %58) > %63 = call float @llvm.fma.f32(float %40, float %60, float %59) > %64 = call float @llvm.fma.f32(float %41, float %60, float %60) > %65 = bitcast float %5 to i32 > %66 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %65, 10 > %67 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %66, float %61, 11 > %68 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %67, float %62, 12 > %69 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %68, float %63, 13 > %70 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %69, float %64, 14 > %71 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %70, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %71 > >ENDIF: ; preds = %LOOP > %72 = call float @llvm.fma.f32(float %temp4.0, float %27, float %46) > %73 = call float @llvm.fma.f32(float %temp4.0, float %28, float %47) > %74 = bitcast float %72 to i32 > %75 = bitcast float %73 to i32 > %76 = insertelement <4 x i32> undef, i32 %74, i32 0 > %77 = insertelement <4 x i32> %76, i32 %75, i32 1 > %78 = insertelement <4 x i32> %77, i32 0, i32 2 > %79 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %78, <8 x i32> %30, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = extractelement <4 x float> %79, i32 3 > %84 = fadd float %temp.0, %80 > %85 = fadd float %temp1.0, %81 > %86 = fadd float %temp2.0, %82 > %87 = fadd float %temp3.0, %83 > %88 = fadd float %temp4.0, 1.000000e+00 > br label %LOOP >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..4095] >DCL TEMP[0..3], LOCAL >DCL ADDR[0] >IMM[0] INT32 {2, 30, 4, 0} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 1.0000} >IMM[2] UINT32 {0, 16, 0, 0} > 0: BFI TEMP[0].x, IMM[0].xxxx, IN[1].xxxx, IMM[0].xxxx, IMM[0].yyyy > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].xy, TEMP[0].xxxx, IMM[1].xyyy > 3: F2U TEMP[1].xy, TEMP[0].xyyy > 4: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy > 5: USHR TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz > 6: UARL ADDR[0].x, TEMP[3].xxxx > 7: MOV TEMP[2], CONST[1][ADDR[0].x] > 8: DP4 TEMP[2].x, IN[0], TEMP[2] > 9: UMUL TEMP[1].x, TEMP[1].yyyy, IMM[2].yyyy > 10: USHR TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz > 11: UARL ADDR[0].x, TEMP[3].xxxx > 12: MOV TEMP[1], CONST[1][ADDR[0].x] > 13: DP4 TEMP[1].x, IN[0], TEMP[1] > 14: MOV TEMP[2].y, TEMP[1].xxxx > 15: SHL TEMP[1].x, IN[1].xxxx, IMM[0].xxxx > 16: U2F TEMP[0].x, TEMP[1].xxxx > 17: ADD TEMP[0].xy, TEMP[0].xxxx, IMM[1].xyyy > 18: F2U TEMP[0].xy, TEMP[0].xyyy > 19: UMUL TEMP[1].x, TEMP[0].xxxx, IMM[2].yyyy > 20: USHR TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz > 21: UARL ADDR[0].x, TEMP[3].xxxx > 22: MOV TEMP[1], CONST[1][ADDR[0].x] > 23: DP4 TEMP[1].x, IN[0], TEMP[1] > 24: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 25: USHR TEMP[3].x, TEMP[0].xxxx, IMM[0].zzzz > 26: UARL ADDR[0].x, TEMP[3].xxxx > 27: MOV TEMP[0], CONST[1][ADDR[0].x] > 28: DP4 TEMP[0].x, IN[0], TEMP[0] > 29: MOV TEMP[1].y, TEMP[0].xxxx > 30: MOV TEMP[1].zw, IMM[1].wwzw > 31: MOV OUT[1], TEMP[2] > 32: MOV OUT[0], TEMP[1] > 33: END >radeonsi: Compiling shader 102 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = bitcast float %27 to i32 > %29 = shl i32 %28, 2 > %30 = or i32 %29, 2 > %31 = uitofp i32 %30 to float > %32 = fadd float %31, 0x3FB99999A0000000 > %33 = fadd float %31, 0x3FF19999A0000000 > %34 = fptoui float %32 to i32 > %35 = fptoui float %33 to i32 > %36 = shl i32 %34, 4 > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %36) > %38 = shl i32 %34, 4 > %39 = or i32 %38, 4 > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %39) > %41 = shl i32 %34, 4 > %42 = or i32 %41, 8 > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %42) > %44 = shl i32 %34, 4 > %45 = or i32 %44, 12 > %46 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %45) > %47 = fmul float %20, %37 > %48 = fmul float %21, %40 > %49 = fadd float %47, %48 > %50 = fmul float %22, %43 > %51 = fadd float %49, %50 > %52 = fmul float %23, %46 > %53 = fadd float %51, %52 > %54 = shl i32 %35, 4 > %55 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %54) > %56 = shl i32 %35, 4 > %57 = or i32 %56, 4 > %58 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %57) > %59 = shl i32 %35, 4 > %60 = or i32 %59, 8 > %61 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %60) > %62 = shl i32 %35, 4 > %63 = or i32 %62, 12 > %64 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %63) > %65 = fmul float %20, %55 > %66 = fmul float %21, %58 > %67 = fadd float %65, %66 > %68 = fmul float %22, %61 > %69 = fadd float %67, %68 > %70 = fmul float %23, %64 > %71 = fadd float %69, %70 > %72 = bitcast float %27 to i32 > %73 = shl i32 %72, 2 > %74 = uitofp i32 %73 to float > %75 = fadd float %74, 0x3FB99999A0000000 > %76 = fadd float %74, 0x3FF19999A0000000 > %77 = fptoui float %75 to i32 > %78 = fptoui float %76 to i32 > %79 = shl i32 %77, 4 > %80 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %79) > %81 = shl i32 %77, 4 > %82 = or i32 %81, 4 > %83 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %82) > %84 = shl i32 %77, 4 > %85 = or i32 %84, 8 > %86 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %85) > %87 = shl i32 %77, 4 > %88 = or i32 %87, 12 > %89 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %88) > %90 = fmul float %20, %80 > %91 = fmul float %21, %83 > %92 = fadd float %90, %91 > %93 = fmul float %22, %86 > %94 = fadd float %92, %93 > %95 = fmul float %23, %89 > %96 = fadd float %94, %95 > %97 = shl i32 %78, 4 > %98 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %97) > %99 = shl i32 %78, 4 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %100) > %102 = shl i32 %78, 4 > %103 = or i32 %102, 8 > %104 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %103) > %105 = shl i32 %78, 4 > %106 = or i32 %105, 12 > %107 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %106) > %108 = fmul float %20, %98 > %109 = fmul float %21, %101 > %110 = fadd float %108, %109 > %111 = fmul float %22, %104 > %112 = fadd float %110, %111 > %113 = fmul float %23, %107 > %114 = fadd float %112, %113 > %115 = bitcast i32 %11 to float > %116 = insertvalue <{ float, float, float }> undef, float %115, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %53, float %71, float %43, float %46) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %114, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %116 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..4] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 16, 32, 48} >IMM[1] UINT32 {64, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: DP4 TEMP[1].x, TEMP[0], CONST[1][1] > 3: DP4 TEMP[2].x, TEMP[0], CONST[1][2] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, TEMP[0], CONST[1][3] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, TEMP[0], CONST[1][4] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: ADD TEMP[0].x, TEMP[0].wwww, CONST[1][0].wwww > 10: FMA TEMP[0], CONST[1][0], TEMP[0].xxxx, TEMP[1] > 11: MUL TEMP[1].xyz, TEMP[0].wwww, TEMP[0].xyzz > 12: MOV TEMP[1].w, TEMP[0].wwww > 13: MOV OUT[0], TEMP[1] > 14: END >radeonsi: Compiling shader 103 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 76) > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 3 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %56 = bitcast float %54 to i32 > %57 = bitcast float %55 to i32 > %58 = insertelement <2 x i32> undef, i32 %56, i32 0 > %59 = insertelement <2 x i32> %58, i32 %57, i32 1 > %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = fmul float %61, %29 > %66 = fmul float %62, %30 > %67 = fadd float %65, %66 > %68 = fmul float %63, %31 > %69 = fadd float %67, %68 > %70 = fmul float %64, %32 > %71 = fadd float %69, %70 > %72 = fmul float %61, %33 > %73 = fmul float %62, %34 > %74 = fadd float %72, %73 > %75 = fmul float %63, %35 > %76 = fadd float %74, %75 > %77 = fmul float %64, %36 > %78 = fadd float %76, %77 > %79 = fmul float %61, %37 > %80 = fmul float %62, %38 > %81 = fadd float %79, %80 > %82 = fmul float %63, %39 > %83 = fadd float %81, %82 > %84 = fmul float %64, %40 > %85 = fadd float %83, %84 > %86 = fmul float %61, %41 > %87 = fmul float %62, %42 > %88 = fadd float %86, %87 > %89 = fmul float %63, %43 > %90 = fadd float %88, %89 > %91 = fmul float %64, %44 > %92 = fadd float %90, %91 > %93 = fadd float %64, %28 > %94 = call float @llvm.fma.f32(float %25, float %93, float %71) > %95 = call float @llvm.fma.f32(float %26, float %93, float %78) > %96 = call float @llvm.fma.f32(float %27, float %93, float %85) > %97 = call float @llvm.fma.f32(float %28, float %93, float %92) > %98 = fmul float %97, %94 > %99 = fmul float %97, %95 > %100 = fmul float %97, %96 > %101 = bitcast float %5 to i32 > %102 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %101, 10 > %103 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %102, float %98, 11 > %104 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %103, float %99, 12 > %105 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %104, float %100, 13 > %106 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %105, float %97, 14 > %107 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %107 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..4] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 16, 32, 48} >IMM[1] UINT32 {64, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: DP4 TEMP[1].x, TEMP[0], CONST[1][1] > 3: DP4 TEMP[2].x, TEMP[0], CONST[1][2] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, TEMP[0], CONST[1][3] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, TEMP[0], CONST[1][4] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: ADD TEMP[0].x, TEMP[0].wwww, CONST[1][0].wwww > 10: FMA TEMP[0], CONST[1][0], TEMP[0].xxxx, TEMP[1] > 11: MOV OUT[0], TEMP[0] > 12: END >radeonsi: Compiling shader 104 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 72) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 76) > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 3 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %56 = bitcast float %54 to i32 > %57 = bitcast float %55 to i32 > %58 = insertelement <2 x i32> undef, i32 %56, i32 0 > %59 = insertelement <2 x i32> %58, i32 %57, i32 1 > %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = fmul float %61, %29 > %66 = fmul float %62, %30 > %67 = fadd float %65, %66 > %68 = fmul float %63, %31 > %69 = fadd float %67, %68 > %70 = fmul float %64, %32 > %71 = fadd float %69, %70 > %72 = fmul float %61, %33 > %73 = fmul float %62, %34 > %74 = fadd float %72, %73 > %75 = fmul float %63, %35 > %76 = fadd float %74, %75 > %77 = fmul float %64, %36 > %78 = fadd float %76, %77 > %79 = fmul float %61, %37 > %80 = fmul float %62, %38 > %81 = fadd float %79, %80 > %82 = fmul float %63, %39 > %83 = fadd float %81, %82 > %84 = fmul float %64, %40 > %85 = fadd float %83, %84 > %86 = fmul float %61, %41 > %87 = fmul float %62, %42 > %88 = fadd float %86, %87 > %89 = fmul float %63, %43 > %90 = fadd float %88, %89 > %91 = fmul float %64, %44 > %92 = fadd float %90, %91 > %93 = fadd float %64, %28 > %94 = call float @llvm.fma.f32(float %25, float %93, float %71) > %95 = call float @llvm.fma.f32(float %26, float %93, float %78) > %96 = call float @llvm.fma.f32(float %27, float %93, float %85) > %97 = call float @llvm.fma.f32(float %28, float %93, float %92) > %98 = bitcast float %5 to i32 > %99 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %98, 10 > %100 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %99, float %94, 11 > %101 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %100, float %95, 12 > %102 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %101, float %96, 13 > %103 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %102, float %97, 14 > %104 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %103, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %104 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..13] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 208, 0, 0} >IMM[1] FLT32 { 1.0000, -0.7141, -0.3441, 0.5312} >IMM[2] FLT32 { 1.0000, 1.4020, -0.7037, 1.7720} >IMM[3] FLT32 { 1.0000, 1.7720, -0.8895, 0.0000} > 0: MUL TEMP[0], IN[0].xyxy, CONST[1][13] > 1: MOV TEMP[1].xy, TEMP[0].zwww > 2: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 3: MOV TEMP[1].y, TEMP[1].xxxx > 4: MOV TEMP[2].xy, TEMP[0].zwww > 5: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: MOV TEMP[2].xy, TEMP[0].xyyy > 8: TEX TEMP[2].x, TEMP[2], SAMP[2], 2D > 9: MOV TEMP[1].x, TEMP[2].xxxx > 10: MOV TEMP[1].w, IMM[1].xxxx > 11: DP4 TEMP[2].x, IMM[1], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP3 TEMP[0].x, IMM[2].xyzz, TEMP[1].xyww > 14: DP3 TEMP[1].x, IMM[3].xyzz, TEMP[1].xzww > 15: MOV TEMP[0].z, TEMP[1].xxxx > 16: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[1].xyzz > 17: MOV TEMP[0].w, IN[1].wwww > 18: MOV OUT[0], TEMP[0] > 19: END >radeonsi: Compiling shader 105 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 208) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 212) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 216) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 220) > %29 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 > %31 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %32 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %31, i64 0, i64 3 > %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 > %34 = extractelement <8 x i32> %30, i32 7 > %35 = extractelement <4 x i32> %33, i32 0 > %36 = and i32 %35, %34 > %37 = insertelement <4 x i32> %33, i32 %36, i32 0 > %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 > %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 7 > %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 > %43 = extractelement <8 x i32> %39, i32 7 > %44 = extractelement <4 x i32> %42, i32 0 > %45 = and i32 %44, %43 > %46 = insertelement <4 x i32> %42, i32 %45, i32 0 > %47 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 > %49 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %50 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %49, i64 0, i64 11 > %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 > %52 = extractelement <8 x i32> %48, i32 7 > %53 = extractelement <4 x i32> %51, i32 0 > %54 = and i32 %53, %52 > %55 = insertelement <4 x i32> %51, i32 %54, i32 0 > %56 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %62 = fmul float %56, %25 > %63 = fmul float %57, %26 > %64 = fmul float %56, %27 > %65 = fmul float %57, %28 > %66 = bitcast float %64 to i32 > %67 = bitcast float %65 to i32 > %68 = insertelement <2 x i32> undef, i32 %66, i32 0 > %69 = insertelement <2 x i32> %68, i32 %67, i32 1 > %70 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %69, <8 x i32> %30, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %71 = extractelement <4 x float> %70, i32 0 > %72 = bitcast float %64 to i32 > %73 = bitcast float %65 to i32 > %74 = insertelement <2 x i32> undef, i32 %72, i32 0 > %75 = insertelement <2 x i32> %74, i32 %73, i32 1 > %76 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %75, <8 x i32> %39, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %77 = extractelement <4 x float> %76, i32 0 > %78 = bitcast float %62 to i32 > %79 = bitcast float %63 to i32 > %80 = insertelement <2 x i32> undef, i32 %78, i32 0 > %81 = insertelement <2 x i32> %80, i32 %79, i32 1 > %82 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %81, <8 x i32> %48, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %83 = extractelement <4 x float> %82, i32 0 > %84 = fmul float %71, 0xBFE6DA3C20000000 > %85 = fadd float %83, %84 > %86 = fmul float %77, 0xBFD60663C0000000 > %87 = fadd float %85, %86 > %88 = fadd float %87, 0x3FE0FFB6C0000000 > %89 = fmul float %71, 0x3FF66E9780000000 > %90 = fadd float %89, %83 > %91 = fadd float %90, 0xBFE6851CA0000000 > %92 = fmul float %77, 0x3FFC5A1CA0000000 > %93 = fadd float %92, %83 > %94 = fadd float %93, 0xBFEC769340000000 > %95 = fmul float %91, %58 > %96 = fmul float %88, %59 > %97 = fmul float %94, %60 > %98 = bitcast float %5 to i32 > %99 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %98, 10 > %100 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %99, float %95, 11 > %101 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %100, float %96, 12 > %102 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %101, float %97, 13 > %103 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %102, float %61, 14 > %104 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %103, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %104 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..4095] >DCL TEMP[0..4], LOCAL >DCL ADDR[0] >IMM[0] INT32 {2, 30, 1, 4} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 1.0000} >IMM[2] UINT32 {0, 16, 0, 0} > 0: MOV TEMP[0].xy, IN[1].xyxx > 1: SHL TEMP[1].x, IN[3].xxxx, IMM[0].xxxx > 2: U2F TEMP[1].x, TEMP[1].xxxx > 3: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 4: BFI TEMP[2].x, IMM[0].xxxx, IN[3].xxxx, IMM[0].xxxx, IMM[0].yyyy > 5: BFI TEMP[3].x, IMM[0].xxxx, IN[3].xxxx, IMM[0].xxxx, IMM[0].yyyy > 6: MOV TEMP[2].y, TEMP[3].xxxx > 7: BFI TEMP[3].x, IMM[0].zzzz, IN[3].xxxx, IMM[0].xxxx, IMM[0].yyyy > 8: MOV TEMP[2].z, TEMP[3].xxxx > 9: U2F TEMP[2].xyz, TEMP[2].xyzz > 10: ADD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xyxx > 11: MOV TEMP[1].yzw, TEMP[2].yxyz > 12: F2U TEMP[1], TEMP[1] > 13: UMUL TEMP[2].x, TEMP[1].wwww, IMM[2].yyyy > 14: USHR TEMP[3].x, TEMP[2].xxxx, IMM[0].wwww > 15: UARL ADDR[0].x, TEMP[3].xxxx > 16: MOV TEMP[2], CONST[1][ADDR[0].x] > 17: UMUL TEMP[3].x, TEMP[1].xxxx, IMM[2].yyyy > 18: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].wwww > 19: UARL ADDR[0].x, TEMP[4].xxxx > 20: MOV TEMP[3], CONST[1][ADDR[0].x] > 21: FMA TEMP[2], IN[0], TEMP[2], TEMP[3] > 22: UMUL TEMP[3].x, TEMP[1].yyyy, IMM[2].yyyy > 23: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].wwww > 24: UARL ADDR[0].x, TEMP[4].xxxx > 25: MOV TEMP[3], CONST[1][ADDR[0].x] > 26: DP4 TEMP[3].x, IN[2], TEMP[3] > 27: UMUL TEMP[1].x, TEMP[1].zzzz, IMM[2].yyyy > 28: USHR TEMP[4].x, TEMP[1].xxxx, IMM[0].wwww > 29: UARL ADDR[0].x, TEMP[4].xxxx > 30: MOV TEMP[1], CONST[1][ADDR[0].x] > 31: DP4 TEMP[1].x, IN[2], TEMP[1] > 32: MOV TEMP[3].y, TEMP[1].xxxx > 33: MOV TEMP[3].zw, IMM[1].wwzw > 34: MOV OUT[2], TEMP[2] > 35: MOV OUT[1], TEMP[0] > 36: MOV OUT[0], TEMP[3] > 37: END >radeonsi: Compiling shader 106 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %13) > %22 = extractelement <4 x float> %21, i32 0 > %23 = extractelement <4 x float> %21, i32 1 > %24 = extractelement <4 x float> %21, i32 2 > %25 = extractelement <4 x float> %21, i32 3 > %26 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 > %28 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %14) > %29 = extractelement <4 x float> %28, i32 0 > %30 = extractelement <4 x float> %28, i32 1 > %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 > %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %15) > %34 = extractelement <4 x float> %33, i32 0 > %35 = extractelement <4 x float> %33, i32 1 > %36 = extractelement <4 x float> %33, i32 2 > %37 = extractelement <4 x float> %33, i32 3 > %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 > %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %16) > %41 = extractelement <4 x float> %40, i32 0 > %42 = bitcast float %41 to i32 > %43 = shl i32 %42, 2 > %44 = uitofp i32 %43 to float > %45 = fadd float %44, 0x3FB99999A0000000 > %46 = bitcast float %41 to i32 > %47 = shl i32 %46, 2 > %48 = or i32 %47, 2 > %49 = bitcast float %41 to i32 > %50 = shl i32 %49, 2 > %51 = or i32 %50, 2 > %52 = bitcast float %41 to i32 > %53 = shl i32 %52, 2 > %54 = or i32 %53, 1 > %55 = uitofp i32 %48 to float > %56 = uitofp i32 %51 to float > %57 = uitofp i32 %54 to float > %58 = fadd float %55, 0x3FB99999A0000000 > %59 = fadd float %56, 0x3FF19999A0000000 > %60 = fadd float %57, 0x3FB99999A0000000 > %61 = fptoui float %45 to i32 > %62 = fptoui float %58 to i32 > %63 = fptoui float %59 to i32 > %64 = fptoui float %60 to i32 > %65 = shl i32 %64, 4 > %66 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %65) > %67 = shl i32 %64, 4 > %68 = or i32 %67, 4 > %69 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %68) > %70 = shl i32 %64, 4 > %71 = or i32 %70, 8 > %72 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %71) > %73 = shl i32 %64, 4 > %74 = or i32 %73, 12 > %75 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %74) > %76 = shl i32 %61, 4 > %77 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %76) > %78 = shl i32 %61, 4 > %79 = or i32 %78, 4 > %80 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %79) > %81 = shl i32 %61, 4 > %82 = or i32 %81, 8 > %83 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %82) > %84 = shl i32 %61, 4 > %85 = or i32 %84, 12 > %86 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %85) > %87 = call float @llvm.fma.f32(float %22, float %66, float %77) > %88 = call float @llvm.fma.f32(float %23, float %69, float %80) > %89 = call float @llvm.fma.f32(float %24, float %72, float %83) > %90 = call float @llvm.fma.f32(float %25, float %75, float %86) > %91 = shl i32 %62, 4 > %92 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %91) > %93 = shl i32 %62, 4 > %94 = or i32 %93, 4 > %95 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %94) > %96 = shl i32 %62, 4 > %97 = or i32 %96, 8 > %98 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %97) > %99 = shl i32 %62, 4 > %100 = or i32 %99, 12 > %101 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %100) > %102 = fmul float %34, %92 > %103 = fmul float %35, %95 > %104 = fadd float %102, %103 > %105 = fmul float %36, %98 > %106 = fadd float %104, %105 > %107 = fmul float %37, %101 > %108 = fadd float %106, %107 > %109 = shl i32 %63, 4 > %110 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %109) > %111 = shl i32 %63, 4 > %112 = or i32 %111, 4 > %113 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %112) > %114 = shl i32 %63, 4 > %115 = or i32 %114, 8 > %116 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %115) > %117 = shl i32 %63, 4 > %118 = or i32 %117, 12 > %119 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %118) > %120 = fmul float %34, %110 > %121 = fmul float %35, %113 > %122 = fadd float %120, %121 > %123 = fmul float %36, %116 > %124 = fadd float %122, %123 > %125 = fmul float %37, %119 > %126 = fadd float %124, %125 > %127 = bitcast i32 %11 to float > %128 = insertvalue <{ float, float, float }> undef, float %127, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %29, float %30, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %87, float %88, float %89, float %90) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %108, float %126, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %128 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0], LOCAL > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0].x, TEMP[0].xxxx, IN[1].wwww > 3: MOV TEMP[0].w, TEMP[0].xxxx > 4: MOV TEMP[0].xyz, IN[1].xyzx > 5: MOV OUT[0], TEMP[0] > 6: END >radeonsi: Compiling shader 107 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %38 = bitcast float %32 to i32 > %39 = bitcast float %33 to i32 > %40 = insertelement <2 x i32> undef, i32 %38, i32 0 > %41 = insertelement <2 x i32> %40, i32 %39, i32 1 > %42 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %41, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %43 = extractelement <4 x float> %42, i32 0 > %44 = fmul float %43, %37 > %45 = bitcast float %5 to i32 > %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %45, 10 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %34, 11 > %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %35, 12 > %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %36, 13 > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49, float %44, 14 > %51 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %51 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..4095] >DCL TEMP[0..5], LOCAL >DCL ADDR[0] >IMM[0] INT32 {1, 31, 96, 4} >IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} >IMM[2] UINT32 {0, 16, 0, 0} >IMM[3] INT32 {2, 3, 0, 0} > 0: MOV TEMP[0].xy, IN[1].xyxx > 1: BFI TEMP[1].x, IMM[0].xxxx, IN[3].xxxx, IMM[0].xxxx, IMM[0].yyyy > 2: U2F TEMP[1].x, TEMP[1].xxxx > 3: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 4: SHL TEMP[2].x, IN[3].xxxx, IMM[0].xxxx > 5: U2F TEMP[2].x, TEMP[2].xxxx > 6: ADD TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx > 7: MOV TEMP[1].y, TEMP[2].xxxx > 8: F2U TEMP[2].xy, TEMP[1].xyyy > 9: UADD TEMP[3].x, IMM[0].zzzz, TEMP[2].xxxx > 10: UMUL TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 11: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].wwww > 12: UARL ADDR[0].x, TEMP[4].xxxx > 13: MOV TEMP[3], CONST[1][ADDR[0].x] > 14: UADD TEMP[2].x, IMM[0].zzzz, TEMP[2].yyyy > 15: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy > 16: USHR TEMP[4].x, TEMP[2].xxxx, IMM[0].wwww > 17: UARL ADDR[0].x, TEMP[4].xxxx > 18: MOV TEMP[2], CONST[1][ADDR[0].x] > 19: FMA TEMP[2], IN[0], TEMP[3], TEMP[2] > 20: U2F TEMP[1].x, IN[3].xxxx > 21: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 22: F2U TEMP[1].x, TEMP[1].xxxx > 23: SHL TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx > 24: UMUL TEMP[3].x, TEMP[1].xxxx, IMM[2].yyyy > 25: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].wwww > 26: UARL ADDR[0].x, TEMP[4].xxxx > 27: MOV TEMP[3], CONST[1][ADDR[0].x] > 28: DP4 TEMP[3].x, IN[2], TEMP[3] > 29: UADD TEMP[4].x, IMM[0].xxxx, TEMP[1].xxxx > 30: UMUL TEMP[4].x, TEMP[4].xxxx, IMM[2].yyyy > 31: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].wwww > 32: UARL ADDR[0].x, TEMP[5].xxxx > 33: MOV TEMP[4], CONST[1][ADDR[0].x] > 34: DP4 TEMP[4].x, IN[2], TEMP[4] > 35: MOV TEMP[3].y, TEMP[4].xxxx > 36: UADD TEMP[4].x, IMM[3].xxxx, TEMP[1].xxxx > 37: UMUL TEMP[4].x, TEMP[4].xxxx, IMM[2].yyyy > 38: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].wwww > 39: UARL ADDR[0].x, TEMP[5].xxxx > 40: MOV TEMP[4], CONST[1][ADDR[0].x] > 41: DP4 TEMP[4].x, IN[2], TEMP[4] > 42: MOV TEMP[3].z, TEMP[4].xxxx > 43: UADD TEMP[1].x, IMM[3].yyyy, TEMP[1].xxxx > 44: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 45: USHR TEMP[4].x, TEMP[1].xxxx, IMM[0].wwww > 46: UARL ADDR[0].x, TEMP[4].xxxx > 47: MOV TEMP[1], CONST[1][ADDR[0].x] > 48: DP4 TEMP[1].x, IN[2], TEMP[1] > 49: MOV TEMP[3].w, TEMP[1].xxxx > 50: MOV OUT[2], TEMP[2] > 51: MOV OUT[1], TEMP[0] > 52: MOV OUT[0], TEMP[3] > 53: END >radeonsi: Compiling shader 108 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %13) > %22 = extractelement <4 x float> %21, i32 0 > %23 = extractelement <4 x float> %21, i32 1 > %24 = extractelement <4 x float> %21, i32 2 > %25 = extractelement <4 x float> %21, i32 3 > %26 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 > %28 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %14) > %29 = extractelement <4 x float> %28, i32 0 > %30 = extractelement <4 x float> %28, i32 1 > %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 > %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %15) > %34 = extractelement <4 x float> %33, i32 0 > %35 = extractelement <4 x float> %33, i32 1 > %36 = extractelement <4 x float> %33, i32 2 > %37 = extractelement <4 x float> %33, i32 3 > %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 > %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %16) > %41 = extractelement <4 x float> %40, i32 0 > %42 = bitcast float %41 to i32 > %43 = shl i32 %42, 1 > %44 = or i32 %43, 1 > %45 = uitofp i32 %44 to float > %46 = fadd float %45, 0x3FB99999A0000000 > %47 = bitcast float %41 to i32 > %48 = shl i32 %47, 1 > %49 = uitofp i32 %48 to float > %50 = fadd float %49, 0x3FB99999A0000000 > %51 = fptoui float %46 to i32 > %52 = fptoui float %50 to i32 > %53 = shl i32 %51, 4 > %54 = add i32 %53, 1536 > %55 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %54) > %56 = add i32 %53, 1540 > %57 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %56) > %58 = add i32 %53, 1544 > %59 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %58) > %60 = add i32 %53, 1548 > %61 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %60) > %62 = shl i32 %52, 4 > %63 = add i32 %62, 1536 > %64 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %63) > %65 = add i32 %62, 1540 > %66 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %65) > %67 = add i32 %62, 1544 > %68 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %67) > %69 = add i32 %62, 1548 > %70 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %69) > %71 = call float @llvm.fma.f32(float %22, float %55, float %64) > %72 = call float @llvm.fma.f32(float %23, float %57, float %66) > %73 = call float @llvm.fma.f32(float %24, float %59, float %68) > %74 = call float @llvm.fma.f32(float %25, float %61, float %70) > %75 = bitcast float %41 to i32 > %76 = uitofp i32 %75 to float > %77 = fadd float %76, 0x3FB99999A0000000 > %78 = fptoui float %77 to i32 > %79 = shl i32 %78, 6 > %80 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %79) > %81 = shl i32 %78, 6 > %82 = or i32 %81, 4 > %83 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %82) > %84 = shl i32 %78, 6 > %85 = or i32 %84, 8 > %86 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %85) > %87 = shl i32 %78, 6 > %88 = or i32 %87, 12 > %89 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %88) > %90 = fmul float %34, %80 > %91 = fmul float %35, %83 > %92 = fadd float %90, %91 > %93 = fmul float %36, %86 > %94 = fadd float %92, %93 > %95 = fmul float %37, %89 > %96 = fadd float %94, %95 > %97 = shl i32 %78, 2 > %98 = and i32 %97, 268435452 > %99 = or i32 %98, 1 > %100 = shl nuw i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %100) > %102 = shl nuw i32 %99, 4 > %103 = or i32 %102, 4 > %104 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %103) > %105 = shl nuw i32 %99, 4 > %106 = or i32 %105, 8 > %107 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %106) > %108 = shl nuw i32 %99, 4 > %109 = or i32 %108, 12 > %110 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %109) > %111 = fmul float %34, %101 > %112 = fmul float %35, %104 > %113 = fadd float %111, %112 > %114 = fmul float %36, %107 > %115 = fadd float %113, %114 > %116 = fmul float %37, %110 > %117 = fadd float %115, %116 > %118 = shl i32 %78, 2 > %119 = and i32 %118, 268435452 > %120 = or i32 %119, 2 > %121 = shl nuw i32 %120, 4 > %122 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %121) > %123 = shl nuw i32 %120, 4 > %124 = or i32 %123, 4 > %125 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %124) > %126 = shl nuw i32 %120, 4 > %127 = or i32 %126, 8 > %128 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %127) > %129 = shl nuw i32 %120, 4 > %130 = or i32 %129, 12 > %131 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %130) > %132 = fmul float %34, %122 > %133 = fmul float %35, %125 > %134 = fadd float %132, %133 > %135 = fmul float %36, %128 > %136 = fadd float %134, %135 > %137 = fmul float %37, %131 > %138 = fadd float %136, %137 > %139 = shl i32 %78, 2 > %140 = and i32 %139, 268435452 > %141 = or i32 %140, 3 > %142 = shl nuw i32 %141, 4 > %143 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %142) > %144 = shl nuw i32 %141, 4 > %145 = or i32 %144, 4 > %146 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %145) > %147 = shl nuw i32 %141, 4 > %148 = or i32 %147, 8 > %149 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %148) > %150 = shl nuw i32 %141, 4 > %151 = or i32 %150, 12 > %152 = call float @llvm.SI.load.const(<16 x i8> %18, i32 %151) > %153 = fmul float %34, %143 > %154 = fmul float %35, %146 > %155 = fadd float %153, %154 > %156 = fmul float %36, %149 > %157 = fadd float %155, %156 > %158 = fmul float %37, %152 > %159 = fadd float %157, %158 > %160 = bitcast i32 %11 to float > %161 = insertvalue <{ float, float, float }> undef, float %160, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %29, float %30, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %71, float %72, float %73, float %74) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %117, float %138, float %159) > ret <{ float, float, float }> %161 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL CONST[1][0..4095] >DCL TEMP[0..2], LOCAL >DCL ADDR[0] >IMM[0] INT32 {1, 4, 0, 0} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 1.0000} >IMM[2] UINT32 {0, 16, 0, 0} > 0: SHL TEMP[0].x, IN[1].xxxx, IMM[0].xxxx > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].xy, TEMP[0].xxxx, IMM[1].xyyy > 3: F2U TEMP[0].xy, TEMP[0].xyyy > 4: UMUL TEMP[1].x, TEMP[0].xxxx, IMM[2].yyyy > 5: USHR TEMP[2].x, TEMP[1].xxxx, IMM[0].yyyy > 6: UARL ADDR[0].x, TEMP[2].xxxx > 7: MOV TEMP[1], CONST[1][ADDR[0].x] > 8: DP4 TEMP[1].x, IN[0], TEMP[1] > 9: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 10: USHR TEMP[2].x, TEMP[0].xxxx, IMM[0].yyyy > 11: UARL ADDR[0].x, TEMP[2].xxxx > 12: MOV TEMP[0], CONST[1][ADDR[0].x] > 13: DP4 TEMP[0].x, IN[0], TEMP[0] > 14: MOV TEMP[1].y, TEMP[0].xxxx > 15: MOV TEMP[1].zw, IMM[1].wwzw > 16: MOV OUT[0], TEMP[1] > 17: END >radeonsi: Compiling shader 109 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %bc = bitcast <4 x float> %26 to <4 x i32> > %27 = extractelement <4 x i32> %bc, i32 0 > %28 = shl i32 %27, 1 > %29 = uitofp i32 %28 to float > %30 = fadd float %29, 0x3FB99999A0000000 > %31 = fadd float %29, 0x3FF19999A0000000 > %32 = fptoui float %30 to i32 > %33 = fptoui float %31 to i32 > %34 = shl i32 %32, 4 > %35 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %34) > %36 = shl i32 %32, 4 > %37 = or i32 %36, 4 > %38 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %37) > %39 = shl i32 %32, 4 > %40 = or i32 %39, 8 > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %40) > %42 = shl i32 %32, 4 > %43 = or i32 %42, 12 > %44 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %43) > %45 = fmul float %20, %35 > %46 = fmul float %21, %38 > %47 = fadd float %45, %46 > %48 = fmul float %22, %41 > %49 = fadd float %47, %48 > %50 = fmul float %23, %44 > %51 = fadd float %49, %50 > %52 = shl i32 %33, 4 > %53 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %52) > %54 = shl i32 %33, 4 > %55 = or i32 %54, 4 > %56 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %55) > %57 = shl i32 %33, 4 > %58 = or i32 %57, 8 > %59 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %58) > %60 = shl i32 %33, 4 > %61 = or i32 %60, 12 > %62 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %61) > %63 = fmul float %20, %53 > %64 = fmul float %21, %56 > %65 = fadd float %63, %64 > %66 = fmul float %22, %59 > %67 = fadd float %65, %66 > %68 = fmul float %23, %62 > %69 = fadd float %67, %68 > %70 = bitcast i32 %11 to float > %71 = insertvalue <{ float, float, float }> undef, float %70, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %69, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %71 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL OUT[0], COLOR >DCL CONST[1][0] >IMM[0] UINT32 {0, 0, 0, 0} > 0: MOV OUT[0], CONST[1][0] > 1: END >radeonsi: Compiling shader 110 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %29 = bitcast float %5 to i32 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %29, 10 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 11 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 12 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %27, 13 > %34 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33, float %28, 14 > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %34, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %35 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..5] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 16, 32, 48} >IMM[1] UINT32 {64, 80, 0, 0} > 0: MOV TEMP[0].xy, IN[1].xyxx > 1: FMA TEMP[1], IN[0], CONST[1][1], CONST[1][0] > 2: DP4 TEMP[2].x, IN[2], CONST[1][2] > 3: DP4 TEMP[3].x, IN[2], CONST[1][3] > 4: MOV TEMP[2].y, TEMP[3].xxxx > 5: DP4 TEMP[3].x, IN[2], CONST[1][4] > 6: MOV TEMP[2].z, TEMP[3].xxxx > 7: DP4 TEMP[3].x, IN[2], CONST[1][5] > 8: MOV TEMP[2].w, TEMP[3].xxxx > 9: MOV OUT[2], TEMP[1] > 10: MOV OUT[1], TEMP[0] > 11: MOV OUT[0], TEMP[2] > 12: END >radeonsi: Compiling shader 111 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 0) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 4) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 8) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 12) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 16) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 20) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 24) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 28) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 32) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 36) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 40) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 44) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 48) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 52) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 56) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 60) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 64) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 68) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 72) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 76) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 80) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 84) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 88) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 92) > %42 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 > %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %13) > %45 = extractelement <4 x float> %44, i32 0 > %46 = extractelement <4 x float> %44, i32 1 > %47 = extractelement <4 x float> %44, i32 2 > %48 = extractelement <4 x float> %44, i32 3 > %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 > %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %14) > %52 = extractelement <4 x float> %51, i32 0 > %53 = extractelement <4 x float> %51, i32 1 > %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 > %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %15) > %57 = extractelement <4 x float> %56, i32 0 > %58 = extractelement <4 x float> %56, i32 1 > %59 = extractelement <4 x float> %56, i32 2 > %60 = extractelement <4 x float> %56, i32 3 > %61 = call float @llvm.fma.f32(float %45, float %22, float %18) > %62 = call float @llvm.fma.f32(float %46, float %23, float %19) > %63 = call float @llvm.fma.f32(float %47, float %24, float %20) > %64 = call float @llvm.fma.f32(float %48, float %25, float %21) > %65 = fmul float %57, %26 > %66 = fmul float %58, %27 > %67 = fadd float %65, %66 > %68 = fmul float %59, %28 > %69 = fadd float %67, %68 > %70 = fmul float %60, %29 > %71 = fadd float %69, %70 > %72 = fmul float %57, %30 > %73 = fmul float %58, %31 > %74 = fadd float %72, %73 > %75 = fmul float %59, %32 > %76 = fadd float %74, %75 > %77 = fmul float %60, %33 > %78 = fadd float %76, %77 > %79 = fmul float %57, %34 > %80 = fmul float %58, %35 > %81 = fadd float %79, %80 > %82 = fmul float %59, %36 > %83 = fadd float %81, %82 > %84 = fmul float %60, %37 > %85 = fadd float %83, %84 > %86 = fmul float %57, %38 > %87 = fmul float %58, %39 > %88 = fadd float %86, %87 > %89 = fmul float %59, %40 > %90 = fadd float %88, %89 > %91 = fmul float %60, %41 > %92 = fadd float %90, %91 > %93 = bitcast i32 %11 to float > %94 = insertvalue <{ float, float, float }> undef, float %93, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %52, float %53, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %61, float %62, float %63, float %64) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %71, float %78, float %85, float %92) > ret <{ float, float, float }> %94 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..3] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 16, 32, 48} >IMM[1] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[1].xyxx > 1: FMA TEMP[1], IN[0], CONST[1][1], CONST[1][0] > 2: DP4 TEMP[2].x, IN[2], CONST[1][2] > 3: DP4 TEMP[3].x, IN[2], CONST[1][3] > 4: MOV TEMP[2].y, TEMP[3].xxxx > 5: MOV TEMP[2].zw, IMM[1].yyxy > 6: MOV OUT[2], TEMP[1] > 7: MOV OUT[1], TEMP[0] > 8: MOV OUT[0], TEMP[2] > 9: END >radeonsi: Compiling shader 112 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 0) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 4) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 8) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 12) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 16) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 20) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 24) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 28) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 32) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 36) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 40) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 44) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 48) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 52) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 56) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 60) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %35, i32 0, i32 %13) > %37 = extractelement <4 x float> %36, i32 0 > %38 = extractelement <4 x float> %36, i32 1 > %39 = extractelement <4 x float> %36, i32 2 > %40 = extractelement <4 x float> %36, i32 3 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %14) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 > %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %15) > %49 = extractelement <4 x float> %48, i32 0 > %50 = extractelement <4 x float> %48, i32 1 > %51 = extractelement <4 x float> %48, i32 2 > %52 = extractelement <4 x float> %48, i32 3 > %53 = call float @llvm.fma.f32(float %37, float %22, float %18) > %54 = call float @llvm.fma.f32(float %38, float %23, float %19) > %55 = call float @llvm.fma.f32(float %39, float %24, float %20) > %56 = call float @llvm.fma.f32(float %40, float %25, float %21) > %57 = fmul float %49, %26 > %58 = fmul float %50, %27 > %59 = fadd float %57, %58 > %60 = fmul float %51, %28 > %61 = fadd float %59, %60 > %62 = fmul float %52, %29 > %63 = fadd float %61, %62 > %64 = fmul float %49, %30 > %65 = fmul float %50, %31 > %66 = fadd float %64, %65 > %67 = fmul float %51, %32 > %68 = fadd float %66, %67 > %69 = fmul float %52, %33 > %70 = fadd float %68, %69 > %71 = bitcast i32 %11 to float > %72 = insertvalue <{ float, float, float }> undef, float %71, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %53, float %54, float %55, float %56) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %63, float %70, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %72 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..1] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 16, 0, 0} >IMM[1] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: DP4 TEMP[0].x, IN[0], CONST[1][0] > 1: DP4 TEMP[1].x, IN[0], CONST[1][1] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: MOV TEMP[0].zw, IMM[1].yyxy > 4: MOV OUT[0], TEMP[0] > 5: END >radeonsi: Compiling shader 113 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 0) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 4) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 8) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 12) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 16) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 20) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 24) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 28) > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %13) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = fmul float %27, %16 > %32 = fmul float %28, %17 > %33 = fadd float %31, %32 > %34 = fmul float %29, %18 > %35 = fadd float %33, %34 > %36 = fmul float %30, %19 > %37 = fadd float %35, %36 > %38 = fmul float %27, %20 > %39 = fmul float %28, %21 > %40 = fadd float %38, %39 > %41 = fmul float %29, %22 > %42 = fadd float %40, %41 > %43 = fmul float %30, %23 > %44 = fadd float %42, %43 > %45 = bitcast i32 %11 to float > %46 = insertvalue <{ float, float, float }> undef, float %45, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %37, float %44, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %46 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL SV[0], INSTANCEID >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..4095] >DCL TEMP[0..6], LOCAL >DCL ADDR[0] >IMM[0] INT32 {3, 4, 2, 29} >IMM[1] FLT32 { 0.1000, 2.1000, 3.1000, 1.1000} >IMM[2] UINT32 {0, 16, 0, 0} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: SHL TEMP[0].x, SV[0].xxxx, IMM[0].xxxx > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 3: F2U TEMP[1].x, TEMP[0].xxxx > 4: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 5: USHR TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy > 6: UARL ADDR[0].x, TEMP[1].xxxx > 7: UARL ADDR[0].x, TEMP[1].xxxx > 8: MOV TEMP[1], CONST[1][ADDR[0].x] > 9: BFI TEMP[2].x, IMM[0].zzzz, SV[0].xxxx, IMM[0].xxxx, IMM[0].wwww > 10: BFI TEMP[3].x, IMM[0].yyyy, SV[0].xxxx, IMM[0].xxxx, IMM[0].wwww > 11: MOV TEMP[2].y, TEMP[3].xxxx > 12: BFI TEMP[3].x, IMM[3].xxxx, SV[0].xxxx, IMM[0].xxxx, IMM[0].wwww > 13: MOV TEMP[2].z, TEMP[3].xxxx > 14: U2F TEMP[0].xyz, TEMP[2].xyzz > 15: ADD TEMP[2].xyz, TEMP[0].yyzz, IMM[1].yzxx > 16: ADD TEMP[0], TEMP[0].xxyy, IMM[1].xwxw > 17: F2U TEMP[0], TEMP[0] > 18: F2U TEMP[2].xyz, TEMP[2].xyzz > 19: UMUL TEMP[3].x, TEMP[2].zzzz, IMM[2].yyyy > 20: USHR TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy > 21: UARL ADDR[0].x, TEMP[3].xxxx > 22: UARL ADDR[0].x, TEMP[3].xxxx > 23: MOV TEMP[3], CONST[1][ADDR[0].x] > 24: UMUL TEMP[4].x, TEMP[0].zzzz, IMM[2].yyyy > 25: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy > 26: UARL ADDR[0].x, TEMP[5].xxxx > 27: MOV TEMP[4], CONST[1][ADDR[0].x] > 28: DP4 TEMP[4].x, IN[1], TEMP[4] > 29: UMUL TEMP[5].x, TEMP[0].wwww, IMM[2].yyyy > 30: USHR TEMP[6].x, TEMP[5].xxxx, IMM[0].yyyy > 31: UARL ADDR[0].x, TEMP[6].xxxx > 32: MOV TEMP[5], CONST[1][ADDR[0].x] > 33: DP4 TEMP[5].x, IN[1], TEMP[5] > 34: MOV TEMP[4].y, TEMP[5].xxxx > 35: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy > 36: USHR TEMP[6].x, TEMP[5].xxxx, IMM[0].yyyy > 37: UARL ADDR[0].x, TEMP[6].xxxx > 38: MOV TEMP[5], CONST[1][ADDR[0].x] > 39: DP4 TEMP[5].x, IN[1], TEMP[5] > 40: MOV TEMP[4].z, TEMP[5].xxxx > 41: UMUL TEMP[2].x, TEMP[2].yyyy, IMM[2].yyyy > 42: USHR TEMP[5].x, TEMP[2].xxxx, IMM[0].yyyy > 43: UARL ADDR[0].x, TEMP[5].xxxx > 44: MOV TEMP[2], CONST[1][ADDR[0].x] > 45: DP4 TEMP[2].x, IN[1], TEMP[2] > 46: MOV TEMP[4].w, TEMP[2].xxxx > 47: UMUL TEMP[2].x, TEMP[0].xxxx, IMM[2].yyyy > 48: USHR TEMP[5].x, TEMP[2].xxxx, IMM[0].yyyy > 49: UARL ADDR[0].x, TEMP[5].xxxx > 50: MOV TEMP[2], CONST[1][ADDR[0].x] > 51: DP4 TEMP[2].x, IN[1], TEMP[2] > 52: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 53: USHR TEMP[5].x, TEMP[0].xxxx, IMM[0].yyyy > 54: UARL ADDR[0].x, TEMP[5].xxxx > 55: MOV TEMP[0], CONST[1][ADDR[0].x] > 56: DP4 TEMP[0].x, IN[1], TEMP[0] > 57: MOV TEMP[2].y, TEMP[0].xxxx > 58: MOV TEMP[2].zw, IMM[4].yyxy > 59: MOV OUT[4], TEMP[4] > 60: MOV OUT[3], TEMP[3] > 61: MOV OUT[2], TEMP[1] > 62: MOV OUT[1], IN[0] > 63: MOV OUT[0], TEMP[2] > 64: END >radeonsi: Compiling shader 114 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = shl i32 %12, 3 > %32 = uitofp i32 %31 to float > %33 = fadd float %32, 0x3FB99999A0000000 > %34 = fptoui float %33 to i32 > %35 = shl i32 %34, 4 > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %35) > %37 = shl i32 %34, 4 > %38 = or i32 %37, 4 > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %38) > %40 = shl i32 %34, 4 > %41 = or i32 %40, 8 > %42 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %41) > %43 = shl i32 %34, 4 > %44 = or i32 %43, 12 > %45 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %44) > %46 = shl i32 %12, 3 > %47 = or i32 %46, 2 > %48 = shl i32 %12, 3 > %49 = or i32 %48, 4 > %50 = shl i32 %12, 3 > %51 = or i32 %50, 1 > %52 = uitofp i32 %47 to float > %53 = uitofp i32 %49 to float > %54 = uitofp i32 %51 to float > %55 = fadd float %53, 0x4000CCCCC0000000 > %56 = fadd float %53, 0x4008CCCCC0000000 > %57 = fadd float %54, 0x3FB99999A0000000 > %58 = fadd float %52, 0x3FB99999A0000000 > %59 = fadd float %52, 0x3FF19999A0000000 > %60 = fadd float %53, 0x3FB99999A0000000 > %61 = fadd float %53, 0x3FF19999A0000000 > %62 = fptoui float %58 to i32 > %63 = fptoui float %59 to i32 > %64 = fptoui float %60 to i32 > %65 = fptoui float %61 to i32 > %66 = fptoui float %55 to i32 > %67 = fptoui float %56 to i32 > %68 = fptoui float %57 to i32 > %69 = shl i32 %68, 4 > %70 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %69) > %71 = shl i32 %68, 4 > %72 = or i32 %71, 4 > %73 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %72) > %74 = shl i32 %68, 4 > %75 = or i32 %74, 8 > %76 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %75) > %77 = shl i32 %68, 4 > %78 = or i32 %77, 12 > %79 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %78) > %80 = shl i32 %64, 4 > %81 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %80) > %82 = shl i32 %64, 4 > %83 = or i32 %82, 4 > %84 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %83) > %85 = shl i32 %64, 4 > %86 = or i32 %85, 8 > %87 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %86) > %88 = shl i32 %64, 4 > %89 = or i32 %88, 12 > %90 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %89) > %91 = fmul float %27, %81 > %92 = fmul float %28, %84 > %93 = fadd float %91, %92 > %94 = fmul float %29, %87 > %95 = fadd float %93, %94 > %96 = fmul float %30, %90 > %97 = fadd float %95, %96 > %98 = shl i32 %65, 4 > %99 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %98) > %100 = shl i32 %65, 4 > %101 = or i32 %100, 4 > %102 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %101) > %103 = shl i32 %65, 4 > %104 = or i32 %103, 8 > %105 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %104) > %106 = shl i32 %65, 4 > %107 = or i32 %106, 12 > %108 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %107) > %109 = fmul float %27, %99 > %110 = fmul float %28, %102 > %111 = fadd float %109, %110 > %112 = fmul float %29, %105 > %113 = fadd float %111, %112 > %114 = fmul float %30, %108 > %115 = fadd float %113, %114 > %116 = shl i32 %66, 4 > %117 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %116) > %118 = shl i32 %66, 4 > %119 = or i32 %118, 4 > %120 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %119) > %121 = shl i32 %66, 4 > %122 = or i32 %121, 8 > %123 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %122) > %124 = shl i32 %66, 4 > %125 = or i32 %124, 12 > %126 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %125) > %127 = fmul float %27, %117 > %128 = fmul float %28, %120 > %129 = fadd float %127, %128 > %130 = fmul float %29, %123 > %131 = fadd float %129, %130 > %132 = fmul float %30, %126 > %133 = fadd float %131, %132 > %134 = shl i32 %67, 4 > %135 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %134) > %136 = shl i32 %67, 4 > %137 = or i32 %136, 4 > %138 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %137) > %139 = shl i32 %67, 4 > %140 = or i32 %139, 8 > %141 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %140) > %142 = shl i32 %67, 4 > %143 = or i32 %142, 12 > %144 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %143) > %145 = fmul float %27, %135 > %146 = fmul float %28, %138 > %147 = fadd float %145, %146 > %148 = fmul float %29, %141 > %149 = fadd float %147, %148 > %150 = fmul float %30, %144 > %151 = fadd float %149, %150 > %152 = shl i32 %62, 4 > %153 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %152) > %154 = shl i32 %62, 4 > %155 = or i32 %154, 4 > %156 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %155) > %157 = shl i32 %62, 4 > %158 = or i32 %157, 8 > %159 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %158) > %160 = shl i32 %62, 4 > %161 = or i32 %160, 12 > %162 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %161) > %163 = fmul float %27, %153 > %164 = fmul float %28, %156 > %165 = fadd float %163, %164 > %166 = fmul float %29, %159 > %167 = fadd float %165, %166 > %168 = fmul float %30, %162 > %169 = fadd float %167, %168 > %170 = shl i32 %63, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %170) > %172 = shl i32 %63, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %173) > %175 = shl i32 %63, 4 > %176 = or i32 %175, 8 > %177 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %176) > %178 = shl i32 %63, 4 > %179 = or i32 %178, 12 > %180 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %179) > %181 = fmul float %27, %171 > %182 = fmul float %28, %174 > %183 = fadd float %181, %182 > %184 = fmul float %29, %177 > %185 = fadd float %183, %184 > %186 = fmul float %30, %180 > %187 = fadd float %185, %186 > %188 = bitcast i32 %11 to float > %189 = insertvalue <{ float, float, float }> undef, float %188, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %36, float %39, float %42, float %45) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %70, float %73, float %76, float %79) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %97, float %115, float %133, float %151) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %169, float %187, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %189 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL TEMP[0..1], LOCAL > 0: MOV TEMP[0].xy, IN[3].zwww > 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D > 2: MOV TEMP[1].xy, IN[3].xyyy > 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 4: ADD TEMP[1], TEMP[1], -TEMP[0] > 5: FMA TEMP[0], IN[0].xxxx, TEMP[1], TEMP[0] > 6: FMA TEMP[0], TEMP[0], IN[2], IN[1] > 7: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww > 8: MOV TEMP[1].w, TEMP[1].xxxx > 9: MOV TEMP[1].xyz, TEMP[0].xyzx > 10: MOV OUT[0], TEMP[1] > 11: END >radeonsi: Compiling shader 115 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32, !tbaa !0 > %34 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %35 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 7 > %36 = load <4 x i32>, <4 x i32> addrspace(2)* %35, align 16, !tbaa !0 > %37 = extractelement <8 x i32> %33, i32 7 > %38 = extractelement <4 x i32> %36, i32 0 > %39 = and i32 %38, %37 > %40 = insertelement <4 x i32> %36, i32 %39, i32 0 > %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %55 = bitcast float %53 to i32 > %56 = bitcast float %54 to i32 > %57 = insertelement <2 x i32> undef, i32 %55, i32 0 > %58 = insertelement <2 x i32> %57, i32 %56, i32 1 > %59 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %58, <8 x i32> %33, <4 x i32> %40, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %60 = extractelement <4 x float> %59, i32 0 > %61 = extractelement <4 x float> %59, i32 1 > %62 = extractelement <4 x float> %59, i32 2 > %63 = extractelement <4 x float> %59, i32 3 > %64 = bitcast float %51 to i32 > %65 = bitcast float %52 to i32 > %66 = insertelement <2 x i32> undef, i32 %64, i32 0 > %67 = insertelement <2 x i32> %66, i32 %65, i32 1 > %68 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %67, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %69 = extractelement <4 x float> %68, i32 0 > %70 = extractelement <4 x float> %68, i32 1 > %71 = extractelement <4 x float> %68, i32 2 > %72 = extractelement <4 x float> %68, i32 3 > %73 = fsub float %69, %60 > %74 = fsub float %70, %61 > %75 = fsub float %71, %62 > %76 = fsub float %72, %63 > %77 = call float @llvm.fma.f32(float %41, float %73, float %60) > %78 = call float @llvm.fma.f32(float %41, float %74, float %61) > %79 = call float @llvm.fma.f32(float %41, float %75, float %62) > %80 = call float @llvm.fma.f32(float %41, float %76, float %63) > %81 = call float @llvm.fma.f32(float %77, float %47, float %43) > %82 = call float @llvm.fma.f32(float %78, float %48, float %44) > %83 = call float @llvm.fma.f32(float %79, float %49, float %45) > %84 = call float @llvm.fma.f32(float %80, float %50, float %46) > %85 = fmul float %84, %42 > %86 = bitcast float %5 to i32 > %87 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %86, 10 > %88 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %87, float %81, 11 > %89 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %88, float %82, 12 > %90 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %89, float %83, 13 > %91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %90, float %85, 14 > %92 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %92 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL SV[0], INSTANCEID >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..4095] >DCL TEMP[0..4], LOCAL >DCL ADDR[0] >IMM[0] INT32 {6, 2, 4, 0} >IMM[1] FLT32 { 0.1000, 1.1000, 2.1000, 3.1000} >IMM[2] UINT32 {0, 16, 0, 0} >IMM[3] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: UMAD TEMP[0].x, SV[0].xxxx, IMM[0].xxxx, IMM[0].yyyy > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0], TEMP[0].xxxx, IMM[1] > 3: F2U TEMP[1], TEMP[0] > 4: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy > 5: USHR TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz > 6: UARL ADDR[0].x, TEMP[3].xxxx > 7: MOV TEMP[2], CONST[1][ADDR[0].x] > 8: DP4 TEMP[2].x, IN[1], TEMP[2] > 9: UMUL TEMP[3].x, TEMP[1].yyyy, IMM[2].yyyy > 10: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].zzzz > 11: UARL ADDR[0].x, TEMP[4].xxxx > 12: MOV TEMP[3], CONST[1][ADDR[0].x] > 13: DP4 TEMP[3].x, IN[1], TEMP[3] > 14: MOV TEMP[2].y, TEMP[3].xxxx > 15: UMUL TEMP[3].x, TEMP[1].zzzz, IMM[2].yyyy > 16: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].zzzz > 17: UARL ADDR[0].x, TEMP[4].xxxx > 18: MOV TEMP[3], CONST[1][ADDR[0].x] > 19: DP4 TEMP[3].x, IN[1], TEMP[3] > 20: MOV TEMP[2].z, TEMP[3].xxxx > 21: UMUL TEMP[1].x, TEMP[1].wwww, IMM[2].yyyy > 22: USHR TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz > 23: UARL ADDR[0].x, TEMP[3].xxxx > 24: MOV TEMP[1], CONST[1][ADDR[0].x] > 25: DP4 TEMP[1].x, IN[1], TEMP[1] > 26: MOV TEMP[2].w, TEMP[1].xxxx > 27: UMUL TEMP[1].x, SV[0].xxxx, IMM[0].xxxx > 28: U2F TEMP[0].x, TEMP[1].xxxx > 29: ADD TEMP[0].xy, TEMP[0].xxxx, IMM[1].xyyy > 30: F2U TEMP[0].xy, TEMP[0].xyyy > 31: UMUL TEMP[1].x, TEMP[0].xxxx, IMM[2].yyyy > 32: USHR TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz > 33: UARL ADDR[0].x, TEMP[3].xxxx > 34: MOV TEMP[1], CONST[1][ADDR[0].x] > 35: DP4 TEMP[1].x, IN[1], TEMP[1] > 36: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 37: USHR TEMP[3].x, TEMP[0].xxxx, IMM[0].zzzz > 38: UARL ADDR[0].x, TEMP[3].xxxx > 39: MOV TEMP[0], CONST[1][ADDR[0].x] > 40: DP4 TEMP[0].x, IN[1], TEMP[0] > 41: MOV TEMP[1].y, TEMP[0].xxxx > 42: MOV TEMP[1].zw, IMM[3].yyxy > 43: MOV OUT[2], TEMP[2] > 44: MOV OUT[1], IN[0] > 45: MOV OUT[0], TEMP[1] > 46: END >radeonsi: Compiling shader 116 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = mul i32 %12, 6 > %32 = add i32 %31, 2 > %33 = uitofp i32 %32 to float > %34 = fadd float %33, 0x3FB99999A0000000 > %35 = fadd float %33, 0x3FF19999A0000000 > %36 = fadd float %33, 0x4000CCCCC0000000 > %37 = fadd float %33, 0x4008CCCCC0000000 > %38 = fptoui float %34 to i32 > %39 = fptoui float %35 to i32 > %40 = fptoui float %36 to i32 > %41 = fptoui float %37 to i32 > %42 = shl i32 %38, 4 > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %42) > %44 = shl i32 %38, 4 > %45 = or i32 %44, 4 > %46 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %45) > %47 = shl i32 %38, 4 > %48 = or i32 %47, 8 > %49 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %48) > %50 = shl i32 %38, 4 > %51 = or i32 %50, 12 > %52 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %51) > %53 = fmul float %27, %43 > %54 = fmul float %28, %46 > %55 = fadd float %53, %54 > %56 = fmul float %29, %49 > %57 = fadd float %55, %56 > %58 = fmul float %30, %52 > %59 = fadd float %57, %58 > %60 = shl i32 %39, 4 > %61 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %60) > %62 = shl i32 %39, 4 > %63 = or i32 %62, 4 > %64 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %63) > %65 = shl i32 %39, 4 > %66 = or i32 %65, 8 > %67 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %66) > %68 = shl i32 %39, 4 > %69 = or i32 %68, 12 > %70 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %69) > %71 = fmul float %27, %61 > %72 = fmul float %28, %64 > %73 = fadd float %71, %72 > %74 = fmul float %29, %67 > %75 = fadd float %73, %74 > %76 = fmul float %30, %70 > %77 = fadd float %75, %76 > %78 = shl i32 %40, 4 > %79 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %78) > %80 = shl i32 %40, 4 > %81 = or i32 %80, 4 > %82 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %81) > %83 = shl i32 %40, 4 > %84 = or i32 %83, 8 > %85 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %84) > %86 = shl i32 %40, 4 > %87 = or i32 %86, 12 > %88 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %87) > %89 = fmul float %27, %79 > %90 = fmul float %28, %82 > %91 = fadd float %89, %90 > %92 = fmul float %29, %85 > %93 = fadd float %91, %92 > %94 = fmul float %30, %88 > %95 = fadd float %93, %94 > %96 = shl i32 %41, 4 > %97 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %96) > %98 = shl i32 %41, 4 > %99 = or i32 %98, 4 > %100 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %99) > %101 = shl i32 %41, 4 > %102 = or i32 %101, 8 > %103 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %102) > %104 = shl i32 %41, 4 > %105 = or i32 %104, 12 > %106 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %105) > %107 = fmul float %27, %97 > %108 = fmul float %28, %100 > %109 = fadd float %107, %108 > %110 = fmul float %29, %103 > %111 = fadd float %109, %110 > %112 = fmul float %30, %106 > %113 = fadd float %111, %112 > %114 = mul i32 %12, 6 > %115 = uitofp i32 %114 to float > %116 = fadd float %115, 0x3FB99999A0000000 > %117 = fadd float %115, 0x3FF19999A0000000 > %118 = fptoui float %116 to i32 > %119 = fptoui float %117 to i32 > %120 = shl i32 %118, 4 > %121 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %120) > %122 = shl i32 %118, 4 > %123 = or i32 %122, 4 > %124 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %123) > %125 = shl i32 %118, 4 > %126 = or i32 %125, 8 > %127 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %126) > %128 = shl i32 %118, 4 > %129 = or i32 %128, 12 > %130 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %129) > %131 = fmul float %27, %121 > %132 = fmul float %28, %124 > %133 = fadd float %131, %132 > %134 = fmul float %29, %127 > %135 = fadd float %133, %134 > %136 = fmul float %30, %130 > %137 = fadd float %135, %136 > %138 = shl i32 %119, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %138) > %140 = shl i32 %119, 4 > %141 = or i32 %140, 4 > %142 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %141) > %143 = shl i32 %119, 4 > %144 = or i32 %143, 8 > %145 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %144) > %146 = shl i32 %119, 4 > %147 = or i32 %146, 12 > %148 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %147) > %149 = fmul float %27, %139 > %150 = fmul float %28, %142 > %151 = fadd float %149, %150 > %152 = fmul float %29, %145 > %153 = fadd float %151, %152 > %154 = fmul float %30, %148 > %155 = fadd float %153, %154 > %156 = bitcast i32 %11 to float > %157 = insertvalue <{ float, float, float }> undef, float %156, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %59, float %77, float %95, float %113) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %137, float %155, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %157 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL TEMP[0..1], LOCAL > 0: MOV TEMP[0].xy, IN[1].zwww > 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D > 2: MOV TEMP[1].xy, IN[1].xyyy > 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 4: ADD TEMP[1], TEMP[1], -TEMP[0] > 5: FMA TEMP[0], IN[0].xxxx, TEMP[1], TEMP[0] > 6: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww > 7: MOV TEMP[1].w, TEMP[1].xxxx > 8: MOV TEMP[1].xyz, TEMP[0].xyzx > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 117 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32, !tbaa !0 > %34 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %35 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 7 > %36 = load <4 x i32>, <4 x i32> addrspace(2)* %35, align 16, !tbaa !0 > %37 = extractelement <8 x i32> %33, i32 7 > %38 = extractelement <4 x i32> %36, i32 0 > %39 = and i32 %38, %37 > %40 = insertelement <4 x i32> %36, i32 %39, i32 0 > %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %47 = bitcast float %45 to i32 > %48 = bitcast float %46 to i32 > %49 = insertelement <2 x i32> undef, i32 %47, i32 0 > %50 = insertelement <2 x i32> %49, i32 %48, i32 1 > %51 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %50, <8 x i32> %33, <4 x i32> %40, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %52 = extractelement <4 x float> %51, i32 0 > %53 = extractelement <4 x float> %51, i32 1 > %54 = extractelement <4 x float> %51, i32 2 > %55 = extractelement <4 x float> %51, i32 3 > %56 = bitcast float %43 to i32 > %57 = bitcast float %44 to i32 > %58 = insertelement <2 x i32> undef, i32 %56, i32 0 > %59 = insertelement <2 x i32> %58, i32 %57, i32 1 > %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = fsub float %61, %52 > %66 = fsub float %62, %53 > %67 = fsub float %63, %54 > %68 = fsub float %64, %55 > %69 = call float @llvm.fma.f32(float %41, float %65, float %52) > %70 = call float @llvm.fma.f32(float %41, float %66, float %53) > %71 = call float @llvm.fma.f32(float %41, float %67, float %54) > %72 = call float @llvm.fma.f32(float %41, float %68, float %55) > %73 = fmul float %72, %42 > %74 = bitcast float %5 to i32 > %75 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %74, 10 > %76 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %75, float %69, 11 > %77 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %76, float %70, 12 > %78 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %77, float %71, 13 > %79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %78, float %73, 14 > %80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %79, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %80 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL SV[0], INSTANCEID >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..4095] >DCL TEMP[0..5], LOCAL >DCL ADDR[0] >IMM[0] INT32 {1, 96, 4, 31} >IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} >IMM[2] UINT32 {0, 16, 0, 0} >IMM[3] INT32 {2, 3, 0, 0} > 0: SHL TEMP[0].x, SV[0].xxxx, IMM[0].xxxx > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 3: F2U TEMP[1].x, TEMP[0].xxxx > 4: UADD TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx > 5: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 6: USHR TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz > 7: UARL ADDR[0].x, TEMP[1].xxxx > 8: UARL ADDR[0].x, TEMP[1].xxxx > 9: MOV TEMP[1], CONST[1][ADDR[0].x] > 10: BFI TEMP[2].x, IMM[0].xxxx, SV[0].xxxx, IMM[0].xxxx, IMM[0].wwww > 11: U2F TEMP[0].x, TEMP[2].xxxx > 12: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 13: F2U TEMP[2].x, TEMP[0].xxxx > 14: UADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx > 15: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy > 16: USHR TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz > 17: UARL ADDR[0].x, TEMP[2].xxxx > 18: UARL ADDR[0].x, TEMP[2].xxxx > 19: MOV TEMP[2], CONST[1][ADDR[0].x] > 20: U2F TEMP[0].x, SV[0].xxxx > 21: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 22: F2U TEMP[0].x, TEMP[0].xxxx > 23: SHL TEMP[0].x, TEMP[0].xxxx, IMM[3].xxxx > 24: UMUL TEMP[3].x, TEMP[0].xxxx, IMM[2].yyyy > 25: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].zzzz > 26: UARL ADDR[0].x, TEMP[4].xxxx > 27: MOV TEMP[3], CONST[1][ADDR[0].x] > 28: DP4 TEMP[3].x, IN[2], TEMP[3] > 29: UADD TEMP[4].x, IMM[0].xxxx, TEMP[0].xxxx > 30: UMUL TEMP[4].x, TEMP[4].xxxx, IMM[2].yyyy > 31: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz > 32: UARL ADDR[0].x, TEMP[5].xxxx > 33: MOV TEMP[4], CONST[1][ADDR[0].x] > 34: DP4 TEMP[4].x, IN[2], TEMP[4] > 35: MOV TEMP[3].y, TEMP[4].xxxx > 36: UADD TEMP[4].x, IMM[3].xxxx, TEMP[0].xxxx > 37: UMUL TEMP[4].x, TEMP[4].xxxx, IMM[2].yyyy > 38: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz > 39: UARL ADDR[0].x, TEMP[5].xxxx > 40: MOV TEMP[4], CONST[1][ADDR[0].x] > 41: DP4 TEMP[4].x, IN[2], TEMP[4] > 42: MOV TEMP[3].z, TEMP[4].xxxx > 43: UADD TEMP[0].x, IMM[3].yyyy, TEMP[0].xxxx > 44: UMUL TEMP[0].x, TEMP[0].xxxx, IMM[2].yyyy > 45: USHR TEMP[4].x, TEMP[0].xxxx, IMM[0].zzzz > 46: UARL ADDR[0].x, TEMP[4].xxxx > 47: MOV TEMP[0], CONST[1][ADDR[0].x] > 48: DP4 TEMP[0].x, IN[2], TEMP[0] > 49: MOV TEMP[3].w, TEMP[0].xxxx > 50: MOV OUT[4], TEMP[2] > 51: MOV OUT[3], TEMP[1] > 52: MOV OUT[2], IN[1] > 53: MOV OUT[1], IN[0] > 54: MOV OUT[0], TEMP[3] > 55: END >radeonsi: Compiling shader 118 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = extractelement <4 x float> %20, i32 2 > %24 = extractelement <4 x float> %20, i32 3 > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %14) > %28 = extractelement <4 x float> %27, i32 0 > %29 = extractelement <4 x float> %27, i32 1 > %30 = extractelement <4 x float> %27, i32 2 > %31 = extractelement <4 x float> %27, i32 3 > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %15) > %35 = extractelement <4 x float> %34, i32 0 > %36 = extractelement <4 x float> %34, i32 1 > %37 = extractelement <4 x float> %34, i32 2 > %38 = extractelement <4 x float> %34, i32 3 > %39 = shl i32 %12, 1 > %40 = uitofp i32 %39 to float > %41 = fadd float %40, 0x3FB99999A0000000 > %42 = fptoui float %41 to i32 > %43 = shl i32 %42, 4 > %44 = add i32 %43, 1536 > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %44) > %46 = add i32 %43, 1540 > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %46) > %48 = add i32 %43, 1544 > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %48) > %50 = add i32 %43, 1548 > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %50) > %52 = shl i32 %12, 1 > %53 = or i32 %52, 1 > %54 = uitofp i32 %53 to float > %55 = fadd float %54, 0x3FB99999A0000000 > %56 = fptoui float %55 to i32 > %57 = shl i32 %56, 4 > %58 = add i32 %57, 1536 > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %58) > %60 = add i32 %57, 1540 > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %60) > %62 = add i32 %57, 1544 > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %62) > %64 = add i32 %57, 1548 > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %64) > %66 = uitofp i32 %12 to float > %67 = fadd float %66, 0x3FB99999A0000000 > %68 = fptoui float %67 to i32 > %69 = shl i32 %68, 6 > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %69) > %71 = shl i32 %68, 6 > %72 = or i32 %71, 4 > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %72) > %74 = shl i32 %68, 6 > %75 = or i32 %74, 8 > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %75) > %77 = shl i32 %68, 6 > %78 = or i32 %77, 12 > %79 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %78) > %80 = fmul float %35, %70 > %81 = fmul float %36, %73 > %82 = fadd float %80, %81 > %83 = fmul float %37, %76 > %84 = fadd float %82, %83 > %85 = fmul float %38, %79 > %86 = fadd float %84, %85 > %87 = shl i32 %68, 2 > %88 = and i32 %87, 268435452 > %89 = or i32 %88, 1 > %90 = shl nuw i32 %89, 4 > %91 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %90) > %92 = shl nuw i32 %89, 4 > %93 = or i32 %92, 4 > %94 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %93) > %95 = shl nuw i32 %89, 4 > %96 = or i32 %95, 8 > %97 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %96) > %98 = shl nuw i32 %89, 4 > %99 = or i32 %98, 12 > %100 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %99) > %101 = fmul float %35, %91 > %102 = fmul float %36, %94 > %103 = fadd float %101, %102 > %104 = fmul float %37, %97 > %105 = fadd float %103, %104 > %106 = fmul float %38, %100 > %107 = fadd float %105, %106 > %108 = shl i32 %68, 2 > %109 = and i32 %108, 268435452 > %110 = or i32 %109, 2 > %111 = shl nuw i32 %110, 4 > %112 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %111) > %113 = shl nuw i32 %110, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %114) > %116 = shl nuw i32 %110, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %117) > %119 = shl nuw i32 %110, 4 > %120 = or i32 %119, 12 > %121 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %120) > %122 = fmul float %35, %112 > %123 = fmul float %36, %115 > %124 = fadd float %122, %123 > %125 = fmul float %37, %118 > %126 = fadd float %124, %125 > %127 = fmul float %38, %121 > %128 = fadd float %126, %127 > %129 = shl i32 %68, 2 > %130 = and i32 %129, 268435452 > %131 = or i32 %130, 3 > %132 = shl nuw i32 %131, 4 > %133 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %132) > %134 = shl nuw i32 %131, 4 > %135 = or i32 %134, 4 > %136 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %135) > %137 = shl nuw i32 %131, 4 > %138 = or i32 %137, 8 > %139 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %138) > %140 = shl nuw i32 %131, 4 > %141 = or i32 %140, 12 > %142 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %141) > %143 = fmul float %35, %133 > %144 = fmul float %36, %136 > %145 = fadd float %143, %144 > %146 = fmul float %37, %139 > %147 = fadd float %145, %146 > %148 = fmul float %38, %142 > %149 = fadd float %147, %148 > %150 = bitcast i32 %11 to float > %151 = insertvalue <{ float, float, float }> undef, float %150, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %28, float %29, float %30, float %31) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %45, float %47, float %49, float %51) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %59, float %61, float %63, float %65) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %86, float %107, float %128, float %149) > ret <{ float, float, float }> %151 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL TEMP[0..1], LOCAL > 0: FMA TEMP[0], IN[0], IN[3], IN[2] > 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww > 2: MOV TEMP[1].w, TEMP[1].xxxx > 3: MOV TEMP[1].xyz, TEMP[0].xyzx > 4: MOV OUT[0], TEMP[1] > 5: END >radeonsi: Compiling shader 119 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %28 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %29 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %30 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %31 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %32 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %36 = call float @llvm.fma.f32(float %23, float %32, float %28) > %37 = call float @llvm.fma.f32(float %24, float %33, float %29) > %38 = call float @llvm.fma.f32(float %25, float %34, float %30) > %39 = call float @llvm.fma.f32(float %26, float %35, float %31) > %40 = fmul float %39, %27 > %41 = bitcast float %5 to i32 > %42 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %41, 10 > %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %36, 11 > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43, float %37, 12 > %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %38, 13 > %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 14 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL SV[0], INSTANCEID >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..4095] >DCL TEMP[0..4], LOCAL >DCL ADDR[0] >IMM[0] INT32 {2, 4, 30, 1} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 1.0000} >IMM[2] UINT32 {0, 16, 0, 0} > 0: SHL TEMP[0].x, SV[0].xxxx, IMM[0].xxxx > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 3: F2U TEMP[1].x, TEMP[0].xxxx > 4: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 5: USHR TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy > 6: UARL ADDR[0].x, TEMP[1].xxxx > 7: UARL ADDR[0].x, TEMP[1].xxxx > 8: MOV TEMP[1], CONST[1][ADDR[0].x] > 9: BFI TEMP[2].x, IMM[0].xxxx, SV[0].xxxx, IMM[0].xxxx, IMM[0].zzzz > 10: BFI TEMP[3].x, IMM[0].xxxx, SV[0].xxxx, IMM[0].xxxx, IMM[0].zzzz > 11: MOV TEMP[2].y, TEMP[3].xxxx > 12: BFI TEMP[3].x, IMM[0].wwww, SV[0].xxxx, IMM[0].xxxx, IMM[0].zzzz > 13: MOV TEMP[2].z, TEMP[3].xxxx > 14: U2F TEMP[0].xyz, TEMP[2].xyzz > 15: ADD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].xyxx > 16: F2U TEMP[0].xyz, TEMP[0].xyzz > 17: UMUL TEMP[2].x, TEMP[0].zzzz, IMM[2].yyyy > 18: USHR TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 19: UARL ADDR[0].x, TEMP[2].xxxx > 20: UARL ADDR[0].x, TEMP[2].xxxx > 21: MOV TEMP[2], CONST[1][ADDR[0].x] > 22: UMUL TEMP[3].x, TEMP[0].xxxx, IMM[2].yyyy > 23: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].yyyy > 24: UARL ADDR[0].x, TEMP[4].xxxx > 25: MOV TEMP[3], CONST[1][ADDR[0].x] > 26: DP4 TEMP[3].x, IN[2], TEMP[3] > 27: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 28: USHR TEMP[4].x, TEMP[0].xxxx, IMM[0].yyyy > 29: UARL ADDR[0].x, TEMP[4].xxxx > 30: MOV TEMP[0], CONST[1][ADDR[0].x] > 31: DP4 TEMP[0].x, IN[2], TEMP[0] > 32: MOV TEMP[3].y, TEMP[0].xxxx > 33: MOV TEMP[3].zw, IMM[1].wwzw > 34: MOV OUT[4], TEMP[2] > 35: MOV OUT[3], TEMP[1] > 36: MOV OUT[2], IN[1] > 37: MOV OUT[1], IN[0] > 38: MOV OUT[0], TEMP[3] > 39: END >radeonsi: Compiling shader 120 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = extractelement <4 x float> %20, i32 2 > %24 = extractelement <4 x float> %20, i32 3 > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %14) > %28 = extractelement <4 x float> %27, i32 0 > %29 = extractelement <4 x float> %27, i32 1 > %30 = extractelement <4 x float> %27, i32 2 > %31 = extractelement <4 x float> %27, i32 3 > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %15) > %35 = extractelement <4 x float> %34, i32 0 > %36 = extractelement <4 x float> %34, i32 1 > %37 = extractelement <4 x float> %34, i32 2 > %38 = extractelement <4 x float> %34, i32 3 > %39 = shl i32 %12, 2 > %40 = uitofp i32 %39 to float > %41 = fadd float %40, 0x3FB99999A0000000 > %42 = fptoui float %41 to i32 > %43 = shl i32 %42, 4 > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %43) > %45 = shl i32 %42, 4 > %46 = or i32 %45, 4 > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %46) > %48 = shl i32 %42, 4 > %49 = or i32 %48, 8 > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %49) > %51 = shl i32 %42, 4 > %52 = or i32 %51, 12 > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %52) > %54 = shl i32 %12, 2 > %55 = or i32 %54, 2 > %56 = shl i32 %12, 2 > %57 = or i32 %56, 2 > %58 = shl i32 %12, 2 > %59 = or i32 %58, 1 > %60 = uitofp i32 %55 to float > %61 = uitofp i32 %57 to float > %62 = uitofp i32 %59 to float > %63 = fadd float %60, 0x3FB99999A0000000 > %64 = fadd float %61, 0x3FF19999A0000000 > %65 = fadd float %62, 0x3FB99999A0000000 > %66 = fptoui float %63 to i32 > %67 = fptoui float %64 to i32 > %68 = fptoui float %65 to i32 > %69 = shl i32 %68, 4 > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %69) > %71 = shl i32 %68, 4 > %72 = or i32 %71, 4 > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %72) > %74 = shl i32 %68, 4 > %75 = or i32 %74, 8 > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %75) > %77 = shl i32 %68, 4 > %78 = or i32 %77, 12 > %79 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %78) > %80 = shl i32 %66, 4 > %81 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %80) > %82 = shl i32 %66, 4 > %83 = or i32 %82, 4 > %84 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %83) > %85 = shl i32 %66, 4 > %86 = or i32 %85, 8 > %87 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %86) > %88 = shl i32 %66, 4 > %89 = or i32 %88, 12 > %90 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %89) > %91 = fmul float %35, %81 > %92 = fmul float %36, %84 > %93 = fadd float %91, %92 > %94 = fmul float %37, %87 > %95 = fadd float %93, %94 > %96 = fmul float %38, %90 > %97 = fadd float %95, %96 > %98 = shl i32 %67, 4 > %99 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %98) > %100 = shl i32 %67, 4 > %101 = or i32 %100, 4 > %102 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %101) > %103 = shl i32 %67, 4 > %104 = or i32 %103, 8 > %105 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %104) > %106 = shl i32 %67, 4 > %107 = or i32 %106, 12 > %108 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %107) > %109 = fmul float %35, %99 > %110 = fmul float %36, %102 > %111 = fadd float %109, %110 > %112 = fmul float %37, %105 > %113 = fadd float %111, %112 > %114 = fmul float %38, %108 > %115 = fadd float %113, %114 > %116 = bitcast i32 %11 to float > %117 = insertvalue <{ float, float, float }> undef, float %116, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %28, float %29, float %30, float %31) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %44, float %47, float %50, float %53) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %70, float %73, float %76, float %79) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %97, float %115, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %117 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL SV[0], INSTANCEID >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..4095] >DCL TEMP[0..2], LOCAL >DCL ADDR[0] >IMM[0] INT32 {1, 4, 0, 0} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 1.0000} >IMM[2] UINT32 {0, 16, 0, 0} > 0: SHL TEMP[0].x, SV[0].xxxx, IMM[0].xxxx > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].xy, TEMP[0].xxxx, IMM[1].xyyy > 3: F2U TEMP[0].xy, TEMP[0].xyyy > 4: UMUL TEMP[1].x, TEMP[0].xxxx, IMM[2].yyyy > 5: USHR TEMP[2].x, TEMP[1].xxxx, IMM[0].yyyy > 6: UARL ADDR[0].x, TEMP[2].xxxx > 7: MOV TEMP[1], CONST[1][ADDR[0].x] > 8: DP4 TEMP[1].x, IN[2], TEMP[1] > 9: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 10: USHR TEMP[2].x, TEMP[0].xxxx, IMM[0].yyyy > 11: UARL ADDR[0].x, TEMP[2].xxxx > 12: MOV TEMP[0], CONST[1][ADDR[0].x] > 13: DP4 TEMP[0].x, IN[2], TEMP[0] > 14: MOV TEMP[1].y, TEMP[0].xxxx > 15: MOV TEMP[1].zw, IMM[1].wwzw > 16: MOV OUT[2], IN[1] > 17: MOV OUT[1], IN[0] > 18: MOV OUT[0], TEMP[1] > 19: END >radeonsi: Compiling shader 121 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = extractelement <4 x float> %20, i32 2 > %24 = extractelement <4 x float> %20, i32 3 > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %14) > %28 = extractelement <4 x float> %27, i32 0 > %29 = extractelement <4 x float> %27, i32 1 > %30 = extractelement <4 x float> %27, i32 2 > %31 = extractelement <4 x float> %27, i32 3 > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %15) > %35 = extractelement <4 x float> %34, i32 0 > %36 = extractelement <4 x float> %34, i32 1 > %37 = extractelement <4 x float> %34, i32 2 > %38 = extractelement <4 x float> %34, i32 3 > %39 = shl i32 %12, 1 > %40 = uitofp i32 %39 to float > %41 = fadd float %40, 0x3FB99999A0000000 > %42 = fadd float %40, 0x3FF19999A0000000 > %43 = fptoui float %41 to i32 > %44 = fptoui float %42 to i32 > %45 = shl i32 %43, 4 > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %45) > %47 = shl i32 %43, 4 > %48 = or i32 %47, 4 > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %48) > %50 = shl i32 %43, 4 > %51 = or i32 %50, 8 > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %51) > %53 = shl i32 %43, 4 > %54 = or i32 %53, 12 > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %54) > %56 = fmul float %35, %46 > %57 = fmul float %36, %49 > %58 = fadd float %56, %57 > %59 = fmul float %37, %52 > %60 = fadd float %58, %59 > %61 = fmul float %38, %55 > %62 = fadd float %60, %61 > %63 = shl i32 %44, 4 > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %63) > %65 = shl i32 %44, 4 > %66 = or i32 %65, 4 > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %66) > %68 = shl i32 %44, 4 > %69 = or i32 %68, 8 > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %69) > %71 = shl i32 %44, 4 > %72 = or i32 %71, 12 > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %72) > %74 = fmul float %35, %64 > %75 = fmul float %36, %67 > %76 = fadd float %74, %75 > %77 = fmul float %37, %70 > %78 = fadd float %76, %77 > %79 = fmul float %38, %73 > %80 = fadd float %78, %79 > %81 = bitcast i32 %11 to float > %82 = insertvalue <{ float, float, float }> undef, float %81, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %28, float %29, float %30, float %31) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %62, float %80, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %82 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL TEMP[0], LOCAL > 0: MUL TEMP[0].x, IN[0].wwww, IN[1].wwww > 1: MOV TEMP[0].w, TEMP[0].xxxx > 2: MOV TEMP[0].xyz, IN[0].xyzx > 3: MOV OUT[0], TEMP[0] > 4: END >radeonsi: Compiling shader 122 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %28 = fmul float %26, %27 > %29 = bitcast float %5 to i32 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %29, 10 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %23, 11 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %24, 12 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %25, 13 > %34 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33, float %28, 14 > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %34, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %35 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL SV[0], INSTANCEID >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..4095] >DCL TEMP[0..5], LOCAL >DCL ADDR[0] >IMM[0] INT32 {6, 1, 4, 2} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 1.0000} >IMM[2] UINT32 {0, 16, 0, 0} > 0: UMUL TEMP[0].x, SV[0].xxxx, IMM[0].xxxx > 1: MOV TEMP[0].x, TEMP[0].xxxx > 2: UMAD TEMP[1].x, SV[0].xxxx, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].y, TEMP[1].xxxx > 4: U2F TEMP[0].xy, TEMP[0].xyyy > 5: ADD TEMP[0].xy, TEMP[0].xyyy, IMM[1].xxxx > 6: F2U TEMP[1].xy, TEMP[0].xyyy > 7: UMUL TEMP[2].x, TEMP[1].yyyy, IMM[2].yyyy > 8: USHR TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz > 9: UARL ADDR[0].x, TEMP[2].xxxx > 10: UARL ADDR[0].x, TEMP[2].xxxx > 11: MOV TEMP[2], CONST[1][ADDR[0].x] > 12: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 13: USHR TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz > 14: UARL ADDR[0].x, TEMP[1].xxxx > 15: UARL ADDR[0].x, TEMP[1].xxxx > 16: MOV TEMP[1], CONST[1][ADDR[0].x] > 17: UMAD TEMP[3], SV[0].xxxx, IMM[0].xxxx, IMM[0].wwzz > 18: U2F TEMP[3], TEMP[3] > 19: ADD TEMP[0], TEMP[3], IMM[1].xyxy > 20: F2U TEMP[0], TEMP[0] > 21: UMUL TEMP[3].x, TEMP[0].zzzz, IMM[2].yyyy > 22: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].zzzz > 23: UARL ADDR[0].x, TEMP[4].xxxx > 24: MOV TEMP[3], CONST[1][ADDR[0].x] > 25: DP4 TEMP[3].x, IN[1], TEMP[3] > 26: UMUL TEMP[4].x, TEMP[0].wwww, IMM[2].yyyy > 27: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz > 28: UARL ADDR[0].x, TEMP[5].xxxx > 29: MOV TEMP[4], CONST[1][ADDR[0].x] > 30: DP4 TEMP[4].x, IN[1], TEMP[4] > 31: MOV TEMP[3].y, TEMP[4].xxxx > 32: UMUL TEMP[4].x, TEMP[0].xxxx, IMM[2].yyyy > 33: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz > 34: UARL ADDR[0].x, TEMP[5].xxxx > 35: MOV TEMP[4], CONST[1][ADDR[0].x] > 36: DP4 TEMP[4].x, IN[1], TEMP[4] > 37: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 38: USHR TEMP[5].x, TEMP[0].xxxx, IMM[0].zzzz > 39: UARL ADDR[0].x, TEMP[5].xxxx > 40: MOV TEMP[0], CONST[1][ADDR[0].x] > 41: DP4 TEMP[0].x, IN[1], TEMP[0] > 42: MOV TEMP[4].y, TEMP[0].xxxx > 43: MOV TEMP[4].zw, IMM[1].wwzw > 44: MOV OUT[4], TEMP[3] > 45: MOV OUT[2], TEMP[1] > 46: MOV OUT[3], TEMP[2] > 47: MOV OUT[1], IN[0] > 48: MOV OUT[0], TEMP[4] > 49: END >radeonsi: Compiling shader 123 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = mul i32 %12, 6 > %32 = mul i32 %12, 6 > %33 = or i32 %32, 1 > %34 = uitofp i32 %31 to float > %35 = uitofp i32 %33 to float > %36 = fadd float %34, 0x3FB99999A0000000 > %37 = fadd float %35, 0x3FB99999A0000000 > %38 = fptoui float %36 to i32 > %39 = fptoui float %37 to i32 > %40 = shl i32 %39, 4 > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %40) > %42 = shl i32 %39, 4 > %43 = or i32 %42, 4 > %44 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %43) > %45 = shl i32 %39, 4 > %46 = or i32 %45, 8 > %47 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %46) > %48 = shl i32 %39, 4 > %49 = or i32 %48, 12 > %50 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %49) > %51 = shl i32 %38, 4 > %52 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %51) > %53 = shl i32 %38, 4 > %54 = or i32 %53, 4 > %55 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %54) > %56 = shl i32 %38, 4 > %57 = or i32 %56, 8 > %58 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %57) > %59 = shl i32 %38, 4 > %60 = or i32 %59, 12 > %61 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %60) > %62 = mul i32 %12, 6 > %63 = add i32 %62, 2 > %64 = mul i32 %12, 6 > %65 = add i32 %64, 2 > %66 = mul i32 %12, 6 > %67 = add i32 %66, 4 > %68 = mul i32 %12, 6 > %69 = add i32 %68, 4 > %70 = uitofp i32 %63 to float > %71 = uitofp i32 %65 to float > %72 = uitofp i32 %67 to float > %73 = uitofp i32 %69 to float > %74 = fadd float %70, 0x3FB99999A0000000 > %75 = fadd float %71, 0x3FF19999A0000000 > %76 = fadd float %72, 0x3FB99999A0000000 > %77 = fadd float %73, 0x3FF19999A0000000 > %78 = fptoui float %74 to i32 > %79 = fptoui float %75 to i32 > %80 = fptoui float %76 to i32 > %81 = fptoui float %77 to i32 > %82 = shl i32 %80, 4 > %83 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %82) > %84 = shl i32 %80, 4 > %85 = or i32 %84, 4 > %86 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %85) > %87 = shl i32 %80, 4 > %88 = or i32 %87, 8 > %89 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %88) > %90 = shl i32 %80, 4 > %91 = or i32 %90, 12 > %92 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %91) > %93 = fmul float %27, %83 > %94 = fmul float %28, %86 > %95 = fadd float %93, %94 > %96 = fmul float %29, %89 > %97 = fadd float %95, %96 > %98 = fmul float %30, %92 > %99 = fadd float %97, %98 > %100 = shl i32 %81, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %100) > %102 = shl i32 %81, 4 > %103 = or i32 %102, 4 > %104 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %103) > %105 = shl i32 %81, 4 > %106 = or i32 %105, 8 > %107 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %106) > %108 = shl i32 %81, 4 > %109 = or i32 %108, 12 > %110 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %109) > %111 = fmul float %27, %101 > %112 = fmul float %28, %104 > %113 = fadd float %111, %112 > %114 = fmul float %29, %107 > %115 = fadd float %113, %114 > %116 = fmul float %30, %110 > %117 = fadd float %115, %116 > %118 = shl i32 %78, 4 > %119 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %118) > %120 = shl i32 %78, 4 > %121 = or i32 %120, 4 > %122 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %121) > %123 = shl i32 %78, 4 > %124 = or i32 %123, 8 > %125 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %124) > %126 = shl i32 %78, 4 > %127 = or i32 %126, 12 > %128 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %127) > %129 = fmul float %27, %119 > %130 = fmul float %28, %122 > %131 = fadd float %129, %130 > %132 = fmul float %29, %125 > %133 = fadd float %131, %132 > %134 = fmul float %30, %128 > %135 = fadd float %133, %134 > %136 = shl i32 %79, 4 > %137 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %136) > %138 = shl i32 %79, 4 > %139 = or i32 %138, 4 > %140 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %139) > %141 = shl i32 %79, 4 > %142 = or i32 %141, 8 > %143 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %142) > %144 = shl i32 %79, 4 > %145 = or i32 %144, 12 > %146 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %145) > %147 = fmul float %27, %137 > %148 = fmul float %28, %140 > %149 = fadd float %147, %148 > %150 = fmul float %29, %143 > %151 = fadd float %149, %150 > %152 = fmul float %30, %146 > %153 = fadd float %151, %152 > %154 = bitcast i32 %11 to float > %155 = insertvalue <{ float, float, float }> undef, float %154, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %52, float %55, float %58, float %61) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %41, float %44, float %47, float %50) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %99, float %117, float %89, float %92) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %135, float %153, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %155 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0..1], LOCAL > 0: MOV TEMP[0].xy, IN[3].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: FMA TEMP[0], TEMP[0], IN[2], IN[1] > 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww > 4: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz > 5: MOV TEMP[0].w, TEMP[1].xxxx > 6: MOV OUT[0], TEMP[0] > 7: END >radeonsi: Compiling shader 124 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %43 = bitcast float %41 to i32 > %44 = bitcast float %42 to i32 > %45 = insertelement <2 x i32> undef, i32 %43, i32 0 > %46 = insertelement <2 x i32> %45, i32 %44, i32 1 > %47 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %46, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %48 = extractelement <4 x float> %47, i32 0 > %49 = extractelement <4 x float> %47, i32 1 > %50 = extractelement <4 x float> %47, i32 2 > %51 = extractelement <4 x float> %47, i32 3 > %52 = call float @llvm.fma.f32(float %48, float %37, float %33) > %53 = call float @llvm.fma.f32(float %49, float %38, float %34) > %54 = call float @llvm.fma.f32(float %50, float %39, float %35) > %55 = call float @llvm.fma.f32(float %51, float %40, float %36) > %56 = fmul float %55, %32 > %57 = fmul float %56, %52 > %58 = fmul float %56, %53 > %59 = fmul float %56, %54 > %60 = bitcast float %5 to i32 > %61 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %60, 10 > %62 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %61, float %57, 11 > %63 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %62, float %58, 12 > %64 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %63, float %59, 13 > %65 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %64, float %56, 14 > %66 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %65, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %66 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL SV[0], INSTANCEID >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..4095] >DCL TEMP[0..6], LOCAL >DCL ADDR[0] >IMM[0] INT32 {2, 96, 4, 30} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 0.0000} >IMM[2] UINT32 {0, 16, 0, 0} >IMM[3] INT32 {1, 3, 0, 0} > 0: SHL TEMP[0].x, SV[0].xxxx, IMM[0].xxxx > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 3: F2U TEMP[1].x, TEMP[0].xxxx > 4: UADD TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx > 5: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 6: USHR TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz > 7: UARL ADDR[0].x, TEMP[1].xxxx > 8: UARL ADDR[0].x, TEMP[1].xxxx > 9: MOV TEMP[1], CONST[1][ADDR[0].x] > 10: BFI TEMP[2].x, IMM[0].xxxx, SV[0].xxxx, IMM[0].xxxx, IMM[0].wwww > 11: BFI TEMP[3].x, IMM[0].xxxx, SV[0].xxxx, IMM[0].xxxx, IMM[0].wwww > 12: MOV TEMP[2].y, TEMP[3].xxxx > 13: BFI TEMP[3].x, IMM[3].xxxx, SV[0].xxxx, IMM[0].xxxx, IMM[0].wwww > 14: MOV TEMP[2].z, TEMP[3].xxxx > 15: U2F TEMP[0].xyz, TEMP[2].xyzz > 16: ADD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].xyxx > 17: F2U TEMP[2].xyz, TEMP[0].xyzz > 18: UADD TEMP[3].x, IMM[0].yyyy, TEMP[2].zzzz > 19: UMUL TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 20: USHR TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz > 21: UARL ADDR[0].x, TEMP[3].xxxx > 22: UARL ADDR[0].x, TEMP[3].xxxx > 23: MOV TEMP[3], CONST[1][ADDR[0].x] > 24: UADD TEMP[4].x, IMM[0].yyyy, TEMP[2].xxxx > 25: UMUL TEMP[4].x, TEMP[4].xxxx, IMM[2].yyyy > 26: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz > 27: UARL ADDR[0].x, TEMP[5].xxxx > 28: MOV TEMP[4], CONST[1][ADDR[0].x] > 29: DP4 TEMP[4].x, IN[1], TEMP[4] > 30: UADD TEMP[2].x, IMM[0].yyyy, TEMP[2].yyyy > 31: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy > 32: USHR TEMP[5].x, TEMP[2].xxxx, IMM[0].zzzz > 33: UARL ADDR[0].x, TEMP[5].xxxx > 34: MOV TEMP[2], CONST[1][ADDR[0].x] > 35: DP4 TEMP[2].x, IN[1], TEMP[2] > 36: MOV TEMP[4].y, TEMP[2].xxxx > 37: U2F TEMP[0].x, SV[0].xxxx > 38: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 39: F2U TEMP[0].x, TEMP[0].xxxx > 40: SHL TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 41: UMUL TEMP[2].x, TEMP[0].xxxx, IMM[2].yyyy > 42: USHR TEMP[5].x, TEMP[2].xxxx, IMM[0].zzzz > 43: UARL ADDR[0].x, TEMP[5].xxxx > 44: MOV TEMP[2], CONST[1][ADDR[0].x] > 45: DP4 TEMP[2].x, IN[1], TEMP[2] > 46: UADD TEMP[5].x, IMM[3].xxxx, TEMP[0].xxxx > 47: UMUL TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy > 48: USHR TEMP[6].x, TEMP[5].xxxx, IMM[0].zzzz > 49: UARL ADDR[0].x, TEMP[6].xxxx > 50: MOV TEMP[5], CONST[1][ADDR[0].x] > 51: DP4 TEMP[5].x, IN[1], TEMP[5] > 52: MOV TEMP[2].y, TEMP[5].xxxx > 53: UADD TEMP[5].x, IMM[0].xxxx, TEMP[0].xxxx > 54: UMUL TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy > 55: USHR TEMP[6].x, TEMP[5].xxxx, IMM[0].zzzz > 56: UARL ADDR[0].x, TEMP[6].xxxx > 57: MOV TEMP[5], CONST[1][ADDR[0].x] > 58: DP4 TEMP[5].x, IN[1], TEMP[5] > 59: MOV TEMP[2].z, TEMP[5].xxxx > 60: UADD TEMP[0].x, IMM[3].yyyy, TEMP[0].xxxx > 61: UMUL TEMP[0].x, TEMP[0].xxxx, IMM[2].yyyy > 62: USHR TEMP[5].x, TEMP[0].xxxx, IMM[0].zzzz > 63: UARL ADDR[0].x, TEMP[5].xxxx > 64: MOV TEMP[0], CONST[1][ADDR[0].x] > 65: DP4 TEMP[0].x, IN[1], TEMP[0] > 66: MOV TEMP[2].w, TEMP[0].xxxx > 67: MOV OUT[4], TEMP[4] > 68: MOV OUT[3], TEMP[3] > 69: MOV OUT[2], TEMP[1] > 70: MOV OUT[1], IN[0] > 71: MOV OUT[0], TEMP[2] > 72: END >radeonsi: Compiling shader 125 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = shl i32 %12, 2 > %32 = uitofp i32 %31 to float > %33 = fadd float %32, 0x3FB99999A0000000 > %34 = fptoui float %33 to i32 > %35 = shl i32 %34, 4 > %36 = add i32 %35, 1536 > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %36) > %38 = add i32 %35, 1540 > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %38) > %40 = add i32 %35, 1544 > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %40) > %42 = add i32 %35, 1548 > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %42) > %44 = shl i32 %12, 2 > %45 = or i32 %44, 2 > %46 = shl i32 %12, 2 > %47 = or i32 %46, 2 > %48 = shl i32 %12, 2 > %49 = or i32 %48, 1 > %50 = uitofp i32 %45 to float > %51 = uitofp i32 %47 to float > %52 = uitofp i32 %49 to float > %53 = fadd float %50, 0x3FB99999A0000000 > %54 = fadd float %51, 0x3FF19999A0000000 > %55 = fadd float %52, 0x3FB99999A0000000 > %56 = fptoui float %53 to i32 > %57 = fptoui float %54 to i32 > %58 = fptoui float %55 to i32 > %59 = shl i32 %58, 4 > %60 = add i32 %59, 1536 > %61 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %60) > %62 = add i32 %59, 1540 > %63 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %62) > %64 = add i32 %59, 1544 > %65 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %64) > %66 = add i32 %59, 1548 > %67 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %66) > %68 = shl i32 %56, 4 > %69 = add i32 %68, 1536 > %70 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %69) > %71 = add i32 %68, 1540 > %72 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %71) > %73 = add i32 %68, 1544 > %74 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %73) > %75 = add i32 %68, 1548 > %76 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %75) > %77 = fmul float %27, %70 > %78 = fmul float %28, %72 > %79 = fadd float %77, %78 > %80 = fmul float %29, %74 > %81 = fadd float %79, %80 > %82 = fmul float %30, %76 > %83 = fadd float %81, %82 > %84 = shl i32 %57, 4 > %85 = add i32 %84, 1536 > %86 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %85) > %87 = add i32 %84, 1540 > %88 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %87) > %89 = add i32 %84, 1544 > %90 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %89) > %91 = add i32 %84, 1548 > %92 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %91) > %93 = fmul float %27, %86 > %94 = fmul float %28, %88 > %95 = fadd float %93, %94 > %96 = fmul float %29, %90 > %97 = fadd float %95, %96 > %98 = fmul float %30, %92 > %99 = fadd float %97, %98 > %100 = uitofp i32 %12 to float > %101 = fadd float %100, 0x3FB99999A0000000 > %102 = fptoui float %101 to i32 > %103 = shl i32 %102, 6 > %104 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %103) > %105 = shl i32 %102, 6 > %106 = or i32 %105, 4 > %107 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %106) > %108 = shl i32 %102, 6 > %109 = or i32 %108, 8 > %110 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %109) > %111 = shl i32 %102, 6 > %112 = or i32 %111, 12 > %113 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %112) > %114 = fmul float %27, %104 > %115 = fmul float %28, %107 > %116 = fadd float %114, %115 > %117 = fmul float %29, %110 > %118 = fadd float %116, %117 > %119 = fmul float %30, %113 > %120 = fadd float %118, %119 > %121 = shl i32 %102, 2 > %122 = and i32 %121, 268435452 > %123 = or i32 %122, 1 > %124 = shl nuw i32 %123, 4 > %125 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %124) > %126 = shl nuw i32 %123, 4 > %127 = or i32 %126, 4 > %128 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %127) > %129 = shl nuw i32 %123, 4 > %130 = or i32 %129, 8 > %131 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %130) > %132 = shl nuw i32 %123, 4 > %133 = or i32 %132, 12 > %134 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %133) > %135 = fmul float %27, %125 > %136 = fmul float %28, %128 > %137 = fadd float %135, %136 > %138 = fmul float %29, %131 > %139 = fadd float %137, %138 > %140 = fmul float %30, %134 > %141 = fadd float %139, %140 > %142 = shl i32 %102, 2 > %143 = and i32 %142, 268435452 > %144 = or i32 %143, 2 > %145 = shl nuw i32 %144, 4 > %146 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %145) > %147 = shl nuw i32 %144, 4 > %148 = or i32 %147, 4 > %149 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %148) > %150 = shl nuw i32 %144, 4 > %151 = or i32 %150, 8 > %152 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %151) > %153 = shl nuw i32 %144, 4 > %154 = or i32 %153, 12 > %155 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %154) > %156 = fmul float %27, %146 > %157 = fmul float %28, %149 > %158 = fadd float %156, %157 > %159 = fmul float %29, %152 > %160 = fadd float %158, %159 > %161 = fmul float %30, %155 > %162 = fadd float %160, %161 > %163 = shl i32 %102, 2 > %164 = and i32 %163, 268435452 > %165 = or i32 %164, 3 > %166 = shl nuw i32 %165, 4 > %167 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %166) > %168 = shl nuw i32 %165, 4 > %169 = or i32 %168, 4 > %170 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %169) > %171 = shl nuw i32 %165, 4 > %172 = or i32 %171, 8 > %173 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %172) > %174 = shl nuw i32 %165, 4 > %175 = or i32 %174, 12 > %176 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %175) > %177 = fmul float %27, %167 > %178 = fmul float %28, %170 > %179 = fadd float %177, %178 > %180 = fmul float %29, %173 > %181 = fadd float %179, %180 > %182 = fmul float %30, %176 > %183 = fadd float %181, %182 > %184 = bitcast i32 %11 to float > %185 = insertvalue <{ float, float, float }> undef, float %184, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %37, float %39, float %41, float %43) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %61, float %63, float %65, float %67) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %83, float %99, float %74, float %76) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %120, float %141, float %162, float %183) > ret <{ float, float, float }> %185 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0..1], LOCAL > 0: MOV TEMP[0].xy, IN[3].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: FMA TEMP[0], TEMP[0], IN[2], IN[1] > 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww > 4: MOV TEMP[1].w, TEMP[1].xxxx > 5: MOV TEMP[1].xyz, TEMP[0].xyzx > 6: MOV OUT[0], TEMP[1] > 7: END >radeonsi: Compiling shader 126 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %43 = bitcast float %41 to i32 > %44 = bitcast float %42 to i32 > %45 = insertelement <2 x i32> undef, i32 %43, i32 0 > %46 = insertelement <2 x i32> %45, i32 %44, i32 1 > %47 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %46, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %48 = extractelement <4 x float> %47, i32 0 > %49 = extractelement <4 x float> %47, i32 1 > %50 = extractelement <4 x float> %47, i32 2 > %51 = extractelement <4 x float> %47, i32 3 > %52 = call float @llvm.fma.f32(float %48, float %37, float %33) > %53 = call float @llvm.fma.f32(float %49, float %38, float %34) > %54 = call float @llvm.fma.f32(float %50, float %39, float %35) > %55 = call float @llvm.fma.f32(float %51, float %40, float %36) > %56 = fmul float %55, %32 > %57 = bitcast float %5 to i32 > %58 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %57, 10 > %59 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %58, float %52, 11 > %60 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %59, float %53, 12 > %61 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %60, float %54, 13 > %62 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %61, float %56, 14 > %63 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %62, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %63 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL SV[0], INSTANCEID >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..4095] >DCL TEMP[0..3], LOCAL >DCL ADDR[0] >IMM[0] INT32 {2, 30, 4, 0} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 1.0000} >IMM[2] UINT32 {0, 16, 0, 0} > 0: BFI TEMP[0].x, IMM[0].xxxx, SV[0].xxxx, IMM[0].xxxx, IMM[0].yyyy > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].xy, TEMP[0].xxxx, IMM[1].xyyy > 3: F2U TEMP[1].xy, TEMP[0].xyyy > 4: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy > 5: USHR TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz > 6: UARL ADDR[0].x, TEMP[3].xxxx > 7: MOV TEMP[2], CONST[1][ADDR[0].x] > 8: DP4 TEMP[2].x, IN[1], TEMP[2] > 9: UMUL TEMP[1].x, TEMP[1].yyyy, IMM[2].yyyy > 10: USHR TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz > 11: UARL ADDR[0].x, TEMP[3].xxxx > 12: MOV TEMP[1], CONST[1][ADDR[0].x] > 13: DP4 TEMP[1].x, IN[1], TEMP[1] > 14: MOV TEMP[2].y, TEMP[1].xxxx > 15: SHL TEMP[1].x, SV[0].xxxx, IMM[0].xxxx > 16: U2F TEMP[0].x, TEMP[1].xxxx > 17: ADD TEMP[0].xy, TEMP[0].xxxx, IMM[1].xyyy > 18: F2U TEMP[0].xy, TEMP[0].xyyy > 19: UMUL TEMP[1].x, TEMP[0].xxxx, IMM[2].yyyy > 20: USHR TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz > 21: UARL ADDR[0].x, TEMP[3].xxxx > 22: MOV TEMP[1], CONST[1][ADDR[0].x] > 23: DP4 TEMP[1].x, IN[1], TEMP[1] > 24: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 25: USHR TEMP[3].x, TEMP[0].xxxx, IMM[0].zzzz > 26: UARL ADDR[0].x, TEMP[3].xxxx > 27: MOV TEMP[0], CONST[1][ADDR[0].x] > 28: DP4 TEMP[0].x, IN[1], TEMP[0] > 29: MOV TEMP[1].y, TEMP[0].xxxx > 30: MOV TEMP[1].zw, IMM[1].wwzw > 31: MOV OUT[2], TEMP[2] > 32: MOV OUT[1], IN[0] > 33: MOV OUT[0], TEMP[1] > 34: END >radeonsi: Compiling shader 127 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = shl i32 %12, 2 > %32 = or i32 %31, 2 > %33 = uitofp i32 %32 to float > %34 = fadd float %33, 0x3FB99999A0000000 > %35 = fadd float %33, 0x3FF19999A0000000 > %36 = fptoui float %34 to i32 > %37 = fptoui float %35 to i32 > %38 = shl i32 %36, 4 > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %38) > %40 = shl i32 %36, 4 > %41 = or i32 %40, 4 > %42 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %41) > %43 = shl i32 %36, 4 > %44 = or i32 %43, 8 > %45 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %44) > %46 = shl i32 %36, 4 > %47 = or i32 %46, 12 > %48 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %47) > %49 = fmul float %27, %39 > %50 = fmul float %28, %42 > %51 = fadd float %49, %50 > %52 = fmul float %29, %45 > %53 = fadd float %51, %52 > %54 = fmul float %30, %48 > %55 = fadd float %53, %54 > %56 = shl i32 %37, 4 > %57 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %56) > %58 = shl i32 %37, 4 > %59 = or i32 %58, 4 > %60 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %59) > %61 = shl i32 %37, 4 > %62 = or i32 %61, 8 > %63 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %62) > %64 = shl i32 %37, 4 > %65 = or i32 %64, 12 > %66 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %65) > %67 = fmul float %27, %57 > %68 = fmul float %28, %60 > %69 = fadd float %67, %68 > %70 = fmul float %29, %63 > %71 = fadd float %69, %70 > %72 = fmul float %30, %66 > %73 = fadd float %71, %72 > %74 = shl i32 %12, 2 > %75 = uitofp i32 %74 to float > %76 = fadd float %75, 0x3FB99999A0000000 > %77 = fadd float %75, 0x3FF19999A0000000 > %78 = fptoui float %76 to i32 > %79 = fptoui float %77 to i32 > %80 = shl i32 %78, 4 > %81 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %80) > %82 = shl i32 %78, 4 > %83 = or i32 %82, 4 > %84 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %83) > %85 = shl i32 %78, 4 > %86 = or i32 %85, 8 > %87 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %86) > %88 = shl i32 %78, 4 > %89 = or i32 %88, 12 > %90 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %89) > %91 = fmul float %27, %81 > %92 = fmul float %28, %84 > %93 = fadd float %91, %92 > %94 = fmul float %29, %87 > %95 = fadd float %93, %94 > %96 = fmul float %30, %90 > %97 = fadd float %95, %96 > %98 = shl i32 %79, 4 > %99 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %98) > %100 = shl i32 %79, 4 > %101 = or i32 %100, 4 > %102 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %101) > %103 = shl i32 %79, 4 > %104 = or i32 %103, 8 > %105 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %104) > %106 = shl i32 %79, 4 > %107 = or i32 %106, 12 > %108 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %107) > %109 = fmul float %27, %99 > %110 = fmul float %28, %102 > %111 = fadd float %109, %110 > %112 = fmul float %29, %105 > %113 = fadd float %111, %112 > %114 = fmul float %30, %108 > %115 = fadd float %113, %114 > %116 = bitcast i32 %11 to float > %117 = insertvalue <{ float, float, float }> undef, float %116, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %55, float %73, float %45, float %48) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %97, float %115, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %117 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0..1], LOCAL > 0: MOV TEMP[0].xy, IN[1].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww > 3: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz > 4: MOV TEMP[0].w, TEMP[1].xxxx > 5: MOV OUT[0], TEMP[0] > 6: END >radeonsi: Compiling shader 128 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %35 = bitcast float %33 to i32 > %36 = bitcast float %34 to i32 > %37 = insertelement <2 x i32> undef, i32 %35, i32 0 > %38 = insertelement <2 x i32> %37, i32 %36, i32 1 > %39 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %38, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %40 = extractelement <4 x float> %39, i32 0 > %41 = extractelement <4 x float> %39, i32 1 > %42 = extractelement <4 x float> %39, i32 2 > %43 = extractelement <4 x float> %39, i32 3 > %44 = fmul float %43, %32 > %45 = fmul float %44, %40 > %46 = fmul float %44, %41 > %47 = fmul float %44, %42 > %48 = bitcast float %5 to i32 > %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %48, 10 > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49, float %45, 11 > %51 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50, float %46, 12 > %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %51, float %47, 13 > %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float %44, 14 > %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL SV[0], INSTANCEID >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..4095] >DCL TEMP[0..4], LOCAL >DCL ADDR[0] >IMM[0] INT32 {1, 96, 4, 2} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 0.0000} >IMM[2] UINT32 {0, 16, 0, 0} >IMM[3] INT32 {3, 0, 0, 0} > 0: SHL TEMP[0].x, SV[0].xxxx, IMM[0].xxxx > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].xy, TEMP[0].xxxx, IMM[1].xyyy > 3: F2U TEMP[1].xy, TEMP[0].xyyy > 4: UADD TEMP[2].x, IMM[0].yyyy, TEMP[1].xxxx > 5: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy > 6: USHR TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz > 7: UARL ADDR[0].x, TEMP[3].xxxx > 8: MOV TEMP[2], CONST[1][ADDR[0].x] > 9: DP4 TEMP[2].x, IN[1], TEMP[2] > 10: UADD TEMP[1].x, IMM[0].yyyy, TEMP[1].yyyy > 11: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 12: USHR TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz > 13: UARL ADDR[0].x, TEMP[3].xxxx > 14: MOV TEMP[1], CONST[1][ADDR[0].x] > 15: DP4 TEMP[1].x, IN[1], TEMP[1] > 16: MOV TEMP[2].y, TEMP[1].xxxx > 17: U2F TEMP[0].x, SV[0].xxxx > 18: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 19: F2U TEMP[0].x, TEMP[0].xxxx > 20: SHL TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww > 21: UMUL TEMP[1].x, TEMP[0].xxxx, IMM[2].yyyy > 22: USHR TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz > 23: UARL ADDR[0].x, TEMP[3].xxxx > 24: MOV TEMP[1], CONST[1][ADDR[0].x] > 25: DP4 TEMP[1].x, IN[1], TEMP[1] > 26: UADD TEMP[3].x, IMM[0].xxxx, TEMP[0].xxxx > 27: UMUL TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 28: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].zzzz > 29: UARL ADDR[0].x, TEMP[4].xxxx > 30: MOV TEMP[3], CONST[1][ADDR[0].x] > 31: DP4 TEMP[3].x, IN[1], TEMP[3] > 32: MOV TEMP[1].y, TEMP[3].xxxx > 33: UADD TEMP[3].x, IMM[0].wwww, TEMP[0].xxxx > 34: UMUL TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[4].x, TEMP[3].xxxx, IMM[0].zzzz > 36: UARL ADDR[0].x, TEMP[4].xxxx > 37: MOV TEMP[3], CONST[1][ADDR[0].x] > 38: DP4 TEMP[3].x, IN[1], TEMP[3] > 39: MOV TEMP[1].z, TEMP[3].xxxx > 40: UADD TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx > 41: UMUL TEMP[0].x, TEMP[0].xxxx, IMM[2].yyyy > 42: USHR TEMP[3].x, TEMP[0].xxxx, IMM[0].zzzz > 43: UARL ADDR[0].x, TEMP[3].xxxx > 44: MOV TEMP[0], CONST[1][ADDR[0].x] > 45: DP4 TEMP[0].x, IN[1], TEMP[0] > 46: MOV TEMP[1].w, TEMP[0].xxxx > 47: MOV OUT[2], TEMP[2] > 48: MOV OUT[1], IN[0] > 49: MOV OUT[0], TEMP[1] > 50: END >radeonsi: Compiling shader 129 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = shl i32 %12, 1 > %32 = uitofp i32 %31 to float > %33 = fadd float %32, 0x3FB99999A0000000 > %34 = fadd float %32, 0x3FF19999A0000000 > %35 = fptoui float %33 to i32 > %36 = fptoui float %34 to i32 > %37 = shl i32 %35, 4 > %38 = add i32 %37, 1536 > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %38) > %40 = add i32 %37, 1540 > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %40) > %42 = add i32 %37, 1544 > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %42) > %44 = add i32 %37, 1548 > %45 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %44) > %46 = fmul float %27, %39 > %47 = fmul float %28, %41 > %48 = fadd float %46, %47 > %49 = fmul float %29, %43 > %50 = fadd float %48, %49 > %51 = fmul float %30, %45 > %52 = fadd float %50, %51 > %53 = shl i32 %36, 4 > %54 = add i32 %53, 1536 > %55 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %54) > %56 = add i32 %53, 1540 > %57 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %56) > %58 = add i32 %53, 1544 > %59 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %58) > %60 = add i32 %53, 1548 > %61 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %60) > %62 = fmul float %27, %55 > %63 = fmul float %28, %57 > %64 = fadd float %62, %63 > %65 = fmul float %29, %59 > %66 = fadd float %64, %65 > %67 = fmul float %30, %61 > %68 = fadd float %66, %67 > %69 = uitofp i32 %12 to float > %70 = fadd float %69, 0x3FB99999A0000000 > %71 = fptoui float %70 to i32 > %72 = shl i32 %71, 6 > %73 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %72) > %74 = shl i32 %71, 6 > %75 = or i32 %74, 4 > %76 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %75) > %77 = shl i32 %71, 6 > %78 = or i32 %77, 8 > %79 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %78) > %80 = shl i32 %71, 6 > %81 = or i32 %80, 12 > %82 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %81) > %83 = fmul float %27, %73 > %84 = fmul float %28, %76 > %85 = fadd float %83, %84 > %86 = fmul float %29, %79 > %87 = fadd float %85, %86 > %88 = fmul float %30, %82 > %89 = fadd float %87, %88 > %90 = shl i32 %71, 2 > %91 = and i32 %90, 268435452 > %92 = or i32 %91, 1 > %93 = shl nuw i32 %92, 4 > %94 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %93) > %95 = shl nuw i32 %92, 4 > %96 = or i32 %95, 4 > %97 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %96) > %98 = shl nuw i32 %92, 4 > %99 = or i32 %98, 8 > %100 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %99) > %101 = shl nuw i32 %92, 4 > %102 = or i32 %101, 12 > %103 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %102) > %104 = fmul float %27, %94 > %105 = fmul float %28, %97 > %106 = fadd float %104, %105 > %107 = fmul float %29, %100 > %108 = fadd float %106, %107 > %109 = fmul float %30, %103 > %110 = fadd float %108, %109 > %111 = shl i32 %71, 2 > %112 = and i32 %111, 268435452 > %113 = or i32 %112, 2 > %114 = shl nuw i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %114) > %116 = shl nuw i32 %113, 4 > %117 = or i32 %116, 4 > %118 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %117) > %119 = shl nuw i32 %113, 4 > %120 = or i32 %119, 8 > %121 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %120) > %122 = shl nuw i32 %113, 4 > %123 = or i32 %122, 12 > %124 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %123) > %125 = fmul float %27, %115 > %126 = fmul float %28, %118 > %127 = fadd float %125, %126 > %128 = fmul float %29, %121 > %129 = fadd float %127, %128 > %130 = fmul float %30, %124 > %131 = fadd float %129, %130 > %132 = shl i32 %71, 2 > %133 = and i32 %132, 268435452 > %134 = or i32 %133, 3 > %135 = shl nuw i32 %134, 4 > %136 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %135) > %137 = shl nuw i32 %134, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %138) > %140 = shl nuw i32 %134, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %141) > %143 = shl nuw i32 %134, 4 > %144 = or i32 %143, 12 > %145 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %144) > %146 = fmul float %27, %136 > %147 = fmul float %28, %139 > %148 = fadd float %146, %147 > %149 = fmul float %29, %142 > %150 = fadd float %148, %149 > %151 = fmul float %30, %145 > %152 = fadd float %150, %151 > %153 = bitcast i32 %11 to float > %154 = insertvalue <{ float, float, float }> undef, float %153, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %52, float %68, float %43, float %45) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %89, float %110, float %131, float %152) > ret <{ float, float, float }> %154 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0..1], LOCAL > 0: MOV TEMP[0].xy, IN[1].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww > 3: MOV TEMP[1].w, TEMP[1].xxxx > 4: MOV TEMP[1].xyz, TEMP[0].xyzx > 5: MOV OUT[0], TEMP[1] > 6: END >radeonsi: Compiling shader 130 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %35 = bitcast float %33 to i32 > %36 = bitcast float %34 to i32 > %37 = insertelement <2 x i32> undef, i32 %35, i32 0 > %38 = insertelement <2 x i32> %37, i32 %36, i32 1 > %39 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %38, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %40 = extractelement <4 x float> %39, i32 0 > %41 = extractelement <4 x float> %39, i32 1 > %42 = extractelement <4 x float> %39, i32 2 > %43 = extractelement <4 x float> %39, i32 3 > %44 = fmul float %43, %32 > %45 = bitcast float %5 to i32 > %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %45, 10 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %40, 11 > %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %41, 12 > %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %42, 13 > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49, float %44, 14 > %51 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %51 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..4095] >DCL TEMP[0..5], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0100, 510.0200, 0.1000, 1.1000} >IMM[1] UINT32 {0, 16, 0, 0} >IMM[2] INT32 {96, 4, 2, 1} >IMM[3] INT32 {3, 0, 0, 0} > 0: FMA TEMP[0].xyz, IN[1].zzzz, IMM[0].xyyy, IMM[0].zzww > 1: F2U TEMP[0].xyz, TEMP[0].xyzz > 2: UADD TEMP[1].x, IMM[2].xxxx, TEMP[0].yyyy > 3: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy > 4: USHR TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 5: UARL ADDR[0].x, TEMP[1].xxxx > 6: UARL ADDR[0].x, TEMP[1].xxxx > 7: MOV TEMP[1], CONST[1][ADDR[0].x] > 8: UADD TEMP[2].x, IMM[2].xxxx, TEMP[0].zzzz > 9: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy > 10: USHR TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy > 11: UARL ADDR[0].x, TEMP[2].xxxx > 12: UARL ADDR[0].x, TEMP[2].xxxx > 13: MOV TEMP[2], CONST[1][ADDR[0].x] > 14: SHL TEMP[0].x, TEMP[0].xxxx, IMM[2].zzzz > 15: UMUL TEMP[3].x, TEMP[0].xxxx, IMM[1].yyyy > 16: USHR TEMP[4].x, TEMP[3].xxxx, IMM[2].yyyy > 17: UARL ADDR[0].x, TEMP[4].xxxx > 18: MOV TEMP[3], CONST[1][ADDR[0].x] > 19: DP4 TEMP[3].x, IN[2], TEMP[3] > 20: UADD TEMP[4].x, IMM[2].wwww, TEMP[0].xxxx > 21: UMUL TEMP[4].x, TEMP[4].xxxx, IMM[1].yyyy > 22: USHR TEMP[5].x, TEMP[4].xxxx, IMM[2].yyyy > 23: UARL ADDR[0].x, TEMP[5].xxxx > 24: MOV TEMP[4], CONST[1][ADDR[0].x] > 25: DP4 TEMP[4].x, IN[2], TEMP[4] > 26: MOV TEMP[3].y, TEMP[4].xxxx > 27: UADD TEMP[4].x, IMM[2].zzzz, TEMP[0].xxxx > 28: UMUL TEMP[4].x, TEMP[4].xxxx, IMM[1].yyyy > 29: USHR TEMP[5].x, TEMP[4].xxxx, IMM[2].yyyy > 30: UARL ADDR[0].x, TEMP[5].xxxx > 31: MOV TEMP[4], CONST[1][ADDR[0].x] > 32: DP4 TEMP[4].x, IN[2], TEMP[4] > 33: MOV TEMP[3].z, TEMP[4].xxxx > 34: UADD TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx > 35: UMUL TEMP[0].x, TEMP[0].xxxx, IMM[1].yyyy > 36: USHR TEMP[4].x, TEMP[0].xxxx, IMM[2].yyyy > 37: UARL ADDR[0].x, TEMP[4].xxxx > 38: MOV TEMP[0], CONST[1][ADDR[0].x] > 39: DP4 TEMP[0].x, IN[2], TEMP[0] > 40: MOV TEMP[3].w, TEMP[0].xxxx > 41: MOV OUT[4], TEMP[2] > 42: MOV OUT[3], TEMP[1] > 43: MOV OUT[2], IN[1] > 44: MOV OUT[1], IN[0] > 45: MOV OUT[0], TEMP[3] > 46: END >radeonsi: Compiling shader 131 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = extractelement <4 x float> %20, i32 2 > %24 = extractelement <4 x float> %20, i32 3 > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %14) > %28 = extractelement <4 x float> %27, i32 0 > %29 = extractelement <4 x float> %27, i32 1 > %30 = extractelement <4 x float> %27, i32 2 > %31 = extractelement <4 x float> %27, i32 3 > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %15) > %35 = extractelement <4 x float> %34, i32 0 > %36 = extractelement <4 x float> %34, i32 1 > %37 = extractelement <4 x float> %34, i32 2 > %38 = extractelement <4 x float> %34, i32 3 > %39 = call float @llvm.fma.f32(float %30, float 0x406FE051E0000000, float 0x3FB99999A0000000) > %40 = call float @llvm.fma.f32(float %30, float 0x407FE051E0000000, float 0x3FB99999A0000000) > %41 = call float @llvm.fma.f32(float %30, float 0x407FE051E0000000, float 0x3FF19999A0000000) > %42 = fptoui float %39 to i32 > %43 = fptoui float %40 to i32 > %44 = fptoui float %41 to i32 > %45 = shl i32 %43, 4 > %46 = add i32 %45, 1536 > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %46) > %48 = add i32 %45, 1540 > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %48) > %50 = add i32 %45, 1544 > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %50) > %52 = add i32 %45, 1548 > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %52) > %54 = shl i32 %44, 4 > %55 = add i32 %54, 1536 > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %55) > %57 = add i32 %54, 1540 > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %57) > %59 = add i32 %54, 1544 > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %59) > %61 = add i32 %54, 1548 > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %61) > %63 = shl i32 %42, 6 > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %63) > %65 = shl i32 %42, 6 > %66 = or i32 %65, 4 > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %66) > %68 = shl i32 %42, 6 > %69 = or i32 %68, 8 > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %69) > %71 = shl i32 %42, 6 > %72 = or i32 %71, 12 > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %72) > %74 = fmul float %35, %64 > %75 = fmul float %36, %67 > %76 = fadd float %74, %75 > %77 = fmul float %37, %70 > %78 = fadd float %76, %77 > %79 = fmul float %38, %73 > %80 = fadd float %78, %79 > %81 = shl i32 %42, 2 > %82 = and i32 %81, 268435452 > %83 = or i32 %82, 1 > %84 = shl nuw i32 %83, 4 > %85 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %84) > %86 = shl nuw i32 %83, 4 > %87 = or i32 %86, 4 > %88 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %87) > %89 = shl nuw i32 %83, 4 > %90 = or i32 %89, 8 > %91 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %90) > %92 = shl nuw i32 %83, 4 > %93 = or i32 %92, 12 > %94 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %93) > %95 = fmul float %35, %85 > %96 = fmul float %36, %88 > %97 = fadd float %95, %96 > %98 = fmul float %37, %91 > %99 = fadd float %97, %98 > %100 = fmul float %38, %94 > %101 = fadd float %99, %100 > %102 = shl i32 %42, 2 > %103 = and i32 %102, 268435452 > %104 = or i32 %103, 2 > %105 = shl nuw i32 %104, 4 > %106 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %105) > %107 = shl nuw i32 %104, 4 > %108 = or i32 %107, 4 > %109 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %108) > %110 = shl nuw i32 %104, 4 > %111 = or i32 %110, 8 > %112 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %111) > %113 = shl nuw i32 %104, 4 > %114 = or i32 %113, 12 > %115 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %114) > %116 = fmul float %35, %106 > %117 = fmul float %36, %109 > %118 = fadd float %116, %117 > %119 = fmul float %37, %112 > %120 = fadd float %118, %119 > %121 = fmul float %38, %115 > %122 = fadd float %120, %121 > %123 = shl i32 %42, 2 > %124 = and i32 %123, 268435452 > %125 = or i32 %124, 3 > %126 = shl nuw i32 %125, 4 > %127 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %126) > %128 = shl nuw i32 %125, 4 > %129 = or i32 %128, 4 > %130 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %129) > %131 = shl nuw i32 %125, 4 > %132 = or i32 %131, 8 > %133 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %132) > %134 = shl nuw i32 %125, 4 > %135 = or i32 %134, 12 > %136 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %135) > %137 = fmul float %35, %127 > %138 = fmul float %36, %130 > %139 = fadd float %137, %138 > %140 = fmul float %37, %133 > %141 = fadd float %139, %140 > %142 = fmul float %38, %136 > %143 = fadd float %141, %142 > %144 = bitcast i32 %11 to float > %145 = insertvalue <{ float, float, float }> undef, float %144, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %28, float %29, float %30, float %31) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %47, float %49, float %51, float %53) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %56, float %58, float %60, float %62) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %80, float %101, float %122, float %143) > ret <{ float, float, float }> %145 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..4095] >DCL TEMP[0..4], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 1020.0400, 0.0000, 1.0000, 0.1000} >IMM[1] FLT32 { 2.1000, 3.1000, 0.1000, 1.1000} >IMM[2] UINT32 {0, 16, 0, 0} >IMM[3] INT32 {4, 0, 0, 0} > 0: FMA TEMP[0], IN[1].zzzz, IMM[0].xxxx, IMM[1] > 1: F2U TEMP[0], TEMP[0] > 2: UMUL TEMP[1].x, TEMP[0].zzzz, IMM[2].yyyy > 3: USHR TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx > 4: UARL ADDR[0].x, TEMP[1].xxxx > 5: UARL ADDR[0].x, TEMP[1].xxxx > 6: MOV TEMP[1], CONST[1][ADDR[0].x] > 7: UMUL TEMP[2].x, TEMP[0].wwww, IMM[2].yyyy > 8: USHR TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx > 9: UARL ADDR[0].x, TEMP[2].xxxx > 10: UARL ADDR[0].x, TEMP[2].xxxx > 11: MOV TEMP[2], CONST[1][ADDR[0].x] > 12: UMUL TEMP[3].x, TEMP[0].xxxx, IMM[2].yyyy > 13: USHR TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx > 14: UARL ADDR[0].x, TEMP[4].xxxx > 15: MOV TEMP[3], CONST[1][ADDR[0].x] > 16: DP4 TEMP[3].x, IN[2], TEMP[3] > 17: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 18: USHR TEMP[4].x, TEMP[0].xxxx, IMM[3].xxxx > 19: UARL ADDR[0].x, TEMP[4].xxxx > 20: MOV TEMP[0], CONST[1][ADDR[0].x] > 21: DP4 TEMP[0].x, IN[2], TEMP[0] > 22: MOV TEMP[3].y, TEMP[0].xxxx > 23: MOV TEMP[3].zw, IMM[0].zzyz > 24: MOV OUT[4], TEMP[2] > 25: MOV OUT[3], TEMP[1] > 26: MOV OUT[2], IN[1] > 27: MOV OUT[1], IN[0] > 28: MOV OUT[0], TEMP[3] > 29: END >radeonsi: Compiling shader 132 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = extractelement <4 x float> %20, i32 2 > %24 = extractelement <4 x float> %20, i32 3 > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %14) > %28 = extractelement <4 x float> %27, i32 0 > %29 = extractelement <4 x float> %27, i32 1 > %30 = extractelement <4 x float> %27, i32 2 > %31 = extractelement <4 x float> %27, i32 3 > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %15) > %35 = extractelement <4 x float> %34, i32 0 > %36 = extractelement <4 x float> %34, i32 1 > %37 = extractelement <4 x float> %34, i32 2 > %38 = extractelement <4 x float> %34, i32 3 > %39 = call float @llvm.fma.f32(float %30, float 0x408FE051E0000000, float 0x4000CCCCC0000000) > %40 = call float @llvm.fma.f32(float %30, float 0x408FE051E0000000, float 0x4008CCCCC0000000) > %41 = call float @llvm.fma.f32(float %30, float 0x408FE051E0000000, float 0x3FB99999A0000000) > %42 = call float @llvm.fma.f32(float %30, float 0x408FE051E0000000, float 0x3FF19999A0000000) > %43 = fptoui float %39 to i32 > %44 = fptoui float %40 to i32 > %45 = fptoui float %41 to i32 > %46 = fptoui float %42 to i32 > %47 = shl i32 %45, 4 > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %47) > %49 = shl i32 %45, 4 > %50 = or i32 %49, 4 > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %50) > %52 = shl i32 %45, 4 > %53 = or i32 %52, 8 > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %53) > %55 = shl i32 %45, 4 > %56 = or i32 %55, 12 > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %56) > %58 = shl i32 %46, 4 > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %58) > %60 = shl i32 %46, 4 > %61 = or i32 %60, 4 > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %61) > %63 = shl i32 %46, 4 > %64 = or i32 %63, 8 > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %64) > %66 = shl i32 %46, 4 > %67 = or i32 %66, 12 > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %67) > %69 = shl i32 %43, 4 > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %69) > %71 = shl i32 %43, 4 > %72 = or i32 %71, 4 > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %72) > %74 = shl i32 %43, 4 > %75 = or i32 %74, 8 > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %75) > %77 = shl i32 %43, 4 > %78 = or i32 %77, 12 > %79 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %78) > %80 = fmul float %35, %70 > %81 = fmul float %36, %73 > %82 = fadd float %80, %81 > %83 = fmul float %37, %76 > %84 = fadd float %82, %83 > %85 = fmul float %38, %79 > %86 = fadd float %84, %85 > %87 = shl i32 %44, 4 > %88 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %87) > %89 = shl i32 %44, 4 > %90 = or i32 %89, 4 > %91 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %90) > %92 = shl i32 %44, 4 > %93 = or i32 %92, 8 > %94 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %93) > %95 = shl i32 %44, 4 > %96 = or i32 %95, 12 > %97 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %96) > %98 = fmul float %35, %88 > %99 = fmul float %36, %91 > %100 = fadd float %98, %99 > %101 = fmul float %37, %94 > %102 = fadd float %100, %101 > %103 = fmul float %38, %97 > %104 = fadd float %102, %103 > %105 = bitcast i32 %11 to float > %106 = insertvalue <{ float, float, float }> undef, float %105, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %28, float %29, float %30, float %31) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %48, float %51, float %54, float %57) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %59, float %62, float %65, float %68) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %86, float %104, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %106 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..4095] >DCL TEMP[0..3], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0100, 0.1000, 0.0000, 0.0000} >IMM[1] INT32 {2, 4, 1, 3} >IMM[2] UINT32 {0, 16, 0, 0} > 0: FMA TEMP[0].x, IN[1].zzzz, IMM[0].xxxx, IMM[0].yyyy > 1: F2U TEMP[0].x, TEMP[0].xxxx > 2: SHL TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 3: UMUL TEMP[1].x, TEMP[0].xxxx, IMM[2].yyyy > 4: USHR TEMP[2].x, TEMP[1].xxxx, IMM[1].yyyy > 5: UARL ADDR[0].x, TEMP[2].xxxx > 6: MOV TEMP[1], CONST[1][ADDR[0].x] > 7: DP4 TEMP[1].x, IN[2], TEMP[1] > 8: UADD TEMP[2].x, IMM[1].zzzz, TEMP[0].xxxx > 9: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy > 10: USHR TEMP[3].x, TEMP[2].xxxx, IMM[1].yyyy > 11: UARL ADDR[0].x, TEMP[3].xxxx > 12: MOV TEMP[2], CONST[1][ADDR[0].x] > 13: DP4 TEMP[2].x, IN[2], TEMP[2] > 14: MOV TEMP[1].y, TEMP[2].xxxx > 15: UADD TEMP[2].x, IMM[1].xxxx, TEMP[0].xxxx > 16: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy > 17: USHR TEMP[3].x, TEMP[2].xxxx, IMM[1].yyyy > 18: UARL ADDR[0].x, TEMP[3].xxxx > 19: MOV TEMP[2], CONST[1][ADDR[0].x] > 20: DP4 TEMP[2].x, IN[2], TEMP[2] > 21: MOV TEMP[1].z, TEMP[2].xxxx > 22: UADD TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx > 23: UMUL TEMP[0].x, TEMP[0].xxxx, IMM[2].yyyy > 24: USHR TEMP[2].x, TEMP[0].xxxx, IMM[1].yyyy > 25: UARL ADDR[0].x, TEMP[2].xxxx > 26: MOV TEMP[0], CONST[1][ADDR[0].x] > 27: DP4 TEMP[0].x, IN[2], TEMP[0] > 28: MOV TEMP[1].w, TEMP[0].xxxx > 29: MOV OUT[2], IN[1] > 30: MOV OUT[1], IN[0] > 31: MOV OUT[0], TEMP[1] > 32: END >radeonsi: Compiling shader 133 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = extractelement <4 x float> %20, i32 2 > %24 = extractelement <4 x float> %20, i32 3 > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %14) > %28 = extractelement <4 x float> %27, i32 0 > %29 = extractelement <4 x float> %27, i32 1 > %30 = extractelement <4 x float> %27, i32 2 > %31 = extractelement <4 x float> %27, i32 3 > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %15) > %35 = extractelement <4 x float> %34, i32 0 > %36 = extractelement <4 x float> %34, i32 1 > %37 = extractelement <4 x float> %34, i32 2 > %38 = extractelement <4 x float> %34, i32 3 > %39 = call float @llvm.fma.f32(float %30, float 0x406FE051E0000000, float 0x3FB99999A0000000) > %40 = fptoui float %39 to i32 > %41 = shl i32 %40, 6 > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %41) > %43 = shl i32 %40, 6 > %44 = or i32 %43, 4 > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %44) > %46 = shl i32 %40, 6 > %47 = or i32 %46, 8 > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %47) > %49 = shl i32 %40, 6 > %50 = or i32 %49, 12 > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %50) > %52 = fmul float %35, %42 > %53 = fmul float %36, %45 > %54 = fadd float %52, %53 > %55 = fmul float %37, %48 > %56 = fadd float %54, %55 > %57 = fmul float %38, %51 > %58 = fadd float %56, %57 > %59 = shl i32 %40, 2 > %60 = and i32 %59, 268435452 > %61 = or i32 %60, 1 > %62 = shl nuw i32 %61, 4 > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %62) > %64 = shl nuw i32 %61, 4 > %65 = or i32 %64, 4 > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %65) > %67 = shl nuw i32 %61, 4 > %68 = or i32 %67, 8 > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %68) > %70 = shl nuw i32 %61, 4 > %71 = or i32 %70, 12 > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %71) > %73 = fmul float %35, %63 > %74 = fmul float %36, %66 > %75 = fadd float %73, %74 > %76 = fmul float %37, %69 > %77 = fadd float %75, %76 > %78 = fmul float %38, %72 > %79 = fadd float %77, %78 > %80 = shl i32 %40, 2 > %81 = and i32 %80, 268435452 > %82 = or i32 %81, 2 > %83 = shl nuw i32 %82, 4 > %84 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %83) > %85 = shl nuw i32 %82, 4 > %86 = or i32 %85, 4 > %87 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %86) > %88 = shl nuw i32 %82, 4 > %89 = or i32 %88, 8 > %90 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %89) > %91 = shl nuw i32 %82, 4 > %92 = or i32 %91, 12 > %93 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %92) > %94 = fmul float %35, %84 > %95 = fmul float %36, %87 > %96 = fadd float %94, %95 > %97 = fmul float %37, %90 > %98 = fadd float %96, %97 > %99 = fmul float %38, %93 > %100 = fadd float %98, %99 > %101 = shl i32 %40, 2 > %102 = and i32 %101, 268435452 > %103 = or i32 %102, 3 > %104 = shl nuw i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %104) > %106 = shl nuw i32 %103, 4 > %107 = or i32 %106, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %107) > %109 = shl nuw i32 %103, 4 > %110 = or i32 %109, 8 > %111 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %110) > %112 = shl nuw i32 %103, 4 > %113 = or i32 %112, 12 > %114 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %113) > %115 = fmul float %35, %105 > %116 = fmul float %36, %108 > %117 = fadd float %115, %116 > %118 = fmul float %37, %111 > %119 = fadd float %117, %118 > %120 = fmul float %38, %114 > %121 = fadd float %119, %120 > %122 = bitcast i32 %11 to float > %123 = insertvalue <{ float, float, float }> undef, float %122, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %28, float %29, float %30, float %31) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %79, float %100, float %121) > ret <{ float, float, float }> %123 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..4095] >DCL TEMP[0..2], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 510.0200, 0.1000, 1.1000, 0.0000} >IMM[1] UINT32 {0, 16, 0, 0} >IMM[2] INT32 {4, 0, 0, 0} >IMM[3] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: FMA TEMP[0].xy, IN[1].zzzz, IMM[0].xxxx, IMM[0].yzzz > 1: F2U TEMP[0].xy, TEMP[0].xyyy > 2: UMUL TEMP[1].x, TEMP[0].xxxx, IMM[1].yyyy > 3: USHR TEMP[2].x, TEMP[1].xxxx, IMM[2].xxxx > 4: UARL ADDR[0].x, TEMP[2].xxxx > 5: MOV TEMP[1], CONST[1][ADDR[0].x] > 6: DP4 TEMP[1].x, IN[2], TEMP[1] > 7: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[1].yyyy > 8: USHR TEMP[2].x, TEMP[0].xxxx, IMM[2].xxxx > 9: UARL ADDR[0].x, TEMP[2].xxxx > 10: MOV TEMP[0], CONST[1][ADDR[0].x] > 11: DP4 TEMP[0].x, IN[2], TEMP[0] > 12: MOV TEMP[1].y, TEMP[0].xxxx > 13: MOV TEMP[1].zw, IMM[3].yyxy > 14: MOV OUT[2], IN[1] > 15: MOV OUT[1], IN[0] > 16: MOV OUT[0], TEMP[1] > 17: END >radeonsi: Compiling shader 134 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = extractelement <4 x float> %20, i32 2 > %24 = extractelement <4 x float> %20, i32 3 > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %14) > %28 = extractelement <4 x float> %27, i32 0 > %29 = extractelement <4 x float> %27, i32 1 > %30 = extractelement <4 x float> %27, i32 2 > %31 = extractelement <4 x float> %27, i32 3 > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %15) > %35 = extractelement <4 x float> %34, i32 0 > %36 = extractelement <4 x float> %34, i32 1 > %37 = extractelement <4 x float> %34, i32 2 > %38 = extractelement <4 x float> %34, i32 3 > %39 = call float @llvm.fma.f32(float %30, float 0x407FE051E0000000, float 0x3FB99999A0000000) > %40 = call float @llvm.fma.f32(float %30, float 0x407FE051E0000000, float 0x3FF19999A0000000) > %41 = fptoui float %39 to i32 > %42 = fptoui float %40 to i32 > %43 = shl i32 %41, 4 > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %43) > %45 = shl i32 %41, 4 > %46 = or i32 %45, 4 > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %46) > %48 = shl i32 %41, 4 > %49 = or i32 %48, 8 > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %49) > %51 = shl i32 %41, 4 > %52 = or i32 %51, 12 > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %52) > %54 = fmul float %35, %44 > %55 = fmul float %36, %47 > %56 = fadd float %54, %55 > %57 = fmul float %37, %50 > %58 = fadd float %56, %57 > %59 = fmul float %38, %53 > %60 = fadd float %58, %59 > %61 = shl i32 %42, 4 > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %61) > %63 = shl i32 %42, 4 > %64 = or i32 %63, 4 > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %64) > %66 = shl i32 %42, 4 > %67 = or i32 %66, 8 > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %67) > %69 = shl i32 %42, 4 > %70 = or i32 %69, 12 > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %70) > %72 = fmul float %35, %62 > %73 = fmul float %36, %65 > %74 = fadd float %72, %73 > %75 = fmul float %37, %68 > %76 = fadd float %74, %75 > %77 = fmul float %38, %71 > %78 = fadd float %76, %77 > %79 = bitcast i32 %11 to float > %80 = insertvalue <{ float, float, float }> undef, float %79, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %28, float %29, float %30, float %31) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %78, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %80 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0..1], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[1].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: MOV TEMP[1].xy, IN[2].xyyy > 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 4: MUL TEMP[0], TEMP[1], TEMP[0] > 5: MUL TEMP[0], TEMP[0], IN[1].wwww > 6: FMA TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] > 7: MUL TEMP[1].xyz, TEMP[0].wwww, TEMP[0].xyzz > 8: MOV TEMP[1].w, TEMP[0].wwww > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 135 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %42 = bitcast float %40 to i32 > %43 = bitcast float %41 to i32 > %44 = insertelement <2 x i32> undef, i32 %42, i32 0 > %45 = insertelement <2 x i32> %44, i32 %43, i32 1 > %46 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %45, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %47 = extractelement <4 x float> %46, i32 0 > %48 = extractelement <4 x float> %46, i32 1 > %49 = extractelement <4 x float> %46, i32 2 > %50 = extractelement <4 x float> %46, i32 3 > %51 = fmul float %47, %36 > %52 = fmul float %48, %37 > %53 = fmul float %49, %38 > %54 = fmul float %51, %39 > %55 = fmul float %52, %39 > %56 = fmul float %53, %39 > %57 = fmul float %50, %39 > %58 = call float @llvm.fma.f32(float %32, float %57, float %54) > %59 = call float @llvm.fma.f32(float %33, float %57, float %55) > %60 = call float @llvm.fma.f32(float %34, float %57, float %56) > %61 = call float @llvm.fma.f32(float %35, float %57, float %57) > %62 = fmul float %61, %58 > %63 = fmul float %61, %59 > %64 = fmul float %61, %60 > %65 = bitcast float %5 to i32 > %66 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %65, 10 > %67 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %66, float %62, 11 > %68 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %67, float %63, 12 > %69 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %68, float %64, 13 > %70 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %69, float %61, 14 > %71 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %70, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %71 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0..1], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[1].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: MOV TEMP[1].xy, IN[2].xyyy > 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 4: MUL TEMP[0], TEMP[1], TEMP[0] > 5: MUL TEMP[0], TEMP[0], IN[1].wwww > 6: FMA TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] > 7: MOV OUT[0], TEMP[0] > 8: END >radeonsi: Compiling shader 136 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %42 = bitcast float %40 to i32 > %43 = bitcast float %41 to i32 > %44 = insertelement <2 x i32> undef, i32 %42, i32 0 > %45 = insertelement <2 x i32> %44, i32 %43, i32 1 > %46 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %45, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %47 = extractelement <4 x float> %46, i32 0 > %48 = extractelement <4 x float> %46, i32 1 > %49 = extractelement <4 x float> %46, i32 2 > %50 = extractelement <4 x float> %46, i32 3 > %51 = fmul float %47, %36 > %52 = fmul float %48, %37 > %53 = fmul float %49, %38 > %54 = fmul float %51, %39 > %55 = fmul float %52, %39 > %56 = fmul float %53, %39 > %57 = fmul float %50, %39 > %58 = call float @llvm.fma.f32(float %32, float %57, float %54) > %59 = call float @llvm.fma.f32(float %33, float %57, float %55) > %60 = call float @llvm.fma.f32(float %34, float %57, float %56) > %61 = call float @llvm.fma.f32(float %35, float %57, float %57) > %62 = bitcast float %5 to i32 > %63 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %62, 10 > %64 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %63, float %58, 11 > %65 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %64, float %59, 12 > %66 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %65, float %60, 13 > %67 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %66, float %61, 14 > %68 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %67, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %68 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..4095] >DCL TEMP[0..6], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0100, 1020.0400, 1.1000, 2.1000} >IMM[1] FLT32 { 0.1000, 2.1000, 3.1000, 0.0000} >IMM[2] UINT32 {0, 16, 0, 0} >IMM[3] INT32 {96, 4, 2, 1} >IMM[4] INT32 {3, 0, 0, 0} > 0: FMA TEMP[0], IN[0].zzzz, IMM[0].xyyy, IMM[1].xyzx > 1: F2U TEMP[0], TEMP[0] > 2: UADD TEMP[1].x, IMM[3].xxxx, TEMP[0].wwww > 3: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 4: USHR TEMP[1].x, TEMP[1].xxxx, IMM[3].yyyy > 5: UARL ADDR[0].x, TEMP[1].xxxx > 6: UARL ADDR[0].x, TEMP[1].xxxx > 7: MOV TEMP[1], CONST[1][ADDR[0].x] > 8: FMA TEMP[2].x, IN[0].zzzz, IMM[0].yyyy, IMM[0].zzzz > 9: F2U TEMP[2].x, TEMP[2].xxxx > 10: UADD TEMP[2].x, IMM[3].xxxx, TEMP[2].xxxx > 11: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy > 12: USHR TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy > 13: UARL ADDR[0].x, TEMP[2].xxxx > 14: UARL ADDR[0].x, TEMP[2].xxxx > 15: MOV TEMP[2], CONST[1][ADDR[0].x] > 16: UADD TEMP[3].x, IMM[3].xxxx, TEMP[0].yyyy > 17: UMUL TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 18: USHR TEMP[4].x, TEMP[3].xxxx, IMM[3].yyyy > 19: UARL ADDR[0].x, TEMP[4].xxxx > 20: MOV TEMP[3], CONST[1][ADDR[0].x] > 21: DP4 TEMP[3].x, IN[1], TEMP[3] > 22: UADD TEMP[4].x, IMM[3].xxxx, TEMP[0].zzzz > 23: UMUL TEMP[4].x, TEMP[4].xxxx, IMM[2].yyyy > 24: USHR TEMP[5].x, TEMP[4].xxxx, IMM[3].yyyy > 25: UARL ADDR[0].x, TEMP[5].xxxx > 26: MOV TEMP[4], CONST[1][ADDR[0].x] > 27: DP4 TEMP[4].x, IN[1], TEMP[4] > 28: MOV TEMP[3].y, TEMP[4].xxxx > 29: SHL TEMP[0].x, TEMP[0].xxxx, IMM[3].zzzz > 30: UMUL TEMP[4].x, TEMP[0].xxxx, IMM[2].yyyy > 31: USHR TEMP[5].x, TEMP[4].xxxx, IMM[3].yyyy > 32: UARL ADDR[0].x, TEMP[5].xxxx > 33: MOV TEMP[4], CONST[1][ADDR[0].x] > 34: DP4 TEMP[4].x, IN[1], TEMP[4] > 35: UADD TEMP[5].x, IMM[3].wwww, TEMP[0].xxxx > 36: UMUL TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy > 37: USHR TEMP[6].x, TEMP[5].xxxx, IMM[3].yyyy > 38: UARL ADDR[0].x, TEMP[6].xxxx > 39: MOV TEMP[5], CONST[1][ADDR[0].x] > 40: DP4 TEMP[5].x, IN[1], TEMP[5] > 41: MOV TEMP[4].y, TEMP[5].xxxx > 42: UADD TEMP[5].x, IMM[3].zzzz, TEMP[0].xxxx > 43: UMUL TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy > 44: USHR TEMP[6].x, TEMP[5].xxxx, IMM[3].yyyy > 45: UARL ADDR[0].x, TEMP[6].xxxx > 46: MOV TEMP[5], CONST[1][ADDR[0].x] > 47: DP4 TEMP[5].x, IN[1], TEMP[5] > 48: MOV TEMP[4].z, TEMP[5].xxxx > 49: UADD TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx > 50: UMUL TEMP[0].x, TEMP[0].xxxx, IMM[2].yyyy > 51: USHR TEMP[5].x, TEMP[0].xxxx, IMM[3].yyyy > 52: UARL ADDR[0].x, TEMP[5].xxxx > 53: MOV TEMP[0], CONST[1][ADDR[0].x] > 54: DP4 TEMP[0].x, IN[1], TEMP[0] > 55: MOV TEMP[4].w, TEMP[0].xxxx > 56: MOV OUT[4], TEMP[3] > 57: MOV OUT[3], TEMP[2] > 58: MOV OUT[2], TEMP[1] > 59: MOV OUT[1], IN[0] > 60: MOV OUT[0], TEMP[4] > 61: END >radeonsi: Compiling shader 137 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = call float @llvm.fma.f32(float %22, float 0x406FE051E0000000, float 0x3FB99999A0000000) > %32 = call float @llvm.fma.f32(float %22, float 0x408FE051E0000000, float 0x4000CCCCC0000000) > %33 = call float @llvm.fma.f32(float %22, float 0x408FE051E0000000, float 0x4008CCCCC0000000) > %34 = call float @llvm.fma.f32(float %22, float 0x408FE051E0000000, float 0x3FB99999A0000000) > %35 = fptoui float %31 to i32 > %36 = fptoui float %32 to i32 > %37 = fptoui float %33 to i32 > %38 = fptoui float %34 to i32 > %39 = shl i32 %38, 4 > %40 = add i32 %39, 1536 > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %40) > %42 = add i32 %39, 1540 > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %42) > %44 = add i32 %39, 1544 > %45 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %44) > %46 = add i32 %39, 1548 > %47 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %46) > %48 = call float @llvm.fma.f32(float %22, float 0x408FE051E0000000, float 0x3FF19999A0000000) > %49 = fptoui float %48 to i32 > %50 = shl i32 %49, 4 > %51 = add i32 %50, 1536 > %52 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %51) > %53 = add i32 %50, 1540 > %54 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %53) > %55 = add i32 %50, 1544 > %56 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %55) > %57 = add i32 %50, 1548 > %58 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %57) > %59 = shl i32 %36, 4 > %60 = add i32 %59, 1536 > %61 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %60) > %62 = add i32 %59, 1540 > %63 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %62) > %64 = add i32 %59, 1544 > %65 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %64) > %66 = add i32 %59, 1548 > %67 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %66) > %68 = fmul float %27, %61 > %69 = fmul float %28, %63 > %70 = fadd float %68, %69 > %71 = fmul float %29, %65 > %72 = fadd float %70, %71 > %73 = fmul float %30, %67 > %74 = fadd float %72, %73 > %75 = shl i32 %37, 4 > %76 = add i32 %75, 1536 > %77 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %76) > %78 = add i32 %75, 1540 > %79 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %78) > %80 = add i32 %75, 1544 > %81 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %80) > %82 = add i32 %75, 1548 > %83 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %82) > %84 = fmul float %27, %77 > %85 = fmul float %28, %79 > %86 = fadd float %84, %85 > %87 = fmul float %29, %81 > %88 = fadd float %86, %87 > %89 = fmul float %30, %83 > %90 = fadd float %88, %89 > %91 = shl i32 %35, 6 > %92 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %91) > %93 = shl i32 %35, 6 > %94 = or i32 %93, 4 > %95 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %94) > %96 = shl i32 %35, 6 > %97 = or i32 %96, 8 > %98 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %97) > %99 = shl i32 %35, 6 > %100 = or i32 %99, 12 > %101 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %100) > %102 = fmul float %27, %92 > %103 = fmul float %28, %95 > %104 = fadd float %102, %103 > %105 = fmul float %29, %98 > %106 = fadd float %104, %105 > %107 = fmul float %30, %101 > %108 = fadd float %106, %107 > %109 = shl i32 %35, 2 > %110 = and i32 %109, 268435452 > %111 = or i32 %110, 1 > %112 = shl nuw i32 %111, 4 > %113 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %112) > %114 = shl nuw i32 %111, 4 > %115 = or i32 %114, 4 > %116 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %115) > %117 = shl nuw i32 %111, 4 > %118 = or i32 %117, 8 > %119 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %118) > %120 = shl nuw i32 %111, 4 > %121 = or i32 %120, 12 > %122 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %121) > %123 = fmul float %27, %113 > %124 = fmul float %28, %116 > %125 = fadd float %123, %124 > %126 = fmul float %29, %119 > %127 = fadd float %125, %126 > %128 = fmul float %30, %122 > %129 = fadd float %127, %128 > %130 = shl i32 %35, 2 > %131 = and i32 %130, 268435452 > %132 = or i32 %131, 2 > %133 = shl nuw i32 %132, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %133) > %135 = shl nuw i32 %132, 4 > %136 = or i32 %135, 4 > %137 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %136) > %138 = shl nuw i32 %132, 4 > %139 = or i32 %138, 8 > %140 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %139) > %141 = shl nuw i32 %132, 4 > %142 = or i32 %141, 12 > %143 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %142) > %144 = fmul float %27, %134 > %145 = fmul float %28, %137 > %146 = fadd float %144, %145 > %147 = fmul float %29, %140 > %148 = fadd float %146, %147 > %149 = fmul float %30, %143 > %150 = fadd float %148, %149 > %151 = shl i32 %35, 2 > %152 = and i32 %151, 268435452 > %153 = or i32 %152, 3 > %154 = shl nuw i32 %153, 4 > %155 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %154) > %156 = shl nuw i32 %153, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %157) > %159 = shl nuw i32 %153, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %160) > %162 = shl nuw i32 %153, 4 > %163 = or i32 %162, 12 > %164 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %163) > %165 = fmul float %27, %155 > %166 = fmul float %28, %158 > %167 = fadd float %165, %166 > %168 = fmul float %29, %161 > %169 = fadd float %167, %168 > %170 = fmul float %30, %164 > %171 = fadd float %169, %170 > %172 = bitcast i32 %11 to float > %173 = insertvalue <{ float, float, float }> undef, float %172, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %43, float %45, float %47) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %52, float %54, float %56, float %58) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %74, float %90, float %65, float %67) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %108, float %129, float %150, float %171) > ret <{ float, float, float }> %173 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..4095] >DCL TEMP[0..5], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 1530.0599, 0.1000, 1.1000, 0.0000} >IMM[1] UINT32 {0, 16, 0, 0} >IMM[2] INT32 {4, 0, 0, 0} >IMM[3] FLT32 { 2.1000, 3.1000, 4.1000, 5.1000} >IMM[4] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: FMA TEMP[0].xy, IN[0].zzzz, IMM[0].xxxx, IMM[0].yzzz > 1: F2U TEMP[0].xy, TEMP[0].xyyy > 2: UMUL TEMP[1].x, TEMP[0].xxxx, IMM[1].yyyy > 3: USHR TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 4: UARL ADDR[0].x, TEMP[1].xxxx > 5: UARL ADDR[0].x, TEMP[1].xxxx > 6: MOV TEMP[1], CONST[1][ADDR[0].x] > 7: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[1].yyyy > 8: USHR TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx > 9: UARL ADDR[0].x, TEMP[0].xxxx > 10: UARL ADDR[0].x, TEMP[0].xxxx > 11: MOV TEMP[0], CONST[1][ADDR[0].x] > 12: FMA TEMP[2], IN[0].zzzz, IMM[0].xxxx, IMM[3] > 13: F2U TEMP[2], TEMP[2] > 14: UMUL TEMP[3].x, TEMP[2].zzzz, IMM[1].yyyy > 15: USHR TEMP[4].x, TEMP[3].xxxx, IMM[2].xxxx > 16: UARL ADDR[0].x, TEMP[4].xxxx > 17: MOV TEMP[3], CONST[1][ADDR[0].x] > 18: DP4 TEMP[3].x, IN[1], TEMP[3] > 19: UMUL TEMP[4].x, TEMP[2].wwww, IMM[1].yyyy > 20: USHR TEMP[5].x, TEMP[4].xxxx, IMM[2].xxxx > 21: UARL ADDR[0].x, TEMP[5].xxxx > 22: MOV TEMP[4], CONST[1][ADDR[0].x] > 23: DP4 TEMP[4].x, IN[1], TEMP[4] > 24: MOV TEMP[3].y, TEMP[4].xxxx > 25: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[1].yyyy > 26: USHR TEMP[5].x, TEMP[4].xxxx, IMM[2].xxxx > 27: UARL ADDR[0].x, TEMP[5].xxxx > 28: MOV TEMP[4], CONST[1][ADDR[0].x] > 29: DP4 TEMP[4].x, IN[1], TEMP[4] > 30: UMUL TEMP[2].x, TEMP[2].yyyy, IMM[1].yyyy > 31: USHR TEMP[5].x, TEMP[2].xxxx, IMM[2].xxxx > 32: UARL ADDR[0].x, TEMP[5].xxxx > 33: MOV TEMP[2], CONST[1][ADDR[0].x] > 34: DP4 TEMP[2].x, IN[1], TEMP[2] > 35: MOV TEMP[4].y, TEMP[2].xxxx > 36: MOV TEMP[4].zw, IMM[4].yyxy > 37: MOV OUT[4], TEMP[3] > 38: MOV OUT[3], TEMP[0] > 39: MOV OUT[2], TEMP[1] > 40: MOV OUT[1], IN[0] > 41: MOV OUT[0], TEMP[4] > 42: END >radeonsi: Compiling shader 138 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = call float @llvm.fma.f32(float %22, float 0x4097E83D60000000, float 0x3FB99999A0000000) > %32 = call float @llvm.fma.f32(float %22, float 0x4097E83D60000000, float 0x3FF19999A0000000) > %33 = fptoui float %31 to i32 > %34 = fptoui float %32 to i32 > %35 = shl i32 %33, 4 > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %35) > %37 = shl i32 %33, 4 > %38 = or i32 %37, 4 > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %38) > %40 = shl i32 %33, 4 > %41 = or i32 %40, 8 > %42 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %41) > %43 = shl i32 %33, 4 > %44 = or i32 %43, 12 > %45 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %44) > %46 = shl i32 %34, 4 > %47 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %46) > %48 = shl i32 %34, 4 > %49 = or i32 %48, 4 > %50 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %49) > %51 = shl i32 %34, 4 > %52 = or i32 %51, 8 > %53 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %52) > %54 = shl i32 %34, 4 > %55 = or i32 %54, 12 > %56 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %55) > %57 = call float @llvm.fma.f32(float %22, float 0x4097E83D60000000, float 0x4000CCCCC0000000) > %58 = call float @llvm.fma.f32(float %22, float 0x4097E83D60000000, float 0x4008CCCCC0000000) > %59 = call float @llvm.fma.f32(float %22, float 0x4097E83D60000000, float 0x4010666660000000) > %60 = call float @llvm.fma.f32(float %22, float 0x4097E83D60000000, float 0x4014666660000000) > %61 = fptoui float %57 to i32 > %62 = fptoui float %58 to i32 > %63 = fptoui float %59 to i32 > %64 = fptoui float %60 to i32 > %65 = shl i32 %63, 4 > %66 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %65) > %67 = shl i32 %63, 4 > %68 = or i32 %67, 4 > %69 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %68) > %70 = shl i32 %63, 4 > %71 = or i32 %70, 8 > %72 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %71) > %73 = shl i32 %63, 4 > %74 = or i32 %73, 12 > %75 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %74) > %76 = fmul float %27, %66 > %77 = fmul float %28, %69 > %78 = fadd float %76, %77 > %79 = fmul float %29, %72 > %80 = fadd float %78, %79 > %81 = fmul float %30, %75 > %82 = fadd float %80, %81 > %83 = shl i32 %64, 4 > %84 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %83) > %85 = shl i32 %64, 4 > %86 = or i32 %85, 4 > %87 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %86) > %88 = shl i32 %64, 4 > %89 = or i32 %88, 8 > %90 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %89) > %91 = shl i32 %64, 4 > %92 = or i32 %91, 12 > %93 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %92) > %94 = fmul float %27, %84 > %95 = fmul float %28, %87 > %96 = fadd float %94, %95 > %97 = fmul float %29, %90 > %98 = fadd float %96, %97 > %99 = fmul float %30, %93 > %100 = fadd float %98, %99 > %101 = shl i32 %61, 4 > %102 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %101) > %103 = shl i32 %61, 4 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %104) > %106 = shl i32 %61, 4 > %107 = or i32 %106, 8 > %108 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %107) > %109 = shl i32 %61, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %110) > %112 = fmul float %27, %102 > %113 = fmul float %28, %105 > %114 = fadd float %112, %113 > %115 = fmul float %29, %108 > %116 = fadd float %114, %115 > %117 = fmul float %30, %111 > %118 = fadd float %116, %117 > %119 = shl i32 %62, 4 > %120 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %119) > %121 = shl i32 %62, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %122) > %124 = shl i32 %62, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %125) > %127 = shl i32 %62, 4 > %128 = or i32 %127, 12 > %129 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %128) > %130 = fmul float %27, %120 > %131 = fmul float %28, %123 > %132 = fadd float %130, %131 > %133 = fmul float %29, %126 > %134 = fadd float %132, %133 > %135 = fmul float %30, %129 > %136 = fadd float %134, %135 > %137 = bitcast i32 %11 to float > %138 = insertvalue <{ float, float, float }> undef, float %137, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %36, float %39, float %42, float %45) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %47, float %50, float %53, float %56) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %82, float %100, float %72, float %75) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %118, float %136, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %138 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..4095] >DCL TEMP[0..6], LOCAL >DCL ADDR[0] >IMM[0] INT32 {1, 96, 4, 31} >IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} >IMM[2] UINT32 {0, 16, 0, 0} >IMM[3] INT32 {2, 3, 0, 0} > 0: SHL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 3: F2U TEMP[1].x, TEMP[0].xxxx > 4: UADD TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx > 5: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 6: USHR TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz > 7: UARL ADDR[0].x, TEMP[1].xxxx > 8: UARL ADDR[0].x, TEMP[1].xxxx > 9: MOV TEMP[1], CONST[1][ADDR[0].x] > 10: BFI TEMP[2].x, IMM[0].xxxx, IN[2].xxxx, IMM[0].xxxx, IMM[0].wwww > 11: U2F TEMP[0].x, TEMP[2].xxxx > 12: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 13: F2U TEMP[2].x, TEMP[0].xxxx > 14: UADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx > 15: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy > 16: USHR TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz > 17: UARL ADDR[0].x, TEMP[2].xxxx > 18: UARL ADDR[0].x, TEMP[2].xxxx > 19: MOV TEMP[2], CONST[1][ADDR[0].x] > 20: MOV TEMP[3].xy, IN[0].xyxx > 21: U2F TEMP[0].x, IN[2].xxxx > 22: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 23: F2U TEMP[0].x, TEMP[0].xxxx > 24: SHL TEMP[0].x, TEMP[0].xxxx, IMM[3].xxxx > 25: UMUL TEMP[4].x, TEMP[0].xxxx, IMM[2].yyyy > 26: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz > 27: UARL ADDR[0].x, TEMP[5].xxxx > 28: MOV TEMP[4], CONST[1][ADDR[0].x] > 29: DP4 TEMP[4].x, IN[1], TEMP[4] > 30: UADD TEMP[5].x, IMM[0].xxxx, TEMP[0].xxxx > 31: UMUL TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy > 32: USHR TEMP[6].x, TEMP[5].xxxx, IMM[0].zzzz > 33: UARL ADDR[0].x, TEMP[6].xxxx > 34: MOV TEMP[5], CONST[1][ADDR[0].x] > 35: DP4 TEMP[5].x, IN[1], TEMP[5] > 36: MOV TEMP[4].y, TEMP[5].xxxx > 37: UADD TEMP[5].x, IMM[3].xxxx, TEMP[0].xxxx > 38: UMUL TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy > 39: USHR TEMP[6].x, TEMP[5].xxxx, IMM[0].zzzz > 40: UARL ADDR[0].x, TEMP[6].xxxx > 41: MOV TEMP[5], CONST[1][ADDR[0].x] > 42: DP4 TEMP[5].x, IN[1], TEMP[5] > 43: MOV TEMP[4].z, TEMP[5].xxxx > 44: UADD TEMP[0].x, IMM[3].yyyy, TEMP[0].xxxx > 45: UMUL TEMP[0].x, TEMP[0].xxxx, IMM[2].yyyy > 46: USHR TEMP[5].x, TEMP[0].xxxx, IMM[0].zzzz > 47: UARL ADDR[0].x, TEMP[5].xxxx > 48: MOV TEMP[0], CONST[1][ADDR[0].x] > 49: DP4 TEMP[0].x, IN[1], TEMP[0] > 50: MOV TEMP[4].w, TEMP[0].xxxx > 51: MOV OUT[3], TEMP[3] > 52: MOV OUT[2], TEMP[2] > 53: MOV OUT[1], TEMP[1] > 54: MOV OUT[0], TEMP[4] > 55: END >radeonsi: Compiling shader 139 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %14) > %26 = extractelement <4 x float> %25, i32 0 > %27 = extractelement <4 x float> %25, i32 1 > %28 = extractelement <4 x float> %25, i32 2 > %29 = extractelement <4 x float> %25, i32 3 > %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 > %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %15) > %33 = extractelement <4 x float> %32, i32 0 > %34 = bitcast float %33 to i32 > %35 = shl i32 %34, 1 > %36 = uitofp i32 %35 to float > %37 = fadd float %36, 0x3FB99999A0000000 > %38 = fptoui float %37 to i32 > %39 = shl i32 %38, 4 > %40 = add i32 %39, 1536 > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %40) > %42 = add i32 %39, 1540 > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %42) > %44 = add i32 %39, 1544 > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %44) > %46 = add i32 %39, 1548 > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %46) > %48 = bitcast float %33 to i32 > %49 = shl i32 %48, 1 > %50 = or i32 %49, 1 > %51 = uitofp i32 %50 to float > %52 = fadd float %51, 0x3FB99999A0000000 > %53 = fptoui float %52 to i32 > %54 = shl i32 %53, 4 > %55 = add i32 %54, 1536 > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %55) > %57 = add i32 %54, 1540 > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %57) > %59 = add i32 %54, 1544 > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %59) > %61 = add i32 %54, 1548 > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %61) > %63 = bitcast float %33 to i32 > %64 = uitofp i32 %63 to float > %65 = fadd float %64, 0x3FB99999A0000000 > %66 = fptoui float %65 to i32 > %67 = shl i32 %66, 6 > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %67) > %69 = shl i32 %66, 6 > %70 = or i32 %69, 4 > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %70) > %72 = shl i32 %66, 6 > %73 = or i32 %72, 8 > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %73) > %75 = shl i32 %66, 6 > %76 = or i32 %75, 12 > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %76) > %78 = fmul float %26, %68 > %79 = fmul float %27, %71 > %80 = fadd float %78, %79 > %81 = fmul float %28, %74 > %82 = fadd float %80, %81 > %83 = fmul float %29, %77 > %84 = fadd float %82, %83 > %85 = shl i32 %66, 2 > %86 = and i32 %85, 268435452 > %87 = or i32 %86, 1 > %88 = shl nuw i32 %87, 4 > %89 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %88) > %90 = shl nuw i32 %87, 4 > %91 = or i32 %90, 4 > %92 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %91) > %93 = shl nuw i32 %87, 4 > %94 = or i32 %93, 8 > %95 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %94) > %96 = shl nuw i32 %87, 4 > %97 = or i32 %96, 12 > %98 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %97) > %99 = fmul float %26, %89 > %100 = fmul float %27, %92 > %101 = fadd float %99, %100 > %102 = fmul float %28, %95 > %103 = fadd float %101, %102 > %104 = fmul float %29, %98 > %105 = fadd float %103, %104 > %106 = shl i32 %66, 2 > %107 = and i32 %106, 268435452 > %108 = or i32 %107, 2 > %109 = shl nuw i32 %108, 4 > %110 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %109) > %111 = shl nuw i32 %108, 4 > %112 = or i32 %111, 4 > %113 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %112) > %114 = shl nuw i32 %108, 4 > %115 = or i32 %114, 8 > %116 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %115) > %117 = shl nuw i32 %108, 4 > %118 = or i32 %117, 12 > %119 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %118) > %120 = fmul float %26, %110 > %121 = fmul float %27, %113 > %122 = fadd float %120, %121 > %123 = fmul float %28, %116 > %124 = fadd float %122, %123 > %125 = fmul float %29, %119 > %126 = fadd float %124, %125 > %127 = shl i32 %66, 2 > %128 = and i32 %127, 268435452 > %129 = or i32 %128, 3 > %130 = shl nuw i32 %129, 4 > %131 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %130) > %132 = shl nuw i32 %129, 4 > %133 = or i32 %132, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %133) > %135 = shl nuw i32 %129, 4 > %136 = or i32 %135, 8 > %137 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %136) > %138 = shl nuw i32 %129, 4 > %139 = or i32 %138, 12 > %140 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %139) > %141 = fmul float %26, %131 > %142 = fmul float %27, %134 > %143 = fadd float %141, %142 > %144 = fmul float %28, %137 > %145 = fadd float %143, %144 > %146 = fmul float %29, %140 > %147 = fadd float %145, %146 > %148 = bitcast i32 %11 to float > %149 = insertvalue <{ float, float, float }> undef, float %148, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %43, float %45, float %47) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %56, float %58, float %60, float %62) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %21, float %22, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %84, float %105, float %126, float %147) > ret <{ float, float, float }> %149 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0], LOCAL > 0: MOV TEMP[0].xy, IN[2].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: FMA TEMP[0], TEMP[0], IN[1], IN[0] > 3: MOV OUT[0], TEMP[0] > 4: END >radeonsi: Compiling shader 140 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %42 = bitcast float %40 to i32 > %43 = bitcast float %41 to i32 > %44 = insertelement <2 x i32> undef, i32 %42, i32 0 > %45 = insertelement <2 x i32> %44, i32 %43, i32 1 > %46 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %45, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %47 = extractelement <4 x float> %46, i32 0 > %48 = extractelement <4 x float> %46, i32 1 > %49 = extractelement <4 x float> %46, i32 2 > %50 = extractelement <4 x float> %46, i32 3 > %51 = call float @llvm.fma.f32(float %47, float %36, float %32) > %52 = call float @llvm.fma.f32(float %48, float %37, float %33) > %53 = call float @llvm.fma.f32(float %49, float %38, float %34) > %54 = call float @llvm.fma.f32(float %50, float %39, float %35) > %55 = bitcast float %5 to i32 > %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %55, 10 > %57 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %51, 11 > %58 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %57, float %52, 12 > %59 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %58, float %53, 13 > %60 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %59, float %54, 14 > %61 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %60, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %61 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..4095] >DCL TEMP[0..5], LOCAL >DCL ADDR[0] >IMM[0] INT32 {2, 4, 30, 1} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 1.0000} >IMM[2] UINT32 {0, 16, 0, 0} > 0: SHL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx > 1: U2F TEMP[0].x, TEMP[0].xxxx > 2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 3: F2U TEMP[1].x, TEMP[0].xxxx > 4: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 5: USHR TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy > 6: UARL ADDR[0].x, TEMP[1].xxxx > 7: UARL ADDR[0].x, TEMP[1].xxxx > 8: MOV TEMP[1], CONST[1][ADDR[0].x] > 9: BFI TEMP[2].x, IMM[0].xxxx, IN[2].xxxx, IMM[0].xxxx, IMM[0].zzzz > 10: BFI TEMP[3].x, IMM[0].xxxx, IN[2].xxxx, IMM[0].xxxx, IMM[0].zzzz > 11: MOV TEMP[2].y, TEMP[3].xxxx > 12: BFI TEMP[3].x, IMM[0].wwww, IN[2].xxxx, IMM[0].xxxx, IMM[0].zzzz > 13: MOV TEMP[2].z, TEMP[3].xxxx > 14: U2F TEMP[0].xyz, TEMP[2].xyzz > 15: ADD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].xyxx > 16: F2U TEMP[0].xyz, TEMP[0].xyzz > 17: UMUL TEMP[2].x, TEMP[0].zzzz, IMM[2].yyyy > 18: USHR TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 19: UARL ADDR[0].x, TEMP[2].xxxx > 20: UARL ADDR[0].x, TEMP[2].xxxx > 21: MOV TEMP[2], CONST[1][ADDR[0].x] > 22: MOV TEMP[3].xy, IN[0].xyxx > 23: UMUL TEMP[4].x, TEMP[0].xxxx, IMM[2].yyyy > 24: USHR TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy > 25: UARL ADDR[0].x, TEMP[5].xxxx > 26: MOV TEMP[4], CONST[1][ADDR[0].x] > 27: DP4 TEMP[4].x, IN[1], TEMP[4] > 28: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 29: USHR TEMP[5].x, TEMP[0].xxxx, IMM[0].yyyy > 30: UARL ADDR[0].x, TEMP[5].xxxx > 31: MOV TEMP[0], CONST[1][ADDR[0].x] > 32: DP4 TEMP[0].x, IN[1], TEMP[0] > 33: MOV TEMP[4].y, TEMP[0].xxxx > 34: MOV TEMP[4].zw, IMM[1].wwzw > 35: MOV OUT[3], TEMP[3] > 36: MOV OUT[2], TEMP[2] > 37: MOV OUT[1], TEMP[1] > 38: MOV OUT[0], TEMP[4] > 39: END >radeonsi: Compiling shader 141 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %14) > %26 = extractelement <4 x float> %25, i32 0 > %27 = extractelement <4 x float> %25, i32 1 > %28 = extractelement <4 x float> %25, i32 2 > %29 = extractelement <4 x float> %25, i32 3 > %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 > %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %15) > %33 = extractelement <4 x float> %32, i32 0 > %34 = bitcast float %33 to i32 > %35 = shl i32 %34, 2 > %36 = uitofp i32 %35 to float > %37 = fadd float %36, 0x3FB99999A0000000 > %38 = fptoui float %37 to i32 > %39 = shl i32 %38, 4 > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %39) > %41 = shl i32 %38, 4 > %42 = or i32 %41, 4 > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %42) > %44 = shl i32 %38, 4 > %45 = or i32 %44, 8 > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %45) > %47 = shl i32 %38, 4 > %48 = or i32 %47, 12 > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %48) > %50 = bitcast float %33 to i32 > %51 = shl i32 %50, 2 > %52 = or i32 %51, 2 > %53 = bitcast float %33 to i32 > %54 = shl i32 %53, 2 > %55 = or i32 %54, 2 > %56 = bitcast float %33 to i32 > %57 = shl i32 %56, 2 > %58 = or i32 %57, 1 > %59 = uitofp i32 %52 to float > %60 = uitofp i32 %55 to float > %61 = uitofp i32 %58 to float > %62 = fadd float %59, 0x3FB99999A0000000 > %63 = fadd float %60, 0x3FF19999A0000000 > %64 = fadd float %61, 0x3FB99999A0000000 > %65 = fptoui float %62 to i32 > %66 = fptoui float %63 to i32 > %67 = fptoui float %64 to i32 > %68 = shl i32 %67, 4 > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %68) > %70 = shl i32 %67, 4 > %71 = or i32 %70, 4 > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %71) > %73 = shl i32 %67, 4 > %74 = or i32 %73, 8 > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %74) > %76 = shl i32 %67, 4 > %77 = or i32 %76, 12 > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %77) > %79 = shl i32 %65, 4 > %80 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %79) > %81 = shl i32 %65, 4 > %82 = or i32 %81, 4 > %83 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %82) > %84 = shl i32 %65, 4 > %85 = or i32 %84, 8 > %86 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %85) > %87 = shl i32 %65, 4 > %88 = or i32 %87, 12 > %89 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %88) > %90 = fmul float %26, %80 > %91 = fmul float %27, %83 > %92 = fadd float %90, %91 > %93 = fmul float %28, %86 > %94 = fadd float %92, %93 > %95 = fmul float %29, %89 > %96 = fadd float %94, %95 > %97 = shl i32 %66, 4 > %98 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %97) > %99 = shl i32 %66, 4 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %100) > %102 = shl i32 %66, 4 > %103 = or i32 %102, 8 > %104 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %103) > %105 = shl i32 %66, 4 > %106 = or i32 %105, 12 > %107 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %106) > %108 = fmul float %26, %98 > %109 = fmul float %27, %101 > %110 = fadd float %108, %109 > %111 = fmul float %28, %104 > %112 = fadd float %110, %111 > %113 = fmul float %29, %107 > %114 = fadd float %112, %113 > %115 = bitcast i32 %11 to float > %116 = insertvalue <{ float, float, float }> undef, float %115, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %40, float %43, float %46, float %49) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %69, float %72, float %75, float %78) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %21, float %22, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %114, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %116 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..4095] >DCL TEMP[0..3], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 1020.0400, 0.0000, 1.0000, 2.1000} >IMM[1] FLT32 { 0.1000, 1.1000, 2.1000, 3.1000} >IMM[2] UINT32 {0, 16, 0, 0} >IMM[3] INT32 {4, 0, 0, 0} > 0: FMA TEMP[0], IN[0].zzzz, IMM[0].xxxx, IMM[1] > 1: F2U TEMP[0], TEMP[0] > 2: UMUL TEMP[1].x, TEMP[0].zzzz, IMM[2].yyyy > 3: USHR TEMP[2].x, TEMP[1].xxxx, IMM[3].xxxx > 4: UARL ADDR[0].x, TEMP[2].xxxx > 5: MOV TEMP[1], CONST[1][ADDR[0].x] > 6: DP4 TEMP[1].x, IN[1], TEMP[1] > 7: UMUL TEMP[2].x, TEMP[0].wwww, IMM[2].yyyy > 8: USHR TEMP[3].x, TEMP[2].xxxx, IMM[3].xxxx > 9: UARL ADDR[0].x, TEMP[3].xxxx > 10: MOV TEMP[2], CONST[1][ADDR[0].x] > 11: DP4 TEMP[2].x, IN[1], TEMP[2] > 12: MOV TEMP[1].y, TEMP[2].xxxx > 13: UMUL TEMP[2].x, TEMP[0].xxxx, IMM[2].yyyy > 14: USHR TEMP[3].x, TEMP[2].xxxx, IMM[3].xxxx > 15: UARL ADDR[0].x, TEMP[3].xxxx > 16: MOV TEMP[2], CONST[1][ADDR[0].x] > 17: DP4 TEMP[2].x, IN[1], TEMP[2] > 18: UMUL TEMP[0].x, TEMP[0].yyyy, IMM[2].yyyy > 19: USHR TEMP[3].x, TEMP[0].xxxx, IMM[3].xxxx > 20: UARL ADDR[0].x, TEMP[3].xxxx > 21: MOV TEMP[0], CONST[1][ADDR[0].x] > 22: DP4 TEMP[0].x, IN[1], TEMP[0] > 23: MOV TEMP[2].y, TEMP[0].xxxx > 24: MOV TEMP[2].zw, IMM[0].zzyz > 25: MOV OUT[2], TEMP[1] > 26: MOV OUT[1], IN[0] > 27: MOV OUT[0], TEMP[2] > 28: END >radeonsi: Compiling shader 142 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = call float @llvm.fma.f32(float %22, float 0x408FE051E0000000, float 0x3FB99999A0000000) > %32 = call float @llvm.fma.f32(float %22, float 0x408FE051E0000000, float 0x3FF19999A0000000) > %33 = call float @llvm.fma.f32(float %22, float 0x408FE051E0000000, float 0x4000CCCCC0000000) > %34 = call float @llvm.fma.f32(float %22, float 0x408FE051E0000000, float 0x4008CCCCC0000000) > %35 = fptoui float %31 to i32 > %36 = fptoui float %32 to i32 > %37 = fptoui float %33 to i32 > %38 = fptoui float %34 to i32 > %39 = shl i32 %37, 4 > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %39) > %41 = shl i32 %37, 4 > %42 = or i32 %41, 4 > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %42) > %44 = shl i32 %37, 4 > %45 = or i32 %44, 8 > %46 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %45) > %47 = shl i32 %37, 4 > %48 = or i32 %47, 12 > %49 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %48) > %50 = fmul float %27, %40 > %51 = fmul float %28, %43 > %52 = fadd float %50, %51 > %53 = fmul float %29, %46 > %54 = fadd float %52, %53 > %55 = fmul float %30, %49 > %56 = fadd float %54, %55 > %57 = shl i32 %38, 4 > %58 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %57) > %59 = shl i32 %38, 4 > %60 = or i32 %59, 4 > %61 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %60) > %62 = shl i32 %38, 4 > %63 = or i32 %62, 8 > %64 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %63) > %65 = shl i32 %38, 4 > %66 = or i32 %65, 12 > %67 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %66) > %68 = fmul float %27, %58 > %69 = fmul float %28, %61 > %70 = fadd float %68, %69 > %71 = fmul float %29, %64 > %72 = fadd float %70, %71 > %73 = fmul float %30, %67 > %74 = fadd float %72, %73 > %75 = shl i32 %35, 4 > %76 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %75) > %77 = shl i32 %35, 4 > %78 = or i32 %77, 4 > %79 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %78) > %80 = shl i32 %35, 4 > %81 = or i32 %80, 8 > %82 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %81) > %83 = shl i32 %35, 4 > %84 = or i32 %83, 12 > %85 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %84) > %86 = fmul float %27, %76 > %87 = fmul float %28, %79 > %88 = fadd float %86, %87 > %89 = fmul float %29, %82 > %90 = fadd float %88, %89 > %91 = fmul float %30, %85 > %92 = fadd float %90, %91 > %93 = shl i32 %36, 4 > %94 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %93) > %95 = shl i32 %36, 4 > %96 = or i32 %95, 4 > %97 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %96) > %98 = shl i32 %36, 4 > %99 = or i32 %98, 8 > %100 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %99) > %101 = shl i32 %36, 4 > %102 = or i32 %101, 12 > %103 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %102) > %104 = fmul float %27, %94 > %105 = fmul float %28, %97 > %106 = fadd float %104, %105 > %107 = fmul float %29, %100 > %108 = fadd float %106, %107 > %109 = fmul float %30, %103 > %110 = fadd float %108, %109 > %111 = bitcast i32 %11 to float > %112 = insertvalue <{ float, float, float }> undef, float %111, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %56, float %74, float %46, float %49) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %92, float %110, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %112 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..4095] >DCL TEMP[0..4], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0100, 510.0200, 0.1000, 1.1000} >IMM[1] UINT32 {0, 16, 0, 0} >IMM[2] INT32 {96, 4, 2, 1} >IMM[3] INT32 {3, 0, 0, 0} > 0: FMA TEMP[0].xyz, IN[0].zzzz, IMM[0].xyyy, IMM[0].zzww > 1: F2U TEMP[0].xyz, TEMP[0].xyzz > 2: UADD TEMP[1].x, IMM[2].xxxx, TEMP[0].yyyy > 3: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy > 4: USHR TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy > 5: UARL ADDR[0].x, TEMP[2].xxxx > 6: MOV TEMP[1], CONST[1][ADDR[0].x] > 7: DP4 TEMP[1].x, IN[1], TEMP[1] > 8: UADD TEMP[2].x, IMM[2].xxxx, TEMP[0].zzzz > 9: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy > 10: USHR TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy > 11: UARL ADDR[0].x, TEMP[3].xxxx > 12: MOV TEMP[2], CONST[1][ADDR[0].x] > 13: DP4 TEMP[2].x, IN[1], TEMP[2] > 14: MOV TEMP[1].y, TEMP[2].xxxx > 15: SHL TEMP[0].x, TEMP[0].xxxx, IMM[2].zzzz > 16: UMUL TEMP[2].x, TEMP[0].xxxx, IMM[1].yyyy > 17: USHR TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy > 18: UARL ADDR[0].x, TEMP[3].xxxx > 19: MOV TEMP[2], CONST[1][ADDR[0].x] > 20: DP4 TEMP[2].x, IN[1], TEMP[2] > 21: UADD TEMP[3].x, IMM[2].wwww, TEMP[0].xxxx > 22: UMUL TEMP[3].x, TEMP[3].xxxx, IMM[1].yyyy > 23: USHR TEMP[4].x, TEMP[3].xxxx, IMM[2].yyyy > 24: UARL ADDR[0].x, TEMP[4].xxxx > 25: MOV TEMP[3], CONST[1][ADDR[0].x] > 26: DP4 TEMP[3].x, IN[1], TEMP[3] > 27: MOV TEMP[2].y, TEMP[3].xxxx > 28: UADD TEMP[3].x, IMM[2].zzzz, TEMP[0].xxxx > 29: UMUL TEMP[3].x, TEMP[3].xxxx, IMM[1].yyyy > 30: USHR TEMP[4].x, TEMP[3].xxxx, IMM[2].yyyy > 31: UARL ADDR[0].x, TEMP[4].xxxx > 32: MOV TEMP[3], CONST[1][ADDR[0].x] > 33: DP4 TEMP[3].x, IN[1], TEMP[3] > 34: MOV TEMP[2].z, TEMP[3].xxxx > 35: UADD TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx > 36: UMUL TEMP[0].x, TEMP[0].xxxx, IMM[1].yyyy > 37: USHR TEMP[3].x, TEMP[0].xxxx, IMM[2].yyyy > 38: UARL ADDR[0].x, TEMP[3].xxxx > 39: MOV TEMP[0], CONST[1][ADDR[0].x] > 40: DP4 TEMP[0].x, IN[1], TEMP[0] > 41: MOV TEMP[2].w, TEMP[0].xxxx > 42: MOV OUT[2], TEMP[1] > 43: MOV OUT[1], IN[0] > 44: MOV OUT[0], TEMP[2] > 45: END >radeonsi: Compiling shader 143 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %13) > %20 = extractelement <4 x float> %19, i32 0 > %21 = extractelement <4 x float> %19, i32 1 > %22 = extractelement <4 x float> %19, i32 2 > %23 = extractelement <4 x float> %19, i32 3 > %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 > %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %14) > %27 = extractelement <4 x float> %26, i32 0 > %28 = extractelement <4 x float> %26, i32 1 > %29 = extractelement <4 x float> %26, i32 2 > %30 = extractelement <4 x float> %26, i32 3 > %31 = call float @llvm.fma.f32(float %22, float 0x406FE051E0000000, float 0x3FB99999A0000000) > %32 = call float @llvm.fma.f32(float %22, float 0x407FE051E0000000, float 0x3FB99999A0000000) > %33 = call float @llvm.fma.f32(float %22, float 0x407FE051E0000000, float 0x3FF19999A0000000) > %34 = fptoui float %31 to i32 > %35 = fptoui float %32 to i32 > %36 = fptoui float %33 to i32 > %37 = shl i32 %35, 4 > %38 = add i32 %37, 1536 > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %38) > %40 = add i32 %37, 1540 > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %40) > %42 = add i32 %37, 1544 > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %42) > %44 = add i32 %37, 1548 > %45 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %44) > %46 = fmul float %27, %39 > %47 = fmul float %28, %41 > %48 = fadd float %46, %47 > %49 = fmul float %29, %43 > %50 = fadd float %48, %49 > %51 = fmul float %30, %45 > %52 = fadd float %50, %51 > %53 = shl i32 %36, 4 > %54 = add i32 %53, 1536 > %55 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %54) > %56 = add i32 %53, 1540 > %57 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %56) > %58 = add i32 %53, 1544 > %59 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %58) > %60 = add i32 %53, 1548 > %61 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %60) > %62 = fmul float %27, %55 > %63 = fmul float %28, %57 > %64 = fadd float %62, %63 > %65 = fmul float %29, %59 > %66 = fadd float %64, %65 > %67 = fmul float %30, %61 > %68 = fadd float %66, %67 > %69 = shl i32 %34, 6 > %70 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %69) > %71 = shl i32 %34, 6 > %72 = or i32 %71, 4 > %73 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %72) > %74 = shl i32 %34, 6 > %75 = or i32 %74, 8 > %76 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %75) > %77 = shl i32 %34, 6 > %78 = or i32 %77, 12 > %79 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %78) > %80 = fmul float %27, %70 > %81 = fmul float %28, %73 > %82 = fadd float %80, %81 > %83 = fmul float %29, %76 > %84 = fadd float %82, %83 > %85 = fmul float %30, %79 > %86 = fadd float %84, %85 > %87 = shl i32 %34, 2 > %88 = and i32 %87, 268435452 > %89 = or i32 %88, 1 > %90 = shl nuw i32 %89, 4 > %91 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %90) > %92 = shl nuw i32 %89, 4 > %93 = or i32 %92, 4 > %94 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %93) > %95 = shl nuw i32 %89, 4 > %96 = or i32 %95, 8 > %97 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %96) > %98 = shl nuw i32 %89, 4 > %99 = or i32 %98, 12 > %100 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %99) > %101 = fmul float %27, %91 > %102 = fmul float %28, %94 > %103 = fadd float %101, %102 > %104 = fmul float %29, %97 > %105 = fadd float %103, %104 > %106 = fmul float %30, %100 > %107 = fadd float %105, %106 > %108 = shl i32 %34, 2 > %109 = and i32 %108, 268435452 > %110 = or i32 %109, 2 > %111 = shl nuw i32 %110, 4 > %112 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %111) > %113 = shl nuw i32 %110, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %114) > %116 = shl nuw i32 %110, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %117) > %119 = shl nuw i32 %110, 4 > %120 = or i32 %119, 12 > %121 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %120) > %122 = fmul float %27, %112 > %123 = fmul float %28, %115 > %124 = fadd float %122, %123 > %125 = fmul float %29, %118 > %126 = fadd float %124, %125 > %127 = fmul float %30, %121 > %128 = fadd float %126, %127 > %129 = shl i32 %34, 2 > %130 = and i32 %129, 268435452 > %131 = or i32 %130, 3 > %132 = shl nuw i32 %131, 4 > %133 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %132) > %134 = shl nuw i32 %131, 4 > %135 = or i32 %134, 4 > %136 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %135) > %137 = shl nuw i32 %131, 4 > %138 = or i32 %137, 8 > %139 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %138) > %140 = shl nuw i32 %131, 4 > %141 = or i32 %140, 12 > %142 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %141) > %143 = fmul float %27, %133 > %144 = fmul float %28, %136 > %145 = fadd float %143, %144 > %146 = fmul float %29, %139 > %147 = fadd float %145, %146 > %148 = fmul float %30, %142 > %149 = fadd float %147, %148 > %150 = bitcast i32 %11 to float > %151 = insertvalue <{ float, float, float }> undef, float %150, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %52, float %68, float %43, float %45) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %86, float %107, float %128, float %149) > ret <{ float, float, float }> %151 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..4095] >DCL TEMP[0..3], LOCAL >DCL ADDR[0] >IMM[0] INT32 {1, 4, 0, 0} >IMM[1] FLT32 { 0.1000, 1.1000, 0.0000, 1.0000} >IMM[2] UINT32 {0, 16, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: SHL TEMP[1].x, IN[2].xxxx, IMM[0].xxxx > 2: U2F TEMP[1].x, TEMP[1].xxxx > 3: ADD TEMP[1].xy, TEMP[1].xxxx, IMM[1].xyyy > 4: F2U TEMP[1].xy, TEMP[1].xyyy > 5: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy > 6: USHR TEMP[3].x, TEMP[2].xxxx, IMM[0].yyyy > 7: UARL ADDR[0].x, TEMP[3].xxxx > 8: MOV TEMP[2], CONST[1][ADDR[0].x] > 9: DP4 TEMP[2].x, IN[1], TEMP[2] > 10: UMUL TEMP[1].x, TEMP[1].yyyy, IMM[2].yyyy > 11: USHR TEMP[3].x, TEMP[1].xxxx, IMM[0].yyyy > 12: UARL ADDR[0].x, TEMP[3].xxxx > 13: MOV TEMP[1], CONST[1][ADDR[0].x] > 14: DP4 TEMP[1].x, IN[1], TEMP[1] > 15: MOV TEMP[2].y, TEMP[1].xxxx > 16: MOV TEMP[2].zw, IMM[1].wwzw > 17: MOV OUT[1], TEMP[0] > 18: MOV OUT[0], TEMP[2] > 19: END >radeonsi: Compiling shader 144 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %14) > %26 = extractelement <4 x float> %25, i32 0 > %27 = extractelement <4 x float> %25, i32 1 > %28 = extractelement <4 x float> %25, i32 2 > %29 = extractelement <4 x float> %25, i32 3 > %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 > %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %15) > %bc = bitcast <4 x float> %32 to <4 x i32> > %33 = extractelement <4 x i32> %bc, i32 0 > %34 = shl i32 %33, 1 > %35 = uitofp i32 %34 to float > %36 = fadd float %35, 0x3FB99999A0000000 > %37 = fadd float %35, 0x3FF19999A0000000 > %38 = fptoui float %36 to i32 > %39 = fptoui float %37 to i32 > %40 = shl i32 %38, 4 > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %40) > %42 = shl i32 %38, 4 > %43 = or i32 %42, 4 > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %43) > %45 = shl i32 %38, 4 > %46 = or i32 %45, 8 > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %46) > %48 = shl i32 %38, 4 > %49 = or i32 %48, 12 > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %49) > %51 = fmul float %26, %41 > %52 = fmul float %27, %44 > %53 = fadd float %51, %52 > %54 = fmul float %28, %47 > %55 = fadd float %53, %54 > %56 = fmul float %29, %50 > %57 = fadd float %55, %56 > %58 = shl i32 %39, 4 > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %58) > %60 = shl i32 %39, 4 > %61 = or i32 %60, 4 > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %61) > %63 = shl i32 %39, 4 > %64 = or i32 %63, 8 > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %64) > %66 = shl i32 %39, 4 > %67 = or i32 %66, 12 > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %67) > %69 = fmul float %26, %59 > %70 = fmul float %27, %62 > %71 = fadd float %69, %70 > %72 = fmul float %28, %65 > %73 = fadd float %71, %72 > %74 = fmul float %29, %68 > %75 = fadd float %73, %74 > %76 = bitcast i32 %11 to float > %77 = insertvalue <{ float, float, float }> undef, float %76, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %57, float %75, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %77 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0], LOCAL > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MOV OUT[0], TEMP[0] > 3: END >radeonsi: Compiling shader 145 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %34 = bitcast float %32 to i32 > %35 = bitcast float %33 to i32 > %36 = insertelement <2 x i32> undef, i32 %34, i32 0 > %37 = insertelement <2 x i32> %36, i32 %35, i32 1 > %38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %39 = extractelement <4 x float> %38, i32 0 > %40 = extractelement <4 x float> %38, i32 1 > %41 = extractelement <4 x float> %38, i32 2 > %42 = extractelement <4 x float> %38, i32 3 > %43 = bitcast float %5 to i32 > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10 > %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 11 > %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 12 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %41, 13 > %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14 > %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..4095] >DCL TEMP[0..4], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} >IMM[1] INT32 {2, 4, 1, 3} >IMM[2] UINT32 {0, 16, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: U2F TEMP[1].x, IN[2].xxxx > 2: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx > 3: F2U TEMP[1].x, TEMP[1].xxxx > 4: SHL TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 5: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy > 6: USHR TEMP[3].x, TEMP[2].xxxx, IMM[1].yyyy > 7: UARL ADDR[0].x, TEMP[3].xxxx > 8: MOV TEMP[2], CONST[1][ADDR[0].x] > 9: DP4 TEMP[2].x, IN[1], TEMP[2] > 10: UADD TEMP[3].x, IMM[1].zzzz, TEMP[1].xxxx > 11: UMUL TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 12: USHR TEMP[4].x, TEMP[3].xxxx, IMM[1].yyyy > 13: UARL ADDR[0].x, TEMP[4].xxxx > 14: MOV TEMP[3], CONST[1][ADDR[0].x] > 15: DP4 TEMP[3].x, IN[1], TEMP[3] > 16: MOV TEMP[2].y, TEMP[3].xxxx > 17: UADD TEMP[3].x, IMM[1].xxxx, TEMP[1].xxxx > 18: UMUL TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 19: USHR TEMP[4].x, TEMP[3].xxxx, IMM[1].yyyy > 20: UARL ADDR[0].x, TEMP[4].xxxx > 21: MOV TEMP[3], CONST[1][ADDR[0].x] > 22: DP4 TEMP[3].x, IN[1], TEMP[3] > 23: MOV TEMP[2].z, TEMP[3].xxxx > 24: UADD TEMP[1].x, IMM[1].wwww, TEMP[1].xxxx > 25: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy > 26: USHR TEMP[3].x, TEMP[1].xxxx, IMM[1].yyyy > 27: UARL ADDR[0].x, TEMP[3].xxxx > 28: MOV TEMP[1], CONST[1][ADDR[0].x] > 29: DP4 TEMP[1].x, IN[1], TEMP[1] > 30: MOV TEMP[2].w, TEMP[1].xxxx > 31: MOV OUT[1], TEMP[0] > 32: MOV OUT[0], TEMP[2] > 33: END >radeonsi: Compiling shader 146 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %13) > %21 = extractelement <4 x float> %20, i32 0 > %22 = extractelement <4 x float> %20, i32 1 > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %14) > %26 = extractelement <4 x float> %25, i32 0 > %27 = extractelement <4 x float> %25, i32 1 > %28 = extractelement <4 x float> %25, i32 2 > %29 = extractelement <4 x float> %25, i32 3 > %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 > %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %15) > %bc = bitcast <4 x float> %32 to <4 x i32> > %33 = extractelement <4 x i32> %bc, i32 0 > %34 = uitofp i32 %33 to float > %35 = fadd float %34, 0x3FB99999A0000000 > %36 = fptoui float %35 to i32 > %37 = shl i32 %36, 6 > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %37) > %39 = shl i32 %36, 6 > %40 = or i32 %39, 4 > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %40) > %42 = shl i32 %36, 6 > %43 = or i32 %42, 8 > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %43) > %45 = shl i32 %36, 6 > %46 = or i32 %45, 12 > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %46) > %48 = fmul float %26, %38 > %49 = fmul float %27, %41 > %50 = fadd float %48, %49 > %51 = fmul float %28, %44 > %52 = fadd float %50, %51 > %53 = fmul float %29, %47 > %54 = fadd float %52, %53 > %55 = shl i32 %36, 2 > %56 = and i32 %55, 268435452 > %57 = or i32 %56, 1 > %58 = shl nuw i32 %57, 4 > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %58) > %60 = shl nuw i32 %57, 4 > %61 = or i32 %60, 4 > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %61) > %63 = shl nuw i32 %57, 4 > %64 = or i32 %63, 8 > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %64) > %66 = shl nuw i32 %57, 4 > %67 = or i32 %66, 12 > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %67) > %69 = fmul float %26, %59 > %70 = fmul float %27, %62 > %71 = fadd float %69, %70 > %72 = fmul float %28, %65 > %73 = fadd float %71, %72 > %74 = fmul float %29, %68 > %75 = fadd float %73, %74 > %76 = shl i32 %36, 2 > %77 = and i32 %76, 268435452 > %78 = or i32 %77, 2 > %79 = shl nuw i32 %78, 4 > %80 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %79) > %81 = shl nuw i32 %78, 4 > %82 = or i32 %81, 4 > %83 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %82) > %84 = shl nuw i32 %78, 4 > %85 = or i32 %84, 8 > %86 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %85) > %87 = shl nuw i32 %78, 4 > %88 = or i32 %87, 12 > %89 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %88) > %90 = fmul float %26, %80 > %91 = fmul float %27, %83 > %92 = fadd float %90, %91 > %93 = fmul float %28, %86 > %94 = fadd float %92, %93 > %95 = fmul float %29, %89 > %96 = fadd float %94, %95 > %97 = shl i32 %36, 2 > %98 = and i32 %97, 268435452 > %99 = or i32 %98, 3 > %100 = shl nuw i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %100) > %102 = shl nuw i32 %99, 4 > %103 = or i32 %102, 4 > %104 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %103) > %105 = shl nuw i32 %99, 4 > %106 = or i32 %105, 8 > %107 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %106) > %108 = shl nuw i32 %99, 4 > %109 = or i32 %108, 12 > %110 = call float @llvm.SI.load.const(<16 x i8> %17, i32 %109) > %111 = fmul float %26, %101 > %112 = fmul float %27, %104 > %113 = fadd float %111, %112 > %114 = fmul float %28, %107 > %115 = fadd float %113, %114 > %116 = fmul float %29, %110 > %117 = fadd float %115, %116 > %118 = bitcast i32 %11 to float > %119 = insertvalue <{ float, float, float }> undef, float %118, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %54, float %75, float %96, float %117) > ret <{ float, float, float }> %119 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..7] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 16, 64, 80} >IMM[1] UINT32 {96, 112, 32, 48} >IMM[2] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: DP4 TEMP[0].x, IN[1], CONST[1][4] > 1: DP4 TEMP[1].x, IN[1], CONST[1][5] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: DP4 TEMP[1].x, IN[1], CONST[1][6] > 4: MOV TEMP[0].z, TEMP[1].xxxx > 5: DP4 TEMP[1].x, IN[1], CONST[1][7] > 6: MOV TEMP[0].w, TEMP[1].xxxx > 7: DP4 TEMP[1].x, IN[1], CONST[1][2] > 8: DP4 TEMP[2].x, IN[1], CONST[1][3] > 9: MOV TEMP[1].y, TEMP[2].xxxx > 10: MOV TEMP[1].zw, IMM[2].yyxy > 11: MOV OUT[4], TEMP[0] > 12: MOV OUT[3], CONST[1][1] > 13: MOV OUT[2], CONST[1][0] > 14: MOV OUT[1], IN[0] > 15: MOV OUT[0], TEMP[1] > 16: END >radeonsi: Compiling shader 147 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = call float @llvm.SI.load.const(<16 x i8> %16, i32 64) > %34 = call float @llvm.SI.load.const(<16 x i8> %16, i32 68) > %35 = call float @llvm.SI.load.const(<16 x i8> %16, i32 72) > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 76) > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 80) > %38 = call float @llvm.SI.load.const(<16 x i8> %16, i32 84) > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 88) > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 92) > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 96) > %42 = call float @llvm.SI.load.const(<16 x i8> %16, i32 100) > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 104) > %44 = call float @llvm.SI.load.const(<16 x i8> %16, i32 108) > %45 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %46 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %47 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %48 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 > %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %13) > %52 = extractelement <4 x float> %51, i32 0 > %53 = extractelement <4 x float> %51, i32 1 > %54 = extractelement <4 x float> %51, i32 2 > %55 = extractelement <4 x float> %51, i32 3 > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %14) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = extractelement <4 x float> %58, i32 3 > %63 = fmul float %59, %33 > %64 = fmul float %60, %34 > %65 = fadd float %63, %64 > %66 = fmul float %61, %35 > %67 = fadd float %65, %66 > %68 = fmul float %62, %36 > %69 = fadd float %67, %68 > %70 = fmul float %59, %37 > %71 = fmul float %60, %38 > %72 = fadd float %70, %71 > %73 = fmul float %61, %39 > %74 = fadd float %72, %73 > %75 = fmul float %62, %40 > %76 = fadd float %74, %75 > %77 = fmul float %59, %41 > %78 = fmul float %60, %42 > %79 = fadd float %77, %78 > %80 = fmul float %61, %43 > %81 = fadd float %79, %80 > %82 = fmul float %62, %44 > %83 = fadd float %81, %82 > %84 = fmul float %59, %45 > %85 = fmul float %60, %46 > %86 = fadd float %84, %85 > %87 = fmul float %61, %47 > %88 = fadd float %86, %87 > %89 = fmul float %62, %48 > %90 = fadd float %88, %89 > %91 = fmul float %59, %25 > %92 = fmul float %60, %26 > %93 = fadd float %91, %92 > %94 = fmul float %61, %27 > %95 = fadd float %93, %94 > %96 = fmul float %62, %28 > %97 = fadd float %95, %96 > %98 = fmul float %59, %29 > %99 = fmul float %60, %30 > %100 = fadd float %98, %99 > %101 = fmul float %61, %31 > %102 = fadd float %100, %101 > %103 = fmul float %62, %32 > %104 = fadd float %102, %103 > %105 = bitcast i32 %11 to float > %106 = insertvalue <{ float, float, float }> undef, float %105, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %52, float %53, float %54, float %55) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %69, float %76, float %83, float %90) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %97, float %104, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %106 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL TEMP[0..1], LOCAL > 0: MOV TEMP[0].xy, IN[3].zwww > 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D > 2: MOV TEMP[1].xy, IN[3].xyyy > 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 4: ADD TEMP[1], TEMP[1], -TEMP[0] > 5: FMA TEMP[0], IN[0].xxxx, TEMP[1], TEMP[0] > 6: FMA TEMP[0], TEMP[0], IN[2], IN[1] > 7: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww > 8: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz > 9: MOV TEMP[0].w, TEMP[1].xxxx > 10: MOV OUT[0], TEMP[0] > 11: END >radeonsi: Compiling shader 148 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32, !tbaa !0 > %34 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %35 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 7 > %36 = load <4 x i32>, <4 x i32> addrspace(2)* %35, align 16, !tbaa !0 > %37 = extractelement <8 x i32> %33, i32 7 > %38 = extractelement <4 x i32> %36, i32 0 > %39 = and i32 %38, %37 > %40 = insertelement <4 x i32> %36, i32 %39, i32 0 > %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %55 = bitcast float %53 to i32 > %56 = bitcast float %54 to i32 > %57 = insertelement <2 x i32> undef, i32 %55, i32 0 > %58 = insertelement <2 x i32> %57, i32 %56, i32 1 > %59 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %58, <8 x i32> %33, <4 x i32> %40, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %60 = extractelement <4 x float> %59, i32 0 > %61 = extractelement <4 x float> %59, i32 1 > %62 = extractelement <4 x float> %59, i32 2 > %63 = extractelement <4 x float> %59, i32 3 > %64 = bitcast float %51 to i32 > %65 = bitcast float %52 to i32 > %66 = insertelement <2 x i32> undef, i32 %64, i32 0 > %67 = insertelement <2 x i32> %66, i32 %65, i32 1 > %68 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %67, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %69 = extractelement <4 x float> %68, i32 0 > %70 = extractelement <4 x float> %68, i32 1 > %71 = extractelement <4 x float> %68, i32 2 > %72 = extractelement <4 x float> %68, i32 3 > %73 = fsub float %69, %60 > %74 = fsub float %70, %61 > %75 = fsub float %71, %62 > %76 = fsub float %72, %63 > %77 = call float @llvm.fma.f32(float %41, float %73, float %60) > %78 = call float @llvm.fma.f32(float %41, float %74, float %61) > %79 = call float @llvm.fma.f32(float %41, float %75, float %62) > %80 = call float @llvm.fma.f32(float %41, float %76, float %63) > %81 = call float @llvm.fma.f32(float %77, float %47, float %43) > %82 = call float @llvm.fma.f32(float %78, float %48, float %44) > %83 = call float @llvm.fma.f32(float %79, float %49, float %45) > %84 = call float @llvm.fma.f32(float %80, float %50, float %46) > %85 = fmul float %84, %42 > %86 = fmul float %85, %81 > %87 = fmul float %85, %82 > %88 = fmul float %85, %83 > %89 = bitcast float %5 to i32 > %90 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %89, 10 > %91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %90, float %86, 11 > %92 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91, float %87, 12 > %93 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %92, float %88, 13 > %94 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %93, float %85, 14 > %95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %94, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..9] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 16, 96, 112} >IMM[1] UINT32 {128, 144, 32, 48} >IMM[2] UINT32 {64, 80, 0, 0} > 0: DP4 TEMP[0].x, IN[1], CONST[1][6] > 1: DP4 TEMP[1].x, IN[1], CONST[1][7] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: DP4 TEMP[1].x, IN[1], CONST[1][8] > 4: MOV TEMP[0].z, TEMP[1].xxxx > 5: DP4 TEMP[1].x, IN[1], CONST[1][9] > 6: MOV TEMP[0].w, TEMP[1].xxxx > 7: DP4 TEMP[1].x, IN[1], CONST[1][2] > 8: DP4 TEMP[2].x, IN[1], CONST[1][3] > 9: MOV TEMP[1].y, TEMP[2].xxxx > 10: DP4 TEMP[2].x, IN[1], CONST[1][4] > 11: MOV TEMP[1].z, TEMP[2].xxxx > 12: DP4 TEMP[2].x, IN[1], CONST[1][5] > 13: MOV TEMP[1].w, TEMP[2].xxxx > 14: MOV OUT[4], TEMP[0] > 15: MOV OUT[3], CONST[1][1] > 16: MOV OUT[2], CONST[1][0] > 17: MOV OUT[1], IN[0] > 18: MOV OUT[0], TEMP[1] > 19: END >radeonsi: Compiling shader 149 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = call float @llvm.SI.load.const(<16 x i8> %16, i32 64) > %34 = call float @llvm.SI.load.const(<16 x i8> %16, i32 68) > %35 = call float @llvm.SI.load.const(<16 x i8> %16, i32 72) > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 76) > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 80) > %38 = call float @llvm.SI.load.const(<16 x i8> %16, i32 84) > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 88) > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 92) > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 96) > %42 = call float @llvm.SI.load.const(<16 x i8> %16, i32 100) > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 104) > %44 = call float @llvm.SI.load.const(<16 x i8> %16, i32 108) > %45 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %46 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %47 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %48 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %49 = call float @llvm.SI.load.const(<16 x i8> %16, i32 128) > %50 = call float @llvm.SI.load.const(<16 x i8> %16, i32 132) > %51 = call float @llvm.SI.load.const(<16 x i8> %16, i32 136) > %52 = call float @llvm.SI.load.const(<16 x i8> %16, i32 140) > %53 = call float @llvm.SI.load.const(<16 x i8> %16, i32 144) > %54 = call float @llvm.SI.load.const(<16 x i8> %16, i32 148) > %55 = call float @llvm.SI.load.const(<16 x i8> %16, i32 152) > %56 = call float @llvm.SI.load.const(<16 x i8> %16, i32 156) > %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 > %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %13) > %60 = extractelement <4 x float> %59, i32 0 > %61 = extractelement <4 x float> %59, i32 1 > %62 = extractelement <4 x float> %59, i32 2 > %63 = extractelement <4 x float> %59, i32 3 > %64 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 > %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %14) > %67 = extractelement <4 x float> %66, i32 0 > %68 = extractelement <4 x float> %66, i32 1 > %69 = extractelement <4 x float> %66, i32 2 > %70 = extractelement <4 x float> %66, i32 3 > %71 = fmul float %67, %41 > %72 = fmul float %68, %42 > %73 = fadd float %71, %72 > %74 = fmul float %69, %43 > %75 = fadd float %73, %74 > %76 = fmul float %70, %44 > %77 = fadd float %75, %76 > %78 = fmul float %67, %45 > %79 = fmul float %68, %46 > %80 = fadd float %78, %79 > %81 = fmul float %69, %47 > %82 = fadd float %80, %81 > %83 = fmul float %70, %48 > %84 = fadd float %82, %83 > %85 = fmul float %67, %49 > %86 = fmul float %68, %50 > %87 = fadd float %85, %86 > %88 = fmul float %69, %51 > %89 = fadd float %87, %88 > %90 = fmul float %70, %52 > %91 = fadd float %89, %90 > %92 = fmul float %67, %53 > %93 = fmul float %68, %54 > %94 = fadd float %92, %93 > %95 = fmul float %69, %55 > %96 = fadd float %94, %95 > %97 = fmul float %70, %56 > %98 = fadd float %96, %97 > %99 = fmul float %67, %25 > %100 = fmul float %68, %26 > %101 = fadd float %99, %100 > %102 = fmul float %69, %27 > %103 = fadd float %101, %102 > %104 = fmul float %70, %28 > %105 = fadd float %103, %104 > %106 = fmul float %67, %29 > %107 = fmul float %68, %30 > %108 = fadd float %106, %107 > %109 = fmul float %69, %31 > %110 = fadd float %108, %109 > %111 = fmul float %70, %32 > %112 = fadd float %110, %111 > %113 = fmul float %67, %33 > %114 = fmul float %68, %34 > %115 = fadd float %113, %114 > %116 = fmul float %69, %35 > %117 = fadd float %115, %116 > %118 = fmul float %70, %36 > %119 = fadd float %117, %118 > %120 = fmul float %67, %37 > %121 = fmul float %68, %38 > %122 = fadd float %120, %121 > %123 = fmul float %69, %39 > %124 = fadd float %122, %123 > %125 = fmul float %70, %40 > %126 = fadd float %124, %125 > %127 = bitcast i32 %11 to float > %128 = insertvalue <{ float, float, float }> undef, float %127, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %60, float %61, float %62, float %63) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %77, float %84, float %91, float %98) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %105, float %112, float %119, float %126) > ret <{ float, float, float }> %128 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..5] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 32, 48, 64} >IMM[1] UINT32 {80, 16, 0, 0} >IMM[2] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: DP4 TEMP[0].x, IN[1], CONST[1][2] > 1: DP4 TEMP[1].x, IN[1], CONST[1][3] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: DP4 TEMP[1].x, IN[1], CONST[1][4] > 4: MOV TEMP[0].z, TEMP[1].xxxx > 5: DP4 TEMP[1].x, IN[1], CONST[1][5] > 6: MOV TEMP[0].w, TEMP[1].xxxx > 7: DP4 TEMP[1].x, IN[1], CONST[1][0] > 8: DP4 TEMP[2].x, IN[1], CONST[1][1] > 9: MOV TEMP[1].y, TEMP[2].xxxx > 10: MOV TEMP[1].zw, IMM[2].yyxy > 11: MOV OUT[2], TEMP[0] > 12: MOV OUT[1], IN[0] > 13: MOV OUT[0], TEMP[1] > 14: END >radeonsi: Compiling shader 150 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = call float @llvm.SI.load.const(<16 x i8> %16, i32 64) > %34 = call float @llvm.SI.load.const(<16 x i8> %16, i32 68) > %35 = call float @llvm.SI.load.const(<16 x i8> %16, i32 72) > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 76) > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 80) > %38 = call float @llvm.SI.load.const(<16 x i8> %16, i32 84) > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 88) > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 92) > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = extractelement <4 x float> %43, i32 3 > %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 > %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %14) > %51 = extractelement <4 x float> %50, i32 0 > %52 = extractelement <4 x float> %50, i32 1 > %53 = extractelement <4 x float> %50, i32 2 > %54 = extractelement <4 x float> %50, i32 3 > %55 = fmul float %51, %25 > %56 = fmul float %52, %26 > %57 = fadd float %55, %56 > %58 = fmul float %53, %27 > %59 = fadd float %57, %58 > %60 = fmul float %54, %28 > %61 = fadd float %59, %60 > %62 = fmul float %51, %29 > %63 = fmul float %52, %30 > %64 = fadd float %62, %63 > %65 = fmul float %53, %31 > %66 = fadd float %64, %65 > %67 = fmul float %54, %32 > %68 = fadd float %66, %67 > %69 = fmul float %51, %33 > %70 = fmul float %52, %34 > %71 = fadd float %69, %70 > %72 = fmul float %53, %35 > %73 = fadd float %71, %72 > %74 = fmul float %54, %36 > %75 = fadd float %73, %74 > %76 = fmul float %51, %37 > %77 = fmul float %52, %38 > %78 = fadd float %76, %77 > %79 = fmul float %53, %39 > %80 = fadd float %78, %79 > %81 = fmul float %54, %40 > %82 = fadd float %80, %81 > %83 = fmul float %51, %17 > %84 = fmul float %52, %18 > %85 = fadd float %83, %84 > %86 = fmul float %53, %19 > %87 = fadd float %85, %86 > %88 = fmul float %54, %20 > %89 = fadd float %87, %88 > %90 = fmul float %51, %21 > %91 = fmul float %52, %22 > %92 = fadd float %90, %91 > %93 = fmul float %53, %23 > %94 = fadd float %92, %93 > %95 = fmul float %54, %24 > %96 = fadd float %94, %95 > %97 = bitcast i32 %11 to float > %98 = insertvalue <{ float, float, float }> undef, float %97, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float %46, float %47) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %61, float %68, float %75, float %82) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %89, float %96, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %98 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL TEMP[0..1], LOCAL > 0: MOV TEMP[0].xy, IN[1].zwww > 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D > 2: MOV TEMP[1].xy, IN[1].xyyy > 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 4: ADD TEMP[1], TEMP[1], -TEMP[0] > 5: FMA TEMP[0], IN[0].xxxx, TEMP[1], TEMP[0] > 6: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww > 7: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz > 8: MOV TEMP[0].w, TEMP[1].xxxx > 9: MOV OUT[0], TEMP[0] > 10: END >radeonsi: Compiling shader 151 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32, !tbaa !0 > %34 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %35 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 7 > %36 = load <4 x i32>, <4 x i32> addrspace(2)* %35, align 16, !tbaa !0 > %37 = extractelement <8 x i32> %33, i32 7 > %38 = extractelement <4 x i32> %36, i32 0 > %39 = and i32 %38, %37 > %40 = insertelement <4 x i32> %36, i32 %39, i32 0 > %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %47 = bitcast float %45 to i32 > %48 = bitcast float %46 to i32 > %49 = insertelement <2 x i32> undef, i32 %47, i32 0 > %50 = insertelement <2 x i32> %49, i32 %48, i32 1 > %51 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %50, <8 x i32> %33, <4 x i32> %40, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %52 = extractelement <4 x float> %51, i32 0 > %53 = extractelement <4 x float> %51, i32 1 > %54 = extractelement <4 x float> %51, i32 2 > %55 = extractelement <4 x float> %51, i32 3 > %56 = bitcast float %43 to i32 > %57 = bitcast float %44 to i32 > %58 = insertelement <2 x i32> undef, i32 %56, i32 0 > %59 = insertelement <2 x i32> %58, i32 %57, i32 1 > %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = fsub float %61, %52 > %66 = fsub float %62, %53 > %67 = fsub float %63, %54 > %68 = fsub float %64, %55 > %69 = call float @llvm.fma.f32(float %41, float %65, float %52) > %70 = call float @llvm.fma.f32(float %41, float %66, float %53) > %71 = call float @llvm.fma.f32(float %41, float %67, float %54) > %72 = call float @llvm.fma.f32(float %41, float %68, float %55) > %73 = fmul float %72, %42 > %74 = fmul float %73, %69 > %75 = fmul float %73, %70 > %76 = fmul float %73, %71 > %77 = bitcast float %5 to i32 > %78 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %77, 10 > %79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %78, float %74, 11 > %80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %79, float %75, 12 > %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %80, float %76, 13 > %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %73, 14 > %83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %83 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..7] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 64, 80, 96} >IMM[1] UINT32 {112, 16, 32, 48} > 0: DP4 TEMP[0].x, IN[1], CONST[1][4] > 1: DP4 TEMP[1].x, IN[1], CONST[1][5] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: DP4 TEMP[1].x, IN[1], CONST[1][6] > 4: MOV TEMP[0].z, TEMP[1].xxxx > 5: DP4 TEMP[1].x, IN[1], CONST[1][7] > 6: MOV TEMP[0].w, TEMP[1].xxxx > 7: DP4 TEMP[1].x, IN[1], CONST[1][0] > 8: DP4 TEMP[2].x, IN[1], CONST[1][1] > 9: MOV TEMP[1].y, TEMP[2].xxxx > 10: DP4 TEMP[2].x, IN[1], CONST[1][2] > 11: MOV TEMP[1].z, TEMP[2].xxxx > 12: DP4 TEMP[2].x, IN[1], CONST[1][3] > 13: MOV TEMP[1].w, TEMP[2].xxxx > 14: MOV OUT[2], TEMP[0] > 15: MOV OUT[1], IN[0] > 16: MOV OUT[0], TEMP[1] > 17: END >radeonsi: Compiling shader 152 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = call float @llvm.SI.load.const(<16 x i8> %16, i32 64) > %34 = call float @llvm.SI.load.const(<16 x i8> %16, i32 68) > %35 = call float @llvm.SI.load.const(<16 x i8> %16, i32 72) > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 76) > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 80) > %38 = call float @llvm.SI.load.const(<16 x i8> %16, i32 84) > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 88) > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 92) > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 96) > %42 = call float @llvm.SI.load.const(<16 x i8> %16, i32 100) > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 104) > %44 = call float @llvm.SI.load.const(<16 x i8> %16, i32 108) > %45 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %46 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %47 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %48 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 > %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %13) > %52 = extractelement <4 x float> %51, i32 0 > %53 = extractelement <4 x float> %51, i32 1 > %54 = extractelement <4 x float> %51, i32 2 > %55 = extractelement <4 x float> %51, i32 3 > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %14) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = extractelement <4 x float> %58, i32 3 > %63 = fmul float %59, %33 > %64 = fmul float %60, %34 > %65 = fadd float %63, %64 > %66 = fmul float %61, %35 > %67 = fadd float %65, %66 > %68 = fmul float %62, %36 > %69 = fadd float %67, %68 > %70 = fmul float %59, %37 > %71 = fmul float %60, %38 > %72 = fadd float %70, %71 > %73 = fmul float %61, %39 > %74 = fadd float %72, %73 > %75 = fmul float %62, %40 > %76 = fadd float %74, %75 > %77 = fmul float %59, %41 > %78 = fmul float %60, %42 > %79 = fadd float %77, %78 > %80 = fmul float %61, %43 > %81 = fadd float %79, %80 > %82 = fmul float %62, %44 > %83 = fadd float %81, %82 > %84 = fmul float %59, %45 > %85 = fmul float %60, %46 > %86 = fadd float %84, %85 > %87 = fmul float %61, %47 > %88 = fadd float %86, %87 > %89 = fmul float %62, %48 > %90 = fadd float %88, %89 > %91 = fmul float %59, %17 > %92 = fmul float %60, %18 > %93 = fadd float %91, %92 > %94 = fmul float %61, %19 > %95 = fadd float %93, %94 > %96 = fmul float %62, %20 > %97 = fadd float %95, %96 > %98 = fmul float %59, %21 > %99 = fmul float %60, %22 > %100 = fadd float %98, %99 > %101 = fmul float %61, %23 > %102 = fadd float %100, %101 > %103 = fmul float %62, %24 > %104 = fadd float %102, %103 > %105 = fmul float %59, %25 > %106 = fmul float %60, %26 > %107 = fadd float %105, %106 > %108 = fmul float %61, %27 > %109 = fadd float %107, %108 > %110 = fmul float %62, %28 > %111 = fadd float %109, %110 > %112 = fmul float %59, %29 > %113 = fmul float %60, %30 > %114 = fadd float %112, %113 > %115 = fmul float %61, %31 > %116 = fadd float %114, %115 > %117 = fmul float %62, %32 > %118 = fadd float %116, %117 > %119 = bitcast i32 %11 to float > %120 = insertvalue <{ float, float, float }> undef, float %119, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %52, float %53, float %54, float %55) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %69, float %76, float %83, float %90) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %97, float %104, float %111, float %118) > ret <{ float, float, float }> %120 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..28] >DCL CONST[2][0..4095] >DCL TEMP[0..19], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 352} >IMM[3] UINT32 {240, 320, 336, 368} >IMM[4] UINT32 {448, 0, 0, 0} > 0: MUL TEMP[0].xyz, IN[6].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[4].xxxx > 7: MOV TEMP[3].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[3].x, IN[5].xxxx, TEMP[3].yyyy > 9: MOV TEMP[3].w, TEMP[3].xxxx > 10: UMUL TEMP[4].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[5].xxxx > 13: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[4].x, IN[5].yyyy, TEMP[4].yyyy > 15: MOV TEMP[4].w, TEMP[4].xxxx > 16: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy > 17: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[6].xxxx > 19: MOV TEMP[5].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 21: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[7].xxxx > 23: MOV TEMP[6].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].wwww > 25: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 26: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[7].xxxx > 28: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 30: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[8].xxxx > 32: MOV TEMP[7].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[6].x, TEMP[6].yyyy, TEMP[7].zzzz, -TEMP[5].xxxx > 34: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 35: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[8].xxxx > 37: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 39: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[9].xxxx > 41: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[5].x, TEMP[7].yyyy, TEMP[8].zzzz, TEMP[5].xxxx > 43: MUL TEMP[5].x, TEMP[5].xxxx, IN[5].xxxx > 44: MUL TEMP[6].x, TEMP[6].xxxx, IN[5].xxxx > 45: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy > 46: MOV TEMP[3].z, TEMP[6].xxxx > 47: UMUL TEMP[6].x, TEMP[1].yyyy, IMM[2].yyyy > 48: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[7].xxxx > 50: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 52: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[8].xxxx > 54: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 56: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 57: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[8].xxxx > 59: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 61: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[9].xxxx > 63: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 65: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 66: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[9].xxxx > 68: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[9].x, TEMP[1].yyyy, IMM[2].yyyy > 70: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[10].xxxx > 72: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 74: MUL TEMP[6].x, TEMP[6].xxxx, IN[5].yyyy > 75: MUL TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx > 76: MOV TEMP[6].y, TEMP[6].xxxx > 77: MUL TEMP[7].x, TEMP[7].xxxx, IN[5].yyyy > 78: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 79: MOV TEMP[4].z, TEMP[7].xxxx > 80: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 81: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[8].xxxx > 83: MOV TEMP[7].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 85: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[9].xxxx > 87: MOV TEMP[8].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[7].xyz, TEMP[7].zzyy, TEMP[8].wxww > 89: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 90: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[9].xxxx > 92: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy > 94: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[10].xxxx > 96: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[9].yyyy, TEMP[7].xxxx > 98: MUL TEMP[8].x, TEMP[8].xxxx, IN[5].xxxx > 99: MUL TEMP[3].x, IMM[0].yyyy, TEMP[8].xxxx >100: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy >101: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[9].xxxx >103: MOV TEMP[8].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy >105: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[10].xxxx >107: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xyzz >109: ADD TEMP[8].xyz, TEMP[8].zzyy, TEMP[8].yxxx >110: FMA TEMP[9].xyz, -TEMP[8].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[10].x, IN[5].xxxx, TEMP[9].yyyy >112: MOV TEMP[3].y, TEMP[10].xxxx >113: UMUL TEMP[10].x, TEMP[1].yyyy, IMM[2].yyyy >114: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[11].xxxx >116: MOV TEMP[10].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >118: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[12].xxxx >120: MOV TEMP[11].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww >122: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >123: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[12].xxxx >125: MOV TEMP[11].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >127: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[13].xxxx >129: MOV TEMP[12].y, CONST[2][ADDR[0].x] >130: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >131: MUL TEMP[11].x, TEMP[11].xxxx, IN[5].yyyy >132: MUL TEMP[4].x, IMM[0].yyyy, TEMP[11].xxxx >133: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >134: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[12].xxxx >136: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >138: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[13].xxxx >140: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >142: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >143: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[13].x, IN[5].yyyy, TEMP[12].yyyy >145: MOV TEMP[4].y, TEMP[13].xxxx >146: ADD TEMP[3], TEMP[3], TEMP[4] >147: UMUL TEMP[13].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[14].xxxx >150: MOV TEMP[13].y, CONST[2][ADDR[0].x] >151: MUL TEMP[13].x, IN[5].zzzz, TEMP[13].yyyy >152: MOV TEMP[4].w, TEMP[13].xxxx >153: UMUL TEMP[13].x, TEMP[1].zzzz, IMM[2].yyyy >154: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[14].xxxx >156: MOV TEMP[13].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >158: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[15].xxxx >160: MOV TEMP[14].w, CONST[2][ADDR[0].x] >161: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].wwww >162: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >163: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[15].xxxx >165: MOV TEMP[14].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >167: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[16].xxxx >169: MOV TEMP[15].z, CONST[2][ADDR[0].x] >170: FMA TEMP[14].x, TEMP[14].yyyy, TEMP[15].zzzz, -TEMP[13].xxxx >171: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >172: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[16].xxxx >174: MOV TEMP[15].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >176: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[17].xxxx >178: MOV TEMP[16].z, CONST[2][ADDR[0].x] >179: FMA TEMP[13].x, TEMP[15].yyyy, TEMP[16].zzzz, TEMP[13].xxxx >180: MUL TEMP[13].x, TEMP[13].xxxx, IN[5].zzzz >181: MUL TEMP[13].x, IMM[0].yyyy, TEMP[13].xxxx >182: MOV TEMP[13].y, TEMP[13].xxxx >183: MUL TEMP[14].x, TEMP[14].xxxx, IN[5].zzzz >184: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >185: MOV TEMP[4].z, TEMP[14].xxxx >186: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >187: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[15].xxxx >189: MOV TEMP[14].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >191: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[16].xxxx >193: MOV TEMP[15].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[14].xyz, TEMP[14].zzyy, TEMP[15].wxww >195: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >196: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[16].xxxx >198: MOV TEMP[15].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >200: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[17].xxxx >202: MOV TEMP[16].y, CONST[2][ADDR[0].x] >203: FMA TEMP[15].x, TEMP[15].xxxx, TEMP[16].yyyy, TEMP[14].xxxx >204: MUL TEMP[15].x, TEMP[15].xxxx, IN[5].zzzz >205: MUL TEMP[4].x, IMM[0].yyyy, TEMP[15].xxxx >206: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >207: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[16].xxxx >209: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >211: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[17].xxxx >213: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[15].xyz, TEMP[15].xyzz, TEMP[16].xyzz >215: ADD TEMP[15].xyz, TEMP[15].zzyy, TEMP[15].yxxx >216: FMA TEMP[16].xyz, -TEMP[15].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[17].x, IN[5].zzzz, TEMP[16].yyyy >218: MOV TEMP[4].y, TEMP[17].xxxx >219: ADD TEMP[3], TEMP[3], TEMP[4] >220: MOV TEMP[4].xyz, IN[0].xyzx >221: MOV TEMP[4].w, IMM[0].zzzz >222: DP4 TEMP[3].x, TEMP[3], TEMP[4] >223: MOV TEMP[3].y, TEMP[3].xxxx >224: UMUL TEMP[17].x, TEMP[1].xxxx, IMM[2].yyyy >225: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[18].xxxx >227: MOV TEMP[17].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[18].x, TEMP[1].xxxx, IMM[2].yyyy >229: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[19].xxxx >231: MOV TEMP[18].z, CONST[2][ADDR[0].x] >232: FMA TEMP[17].x, TEMP[17].xxxx, TEMP[18].zzzz, -TEMP[7].zzzz >233: MUL TEMP[17].x, TEMP[17].xxxx, IN[5].xxxx >234: MUL TEMP[17].x, IMM[0].yyyy, TEMP[17].xxxx >235: MUL TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy >236: MOV TEMP[17].y, TEMP[5].xxxx >237: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >238: USHR TEMP[18].x, TEMP[5].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[18].xxxx >240: MOV TEMP[5].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[18].x, TEMP[1].yyyy, IMM[2].yyyy >242: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[19].xxxx >244: MOV TEMP[18].z, CONST[2][ADDR[0].x] >245: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[18].zzzz, -TEMP[10].zzzz >246: MUL TEMP[5].x, TEMP[5].xxxx, IN[5].yyyy >247: MUL TEMP[6].x, IMM[0].yyyy, TEMP[5].xxxx >248: MUL TEMP[5].x, IN[5].xxxx, TEMP[9].zzzz >249: MOV TEMP[17].z, TEMP[5].xxxx >250: MUL TEMP[8].x, IN[5].xxxx, TEMP[9].xxxx >251: MUL TEMP[5].x, IN[5].yyyy, TEMP[12].zzzz >252: MOV TEMP[6].z, TEMP[5].xxxx >253: MUL TEMP[11].x, IN[5].yyyy, TEMP[12].xxxx >254: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[9].xxxx >257: MOV TEMP[5].z, CONST[2][ADDR[0].x] >258: MUL TEMP[5].x, IN[5].xxxx, TEMP[5].zzzz >259: MOV TEMP[17].w, TEMP[5].xxxx >260: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[9].xxxx >263: MOV TEMP[5].z, CONST[2][ADDR[0].x] >264: MUL TEMP[5].x, IN[5].yyyy, TEMP[5].zzzz >265: MOV TEMP[6].w, TEMP[5].xxxx >266: ADD TEMP[6], TEMP[6], TEMP[17] >267: UMUL TEMP[5].x, TEMP[1].zzzz, IMM[2].yyyy >268: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[9].xxxx >270: MOV TEMP[5].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[9].x, TEMP[1].zzzz, IMM[2].yyyy >272: USHR TEMP[12].x, TEMP[9].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[12].xxxx >274: MOV TEMP[9].z, CONST[2][ADDR[0].x] >275: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[9].zzzz, -TEMP[14].zzzz >276: MUL TEMP[5].x, TEMP[5].xxxx, IN[5].zzzz >277: MUL TEMP[13].x, IMM[0].yyyy, TEMP[5].xxxx >278: MUL TEMP[5].x, IN[5].zzzz, TEMP[16].zzzz >279: MOV TEMP[13].z, TEMP[5].xxxx >280: MUL TEMP[15].x, IN[5].zzzz, TEMP[16].xxxx >281: UMUL TEMP[5].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[9].xxxx >284: MOV TEMP[5].z, CONST[2][ADDR[0].x] >285: MUL TEMP[5].x, IN[5].zzzz, TEMP[5].zzzz >286: MOV TEMP[13].w, TEMP[5].xxxx >287: ADD TEMP[6], TEMP[6], TEMP[13] >288: DP4 TEMP[5].x, TEMP[6], TEMP[4] >289: MOV TEMP[3].z, TEMP[5].xxxx >290: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[6].xxxx >293: MOV TEMP[5].x, CONST[2][ADDR[0].x] >294: MUL TEMP[5].x, IN[5].xxxx, TEMP[5].xxxx >295: MOV TEMP[8].w, TEMP[5].xxxx >296: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[6].xxxx >299: MOV TEMP[5].x, CONST[2][ADDR[0].x] >300: MUL TEMP[5].x, IN[5].yyyy, TEMP[5].xxxx >301: MOV TEMP[11].w, TEMP[5].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[5].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[5].zzzz, TEMP[2].xxxx >307: MOV TEMP[15].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy >309: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[5].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy >313: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[6].xxxx >315: MOV TEMP[5].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[7].xxxx >317: ADD TEMP[2].x, TEMP[7].zzzz, TEMP[7].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[5].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[8].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[1].yyyy, IMM[2].yyyy >323: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[5].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >327: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[6].xxxx >329: MOV TEMP[5].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[10].xxxx >331: UMUL TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >332: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[5].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[1].x, TEMP[1].zzzz, IMM[2].yyyy >336: USHR TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[5].xxxx >338: MOV TEMP[1].y, CONST[2][ADDR[0].x] >339: FMA TEMP[1].x, TEMP[2].xxxx, TEMP[1].yyyy, -TEMP[14].xxxx >340: MOV TEMP[0].y, TEMP[1].xxxx >341: ADD TEMP[1].x, TEMP[14].zzzz, TEMP[14].yyyy >342: MOV TEMP[0].z, TEMP[1].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[5].yzzz >344: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[15].yz, TEMP[1].yxyy >346: ADD TEMP[1].x, TEMP[10].zzzz, TEMP[10].yyyy >347: MUL TEMP[1].x, TEMP[1].xxxx, IN[5].yyyy >348: MOV TEMP[0].y, TEMP[1].xxxx >349: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[11].yz, TEMP[1].yxyy >351: ADD TEMP[0], TEMP[8], TEMP[11] >352: ADD TEMP[0], TEMP[15], TEMP[0] >353: DP4 TEMP[3].x, TEMP[0], TEMP[4] >354: MOV TEMP[3].w, IMM[0].zzzz >355: DP4 TEMP[1].x, CONST[1][22], TEMP[3] >356: ADD TEMP[1].x, TEMP[1].xxxx, CONST[1][15].yyyy >357: MOV TEMP[0].z, TEMP[1].xxxx >358: DP4 TEMP[0].x, CONST[1][20], TEMP[3] >359: DP4 TEMP[1].x, CONST[1][21], TEMP[3] >360: MOV TEMP[0].y, TEMP[1].xxxx >361: DP4 TEMP[1].x, CONST[1][23], TEMP[3] >362: MOV TEMP[0].w, TEMP[1].xxxx >363: MOV TEMP[0], TEMP[0] >364: MOV OUT[1], IN[4] >365: MOV OUT[0], TEMP[0] >366: END >radeonsi: Compiling shader 153 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %20 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, align 16, !tbaa !0 > %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244) > %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 320) > %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 324) > %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 328) > %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 332) > %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 336) > %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 340) > %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 344) > %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 348) > %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 352) > %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 356) > %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 360) > %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 364) > %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 368) > %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 372) > %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 376) > %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 380) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %17) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = extractelement <4 x float> %49, i32 3 > %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 > %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %18) > %57 = extractelement <4 x float> %56, i32 0 > %58 = extractelement <4 x float> %56, i32 1 > %59 = extractelement <4 x float> %56, i32 2 > %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 > %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %19) > %63 = extractelement <4 x float> %62, i32 0 > %64 = extractelement <4 x float> %62, i32 1 > %65 = extractelement <4 x float> %62, i32 2 > %66 = fmul float %65, 0x406FE01000000000 > %67 = fmul float %64, 0x406FE01000000000 > %68 = fmul float %63, 0x406FE01000000000 > %69 = fptosi float %66 to i32 > %70 = fptosi float %67 to i32 > %71 = fptosi float %68 to i32 > %72 = shl i32 %69, 1 > %73 = or i32 %72, 1 > %74 = shl i32 %70, 1 > %75 = or i32 %74, 1 > %76 = shl i32 %71, 1 > %77 = or i32 %76, 1 > %78 = shl i32 %69, 5 > %79 = or i32 %78, 4 > %80 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %79) > %81 = fmul float %57, %80 > %82 = shl i32 %70, 5 > %83 = or i32 %82, 4 > %84 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %83) > %85 = fmul float %58, %84 > %86 = shl i32 %73, 4 > %87 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %86) > %88 = shl i32 %73, 4 > %89 = or i32 %88, 12 > %90 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %89) > %91 = fmul float %87, %90 > %92 = shl i32 %73, 4 > %93 = or i32 %92, 4 > %94 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %93) > %95 = shl i32 %73, 4 > %96 = or i32 %95, 8 > %97 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %96) > %98 = fsub float -0.000000e+00, %91 > %99 = call float @llvm.fma.f32(float %94, float %97, float %98) > %100 = shl i32 %73, 4 > %101 = or i32 %100, 4 > %102 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %101) > %103 = shl i32 %73, 4 > %104 = or i32 %103, 8 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = call float @llvm.fma.f32(float %102, float %105, float %91) > %107 = fmul float %106, %57 > %108 = fmul float %99, %57 > %109 = fmul float %108, 2.000000e+00 > %110 = shl i32 %75, 4 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = shl i32 %75, 4 > %113 = or i32 %112, 12 > %114 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %113) > %115 = fmul float %111, %114 > %116 = shl i32 %75, 4 > %117 = or i32 %116, 4 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = shl i32 %75, 4 > %120 = or i32 %119, 8 > %121 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %120) > %122 = fsub float -0.000000e+00, %115 > %123 = call float @llvm.fma.f32(float %118, float %121, float %122) > %124 = shl i32 %75, 4 > %125 = or i32 %124, 4 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = shl i32 %75, 4 > %128 = or i32 %127, 8 > %129 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %128) > %130 = call float @llvm.fma.f32(float %126, float %129, float %115) > %131 = fmul float %130, %58 > %132 = fmul float %131, 2.000000e+00 > %133 = fmul float %123, %58 > %134 = fmul float %133, 2.000000e+00 > %135 = shl i32 %73, 4 > %136 = or i32 %135, 4 > %137 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %136) > %138 = shl i32 %73, 4 > %139 = or i32 %138, 8 > %140 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %139) > %141 = shl i32 %73, 4 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = shl i32 %73, 4 > %144 = or i32 %143, 12 > %145 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %144) > %146 = fmul float %140, %145 > %147 = fmul float %140, %142 > %148 = fmul float %137, %145 > %149 = shl i32 %73, 4 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = shl i32 %73, 4 > %152 = or i32 %151, 4 > %153 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %152) > %154 = call float @llvm.fma.f32(float %150, float %153, float %146) > %155 = fmul float %154, %57 > %156 = fmul float %155, 2.000000e+00 > %157 = shl i32 %73, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %73, 4 > %160 = or i32 %159, 4 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %73, 4 > %163 = or i32 %162, 8 > %164 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %163) > %165 = shl i32 %73, 4 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = shl i32 %73, 4 > %168 = or i32 %167, 4 > %169 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %168) > %170 = shl i32 %73, 4 > %171 = or i32 %170, 8 > %172 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %171) > %173 = fmul float %158, %166 > %174 = fmul float %161, %169 > %175 = fmul float %164, %172 > %176 = fadd float %175, %174 > %177 = fadd float %175, %173 > %178 = fadd float %174, %173 > %179 = fsub float -0.000000e+00, %176 > %180 = call float @llvm.fma.f32(float %179, float 2.000000e+00, float 1.000000e+00) > %181 = fsub float -0.000000e+00, %177 > %182 = call float @llvm.fma.f32(float %181, float 2.000000e+00, float 1.000000e+00) > %183 = fsub float -0.000000e+00, %178 > %184 = call float @llvm.fma.f32(float %183, float 2.000000e+00, float 1.000000e+00) > %185 = fmul float %57, %182 > %186 = shl i32 %75, 4 > %187 = or i32 %186, 4 > %188 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %187) > %189 = shl i32 %75, 4 > %190 = or i32 %189, 8 > %191 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %190) > %192 = shl i32 %75, 4 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = shl i32 %75, 4 > %195 = or i32 %194, 12 > %196 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %195) > %197 = fmul float %191, %196 > %198 = fmul float %191, %193 > %199 = fmul float %188, %196 > %200 = shl i32 %75, 4 > %201 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %200) > %202 = shl i32 %75, 4 > %203 = or i32 %202, 4 > %204 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %203) > %205 = call float @llvm.fma.f32(float %201, float %204, float %197) > %206 = fmul float %205, %58 > %207 = fmul float %206, 2.000000e+00 > %208 = shl i32 %75, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %75, 4 > %211 = or i32 %210, 4 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %75, 4 > %214 = or i32 %213, 8 > %215 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %214) > %216 = shl i32 %75, 4 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = shl i32 %75, 4 > %219 = or i32 %218, 4 > %220 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %219) > %221 = shl i32 %75, 4 > %222 = or i32 %221, 8 > %223 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %222) > %224 = fmul float %209, %217 > %225 = fmul float %212, %220 > %226 = fmul float %215, %223 > %227 = fadd float %226, %225 > %228 = fadd float %226, %224 > %229 = fadd float %225, %224 > %230 = fsub float -0.000000e+00, %227 > %231 = call float @llvm.fma.f32(float %230, float 2.000000e+00, float 1.000000e+00) > %232 = fsub float -0.000000e+00, %228 > %233 = call float @llvm.fma.f32(float %232, float 2.000000e+00, float 1.000000e+00) > %234 = fsub float -0.000000e+00, %229 > %235 = call float @llvm.fma.f32(float %234, float 2.000000e+00, float 1.000000e+00) > %236 = fmul float %58, %233 > %237 = fadd float %156, %207 > %238 = fadd float %185, %236 > %239 = fadd float %109, %134 > %240 = fadd float %81, %85 > %241 = shl i32 %71, 5 > %242 = or i32 %241, 4 > %243 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %242) > %244 = fmul float %59, %243 > %245 = shl i32 %77, 4 > %246 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %245) > %247 = shl i32 %77, 4 > %248 = or i32 %247, 12 > %249 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %248) > %250 = fmul float %246, %249 > %251 = shl i32 %77, 4 > %252 = or i32 %251, 4 > %253 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %252) > %254 = shl i32 %77, 4 > %255 = or i32 %254, 8 > %256 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %255) > %257 = fsub float -0.000000e+00, %250 > %258 = call float @llvm.fma.f32(float %253, float %256, float %257) > %259 = shl i32 %77, 4 > %260 = or i32 %259, 4 > %261 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %260) > %262 = shl i32 %77, 4 > %263 = or i32 %262, 8 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = call float @llvm.fma.f32(float %261, float %264, float %250) > %266 = fmul float %265, %59 > %267 = fmul float %266, 2.000000e+00 > %268 = fmul float %258, %59 > %269 = fmul float %268, 2.000000e+00 > %270 = shl i32 %77, 4 > %271 = or i32 %270, 4 > %272 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %271) > %273 = shl i32 %77, 4 > %274 = or i32 %273, 8 > %275 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %274) > %276 = shl i32 %77, 4 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = shl i32 %77, 4 > %279 = or i32 %278, 12 > %280 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %279) > %281 = fmul float %275, %280 > %282 = fmul float %275, %277 > %283 = fmul float %272, %280 > %284 = shl i32 %77, 4 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = shl i32 %77, 4 > %287 = or i32 %286, 4 > %288 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %287) > %289 = call float @llvm.fma.f32(float %285, float %288, float %281) > %290 = fmul float %289, %59 > %291 = fmul float %290, 2.000000e+00 > %292 = shl i32 %77, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %77, 4 > %295 = or i32 %294, 4 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %77, 4 > %298 = or i32 %297, 8 > %299 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %298) > %300 = shl i32 %77, 4 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = shl i32 %77, 4 > %303 = or i32 %302, 4 > %304 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %303) > %305 = shl i32 %77, 4 > %306 = or i32 %305, 8 > %307 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %306) > %308 = fmul float %293, %301 > %309 = fmul float %296, %304 > %310 = fmul float %299, %307 > %311 = fadd float %310, %309 > %312 = fadd float %310, %308 > %313 = fadd float %309, %308 > %314 = fsub float -0.000000e+00, %311 > %315 = call float @llvm.fma.f32(float %314, float 2.000000e+00, float 1.000000e+00) > %316 = fsub float -0.000000e+00, %312 > %317 = call float @llvm.fma.f32(float %316, float 2.000000e+00, float 1.000000e+00) > %318 = fsub float -0.000000e+00, %313 > %319 = call float @llvm.fma.f32(float %318, float 2.000000e+00, float 1.000000e+00) > %320 = fmul float %59, %317 > %321 = fadd float %237, %291 > %322 = fadd float %238, %320 > %323 = fadd float %239, %269 > %324 = fadd float %240, %244 > %325 = fmul float %321, %44 > %326 = fmul float %322, %45 > %327 = fadd float %325, %326 > %328 = fmul float %323, %46 > %329 = fadd float %327, %328 > %330 = fadd float %329, %324 > %331 = shl i32 %73, 4 > %332 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %331) > %333 = shl i32 %73, 4 > %334 = or i32 %333, 8 > %335 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %334) > %336 = fsub float -0.000000e+00, %148 > %337 = call float @llvm.fma.f32(float %332, float %335, float %336) > %338 = fmul float %337, %57 > %339 = fmul float %338, 2.000000e+00 > %340 = fmul float %107, 2.000000e+00 > %341 = shl i32 %75, 4 > %342 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %341) > %343 = shl i32 %75, 4 > %344 = or i32 %343, 8 > %345 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %344) > %346 = fsub float -0.000000e+00, %199 > %347 = call float @llvm.fma.f32(float %342, float %345, float %346) > %348 = fmul float %347, %58 > %349 = fmul float %348, 2.000000e+00 > %350 = fmul float %57, %184 > %351 = fmul float %57, %180 > %352 = fmul float %58, %235 > %353 = fmul float %58, %231 > %354 = shl i32 %69, 5 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fmul float %57, %356 > %358 = shl i32 %70, 5 > %359 = or i32 %358, 8 > %360 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %359) > %361 = fmul float %58, %360 > %362 = fadd float %349, %339 > %363 = fadd float %132, %340 > %364 = fadd float %352, %350 > %365 = fadd float %361, %357 > %366 = shl i32 %77, 4 > %367 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %366) > %368 = shl i32 %77, 4 > %369 = or i32 %368, 8 > %370 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %369) > %371 = fsub float -0.000000e+00, %283 > %372 = call float @llvm.fma.f32(float %367, float %370, float %371) > %373 = fmul float %372, %59 > %374 = fmul float %373, 2.000000e+00 > %375 = fmul float %59, %319 > %376 = fmul float %59, %315 > %377 = shl i32 %71, 5 > %378 = or i32 %377, 8 > %379 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %378) > %380 = fmul float %59, %379 > %381 = fadd float %362, %374 > %382 = fadd float %363, %267 > %383 = fadd float %364, %375 > %384 = fadd float %365, %380 > %385 = fmul float %381, %44 > %386 = fmul float %382, %45 > %387 = fadd float %385, %386 > %388 = fmul float %383, %46 > %389 = fadd float %387, %388 > %390 = fadd float %389, %384 > %391 = shl i32 %69, 5 > %392 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %391) > %393 = fmul float %57, %392 > %394 = shl i32 %70, 5 > %395 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %394) > %396 = fmul float %58, %395 > %397 = shl i32 %71, 5 > %398 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %397) > %399 = fmul float %59, %398 > %400 = shl i32 %73, 4 > %401 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %400) > %402 = shl i32 %73, 4 > %403 = or i32 %402, 4 > %404 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %403) > %405 = fsub float -0.000000e+00, %146 > %406 = call float @llvm.fma.f32(float %401, float %404, float %405) > %407 = fadd float %148, %147 > %408 = fmul float %406, %57 > %409 = fmul float %407, %57 > %410 = fmul float %408, 2.000000e+00 > %411 = fmul float %409, 2.000000e+00 > %412 = shl i32 %75, 4 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = shl i32 %75, 4 > %415 = or i32 %414, 4 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fsub float -0.000000e+00, %197 > %418 = call float @llvm.fma.f32(float %413, float %416, float %417) > %419 = shl i32 %77, 4 > %420 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %419) > %421 = shl i32 %77, 4 > %422 = or i32 %421, 4 > %423 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %422) > %424 = fsub float -0.000000e+00, %281 > %425 = call float @llvm.fma.f32(float %420, float %423, float %424) > %426 = fadd float %283, %282 > %427 = fmul float %418, %58 > %428 = fmul float %425, %59 > %429 = fmul float %426, %59 > %430 = fmul float %428, 2.000000e+00 > %431 = fmul float %429, 2.000000e+00 > %432 = fadd float %199, %198 > %433 = fmul float %432, %58 > %434 = fmul float %427, 2.000000e+00 > %435 = fmul float %433, 2.000000e+00 > %436 = fadd float %351, %353 > %437 = fadd float %410, %434 > %438 = fadd float %411, %435 > %439 = fadd float %393, %396 > %440 = fadd float %376, %436 > %441 = fadd float %430, %437 > %442 = fadd float %431, %438 > %443 = fadd float %399, %439 > %444 = fmul float %440, %44 > %445 = fmul float %441, %45 > %446 = fadd float %444, %445 > %447 = fmul float %442, %46 > %448 = fadd float %446, %447 > %449 = fadd float %448, %443 > %450 = fmul float %31, %449 > %451 = fmul float %32, %330 > %452 = fadd float %450, %451 > %453 = fmul float %33, %390 > %454 = fadd float %452, %453 > %455 = fadd float %454, %34 > %456 = fadd float %455, %22 > %457 = fmul float %23, %449 > %458 = fmul float %24, %330 > %459 = fadd float %457, %458 > %460 = fmul float %25, %390 > %461 = fadd float %459, %460 > %462 = fadd float %461, %26 > %463 = fmul float %27, %449 > %464 = fmul float %28, %330 > %465 = fadd float %463, %464 > %466 = fmul float %29, %390 > %467 = fadd float %465, %466 > %468 = fadd float %467, %30 > %469 = fmul float %35, %449 > %470 = fmul float %36, %330 > %471 = fadd float %469, %470 > %472 = fmul float %37, %390 > %473 = fadd float %471, %472 > %474 = fadd float %473, %38 > %475 = bitcast i32 %11 to float > %476 = insertvalue <{ float, float, float }> undef, float %475, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %50, float %51, float %52, float %53) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %462, float %468, float %456, float %474) > ret <{ float, float, float }> %476 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL CONST[1][0..15] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 240, 0, 0} >IMM[1] FLT32 { 1.0000, -1.0000, 0.0000, 158456325028528675187087900672.0000} >IMM[2] FLT32 { -2.0000, 3.0000, -0.1000, 0.0000} >IMM[3] INT32 {1, 0, 0, 0} > 0: ADD TEMP[0].x, CONST[1][15].wwww, IMM[1].xxxx > 1: FMA TEMP[1].x, TEMP[0].xxxx, CONST[1][15].zzzz, IN[0].wwww > 2: ADD TEMP[0].x, TEMP[1].xxxx, IMM[1].yyyy > 3: FSNE TEMP[1].x, CONST[1][15].wwww, IMM[1].zzzz > 4: UIF TEMP[1].xxxx :0 > 5: RCP TEMP[1].x, CONST[1][15].wwww > 6: ELSE :0 > 7: MOV TEMP[1].x, IMM[1].wwww > 8: ENDIF > 9: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx > 10: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 11: FMA TEMP[2].x, TEMP[1].xxxx, IMM[2].xxxx, IMM[2].yyyy > 12: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[1].xxxx > 13: FMA TEMP[1].x, TEMP[2].xxxx, TEMP[0].xxxx, IMM[2].zzzz > 14: FSLT TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz > 15: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx > 16: INEG TEMP[1].x, TEMP[1].xxxx > 17: USNE TEMP[0].x, TEMP[1].xxxx, IMM[0].xxxx > 18: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 19: KILL_IF -TEMP[0].xxxx > 20: MOV OUT[0], IMM[1].xzzx > 21: END >radeonsi: Compiling shader 154 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %28 = fadd float %26, 1.000000e+00 > %29 = call float @llvm.fma.f32(float %28, float %25, float %27) > %30 = fadd float %29, -1.000000e+00 > %31 = fcmp une float %26, 0.000000e+00 > %32 = fdiv float 1.000000e+00, %26 > %temp4.0 = select i1 %31, float %32, float 0x4600000000000000 > %33 = fmul float %temp4.0, %30 > %34 = call float @llvm.AMDGPU.clamp.(float %33, float 0.000000e+00, float 1.000000e+00) > %35 = call float @llvm.fma.f32(float %34, float -2.000000e+00, float 3.000000e+00) > %36 = fmul float %34, %34 > %37 = call float @llvm.fma.f32(float %35, float %36, float 0xBFB99999A0000000) > %38 = fcmp olt float %37, 0.000000e+00 > %39 = select i1 %38, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %39) > %40 = bitcast float %5 to i32 > %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %40, 10 > %42 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float 1.000000e+00, 11 > %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float 0.000000e+00, 12 > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43, float 0.000000e+00, 13 > %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float 1.000000e+00, 14 > %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >declare void @llvm.AMDGPU.kill(float) > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..2] >DCL CONST[2][0..4095] >DCL CONST[3][0..39] >DCL TEMP[0..19], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 32} >IMM[3] FLT32 {158456325028528675187087900672.0000, 0.0000, 0.0000, 0.0000} >IMM[4] UINT32 {2, 624, 0, 0} > 0: MUL TEMP[0].xyz, IN[3].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[4].xxxx > 7: MOV TEMP[3].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[3].x, IN[2].xxxx, TEMP[3].yyyy > 9: MOV TEMP[3].w, TEMP[3].xxxx > 10: UMUL TEMP[4].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[5].xxxx > 13: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[4].x, IN[2].yyyy, TEMP[4].yyyy > 15: MOV TEMP[4].w, TEMP[4].xxxx > 16: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy > 17: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[6].xxxx > 19: MOV TEMP[5].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 21: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[7].xxxx > 23: MOV TEMP[6].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].wwww > 25: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 26: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[7].xxxx > 28: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 30: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[8].xxxx > 32: MOV TEMP[7].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[6].x, TEMP[6].yyyy, TEMP[7].zzzz, -TEMP[5].xxxx > 34: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 35: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[8].xxxx > 37: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 39: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[9].xxxx > 41: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[5].x, TEMP[7].yyyy, TEMP[8].zzzz, TEMP[5].xxxx > 43: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].xxxx > 44: MUL TEMP[6].x, TEMP[6].xxxx, IN[2].xxxx > 45: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy > 46: MOV TEMP[3].z, TEMP[6].xxxx > 47: UMUL TEMP[6].x, TEMP[1].yyyy, IMM[2].yyyy > 48: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[7].xxxx > 50: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 52: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[8].xxxx > 54: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 56: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 57: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[8].xxxx > 59: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 61: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[9].xxxx > 63: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 65: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 66: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[9].xxxx > 68: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[9].x, TEMP[1].yyyy, IMM[2].yyyy > 70: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[10].xxxx > 72: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 74: MUL TEMP[6].x, TEMP[6].xxxx, IN[2].yyyy > 75: MUL TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx > 76: MOV TEMP[6].y, TEMP[6].xxxx > 77: MUL TEMP[7].x, TEMP[7].xxxx, IN[2].yyyy > 78: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 79: MOV TEMP[4].z, TEMP[7].xxxx > 80: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 81: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[8].xxxx > 83: MOV TEMP[7].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 85: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[9].xxxx > 87: MOV TEMP[8].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[7].xyz, TEMP[7].zzyy, TEMP[8].wxww > 89: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 90: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[9].xxxx > 92: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy > 94: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[10].xxxx > 96: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[9].yyyy, TEMP[7].xxxx > 98: MUL TEMP[8].x, TEMP[8].xxxx, IN[2].xxxx > 99: MUL TEMP[3].x, IMM[0].yyyy, TEMP[8].xxxx >100: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy >101: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[9].xxxx >103: MOV TEMP[8].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy >105: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[10].xxxx >107: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xyzz >109: ADD TEMP[8].xyz, TEMP[8].zzyy, TEMP[8].yxxx >110: FMA TEMP[9].xyz, -TEMP[8].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[10].x, IN[2].xxxx, TEMP[9].yyyy >112: MOV TEMP[3].y, TEMP[10].xxxx >113: UMUL TEMP[10].x, TEMP[1].yyyy, IMM[2].yyyy >114: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[11].xxxx >116: MOV TEMP[10].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >118: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[12].xxxx >120: MOV TEMP[11].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww >122: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >123: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[12].xxxx >125: MOV TEMP[11].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >127: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[13].xxxx >129: MOV TEMP[12].y, CONST[2][ADDR[0].x] >130: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >131: MUL TEMP[11].x, TEMP[11].xxxx, IN[2].yyyy >132: MUL TEMP[4].x, IMM[0].yyyy, TEMP[11].xxxx >133: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >134: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[12].xxxx >136: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >138: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[13].xxxx >140: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >142: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >143: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[13].x, IN[2].yyyy, TEMP[12].yyyy >145: MOV TEMP[4].y, TEMP[13].xxxx >146: ADD TEMP[3], TEMP[3], TEMP[4] >147: UMUL TEMP[13].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[14].xxxx >150: MOV TEMP[13].y, CONST[2][ADDR[0].x] >151: MUL TEMP[13].x, IN[2].zzzz, TEMP[13].yyyy >152: MOV TEMP[4].w, TEMP[13].xxxx >153: UMUL TEMP[13].x, TEMP[1].zzzz, IMM[2].yyyy >154: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[14].xxxx >156: MOV TEMP[13].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >158: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[15].xxxx >160: MOV TEMP[14].w, CONST[2][ADDR[0].x] >161: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].wwww >162: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >163: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[15].xxxx >165: MOV TEMP[14].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >167: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[16].xxxx >169: MOV TEMP[15].z, CONST[2][ADDR[0].x] >170: FMA TEMP[14].x, TEMP[14].yyyy, TEMP[15].zzzz, -TEMP[13].xxxx >171: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >172: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[16].xxxx >174: MOV TEMP[15].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >176: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[17].xxxx >178: MOV TEMP[16].z, CONST[2][ADDR[0].x] >179: FMA TEMP[13].x, TEMP[15].yyyy, TEMP[16].zzzz, TEMP[13].xxxx >180: MUL TEMP[13].x, TEMP[13].xxxx, IN[2].zzzz >181: MUL TEMP[13].x, IMM[0].yyyy, TEMP[13].xxxx >182: MOV TEMP[13].y, TEMP[13].xxxx >183: MUL TEMP[14].x, TEMP[14].xxxx, IN[2].zzzz >184: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >185: MOV TEMP[4].z, TEMP[14].xxxx >186: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >187: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[15].xxxx >189: MOV TEMP[14].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >191: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[16].xxxx >193: MOV TEMP[15].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[14].xyz, TEMP[14].zzyy, TEMP[15].wxww >195: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >196: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[16].xxxx >198: MOV TEMP[15].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >200: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[17].xxxx >202: MOV TEMP[16].y, CONST[2][ADDR[0].x] >203: FMA TEMP[15].x, TEMP[15].xxxx, TEMP[16].yyyy, TEMP[14].xxxx >204: MUL TEMP[15].x, TEMP[15].xxxx, IN[2].zzzz >205: MUL TEMP[4].x, IMM[0].yyyy, TEMP[15].xxxx >206: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >207: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[16].xxxx >209: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >211: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[17].xxxx >213: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[15].xyz, TEMP[15].xyzz, TEMP[16].xyzz >215: ADD TEMP[15].xyz, TEMP[15].zzyy, TEMP[15].yxxx >216: FMA TEMP[16].xyz, -TEMP[15].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[17].x, IN[2].zzzz, TEMP[16].yyyy >218: MOV TEMP[4].y, TEMP[17].xxxx >219: ADD TEMP[3], TEMP[3], TEMP[4] >220: MOV TEMP[4].xyz, IN[0].xyzx >221: MOV TEMP[4].w, IMM[0].zzzz >222: DP4 TEMP[17].x, TEMP[3], TEMP[4] >223: MOV TEMP[3].y, TEMP[17].xxxx >224: UMUL TEMP[17].x, TEMP[1].xxxx, IMM[2].yyyy >225: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[18].xxxx >227: MOV TEMP[17].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[18].x, TEMP[1].xxxx, IMM[2].yyyy >229: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[19].xxxx >231: MOV TEMP[18].z, CONST[2][ADDR[0].x] >232: FMA TEMP[17].x, TEMP[17].xxxx, TEMP[18].zzzz, -TEMP[7].zzzz >233: MUL TEMP[17].x, TEMP[17].xxxx, IN[2].xxxx >234: MUL TEMP[17].x, IMM[0].yyyy, TEMP[17].xxxx >235: MUL TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy >236: MOV TEMP[17].y, TEMP[5].xxxx >237: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >238: USHR TEMP[18].x, TEMP[5].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[18].xxxx >240: MOV TEMP[5].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[18].x, TEMP[1].yyyy, IMM[2].yyyy >242: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[19].xxxx >244: MOV TEMP[18].z, CONST[2][ADDR[0].x] >245: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[18].zzzz, -TEMP[10].zzzz >246: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].yyyy >247: MUL TEMP[6].x, IMM[0].yyyy, TEMP[5].xxxx >248: MUL TEMP[5].x, IN[2].xxxx, TEMP[9].zzzz >249: MOV TEMP[17].z, TEMP[5].xxxx >250: MUL TEMP[8].x, IN[2].xxxx, TEMP[9].xxxx >251: MUL TEMP[5].x, IN[2].yyyy, TEMP[12].zzzz >252: MOV TEMP[6].z, TEMP[5].xxxx >253: MUL TEMP[11].x, IN[2].yyyy, TEMP[12].xxxx >254: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[9].xxxx >257: MOV TEMP[5].z, CONST[2][ADDR[0].x] >258: MUL TEMP[5].x, IN[2].xxxx, TEMP[5].zzzz >259: MOV TEMP[17].w, TEMP[5].xxxx >260: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[9].xxxx >263: MOV TEMP[5].z, CONST[2][ADDR[0].x] >264: MUL TEMP[5].x, IN[2].yyyy, TEMP[5].zzzz >265: MOV TEMP[6].w, TEMP[5].xxxx >266: ADD TEMP[6], TEMP[6], TEMP[17] >267: UMUL TEMP[5].x, TEMP[1].zzzz, IMM[2].yyyy >268: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[9].xxxx >270: MOV TEMP[5].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[9].x, TEMP[1].zzzz, IMM[2].yyyy >272: USHR TEMP[12].x, TEMP[9].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[12].xxxx >274: MOV TEMP[9].z, CONST[2][ADDR[0].x] >275: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[9].zzzz, -TEMP[14].zzzz >276: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].zzzz >277: MUL TEMP[13].x, IMM[0].yyyy, TEMP[5].xxxx >278: MUL TEMP[5].x, IN[2].zzzz, TEMP[16].zzzz >279: MOV TEMP[13].z, TEMP[5].xxxx >280: MUL TEMP[15].x, IN[2].zzzz, TEMP[16].xxxx >281: UMUL TEMP[5].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[9].xxxx >284: MOV TEMP[5].z, CONST[2][ADDR[0].x] >285: MUL TEMP[5].x, IN[2].zzzz, TEMP[5].zzzz >286: MOV TEMP[13].w, TEMP[5].xxxx >287: ADD TEMP[6], TEMP[6], TEMP[13] >288: DP4 TEMP[5].x, TEMP[6], TEMP[4] >289: MOV TEMP[3].z, TEMP[5].xxxx >290: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[6].xxxx >293: MOV TEMP[5].x, CONST[2][ADDR[0].x] >294: MUL TEMP[5].x, IN[2].xxxx, TEMP[5].xxxx >295: MOV TEMP[8].w, TEMP[5].xxxx >296: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[6].xxxx >299: MOV TEMP[5].x, CONST[2][ADDR[0].x] >300: MUL TEMP[5].x, IN[2].yyyy, TEMP[5].xxxx >301: MOV TEMP[11].w, TEMP[5].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[5].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[2].zzzz, TEMP[2].xxxx >307: MOV TEMP[15].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy >309: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[5].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy >313: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[6].xxxx >315: MOV TEMP[5].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[7].xxxx >317: ADD TEMP[2].x, TEMP[7].zzzz, TEMP[7].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[2].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[8].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[1].yyyy, IMM[2].yyyy >323: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[5].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >327: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[6].xxxx >329: MOV TEMP[5].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[10].xxxx >331: UMUL TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >332: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[5].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[1].x, TEMP[1].zzzz, IMM[2].yyyy >336: USHR TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[5].xxxx >338: MOV TEMP[1].y, CONST[2][ADDR[0].x] >339: FMA TEMP[1].x, TEMP[2].xxxx, TEMP[1].yyyy, -TEMP[14].xxxx >340: MOV TEMP[0].y, TEMP[1].xxxx >341: ADD TEMP[1].x, TEMP[14].zzzz, TEMP[14].yyyy >342: MOV TEMP[0].z, TEMP[1].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[2].yzzz >344: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[15].yz, TEMP[1].yxyy >346: ADD TEMP[1].x, TEMP[10].zzzz, TEMP[10].yyyy >347: MUL TEMP[1].x, TEMP[1].xxxx, IN[2].yyyy >348: MOV TEMP[0].y, TEMP[1].xxxx >349: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[11].yz, TEMP[1].yxyy >351: ADD TEMP[0], TEMP[8], TEMP[11] >352: ADD TEMP[0], TEMP[15], TEMP[0] >353: DP4 TEMP[3].x, TEMP[0], TEMP[4] >354: MOV TEMP[3].w, IMM[0].zzzz >355: DP4 TEMP[0].x, CONST[1][0], TEMP[3] >356: DP4 TEMP[1].x, CONST[1][1], TEMP[3] >357: MOV TEMP[0].y, TEMP[1].xxxx >358: DP4 TEMP[1].x, CONST[1][2], TEMP[3] >359: MOV TEMP[0].z, TEMP[1].xxxx >360: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz >361: SQRT TEMP[1].x, TEMP[1].xxxx >362: FSEQ TEMP[2].xyz, TEMP[1].xxxx, IMM[0].wwww >363: SSG TEMP[3].xyz, TEMP[0].xyzz >364: MUL TEMP[3].xyz, IMM[3].xxxx, TEMP[3].xyzz >365: RCP TEMP[4].xyz, TEMP[1].xxxx >366: MUL TEMP[4].xyz, TEMP[0].xyzz, TEMP[4].xyzz >367: UCMP TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz, TEMP[4].xyzz >368: FSNE TEMP[3].x, CONST[3][39].xxxx, IMM[0].wwww >369: UIF TEMP[3].xxxx :0 >370: RCP TEMP[3].x, CONST[3][39].xxxx >371: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >372: ELSE :0 >373: SSG TEMP[1].x, TEMP[1].xxxx >374: MUL TEMP[3].x, IMM[3].xxxx, TEMP[1].xxxx >375: ENDIF >376: ADD TEMP[1].x, -TEMP[3].xxxx, IMM[0].zzzz >377: MOV TEMP[0].w, TEMP[1].xxxx >378: FMA TEMP[3].x, TEMP[2].zzzz, CONST[3][39].zzzz, IMM[0].zzzz >379: FSEQ TEMP[4].xy, TEMP[3].xxxx, IMM[0].wwww >380: SSG TEMP[5].xy, TEMP[2].xyyy >381: MUL TEMP[5].xy, IMM[3].xxxx, TEMP[5].xyyy >382: RCP TEMP[3].xy, TEMP[3].xxxx >383: MUL TEMP[3].xy, TEMP[2].xyyy, TEMP[3].xyyy >384: UCMP TEMP[3].xy, TEMP[4].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >385: MUL TEMP[0].x, TEMP[2].zzzz, CONST[3][39].zzzz >386: MOV TEMP[3].z, TEMP[1].xxxx >387: MOV TEMP[0].zw, TEMP[0].wwxw >388: MOV TEMP[3].w, IMM[0].zzzz >389: MOV TEMP[0].xy, IN[1].xyxx >390: MOV OUT[1], TEMP[0] >391: MOV OUT[0], TEMP[3] >392: END >radeonsi: Compiling shader 155 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) > %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) > %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) > %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) > %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) > %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) > %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) > %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) > %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) > %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) > %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) > %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 > %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 > %35 = call float @llvm.SI.load.const(<16 x i8> %34, i32 624) > %36 = call float @llvm.SI.load.const(<16 x i8> %34, i32 632) > %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 > %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %13) > %40 = extractelement <4 x float> %39, i32 0 > %41 = extractelement <4 x float> %39, i32 1 > %42 = extractelement <4 x float> %39, i32 2 > %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 > %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %14) > %46 = extractelement <4 x float> %45, i32 0 > %47 = extractelement <4 x float> %45, i32 1 > %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 > %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %15) > %51 = extractelement <4 x float> %50, i32 0 > %52 = extractelement <4 x float> %50, i32 1 > %53 = extractelement <4 x float> %50, i32 2 > %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 > %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %16) > %57 = extractelement <4 x float> %56, i32 0 > %58 = extractelement <4 x float> %56, i32 1 > %59 = extractelement <4 x float> %56, i32 2 > %60 = fmul float %59, 0x406FE01000000000 > %61 = fmul float %58, 0x406FE01000000000 > %62 = fmul float %57, 0x406FE01000000000 > %63 = fptosi float %60 to i32 > %64 = fptosi float %61 to i32 > %65 = fptosi float %62 to i32 > %66 = shl i32 %63, 1 > %67 = or i32 %66, 1 > %68 = shl i32 %64, 1 > %69 = or i32 %68, 1 > %70 = shl i32 %65, 1 > %71 = or i32 %70, 1 > %72 = shl i32 %63, 5 > %73 = or i32 %72, 4 > %74 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %73) > %75 = fmul float %51, %74 > %76 = shl i32 %64, 5 > %77 = or i32 %76, 4 > %78 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %77) > %79 = fmul float %52, %78 > %80 = shl i32 %67, 4 > %81 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %80) > %82 = shl i32 %67, 4 > %83 = or i32 %82, 12 > %84 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %83) > %85 = fmul float %81, %84 > %86 = shl i32 %67, 4 > %87 = or i32 %86, 4 > %88 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %87) > %89 = shl i32 %67, 4 > %90 = or i32 %89, 8 > %91 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %90) > %92 = fsub float -0.000000e+00, %85 > %93 = call float @llvm.fma.f32(float %88, float %91, float %92) > %94 = shl i32 %67, 4 > %95 = or i32 %94, 4 > %96 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %95) > %97 = shl i32 %67, 4 > %98 = or i32 %97, 8 > %99 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %98) > %100 = call float @llvm.fma.f32(float %96, float %99, float %85) > %101 = fmul float %100, %51 > %102 = fmul float %93, %51 > %103 = fmul float %102, 2.000000e+00 > %104 = shl i32 %69, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %104) > %106 = shl i32 %69, 4 > %107 = or i32 %106, 12 > %108 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %107) > %109 = fmul float %105, %108 > %110 = shl i32 %69, 4 > %111 = or i32 %110, 4 > %112 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %111) > %113 = shl i32 %69, 4 > %114 = or i32 %113, 8 > %115 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %114) > %116 = fsub float -0.000000e+00, %109 > %117 = call float @llvm.fma.f32(float %112, float %115, float %116) > %118 = shl i32 %69, 4 > %119 = or i32 %118, 4 > %120 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %119) > %121 = shl i32 %69, 4 > %122 = or i32 %121, 8 > %123 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %122) > %124 = call float @llvm.fma.f32(float %120, float %123, float %109) > %125 = fmul float %124, %52 > %126 = fmul float %125, 2.000000e+00 > %127 = fmul float %117, %52 > %128 = fmul float %127, 2.000000e+00 > %129 = shl i32 %67, 4 > %130 = or i32 %129, 4 > %131 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %130) > %132 = shl i32 %67, 4 > %133 = or i32 %132, 8 > %134 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %133) > %135 = shl i32 %67, 4 > %136 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %135) > %137 = shl i32 %67, 4 > %138 = or i32 %137, 12 > %139 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %138) > %140 = fmul float %134, %139 > %141 = fmul float %134, %136 > %142 = fmul float %131, %139 > %143 = shl i32 %67, 4 > %144 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %143) > %145 = shl i32 %67, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %146) > %148 = call float @llvm.fma.f32(float %144, float %147, float %140) > %149 = fmul float %148, %51 > %150 = fmul float %149, 2.000000e+00 > %151 = shl i32 %67, 4 > %152 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %151) > %153 = shl i32 %67, 4 > %154 = or i32 %153, 4 > %155 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %154) > %156 = shl i32 %67, 4 > %157 = or i32 %156, 8 > %158 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %157) > %159 = shl i32 %67, 4 > %160 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %159) > %161 = shl i32 %67, 4 > %162 = or i32 %161, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %162) > %164 = shl i32 %67, 4 > %165 = or i32 %164, 8 > %166 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %165) > %167 = fmul float %152, %160 > %168 = fmul float %155, %163 > %169 = fmul float %158, %166 > %170 = fadd float %169, %168 > %171 = fadd float %169, %167 > %172 = fadd float %168, %167 > %173 = fsub float -0.000000e+00, %170 > %174 = call float @llvm.fma.f32(float %173, float 2.000000e+00, float 1.000000e+00) > %175 = fsub float -0.000000e+00, %171 > %176 = call float @llvm.fma.f32(float %175, float 2.000000e+00, float 1.000000e+00) > %177 = fsub float -0.000000e+00, %172 > %178 = call float @llvm.fma.f32(float %177, float 2.000000e+00, float 1.000000e+00) > %179 = fmul float %51, %176 > %180 = shl i32 %69, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %181) > %183 = shl i32 %69, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %184) > %186 = shl i32 %69, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %186) > %188 = shl i32 %69, 4 > %189 = or i32 %188, 12 > %190 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %189) > %191 = fmul float %185, %190 > %192 = fmul float %185, %187 > %193 = fmul float %182, %190 > %194 = shl i32 %69, 4 > %195 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %194) > %196 = shl i32 %69, 4 > %197 = or i32 %196, 4 > %198 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %197) > %199 = call float @llvm.fma.f32(float %195, float %198, float %191) > %200 = fmul float %199, %52 > %201 = fmul float %200, 2.000000e+00 > %202 = shl i32 %69, 4 > %203 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %202) > %204 = shl i32 %69, 4 > %205 = or i32 %204, 4 > %206 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %205) > %207 = shl i32 %69, 4 > %208 = or i32 %207, 8 > %209 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %208) > %210 = shl i32 %69, 4 > %211 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %210) > %212 = shl i32 %69, 4 > %213 = or i32 %212, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %213) > %215 = shl i32 %69, 4 > %216 = or i32 %215, 8 > %217 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %216) > %218 = fmul float %203, %211 > %219 = fmul float %206, %214 > %220 = fmul float %209, %217 > %221 = fadd float %220, %219 > %222 = fadd float %220, %218 > %223 = fadd float %219, %218 > %224 = fsub float -0.000000e+00, %221 > %225 = call float @llvm.fma.f32(float %224, float 2.000000e+00, float 1.000000e+00) > %226 = fsub float -0.000000e+00, %222 > %227 = call float @llvm.fma.f32(float %226, float 2.000000e+00, float 1.000000e+00) > %228 = fsub float -0.000000e+00, %223 > %229 = call float @llvm.fma.f32(float %228, float 2.000000e+00, float 1.000000e+00) > %230 = fmul float %52, %227 > %231 = fadd float %150, %201 > %232 = fadd float %179, %230 > %233 = fadd float %103, %128 > %234 = fadd float %75, %79 > %235 = shl i32 %65, 5 > %236 = or i32 %235, 4 > %237 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %236) > %238 = fmul float %53, %237 > %239 = shl i32 %71, 4 > %240 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %239) > %241 = shl i32 %71, 4 > %242 = or i32 %241, 12 > %243 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %242) > %244 = fmul float %240, %243 > %245 = shl i32 %71, 4 > %246 = or i32 %245, 4 > %247 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %246) > %248 = shl i32 %71, 4 > %249 = or i32 %248, 8 > %250 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %249) > %251 = fsub float -0.000000e+00, %244 > %252 = call float @llvm.fma.f32(float %247, float %250, float %251) > %253 = shl i32 %71, 4 > %254 = or i32 %253, 4 > %255 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %254) > %256 = shl i32 %71, 4 > %257 = or i32 %256, 8 > %258 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %257) > %259 = call float @llvm.fma.f32(float %255, float %258, float %244) > %260 = fmul float %259, %53 > %261 = fmul float %260, 2.000000e+00 > %262 = fmul float %252, %53 > %263 = fmul float %262, 2.000000e+00 > %264 = shl i32 %71, 4 > %265 = or i32 %264, 4 > %266 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %265) > %267 = shl i32 %71, 4 > %268 = or i32 %267, 8 > %269 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %268) > %270 = shl i32 %71, 4 > %271 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %270) > %272 = shl i32 %71, 4 > %273 = or i32 %272, 12 > %274 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %273) > %275 = fmul float %269, %274 > %276 = fmul float %269, %271 > %277 = fmul float %266, %274 > %278 = shl i32 %71, 4 > %279 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %278) > %280 = shl i32 %71, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %281) > %283 = call float @llvm.fma.f32(float %279, float %282, float %275) > %284 = fmul float %283, %53 > %285 = fmul float %284, 2.000000e+00 > %286 = shl i32 %71, 4 > %287 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %286) > %288 = shl i32 %71, 4 > %289 = or i32 %288, 4 > %290 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %289) > %291 = shl i32 %71, 4 > %292 = or i32 %291, 8 > %293 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %292) > %294 = shl i32 %71, 4 > %295 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %294) > %296 = shl i32 %71, 4 > %297 = or i32 %296, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %297) > %299 = shl i32 %71, 4 > %300 = or i32 %299, 8 > %301 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %300) > %302 = fmul float %287, %295 > %303 = fmul float %290, %298 > %304 = fmul float %293, %301 > %305 = fadd float %304, %303 > %306 = fadd float %304, %302 > %307 = fadd float %303, %302 > %308 = fsub float -0.000000e+00, %305 > %309 = call float @llvm.fma.f32(float %308, float 2.000000e+00, float 1.000000e+00) > %310 = fsub float -0.000000e+00, %306 > %311 = call float @llvm.fma.f32(float %310, float 2.000000e+00, float 1.000000e+00) > %312 = fsub float -0.000000e+00, %307 > %313 = call float @llvm.fma.f32(float %312, float 2.000000e+00, float 1.000000e+00) > %314 = fmul float %53, %311 > %315 = fadd float %231, %285 > %316 = fadd float %232, %314 > %317 = fadd float %233, %263 > %318 = fadd float %234, %238 > %319 = fmul float %315, %40 > %320 = fmul float %316, %41 > %321 = fadd float %319, %320 > %322 = fmul float %317, %42 > %323 = fadd float %321, %322 > %324 = fadd float %323, %318 > %325 = shl i32 %67, 4 > %326 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %325) > %327 = shl i32 %67, 4 > %328 = or i32 %327, 8 > %329 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %328) > %330 = fsub float -0.000000e+00, %142 > %331 = call float @llvm.fma.f32(float %326, float %329, float %330) > %332 = fmul float %331, %51 > %333 = fmul float %332, 2.000000e+00 > %334 = fmul float %101, 2.000000e+00 > %335 = shl i32 %69, 4 > %336 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %335) > %337 = shl i32 %69, 4 > %338 = or i32 %337, 8 > %339 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %338) > %340 = fsub float -0.000000e+00, %193 > %341 = call float @llvm.fma.f32(float %336, float %339, float %340) > %342 = fmul float %341, %52 > %343 = fmul float %342, 2.000000e+00 > %344 = fmul float %51, %178 > %345 = fmul float %51, %174 > %346 = fmul float %52, %229 > %347 = fmul float %52, %225 > %348 = shl i32 %63, 5 > %349 = or i32 %348, 8 > %350 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %349) > %351 = fmul float %51, %350 > %352 = shl i32 %64, 5 > %353 = or i32 %352, 8 > %354 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %353) > %355 = fmul float %52, %354 > %356 = fadd float %343, %333 > %357 = fadd float %126, %334 > %358 = fadd float %346, %344 > %359 = fadd float %355, %351 > %360 = shl i32 %71, 4 > %361 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %360) > %362 = shl i32 %71, 4 > %363 = or i32 %362, 8 > %364 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %363) > %365 = fsub float -0.000000e+00, %277 > %366 = call float @llvm.fma.f32(float %361, float %364, float %365) > %367 = fmul float %366, %53 > %368 = fmul float %367, 2.000000e+00 > %369 = fmul float %53, %313 > %370 = fmul float %53, %309 > %371 = shl i32 %65, 5 > %372 = or i32 %371, 8 > %373 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %372) > %374 = fmul float %53, %373 > %375 = fadd float %356, %368 > %376 = fadd float %357, %261 > %377 = fadd float %358, %369 > %378 = fadd float %359, %374 > %379 = fmul float %375, %40 > %380 = fmul float %376, %41 > %381 = fadd float %379, %380 > %382 = fmul float %377, %42 > %383 = fadd float %381, %382 > %384 = fadd float %383, %378 > %385 = shl i32 %63, 5 > %386 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %385) > %387 = fmul float %51, %386 > %388 = shl i32 %64, 5 > %389 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %388) > %390 = fmul float %52, %389 > %391 = shl i32 %65, 5 > %392 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %391) > %393 = fmul float %53, %392 > %394 = shl i32 %67, 4 > %395 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %394) > %396 = shl i32 %67, 4 > %397 = or i32 %396, 4 > %398 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %397) > %399 = fsub float -0.000000e+00, %140 > %400 = call float @llvm.fma.f32(float %395, float %398, float %399) > %401 = fadd float %142, %141 > %402 = fmul float %400, %51 > %403 = fmul float %401, %51 > %404 = fmul float %402, 2.000000e+00 > %405 = fmul float %403, 2.000000e+00 > %406 = shl i32 %69, 4 > %407 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %406) > %408 = shl i32 %69, 4 > %409 = or i32 %408, 4 > %410 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %409) > %411 = fsub float -0.000000e+00, %191 > %412 = call float @llvm.fma.f32(float %407, float %410, float %411) > %413 = shl i32 %71, 4 > %414 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %413) > %415 = shl i32 %71, 4 > %416 = or i32 %415, 4 > %417 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %416) > %418 = fsub float -0.000000e+00, %275 > %419 = call float @llvm.fma.f32(float %414, float %417, float %418) > %420 = fadd float %277, %276 > %421 = fmul float %412, %52 > %422 = fmul float %419, %53 > %423 = fmul float %420, %53 > %424 = fmul float %422, 2.000000e+00 > %425 = fmul float %423, 2.000000e+00 > %426 = fadd float %193, %192 > %427 = fmul float %426, %52 > %428 = fmul float %421, 2.000000e+00 > %429 = fmul float %427, 2.000000e+00 > %430 = fadd float %345, %347 > %431 = fadd float %404, %428 > %432 = fadd float %405, %429 > %433 = fadd float %387, %390 > %434 = fadd float %370, %430 > %435 = fadd float %424, %431 > %436 = fadd float %425, %432 > %437 = fadd float %393, %433 > %438 = fmul float %434, %40 > %439 = fmul float %435, %41 > %440 = fadd float %438, %439 > %441 = fmul float %436, %42 > %442 = fadd float %440, %441 > %443 = fadd float %442, %437 > %444 = fmul float %19, %443 > %445 = fmul float %20, %324 > %446 = fadd float %444, %445 > %447 = fmul float %21, %384 > %448 = fadd float %446, %447 > %449 = fadd float %448, %22 > %450 = fmul float %23, %443 > %451 = fmul float %24, %324 > %452 = fadd float %450, %451 > %453 = fmul float %25, %384 > %454 = fadd float %452, %453 > %455 = fadd float %454, %26 > %456 = fmul float %27, %443 > %457 = fmul float %28, %324 > %458 = fadd float %456, %457 > %459 = fmul float %29, %384 > %460 = fadd float %458, %459 > %461 = fadd float %460, %30 > %462 = fmul float %449, %449 > %463 = fmul float %455, %455 > %464 = fadd float %463, %462 > %465 = fmul float %461, %461 > %466 = fadd float %464, %465 > %467 = call float @llvm.sqrt.f32(float %466) > %468 = fcmp oeq float %467, 0.000000e+00 > %469 = fcmp oeq float %467, 0.000000e+00 > %470 = fcmp oeq float %467, 0.000000e+00 > %471 = fcmp ogt float %449, 0.000000e+00 > %472 = select i1 %471, float 1.000000e+00, float %449 > %473 = fcmp oge float %472, 0.000000e+00 > %474 = fcmp ogt float %455, 0.000000e+00 > %475 = select i1 %474, float 1.000000e+00, float %455 > %476 = fcmp oge float %475, 0.000000e+00 > %477 = fcmp ogt float %461, 0.000000e+00 > %478 = select i1 %477, float 1.000000e+00, float %461 > %479 = fcmp oge float %478, 0.000000e+00 > %.op = fmul float %472, 0x4600000000000000 > %480 = select i1 %473, float %.op, float 0xC600000000000000 > %.op154 = fmul float %475, 0x4600000000000000 > %481 = select i1 %476, float %.op154, float 0xC600000000000000 > %.op155 = fmul float %478, 0x4600000000000000 > %482 = select i1 %479, float %.op155, float 0xC600000000000000 > %483 = fdiv float 1.000000e+00, %467 > %484 = fmul float %449, %483 > %485 = fmul float %455, %483 > %486 = fmul float %461, %483 > %487 = select i1 %468, float %480, float %484 > %488 = select i1 %469, float %481, float %485 > %489 = select i1 %470, float %482, float %486 > %490 = fcmp une float %35, 0.000000e+00 > br i1 %490, label %IF, label %ELSE > >IF: ; preds = %main_body > %491 = fdiv float 1.000000e+00, %35 > %492 = fmul float %467, %491 > br label %ENDIF > >ELSE: ; preds = %main_body > %493 = fcmp ogt float %467, 0.000000e+00 > %494 = select i1 %493, float 1.000000e+00, float %467 > %495 = fcmp oge float %494, 0.000000e+00 > %.op156 = fmul float %494, 0x4600000000000000 > %496 = select i1 %495, float %.op156, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %492, %IF ], [ %496, %ELSE ] > %497 = fsub float 1.000000e+00, %temp12.0 > %498 = call float @llvm.fma.f32(float %489, float %36, float 1.000000e+00) > %499 = fcmp oeq float %498, 0.000000e+00 > %500 = fcmp oeq float %498, 0.000000e+00 > %501 = fcmp ogt float %487, 0.000000e+00 > %502 = select i1 %501, float 1.000000e+00, float %487 > %503 = fcmp oge float %502, 0.000000e+00 > %504 = fcmp ogt float %488, 0.000000e+00 > %505 = select i1 %504, float 1.000000e+00, float %488 > %506 = fcmp oge float %505, 0.000000e+00 > %.op157 = fmul float %502, 0x4600000000000000 > %507 = select i1 %503, float %.op157, float 0xC600000000000000 > %.op158 = fmul float %505, 0x4600000000000000 > %508 = select i1 %506, float %.op158, float 0xC600000000000000 > %509 = fdiv float 1.000000e+00, %498 > %510 = fmul float %487, %509 > %511 = fmul float %488, %509 > %512 = select i1 %499, float %507, float %510 > %513 = select i1 %500, float %508, float %511 > %514 = fmul float %489, %36 > %515 = bitcast i32 %11 to float > %516 = insertvalue <{ float, float, float }> undef, float %515, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %514, float %497) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %512, float %513, float %497, float 1.000000e+00) > ret <{ float, float, float }> %516 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL CONST[1][0..39] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 0.0500, 0.0000, 1.0000, 158456325028528675187087900672.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {0, 624, 0, 0} > 0: ADD TEMP[0].x, IN[0].zzzz, IMM[0].xxxx > 1: FSLT TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy > 2: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 3: INEG TEMP[1].x, TEMP[1].xxxx > 4: USNE TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 5: AND TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz > 6: KILL_IF -TEMP[2].xxxx > 7: FSNE TEMP[2].x, CONST[1][39].xxxx, IMM[0].yyyy > 8: UIF TEMP[2].xxxx :0 > 9: RCP TEMP[2].x, CONST[1][39].xxxx > 10: MUL TEMP[2].x, CONST[1][39].yyyy, TEMP[2].xxxx > 11: ELSE :0 > 12: SSG TEMP[3].x, CONST[1][39].yyyy > 13: MUL TEMP[2].x, IMM[0].wwww, TEMP[3].xxxx > 14: ENDIF > 15: ADD TEMP[0].x, -TEMP[2].xxxx, IN[0].wwww > 16: FSLT TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy > 17: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 18: INEG TEMP[0].x, TEMP[0].xxxx > 19: USNE TEMP[1].x, TEMP[0].xxxx, IMM[2].xxxx > 20: AND TEMP[0].x, TEMP[1].xxxx, IMM[0].zzzz > 21: KILL_IF -TEMP[0].xxxx > 22: ADD TEMP[0], -IN[0].wwww, IMM[0].zzzz > 23: MOV OUT[0], TEMP[0] > 24: END >radeonsi: Compiling shader 156 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 624) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 628) > %27 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %28 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %29 = fadd float %27, 0x3FA99999A0000000 > %30 = fcmp olt float %29, 0.000000e+00 > %31 = select i1 %30, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %31) > %32 = fcmp une float %25, 0.000000e+00 > br i1 %32, label %IF, label %ELSE > >IF: ; preds = %main_body > %33 = fdiv float 1.000000e+00, %25 > %34 = fmul float %26, %33 > br label %ENDIF > >ELSE: ; preds = %main_body > %35 = fcmp ogt float %26, 0.000000e+00 > %36 = select i1 %35, float 1.000000e+00, float %26 > %37 = fcmp oge float %36, 0.000000e+00 > %.op = fmul float %36, 0x4600000000000000 > %38 = select i1 %37, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp8.0 = phi float [ %34, %IF ], [ %38, %ELSE ] > %39 = fsub float %28, %temp8.0 > %40 = fcmp olt float %39, 0.000000e+00 > %41 = select i1 %40, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %41) > %42 = fsub float 1.000000e+00, %28 > %43 = fsub float 1.000000e+00, %28 > %44 = fsub float 1.000000e+00, %28 > %45 = fsub float 1.000000e+00, %28 > %46 = bitcast float %5 to i32 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %46, 10 > %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 11 > %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %43, 12 > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49, float %44, 13 > %51 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50, float %45, 14 > %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %51, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >declare void @llvm.AMDGPU.kill(float) > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..35] >DCL TEMP[0..13], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 272, 288, 304} >IMM[2] UINT32 {320, 352, 448, 560} >IMM[3] UINT32 {544, 512, 528, 336} >IMM[4] UINT32 {368, 480, 400, 384} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {464, 416, 432, 0} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][17], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][18], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][20], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][22], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][28].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: MUL TEMP[5].xy, CONST[1][35].xyyy, IMM[0].xyyy > 14: MUL TEMP[6].xy, TEMP[2].xxxx, CONST[1][35].xyyy > 15: FMA TEMP[5].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 16: MOV TEMP[5].zw, TEMP[1].wwzw > 17: ADD TEMP[7].xyz, -IN[0].xyzz, CONST[1][34].xyzz > 18: MUL TEMP[8].xyz, CONST[1][32].xyzz, CONST[1][33].xyzz > 19: MOV TEMP[8].w, CONST[1][32].wwww > 20: DP3 TEMP[1].x, CONST[1][21].xyzz, TEMP[7].xyzz > 21: DP3 TEMP[9].x, CONST[1][23].xyzz, TEMP[7].xyzz > 22: MOV TEMP[1].z, TEMP[9].xxxx > 23: DP3 TEMP[7].x, CONST[1][22].xyzz, TEMP[7].xyzz > 24: MOV TEMP[1].y, TEMP[7].xxxx > 25: DP3 TEMP[9].x, TEMP[1].xyzz, TEMP[1].xyzz > 26: RSQ TEMP[9].x, TEMP[9].xxxx > 27: MUL TEMP[6].xyz, TEMP[9].xxxx, TEMP[1].xyzz > 28: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx, IMM[0].zzzz > 29: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 30: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 31: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 32: DP3 TEMP[9].x, -TEMP[6].xyzz, CONST[1][30].xyzz > 33: FMA TEMP[10].x, -CONST[1][25].yyyy, TEMP[9].xxxx, CONST[1][25].xxxx > 34: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].xxxx > 35: MOV TEMP[0].z, TEMP[9].xxxx > 36: ABS TEMP[9].x, TEMP[10].xxxx > 37: LG2 TEMP[9].x, TEMP[9].xxxx > 38: MOV TEMP[0].w, TEMP[9].xxxx > 39: MUL TEMP[9].xy, TEMP[0].zwww, IMM[5].xyyy > 40: EX2 TEMP[10].x, TEMP[9].yyyy > 41: FMA TEMP[1].x, CONST[1][25].zzzz, TEMP[10].xxxx, -CONST[1][24].zzzz > 42: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][25].zzzz > 43: MAX TEMP[11].x, TEMP[1].xxxx, IMM[0].wwww > 44: ABS TEMP[12].x, TEMP[2].xxxx > 45: MUL TEMP[12].x, TEMP[12].xxxx, IMM[5].zzzz > 46: MIN TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx > 47: ADD TEMP[12].x, -TEMP[12].xxxx, IMM[0].xxxx > 48: FMA TEMP[10].x, -TEMP[11].xxxx, TEMP[12].xxxx, TEMP[10].xxxx > 49: MAX TEMP[10].x, TEMP[10].xxxx, CONST[1][29].wwww > 50: FSNE TEMP[11].x, CONST[1][24].xxxx, IMM[0].wwww > 51: UIF TEMP[11].xxxx :0 > 52: RCP TEMP[11].x, CONST[1][24].xxxx > 53: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 54: ELSE :0 > 55: SSG TEMP[12].x, -TEMP[0].xxxx > 56: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 57: ENDIF > 58: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 59: EX2 TEMP[11].x, TEMP[1].xxxx > 60: ADD TEMP[1].x, TEMP[11].xxxx, CONST[1][25].wwww > 61: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][26].yyyy > 62: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].yyyy > 63: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[1].xxxx > 64: MIN TEMP[7].x, TEMP[7].xxxx, CONST[1][24].wwww > 65: MAX TEMP[7].x, TEMP[7].xxxx, CONST[1][26].xxxx > 66: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx > 67: FSNE TEMP[11].x, CONST[1][27].wwww, IMM[0].wwww > 68: UIF TEMP[11].xxxx :0 > 69: RCP TEMP[11].x, CONST[1][27].wwww > 70: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 71: ELSE :0 > 72: SSG TEMP[12].x, -TEMP[0].xxxx > 73: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 74: ENDIF > 75: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][28].zzzz > 76: FSNE TEMP[12].x, CONST[1][24].yyyy, IMM[0].wwww > 77: UIF TEMP[12].xxxx :0 > 78: RCP TEMP[12].x, CONST[1][24].yyyy > 79: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 80: ELSE :0 > 81: SSG TEMP[13].x, TEMP[0].xxxx > 82: MUL TEMP[12].x, IMM[5].wwww, TEMP[13].xxxx > 83: ENDIF > 84: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 85: EX2 TEMP[11].x, TEMP[1].xxxx > 86: MUL TEMP[6].xyz, TEMP[11].xxxx, CONST[1][27].xyzz > 87: FMA TEMP[7].xyz, CONST[1][27].xyzz, TEMP[11].xxxx, TEMP[7].xxxx > 88: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[10].xxxx > 89: FSEQ TEMP[10].xyz, TEMP[7].xyzz, IMM[0].wwww > 90: SSG TEMP[11].xyz, TEMP[9].xyzz > 91: MUL TEMP[11].xyz, IMM[5].wwww, TEMP[11].xyzz > 92: RCP TEMP[13].x, TEMP[7].xxxx > 93: RCP TEMP[13].y, TEMP[7].yyyy > 94: RCP TEMP[13].z, TEMP[7].zzzz > 95: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 96: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz > 97: MUL TEMP[6].xyz, TEMP[12].xxxx, -TEMP[7].xyzz > 98: ABS TEMP[2].xyz, TEMP[2].xxxx > 99: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[7].xyzz >100: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].xxxx >101: EX2 TEMP[2].x, TEMP[1].xxxx >102: EX2 TEMP[2].y, TEMP[1].yyyy >103: EX2 TEMP[2].z, TEMP[1].zzzz >104: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[7].xxxx >105: LG2 TEMP[7].x, CONST[1][29].xxxx >106: LG2 TEMP[7].y, CONST[1][29].yyyy >107: LG2 TEMP[7].z, CONST[1][29].zzzz >108: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >109: EX2 TEMP[10].x, TEMP[7].xxxx >110: EX2 TEMP[10].y, TEMP[7].yyyy >111: EX2 TEMP[10].z, TEMP[7].zzzz >112: EX2 TEMP[7].x, TEMP[6].xxxx >113: EX2 TEMP[7].y, TEMP[6].yyyy >114: EX2 TEMP[7].z, TEMP[6].zzzz >115: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[10].xyzz >116: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[6].xyzz >117: ADD TEMP[7].xyz, -TEMP[2].xyzz, IMM[0].xxxx >118: MOV TEMP[2].w, TEMP[2].xxxx >119: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].wwww >120: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >121: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >122: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xyzz >123: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >124: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].zzzz >125: FSEQ TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww >126: SSG TEMP[7].xyz, TEMP[1].xyzz >127: MUL TEMP[7].xyz, IMM[5].wwww, TEMP[7].xyzz >128: RCP TEMP[9].x, TEMP[0].xxxx >129: RCP TEMP[9].y, TEMP[0].yyyy >130: RCP TEMP[9].z, TEMP[0].zzzz >131: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[9].xyzz >132: UCMP TEMP[2].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >133: MOV OUT[5], IN[2] >134: MOV OUT[4], TEMP[2] >135: MOV OUT[3], TEMP[8] >136: MOV OUT[2], TEMP[5] >137: MOV OUT[1], TEMP[4] >138: MOV OUT[0], TEMP[3] >139: END >radeonsi: Compiling shader 157 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 272) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 276) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 280) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 284) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 288) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 292) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 296) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 300) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 304) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 308) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 312) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 316) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 332) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 412) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 476) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 524) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 544) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 548) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 552) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %13) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %14) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %15) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = extractelement <4 x float> %92, i32 3 > %97 = fmul float %18, %82 > %98 = fmul float %19, %83 > %99 = fadd float %97, %98 > %100 = fmul float %20, %84 > %101 = fadd float %99, %100 > %102 = fadd float %101, %21 > %103 = fmul float %22, %82 > %104 = fmul float %23, %83 > %105 = fadd float %103, %104 > %106 = fmul float %24, %84 > %107 = fadd float %105, %106 > %108 = fadd float %107, %25 > %109 = fmul float %26, %82 > %110 = fmul float %27, %83 > %111 = fadd float %109, %110 > %112 = fmul float %28, %84 > %113 = fadd float %111, %112 > %114 = fadd float %113, %29 > %115 = fmul float %30, %82 > %116 = fmul float %31, %83 > %117 = fadd float %115, %116 > %118 = fmul float %32, %84 > %119 = fadd float %117, %118 > %120 = fadd float %119, %33 > %121 = fmul float %37, %82 > %122 = fmul float %38, %83 > %123 = fadd float %121, %122 > %124 = fmul float %39, %84 > %125 = fadd float %123, %124 > %126 = fadd float %125, %40 > %127 = fadd float %126, %59 > %128 = fsub float -0.000000e+00, %78 > %129 = fmul float %120, %77 > %130 = fmul float %120, %78 > %131 = call float @llvm.fma.f32(float %102, float %77, float %129) > %132 = call float @llvm.fma.f32(float %108, float %128, float %130) > %133 = fsub float %74, %82 > %134 = fsub float %75, %83 > %135 = fsub float %76, %84 > %136 = fmul float %67, %71 > %137 = fmul float %68, %72 > %138 = fmul float %69, %73 > %139 = fmul float %34, %133 > %140 = fmul float %35, %134 > %141 = fadd float %140, %139 > %142 = fmul float %36, %135 > %143 = fadd float %141, %142 > %144 = fmul float %41, %133 > %145 = fmul float %42, %134 > %146 = fadd float %145, %144 > %147 = fmul float %43, %135 > %148 = fadd float %146, %147 > %149 = fmul float %37, %133 > %150 = fmul float %38, %134 > %151 = fadd float %150, %149 > %152 = fmul float %39, %135 > %153 = fadd float %151, %152 > %154 = fmul float %143, %143 > %155 = fmul float %153, %153 > %156 = fadd float %155, %154 > %157 = fmul float %148, %148 > %158 = fadd float %156, %157 > %159 = call float @llvm.AMDGPU.rsq.clamped.f32(float %158) > %160 = fmul float %159, %143 > %161 = fmul float %159, %153 > %162 = fmul float %159, %148 > %163 = fsub float -0.000000e+00, %153 > %164 = call float @llvm.fma.f32(float %163, float %159, float 0xBFC3333340000000) > %165 = fsub float 1.000000e+00, %164 > %166 = call float @llvm.AMDGPU.clamp.(float %165, float 0.000000e+00, float 1.000000e+00) > %167 = fmul float %166, %166 > %168 = fmul float %160, %64 > %169 = fsub float -0.000000e+00, %168 > %170 = fmul float %161, %65 > %171 = fsub float %169, %170 > %172 = fmul float %162, %66 > %173 = fsub float %171, %172 > %174 = fsub float -0.000000e+00, %49 > %175 = call float @llvm.fma.f32(float %174, float %173, float %48) > %176 = call float @llvm.fma.f32(float %173, float %173, float 1.000000e+00) > %177 = call float @llvm.fabs.f32(float %175) > %178 = call float @llvm.log2.f32(float %177) > %179 = fmul float %176, 0x3FAE8EC8A0000000 > %180 = fmul float %178, -1.500000e+00 > %181 = call float @llvm.exp2.f32(float %180) > %182 = fsub float -0.000000e+00, %46 > %183 = call float @llvm.fma.f32(float %50, float %181, float %182) > %184 = fmul float %181, %50 > %185 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00) > %186 = call float @llvm.fabs.f32(float %120) > %187 = fmul float %186, 0x3EF4F8B580000000 > %188 = call float @llvm.minnum.f32(float %187, float 1.000000e+00) > %189 = fsub float 1.000000e+00, %188 > %190 = fsub float -0.000000e+00, %185 > %191 = call float @llvm.fma.f32(float %190, float %189, float %184) > %192 = call float @llvm.maxnum.f32(float %191, float %63) > %193 = fcmp une float %44, 0.000000e+00 > br i1 %193, label %IF, label %ELSE > >IF: ; preds = %main_body > %194 = fdiv float 1.000000e+00, %44 > %195 = fmul float %127, %194 > %196 = fsub float -0.000000e+00, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fsub float -0.000000e+00, %127 > %198 = fcmp olt float %127, -0.000000e+00 > %199 = select i1 %198, float 1.000000e+00, float %197 > %200 = fcmp oge float %199, 0.000000e+00 > %.op = fmul float %199, 0x4600000000000000 > %201 = select i1 %200, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %196, %IF ], [ %201, %ELSE ] > %202 = fmul float %temp44.0, 0x3FF7154760000000 > %203 = call float @llvm.exp2.f32(float %202) > %204 = fadd float %203, %51 > %205 = fmul float %204, %53 > %206 = fmul float %205, 5.000000e-01 > %207 = fmul float %167, %206 > %208 = call float @llvm.minnum.f32(float %207, float %47) > %209 = call float @llvm.maxnum.f32(float %208, float %52) > %210 = fmul float %209, %192 > %211 = fcmp une float %57, 0.000000e+00 > br i1 %211, label %IF57, label %ELSE58 > >IF57: ; preds = %ENDIF > %212 = fdiv float 1.000000e+00, %57 > %213 = fmul float %127, %212 > %214 = fsub float -0.000000e+00, %213 > br label %ENDIF56 > >ELSE58: ; preds = %ENDIF > %215 = fsub float -0.000000e+00, %127 > %216 = fcmp olt float %127, -0.000000e+00 > %217 = select i1 %216, float 1.000000e+00, float %215 > %218 = fcmp oge float %217, 0.000000e+00 > %.op62 = fmul float %217, 0x4600000000000000 > %219 = select i1 %218, float %.op62, float 0xC600000000000000 > br label %ENDIF56 > >ENDIF56: ; preds = %ELSE58, %IF57 > %temp44.1 = phi float [ %214, %IF57 ], [ %219, %ELSE58 ] > %220 = fsub float %58, %127 > %221 = fcmp une float %45, 0.000000e+00 > br i1 %221, label %IF60, label %ELSE61 > >IF60: ; preds = %ENDIF56 > %222 = fdiv float 1.000000e+00, %45 > %223 = fmul float %220, %222 > br label %ENDIF59 > >ELSE61: ; preds = %ENDIF56 > %224 = fcmp ogt float %220, 0.000000e+00 > %225 = select i1 %224, float 1.000000e+00, float %220 > %226 = fcmp oge float %225, 0.000000e+00 > %.op63 = fmul float %225, 0x4600000000000000 > %227 = select i1 %226, float %.op63, float 0xC600000000000000 > br label %ENDIF59 > >ENDIF59: ; preds = %ELSE61, %IF60 > %temp48.0 = phi float [ %223, %IF60 ], [ %227, %ELSE61 ] > %228 = fmul float %temp44.1, 0x3FF7154760000000 > %229 = call float @llvm.exp2.f32(float %228) > %230 = fmul float %229, %54 > %231 = fmul float %229, %55 > %232 = fmul float %229, %56 > %233 = call float @llvm.fma.f32(float %54, float %229, float %209) > %234 = call float @llvm.fma.f32(float %55, float %229, float %209) > %235 = call float @llvm.fma.f32(float %56, float %229, float %209) > %236 = call float @llvm.fma.f32(float %230, float %179, float %210) > %237 = call float @llvm.fma.f32(float %231, float %179, float %210) > %238 = call float @llvm.fma.f32(float %232, float %179, float %210) > %239 = fcmp oeq float %233, 0.000000e+00 > %240 = fcmp oeq float %234, 0.000000e+00 > %241 = fcmp oeq float %235, 0.000000e+00 > %242 = fcmp ogt float %236, 0.000000e+00 > %243 = select i1 %242, float 1.000000e+00, float %236 > %244 = fcmp oge float %243, 0.000000e+00 > %245 = fcmp ogt float %237, 0.000000e+00 > %246 = select i1 %245, float 1.000000e+00, float %237 > %247 = fcmp oge float %246, 0.000000e+00 > %248 = fcmp ogt float %238, 0.000000e+00 > %249 = select i1 %248, float 1.000000e+00, float %238 > %250 = fcmp oge float %249, 0.000000e+00 > %.op64 = fmul float %243, 0x4600000000000000 > %251 = select i1 %244, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %246, 0x4600000000000000 > %252 = select i1 %247, float %.op65, float 0xC600000000000000 > %.op66 = fmul float %249, 0x4600000000000000 > %253 = select i1 %250, float %.op66, float 0xC600000000000000 > %254 = fdiv float 1.000000e+00, %233 > %255 = fdiv float 1.000000e+00, %234 > %256 = fdiv float 1.000000e+00, %235 > %257 = fmul float %236, %254 > %258 = fmul float %237, %255 > %259 = fmul float %238, %256 > %260 = select i1 %239, float %251, float %257 > %261 = select i1 %240, float %252, float %258 > %262 = select i1 %241, float %253, float %259 > %263 = fmul float %233, %temp48.0 > %264 = fmul float %234, %temp48.0 > %265 = fmul float %235, %temp48.0 > %266 = call float @llvm.fabs.f32(float %120) > %267 = call float @llvm.fabs.f32(float %120) > %268 = call float @llvm.fabs.f32(float %120) > %269 = fmul float %233, %266 > %270 = fmul float %234, %267 > %271 = fmul float %235, %268 > %272 = fmul float %269, 0xBFF7154760000000 > %273 = fmul float %270, 0xBFF7154760000000 > %274 = fmul float %271, 0xBFF7154760000000 > %275 = call float @llvm.exp2.f32(float %272) > %276 = call float @llvm.exp2.f32(float %273) > %277 = call float @llvm.exp2.f32(float %274) > %278 = fmul float %263, 0xBFF7154760000000 > %279 = fmul float %264, 0xBFF7154760000000 > %280 = fmul float %265, 0xBFF7154760000000 > %281 = call float @llvm.log2.f32(float %60) > %282 = call float @llvm.log2.f32(float %61) > %283 = call float @llvm.log2.f32(float %62) > %284 = fmul float %281, 0x3FDD1745E0000000 > %285 = fmul float %282, 0x3FDD1745E0000000 > %286 = fmul float %283, 0x3FDD1745E0000000 > %287 = call float @llvm.exp2.f32(float %284) > %288 = call float @llvm.exp2.f32(float %285) > %289 = call float @llvm.exp2.f32(float %286) > %290 = call float @llvm.exp2.f32(float %278) > %291 = call float @llvm.exp2.f32(float %279) > %292 = call float @llvm.exp2.f32(float %280) > %293 = fmul float %290, %287 > %294 = fmul float %291, %288 > %295 = fmul float %292, %289 > %296 = fmul float %260, %293 > %297 = fmul float %261, %294 > %298 = fmul float %262, %295 > %299 = fsub float 1.000000e+00, %275 > %300 = fsub float 1.000000e+00, %276 > %301 = fsub float 1.000000e+00, %277 > %302 = call float @llvm.fma.f32(float %296, float %299, float 0xBF70624DE0000000) > %303 = call float @llvm.fma.f32(float %297, float %300, float 0xBF70624DE0000000) > %304 = call float @llvm.fma.f32(float %298, float %301, float 0xBF70624DE0000000) > %305 = call float @llvm.maxnum.f32(float %302, float 0.000000e+00) > %306 = call float @llvm.maxnum.f32(float %303, float 0.000000e+00) > %307 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00) > %308 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 5.000000e-01) > %309 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 5.000000e-01) > %310 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 5.000000e-01) > %311 = fmul float %305, %308 > %312 = fmul float %306, %309 > %313 = fmul float %307, %310 > %314 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %315 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %316 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %317 = call float @llvm.fma.f32(float %305, float %314, float 0x3FAEB851E0000000) > %318 = call float @llvm.fma.f32(float %306, float %315, float 0x3FAEB851E0000000) > %319 = call float @llvm.fma.f32(float %307, float %316, float 0x3FAEB851E0000000) > %320 = fcmp oeq float %317, 0.000000e+00 > %321 = fcmp oeq float %318, 0.000000e+00 > %322 = fcmp oeq float %319, 0.000000e+00 > %323 = fcmp ogt float %311, 0.000000e+00 > %324 = select i1 %323, float 1.000000e+00, float %311 > %325 = fcmp oge float %324, 0.000000e+00 > %326 = fcmp ogt float %312, 0.000000e+00 > %327 = select i1 %326, float 1.000000e+00, float %312 > %328 = fcmp oge float %327, 0.000000e+00 > %329 = fcmp ogt float %313, 0.000000e+00 > %330 = select i1 %329, float 1.000000e+00, float %313 > %331 = fcmp oge float %330, 0.000000e+00 > %.op67 = fmul float %324, 0x4600000000000000 > %332 = select i1 %325, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %327, 0x4600000000000000 > %333 = select i1 %328, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %330, 0x4600000000000000 > %334 = select i1 %331, float %.op69, float 0xC600000000000000 > %335 = fdiv float 1.000000e+00, %317 > %336 = fdiv float 1.000000e+00, %318 > %337 = fdiv float 1.000000e+00, %319 > %338 = fmul float %311, %335 > %339 = fmul float %312, %336 > %340 = fmul float %313, %337 > %341 = select i1 %320, float %332, float %338 > %342 = select i1 %321, float %333, float %339 > %343 = select i1 %322, float %334, float %340 > %344 = bitcast i32 %11 to float > %345 = insertvalue <{ float, float, float }> undef, float %344, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %131, float %132, float %114, float %120) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %136, float %137, float %138, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %341, float %342, float %343, float %275) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %93, float %94, float %95, float %96) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %102, float %108, float %114, float %120) > ret <{ float, float, float }> %345 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..31] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 240, 496, 256} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: FSNE TEMP[1].x, CONST[1][15].zzzz, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][15].zzzz > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][15].wwww > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: MOV TEMP[1].xy, IN[0].xyyy > 26: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 27: MOV TEMP[2].xyz, TEMP[1].xyzx > 28: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].wwww > 29: MOV TEMP[2].w, TEMP[1].xxxx > 30: MUL TEMP[0], TEMP[2], IN[4] > 31: MUL TEMP[0], TEMP[0], IN[2] > 32: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].wwww > 33: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 34: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][31].xyzz > 35: MOV TEMP[0].xyz, TEMP[0].xyzx > 36: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][16].xxxx > 37: MOV TEMP[0].w, TEMP[1].xxxx > 38: MOV OUT[0], TEMP[0] > 39: END >radeonsi: Compiling shader 158 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 496) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 500) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 504) > %31 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 > %33 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %34 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %33, i64 0, i64 3 > %35 = load <4 x i32>, <4 x i32> addrspace(2)* %34, align 16, !tbaa !0 > %36 = extractelement <8 x i32> %32, i32 7 > %37 = extractelement <4 x i32> %35, i32 0 > %38 = and i32 %37, %36 > %39 = insertelement <4 x i32> %35, i32 %38, i32 0 > %40 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !tbaa !0 > %42 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %43 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %42, i64 0, i64 7 > %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !tbaa !0 > %45 = extractelement <8 x i32> %41, i32 7 > %46 = extractelement <4 x i32> %44, i32 0 > %47 = and i32 %46, %45 > %48 = insertelement <4 x i32> %44, i32 %47, i32 0 > %49 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %63 = fcmp oeq float %53, 0.000000e+00 > %64 = fcmp oeq float %53, 0.000000e+00 > %65 = fcmp ogt float %51, 0.000000e+00 > %66 = select i1 %65, float 1.000000e+00, float %51 > %67 = fcmp oge float %66, 0.000000e+00 > %68 = fcmp ogt float %52, 0.000000e+00 > %69 = select i1 %68, float 1.000000e+00, float %52 > %70 = fcmp oge float %69, 0.000000e+00 > %.op = fmul float %66, 0x4600000000000000 > %71 = select i1 %67, float %.op, float 0xC600000000000000 > %.op12 = fmul float %69, 0x4600000000000000 > %72 = select i1 %70, float %.op12, float 0xC600000000000000 > %73 = fdiv float 1.000000e+00, %53 > %74 = fmul float %51, %73 > %75 = fmul float %52, %73 > %76 = select i1 %63, float %71, float %74 > %77 = select i1 %64, float %72, float %75 > %78 = bitcast float %76 to i32 > %79 = bitcast float %77 to i32 > %80 = insertelement <2 x i32> undef, i32 %78, i32 0 > %81 = insertelement <2 x i32> %80, i32 %79, i32 1 > %82 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %81, <8 x i32> %32, <4 x i32> %39, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %83 = extractelement <4 x float> %82, i32 0 > %84 = fsub float %53, %83 > %85 = fcmp une float %25, 0.000000e+00 > %86 = call float @llvm.fabs.f32(float %84) > br i1 %85, label %IF, label %ELSE > >IF: ; preds = %main_body > %87 = fdiv float 1.000000e+00, %25 > %88 = fmul float %86, %87 > br label %ENDIF > >ELSE: ; preds = %main_body > %89 = fcmp one float %84, 0.000000e+00 > %90 = select i1 %89, float 1.000000e+00, float %86 > %91 = fcmp oge float %90, 0.000000e+00 > %.op13 = fmul float %90, 0x4600000000000000 > %92 = select i1 %91, float %.op13, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %88, %IF ], [ %92, %ELSE ] > %93 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %94 = fsub float 1.000000e+00, %93 > %95 = call float @llvm.log2.f32(float %94) > %96 = fmul float %95, %26 > %97 = call float @llvm.exp2.f32(float %96) > %98 = fsub float 1.000000e+00, %97 > %99 = bitcast float %49 to i32 > %100 = bitcast float %50 to i32 > %101 = insertelement <2 x i32> undef, i32 %99, i32 0 > %102 = insertelement <2 x i32> %101, i32 %100, i32 1 > %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %41, <4 x i32> %48, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = extractelement <4 x float> %103, i32 2 > %107 = extractelement <4 x float> %103, i32 3 > %108 = fmul float %98, %107 > %109 = fmul float %104, %59 > %110 = fmul float %105, %60 > %111 = fmul float %106, %61 > %112 = fmul float %108, %62 > %113 = fmul float %109, %54 > %114 = fmul float %110, %55 > %115 = fmul float %111, %56 > %116 = fmul float %112, %57 > %117 = fmul float %113, %58 > %118 = fmul float %114, %58 > %119 = fmul float %115, %58 > %120 = fmul float %116, %117 > %121 = fmul float %116, %118 > %122 = fmul float %116, %119 > %123 = fmul float %120, %28 > %124 = fmul float %121, %29 > %125 = fadd float %124, %123 > %126 = fmul float %122, %30 > %127 = fadd float %125, %126 > %128 = fmul float %127, %27 > %129 = bitcast float %5 to i32 > %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %129, 10 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130, float %120, 11 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %121, 12 > %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %122, 13 > %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %128, 14 > %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..31] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 240, 496, 0} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: FSNE TEMP[1].x, CONST[1][15].zzzz, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][15].zzzz > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][15].wwww > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: MOV TEMP[1].xy, IN[0].xyyy > 26: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 27: MOV TEMP[2].xyz, TEMP[1].xyzx > 28: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].wwww > 29: MOV TEMP[2].w, TEMP[1].xxxx > 30: MUL TEMP[0], TEMP[2], IN[4] > 31: MUL TEMP[0], TEMP[0], IN[2] > 32: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].wwww > 33: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 34: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][31].xyzz > 35: MOV TEMP[1].w, TEMP[1].xxxx > 36: MOV TEMP[1].xyz, TEMP[0].xyzx > 37: MOV OUT[0], TEMP[1] > 38: END >radeonsi: Compiling shader 159 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 496) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 500) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 504) > %30 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 > %32 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %33 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %32, i64 0, i64 3 > %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 > %35 = extractelement <8 x i32> %31, i32 7 > %36 = extractelement <4 x i32> %34, i32 0 > %37 = and i32 %36, %35 > %38 = insertelement <4 x i32> %34, i32 %37, i32 0 > %39 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 > %41 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %42 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %41, i64 0, i64 7 > %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 > %44 = extractelement <8 x i32> %40, i32 7 > %45 = extractelement <4 x i32> %43, i32 0 > %46 = and i32 %45, %44 > %47 = insertelement <4 x i32> %43, i32 %46, i32 0 > %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %62 = fcmp oeq float %52, 0.000000e+00 > %63 = fcmp oeq float %52, 0.000000e+00 > %64 = fcmp ogt float %50, 0.000000e+00 > %65 = select i1 %64, float 1.000000e+00, float %50 > %66 = fcmp oge float %65, 0.000000e+00 > %67 = fcmp ogt float %51, 0.000000e+00 > %68 = select i1 %67, float 1.000000e+00, float %51 > %69 = fcmp oge float %68, 0.000000e+00 > %.op = fmul float %65, 0x4600000000000000 > %70 = select i1 %66, float %.op, float 0xC600000000000000 > %.op12 = fmul float %68, 0x4600000000000000 > %71 = select i1 %69, float %.op12, float 0xC600000000000000 > %72 = fdiv float 1.000000e+00, %52 > %73 = fmul float %50, %72 > %74 = fmul float %51, %72 > %75 = select i1 %62, float %70, float %73 > %76 = select i1 %63, float %71, float %74 > %77 = bitcast float %75 to i32 > %78 = bitcast float %76 to i32 > %79 = insertelement <2 x i32> undef, i32 %77, i32 0 > %80 = insertelement <2 x i32> %79, i32 %78, i32 1 > %81 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %80, <8 x i32> %31, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %82 = extractelement <4 x float> %81, i32 0 > %83 = fsub float %52, %82 > %84 = fcmp une float %25, 0.000000e+00 > %85 = call float @llvm.fabs.f32(float %83) > br i1 %84, label %IF, label %ELSE > >IF: ; preds = %main_body > %86 = fdiv float 1.000000e+00, %25 > %87 = fmul float %85, %86 > br label %ENDIF > >ELSE: ; preds = %main_body > %88 = fcmp one float %83, 0.000000e+00 > %89 = select i1 %88, float 1.000000e+00, float %85 > %90 = fcmp oge float %89, 0.000000e+00 > %.op13 = fmul float %89, 0x4600000000000000 > %91 = select i1 %90, float %.op13, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %87, %IF ], [ %91, %ELSE ] > %92 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %93 = fsub float 1.000000e+00, %92 > %94 = call float @llvm.log2.f32(float %93) > %95 = fmul float %94, %26 > %96 = call float @llvm.exp2.f32(float %95) > %97 = fsub float 1.000000e+00, %96 > %98 = bitcast float %48 to i32 > %99 = bitcast float %49 to i32 > %100 = insertelement <2 x i32> undef, i32 %98, i32 0 > %101 = insertelement <2 x i32> %100, i32 %99, i32 1 > %102 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %101, <8 x i32> %40, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %103 = extractelement <4 x float> %102, i32 0 > %104 = extractelement <4 x float> %102, i32 1 > %105 = extractelement <4 x float> %102, i32 2 > %106 = extractelement <4 x float> %102, i32 3 > %107 = fmul float %97, %106 > %108 = fmul float %103, %58 > %109 = fmul float %104, %59 > %110 = fmul float %105, %60 > %111 = fmul float %107, %61 > %112 = fmul float %108, %53 > %113 = fmul float %109, %54 > %114 = fmul float %110, %55 > %115 = fmul float %111, %56 > %116 = fmul float %112, %57 > %117 = fmul float %113, %57 > %118 = fmul float %114, %57 > %119 = fmul float %115, %116 > %120 = fmul float %115, %117 > %121 = fmul float %115, %118 > %122 = fmul float %119, %27 > %123 = fmul float %120, %28 > %124 = fadd float %123, %122 > %125 = fmul float %121, %29 > %126 = fadd float %124, %125 > %127 = bitcast float %5 to i32 > %128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %127, 10 > %129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128, float %119, 11 > %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129, float %120, 12 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130, float %121, 13 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %126, 14 > %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..29] >DCL TEMP[0..9], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, 0.5000, 0.0087} >IMM[1] UINT32 {0, 272, 288, 304} >IMM[2] UINT32 {320, 432, 352, 464} >IMM[3] UINT32 {336, 368, 416, 176} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][17], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][18], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][20], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: MOV TEMP[3], TEMP[1] > 10: MOV TEMP[4].zw, TEMP[1].wwzw > 11: MUL TEMP[0].xy, TEMP[2].xxxx, CONST[1][27].xyyy > 12: MUL TEMP[2].xy, CONST[1][27].xyyy, IMM[0].xyyy > 13: FMA TEMP[4].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[0].xyyy > 14: MUL TEMP[0].xyz, CONST[1][22].xyzz, CONST[1][29].yyyy > 15: FMA TEMP[0].xyz, CONST[1][21].xyzz, CONST[1][29].xxxx, TEMP[0].xyzz > 16: FMA TEMP[2].xyz, CONST[1][23].xyzz, CONST[1][29].zzzz, TEMP[0].xyzz > 17: MUL TEMP[5].x, TEMP[2].yyyy, IN[1].yyyy > 18: FMA TEMP[5].x, TEMP[2].xxxx, -IN[1].xxxx, -TEMP[5].xxxx > 19: FMA TEMP[5].x, -TEMP[2].zzzz, IN[1].zzzz, TEMP[5].xxxx > 20: MUL TEMP[1].x, TEMP[5].xxxx, -IN[1].xxxx > 21: DP2 TEMP[6].x, TEMP[2].zxxx, IN[1].ywww > 22: FMA TEMP[6].x, -TEMP[2].yyyy, IN[1].zzzz, TEMP[6].xxxx > 23: FMA TEMP[1].x, TEMP[6].xxxx, IN[1].wwww, TEMP[1].xxxx > 24: MUL TEMP[6].xy, TEMP[6].xxxx, -IN[1].zyyy > 25: MUL TEMP[7].x, TEMP[2].zzzz, IN[1].xxxx > 26: FMA TEMP[7].x, TEMP[2].yyyy, IN[1].wwww, -TEMP[7].xxxx > 27: FMA TEMP[7].x, TEMP[2].xxxx, IN[1].zzzz, TEMP[7].xxxx > 28: FMA TEMP[1].x, TEMP[7].xxxx, -IN[1].zzzz, TEMP[1].xxxx > 29: DP2 TEMP[8].x, TEMP[2].yzzz, IN[1].xwww > 30: FMA TEMP[2].x, -TEMP[2].xxxx, IN[1].yyyy, TEMP[8].xxxx > 31: FMA TEMP[8].x, -TEMP[2].xxxx, -IN[1].yyyy, TEMP[1].xxxx > 32: FMA TEMP[9].x, TEMP[5].xxxx, -IN[1].yyyy, -TEMP[6].xxxx > 33: FMA TEMP[5].x, TEMP[5].xxxx, -IN[1].zzzz, TEMP[6].yyyy > 34: FMA TEMP[5].x, -TEMP[7].xxxx, -IN[1].xxxx, TEMP[5].xxxx > 35: FMA TEMP[6].x, TEMP[7].xxxx, IN[1].wwww, TEMP[9].xxxx > 36: FMA TEMP[6].x, TEMP[2].xxxx, -IN[1].xxxx, TEMP[6].xxxx > 37: MOV TEMP[8].y, TEMP[6].xxxx > 38: FMA TEMP[2].x, TEMP[2].xxxx, IN[1].wwww, TEMP[5].xxxx > 39: MOV TEMP[8].z, TEMP[2].xxxx > 40: DP3 TEMP[0].x, TEMP[8].xyzz, TEMP[8].xyzz > 41: RSQ TEMP[2].x, TEMP[0].xxxx > 42: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[8].xyzz > 43: ADD TEMP[0].xyz, IN[0].xyzz, -CONST[1][26].xyzz > 44: MUL TEMP[5].x, TEMP[0].yyyy, IN[1].yyyy > 45: FMA TEMP[5].x, TEMP[0].xxxx, -IN[1].xxxx, -TEMP[5].xxxx > 46: FMA TEMP[5].x, -TEMP[0].zzzz, IN[1].zzzz, TEMP[5].xxxx > 47: MUL TEMP[1].x, TEMP[5].xxxx, -IN[1].xxxx > 48: DP2 TEMP[6].x, TEMP[0].zxxx, IN[1].ywww > 49: FMA TEMP[6].x, -TEMP[0].yyyy, IN[1].zzzz, TEMP[6].xxxx > 50: FMA TEMP[1].x, TEMP[6].xxxx, IN[1].wwww, TEMP[1].xxxx > 51: MUL TEMP[6].xy, TEMP[6].xxxx, -IN[1].zyyy > 52: MUL TEMP[7].x, TEMP[0].zzzz, IN[1].xxxx > 53: FMA TEMP[7].x, TEMP[0].yyyy, IN[1].wwww, -TEMP[7].xxxx > 54: FMA TEMP[7].x, TEMP[0].xxxx, IN[1].zzzz, TEMP[7].xxxx > 55: FMA TEMP[1].x, TEMP[7].xxxx, -IN[1].zzzz, TEMP[1].xxxx > 56: DP2 TEMP[9].x, TEMP[0].yzzz, IN[1].xwww > 57: FMA TEMP[0].x, -TEMP[0].xxxx, IN[1].yyyy, TEMP[9].xxxx > 58: FMA TEMP[8].x, -TEMP[0].xxxx, -IN[1].yyyy, TEMP[1].xxxx > 59: FMA TEMP[1].x, TEMP[5].xxxx, -IN[1].yyyy, -TEMP[6].xxxx > 60: FMA TEMP[5].x, TEMP[5].xxxx, -IN[1].zzzz, TEMP[6].yyyy > 61: FMA TEMP[5].x, -TEMP[7].xxxx, -IN[1].xxxx, TEMP[5].xxxx > 62: FMA TEMP[1].x, TEMP[7].xxxx, IN[1].wwww, TEMP[1].xxxx > 63: FMA TEMP[1].x, TEMP[0].xxxx, -IN[1].xxxx, TEMP[1].xxxx > 64: MOV TEMP[8].y, TEMP[1].xxxx > 65: FMA TEMP[0].x, TEMP[0].xxxx, IN[1].wwww, TEMP[5].xxxx > 66: MOV TEMP[8].z, TEMP[0].xxxx > 67: MOV TEMP[0].xyz, TEMP[8].xyzx > 68: MOV TEMP[1].xyz, IN[2].xyzx > 69: MOV TEMP[5].xyz, IN[3].xyzx > 70: MOV OUT[5], TEMP[5] > 71: MOV OUT[4], TEMP[1] > 72: MOV OUT[3], TEMP[0] > 73: MOV OUT[2], TEMP[2] > 74: MOV OUT[1], TEMP[4] > 75: MOV OUT[0], TEMP[3] > 76: END >radeonsi: Compiling shader 160 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 272) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 276) > %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 280) > %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 284) > %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 288) > %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 292) > %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 296) > %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 300) > %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) > %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) > %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) > %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 316) > %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) > %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) > %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) > %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 332) > %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 336) > %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 340) > %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 344) > %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 352) > %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 356) > %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 360) > %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 368) > %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) > %43 = call float @llvm.SI.load.const(<16 x i8> %18, i32 376) > %44 = call float @llvm.SI.load.const(<16 x i8> %18, i32 416) > %45 = call float @llvm.SI.load.const(<16 x i8> %18, i32 420) > %46 = call float @llvm.SI.load.const(<16 x i8> %18, i32 424) > %47 = call float @llvm.SI.load.const(<16 x i8> %18, i32 432) > %48 = call float @llvm.SI.load.const(<16 x i8> %18, i32 436) > %49 = call float @llvm.SI.load.const(<16 x i8> %18, i32 464) > %50 = call float @llvm.SI.load.const(<16 x i8> %18, i32 468) > %51 = call float @llvm.SI.load.const(<16 x i8> %18, i32 472) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %13) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %14) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 > %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %15) > %68 = extractelement <4 x float> %67, i32 0 > %69 = extractelement <4 x float> %67, i32 1 > %70 = extractelement <4 x float> %67, i32 2 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %16) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = fmul float %19, %55 > %78 = fmul float %20, %56 > %79 = fadd float %77, %78 > %80 = fmul float %21, %57 > %81 = fadd float %79, %80 > %82 = fadd float %81, %22 > %83 = fmul float %23, %55 > %84 = fmul float %24, %56 > %85 = fadd float %83, %84 > %86 = fmul float %25, %57 > %87 = fadd float %85, %86 > %88 = fadd float %87, %26 > %89 = fmul float %27, %55 > %90 = fmul float %28, %56 > %91 = fadd float %89, %90 > %92 = fmul float %29, %57 > %93 = fadd float %91, %92 > %94 = fadd float %93, %30 > %95 = fmul float %31, %55 > %96 = fmul float %32, %56 > %97 = fadd float %95, %96 > %98 = fmul float %33, %57 > %99 = fadd float %97, %98 > %100 = fadd float %99, %34 > %101 = fmul float %100, %47 > %102 = fmul float %100, %48 > %103 = fsub float -0.000000e+00, %48 > %104 = call float @llvm.fma.f32(float %82, float %47, float %101) > %105 = call float @llvm.fma.f32(float %88, float %103, float %102) > %106 = fmul float %38, %50 > %107 = fmul float %39, %50 > %108 = fmul float %40, %50 > %109 = call float @llvm.fma.f32(float %35, float %49, float %106) > %110 = call float @llvm.fma.f32(float %36, float %49, float %107) > %111 = call float @llvm.fma.f32(float %37, float %49, float %108) > %112 = call float @llvm.fma.f32(float %41, float %51, float %109) > %113 = call float @llvm.fma.f32(float %42, float %51, float %110) > %114 = call float @llvm.fma.f32(float %43, float %51, float %111) > %115 = fmul float %113, %62 > %116 = fsub float -0.000000e+00, %61 > %117 = fsub float -0.000000e+00, %115 > %118 = call float @llvm.fma.f32(float %112, float %116, float %117) > %119 = fsub float -0.000000e+00, %114 > %120 = call float @llvm.fma.f32(float %119, float %63, float %118) > %121 = fmul float %61, %120 > %122 = fsub float -0.000000e+00, %121 > %123 = fmul float %114, %62 > %124 = fmul float %112, %64 > %125 = fadd float %123, %124 > %126 = fsub float -0.000000e+00, %113 > %127 = call float @llvm.fma.f32(float %126, float %63, float %125) > %128 = call float @llvm.fma.f32(float %127, float %64, float %122) > %129 = fmul float %63, %127 > %130 = fmul float %62, %127 > %131 = fsub float -0.000000e+00, %130 > %132 = fmul float %114, %61 > %133 = fsub float -0.000000e+00, %132 > %134 = call float @llvm.fma.f32(float %113, float %64, float %133) > %135 = call float @llvm.fma.f32(float %112, float %63, float %134) > %136 = fsub float -0.000000e+00, %63 > %137 = call float @llvm.fma.f32(float %135, float %136, float %128) > %138 = fmul float %113, %61 > %139 = fmul float %114, %64 > %140 = fadd float %138, %139 > %141 = fsub float -0.000000e+00, %112 > %142 = call float @llvm.fma.f32(float %141, float %62, float %140) > %143 = fsub float -0.000000e+00, %142 > %144 = fsub float -0.000000e+00, %62 > %145 = call float @llvm.fma.f32(float %143, float %144, float %137) > %146 = fsub float -0.000000e+00, %62 > %147 = call float @llvm.fma.f32(float %120, float %146, float %129) > %148 = fsub float -0.000000e+00, %63 > %149 = call float @llvm.fma.f32(float %120, float %148, float %131) > %150 = fsub float -0.000000e+00, %135 > %151 = fsub float -0.000000e+00, %61 > %152 = call float @llvm.fma.f32(float %150, float %151, float %149) > %153 = call float @llvm.fma.f32(float %135, float %64, float %147) > %154 = fsub float -0.000000e+00, %61 > %155 = call float @llvm.fma.f32(float %142, float %154, float %153) > %156 = call float @llvm.fma.f32(float %142, float %64, float %152) > %157 = fmul float %145, %145 > %158 = fmul float %155, %155 > %159 = fadd float %158, %157 > %160 = fmul float %156, %156 > %161 = fadd float %159, %160 > %162 = call float @llvm.AMDGPU.rsq.clamped.f32(float %161) > %163 = fmul float %162, %145 > %164 = fmul float %162, %155 > %165 = fmul float %162, %156 > %166 = fsub float %55, %44 > %167 = fsub float %56, %45 > %168 = fsub float %57, %46 > %169 = fmul float %167, %62 > %170 = fsub float -0.000000e+00, %61 > %171 = fsub float -0.000000e+00, %169 > %172 = call float @llvm.fma.f32(float %166, float %170, float %171) > %173 = fsub float -0.000000e+00, %168 > %174 = call float @llvm.fma.f32(float %173, float %63, float %172) > %175 = fmul float %61, %174 > %176 = fsub float -0.000000e+00, %175 > %177 = fmul float %168, %62 > %178 = fmul float %166, %64 > %179 = fadd float %177, %178 > %180 = fsub float -0.000000e+00, %167 > %181 = call float @llvm.fma.f32(float %180, float %63, float %179) > %182 = call float @llvm.fma.f32(float %181, float %64, float %176) > %183 = fmul float %63, %181 > %184 = fmul float %62, %181 > %185 = fsub float -0.000000e+00, %184 > %186 = fmul float %168, %61 > %187 = fsub float -0.000000e+00, %186 > %188 = call float @llvm.fma.f32(float %167, float %64, float %187) > %189 = call float @llvm.fma.f32(float %166, float %63, float %188) > %190 = fsub float -0.000000e+00, %63 > %191 = call float @llvm.fma.f32(float %189, float %190, float %182) > %192 = fmul float %167, %61 > %193 = fmul float %168, %64 > %194 = fadd float %192, %193 > %195 = fsub float -0.000000e+00, %166 > %196 = call float @llvm.fma.f32(float %195, float %62, float %194) > %197 = fsub float -0.000000e+00, %196 > %198 = fsub float -0.000000e+00, %62 > %199 = call float @llvm.fma.f32(float %197, float %198, float %191) > %200 = fsub float -0.000000e+00, %62 > %201 = call float @llvm.fma.f32(float %174, float %200, float %183) > %202 = fsub float -0.000000e+00, %63 > %203 = call float @llvm.fma.f32(float %174, float %202, float %185) > %204 = fsub float -0.000000e+00, %189 > %205 = fsub float -0.000000e+00, %61 > %206 = call float @llvm.fma.f32(float %204, float %205, float %203) > %207 = call float @llvm.fma.f32(float %189, float %64, float %201) > %208 = fsub float -0.000000e+00, %61 > %209 = call float @llvm.fma.f32(float %196, float %208, float %207) > %210 = call float @llvm.fma.f32(float %196, float %64, float %206) > %211 = bitcast i32 %11 to float > %212 = insertvalue <{ float, float, float }> undef, float %211, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %104, float %105, float %94, float %100) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %163, float %164, float %165, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %199, float %209, float %210, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %68, float %69, float %70, float %100) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %74, float %75, float %76, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %88, float %94, float %100) > ret <{ float, float, float }> %212 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 3D, FLOAT >DCL CONST[1][0..28] >DCL TEMP[0..12], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, -0.5000} >IMM[1] UINT32 {0, 448, 176, 256} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] FLT32 { -0.0187, 0.0743, -0.2121, 1.5707} >IMM[4] FLT32 { -2.0000, 3.1416, 1.1547, 0.3183} >IMM[5] UINT32 {400, 384, 0, 0} > 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz > 1: SQRT TEMP[1].x, TEMP[0].xxxx > 2: MOV TEMP[0].x, TEMP[1].xxxx > 3: FSEQ TEMP[2].xyz, TEMP[1].xxxx, IMM[0].xxxx > 4: SSG TEMP[3].xyz, IN[2].xyzz > 5: MUL TEMP[3].xyz, IMM[0].yyyy, TEMP[3].xyzz > 6: RCP TEMP[4].xyz, TEMP[1].xxxx > 7: MUL TEMP[4].xyz, IN[2].xyzz, TEMP[4].xyzz > 8: UCMP TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz, TEMP[4].xyzz > 9: DP3 TEMP[3].x, TEMP[2].xyzz, IN[1].xyzz > 10: FSNE TEMP[4].x, TEMP[3].xxxx, IMM[0].xxxx > 11: UIF TEMP[4].xxxx :0 > 12: RCP TEMP[4].x, TEMP[3].xxxx > 13: ELSE :0 > 14: MOV TEMP[4].x, IMM[0].yyyy > 15: ENDIF > 16: MOV TEMP[3].x, TEMP[4].xxxx > 17: MUL TEMP[5].x, TEMP[4].xxxx, CONST[1][28].xxxx > 18: FSEQ TEMP[6].xyz, TEMP[2].xyzz, IMM[0].xxxx > 19: SSG TEMP[7].xyz, IN[3].xyzz > 20: MUL TEMP[7].xyz, IMM[0].yyyy, TEMP[7].xyzz > 21: RCP TEMP[8].x, TEMP[2].xxxx > 22: RCP TEMP[8].y, TEMP[2].yyyy > 23: RCP TEMP[8].z, TEMP[2].zzzz > 24: MUL TEMP[8].xyz, IN[3].xyzz, TEMP[8].xyzz > 25: UCMP TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[8].xyzz > 26: FSEQ TEMP[7].xyz, TEMP[2].xyzz, IMM[0].xxxx > 27: SSG TEMP[8].xyz, IN[4].xyzz > 28: MUL TEMP[8].xyz, IMM[0].yyyy, TEMP[8].xyzz > 29: RCP TEMP[9].x, TEMP[2].xxxx > 30: RCP TEMP[9].y, TEMP[2].yyyy > 31: RCP TEMP[9].z, TEMP[2].zzzz > 32: MUL TEMP[9].xyz, IN[4].xyzz, TEMP[9].xyzz > 33: UCMP TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xyzz, TEMP[9].xyzz > 34: MIN TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xyzz > 35: MAX TEMP[8].x, TEMP[6].zzzz, TEMP[6].yyyy > 36: MAX TEMP[8].x, TEMP[8].xxxx, TEMP[6].xxxx > 37: ADD TEMP[8].x, TEMP[1].xxxx, TEMP[8].xxxx > 38: FSEQ TEMP[9].xy, IN[0].wwww, IMM[0].xxxx > 39: SSG TEMP[10].xy, IN[0].xyyy > 40: MUL TEMP[10].xy, IMM[0].yyyy, TEMP[10].xyyy > 41: RCP TEMP[11].xy, IN[0].wwww > 42: MUL TEMP[11].xy, IN[0].xyyy, TEMP[11].xyyy > 43: UCMP TEMP[6].xy, TEMP[9].xyyy, TEMP[10].xyyy, TEMP[11].xyyy > 44: MOV TEMP[9].xy, TEMP[6].xyyy > 45: TEX TEMP[9].x, TEMP[9], SAMP[0], 2D > 46: MUL TEMP[9].x, TEMP[4].xxxx, TEMP[9].xxxx > 47: MIN TEMP[9].x, TEMP[1].xxxx, TEMP[9].xxxx > 48: MAX TEMP[5].x, TEMP[5].xxxx, TEMP[8].xxxx > 49: ADD TEMP[8].x, -TEMP[5].xxxx, TEMP[9].xxxx > 50: MAX TEMP[8].x, TEMP[8].xxxx, IMM[0].xxxx > 51: ADD TEMP[7].xyz, IN[3].xyzz, -IN[4].xyzz > 52: DP3 TEMP[10].x, TEMP[7].xyzz, TEMP[7].xyzz > 53: SQRT TEMP[10].x, TEMP[10].xxxx > 54: FSNE TEMP[11].x, TEMP[10].xxxx, IMM[0].xxxx > 55: UIF TEMP[11].xxxx :0 > 56: RCP TEMP[11].x, TEMP[10].xxxx > 57: MUL TEMP[11].x, TEMP[8].xxxx, TEMP[11].xxxx > 58: ELSE :0 > 59: SSG TEMP[8].x, TEMP[8].xxxx > 60: MUL TEMP[11].x, IMM[0].yyyy, TEMP[8].xxxx > 61: ENDIF > 62: ADD TEMP[8].x, -TEMP[11].xxxx, IMM[0].zzzz > 63: ABS TEMP[8].x, TEMP[8].xxxx > 64: LG2 TEMP[8].x, TEMP[8].xxxx > 65: MUL TEMP[8].x, TEMP[8].xxxx, CONST[1][11].zzzz > 66: EX2 TEMP[8].x, TEMP[8].xxxx > 67: ADD TEMP[8].x, -TEMP[8].xxxx, IMM[0].zzzz > 68: MOV TEMP[3].z, TEMP[8].xxxx > 69: ADD TEMP[11].x, TEMP[1].xxxx, -TEMP[5].xxxx > 70: FMA TEMP[7].xyz, -TEMP[2].xyzz, TEMP[11].xxxx, -IN[3].xyzz > 71: ADD TEMP[9].x, TEMP[1].xxxx, -TEMP[9].xxxx > 72: FMA TEMP[2].xyz, -TEMP[2].xyzz, TEMP[9].xxxx, -IN[3].xyzz > 73: ADD TEMP[9].xyz, -IN[3].xyzz, IN[4].xyzz > 74: FSEQ TEMP[11].xyz, TEMP[9].xyzz, IMM[0].xxxx > 75: RCP TEMP[12].x, TEMP[9].xxxx > 76: RCP TEMP[12].y, TEMP[9].yyyy > 77: RCP TEMP[12].z, TEMP[9].zzzz > 78: UCMP TEMP[9].xyz, TEMP[11].xyzz, IMM[0].yyyy, TEMP[12].xyzz > 79: USNE TEMP[11].x, CONST[1][16].xxxx, IMM[1].xxxx > 80: UIF TEMP[11].xxxx :0 > 81: MOV TEMP[11].xy, TEMP[6].xyyy > 82: MOV TEMP[11].w, IMM[0].xxxx > 83: TXL TEMP[11].x, TEMP[11], SAMP[1], 2D > 84: FSLT TEMP[12].x, TEMP[11].xxxx, IMM[0].zzzz > 85: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].xxxx > 86: INEG TEMP[12].x, TEMP[12].xxxx > 87: USNE TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx > 88: UIF TEMP[12].xxxx :0 > 89: MOV TEMP[12].xy, TEMP[6].xyyy > 90: MOV TEMP[12].w, IMM[0].xxxx > 91: TXL TEMP[12].x, TEMP[12], SAMP[2], 2D > 92: MUL TEMP[3].x, TEMP[4].xxxx, TEMP[12].xxxx > 93: MIN TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx > 94: ADD TEMP[0].x, -TEMP[5].xxxx, TEMP[1].xxxx > 95: MAX TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx > 96: FSNE TEMP[4].x, TEMP[10].xxxx, IMM[0].xxxx > 97: UIF TEMP[4].xxxx :0 > 98: RCP TEMP[4].x, TEMP[10].xxxx > 99: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[4].xxxx >100: ELSE :0 >101: SSG TEMP[1].x, TEMP[1].xxxx >102: MUL TEMP[4].x, IMM[0].yyyy, TEMP[1].xxxx >103: ENDIF >104: ADD TEMP[0].x, -TEMP[4].xxxx, IMM[0].zzzz >105: ABS TEMP[1].x, TEMP[0].xxxx >106: LG2 TEMP[1].x, TEMP[1].xxxx >107: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][11].zzzz >108: EX2 TEMP[1].x, TEMP[0].xxxx >109: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz >110: SQRT TEMP[3].x, TEMP[11].xxxx >111: ADD TEMP[1].x, -TEMP[0].xxxx, TEMP[8].xxxx >112: FMA TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx, TEMP[0].xxxx >113: MOV TEMP[3].z, TEMP[1].xxxx >114: ENDIF >115: ENDIF >116: FMA TEMP[1].xyz, TEMP[7].xyzz, TEMP[9].xyzz, IMM[0].wwww >117: FMA TEMP[0].xyz, TEMP[2].xyzz, TEMP[9].xyzz, IMM[0].wwww >118: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >119: SQRT TEMP[2].x, TEMP[2].xxxx >120: MOV TEMP[6].x, TEMP[2].xxxx >121: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz >122: SQRT TEMP[4].x, TEMP[4].xxxx >123: MOV TEMP[6].y, TEMP[4].xxxx >124: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[0].xyzz >125: MUL TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx >126: FSNE TEMP[4].x, TEMP[2].xxxx, IMM[0].xxxx >127: UIF TEMP[4].xxxx :0 >128: RCP TEMP[2].x, TEMP[2].xxxx >129: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx >130: ELSE :0 >131: SSG TEMP[1].x, TEMP[1].xxxx >132: MUL TEMP[2].x, IMM[0].yyyy, TEMP[1].xxxx >133: ENDIF >134: ABS TEMP[1].x, TEMP[2].xxxx >135: ADD TEMP[1].x, -TEMP[1].xxxx, IMM[0].zzzz >136: SQRT TEMP[1].x, TEMP[1].xxxx >137: ABS TEMP[4].x, TEMP[2].xxxx >138: FMA TEMP[4].x, TEMP[4].xxxx, IMM[3].xxxx, IMM[3].yyyy >139: ABS TEMP[5].x, TEMP[2].xxxx >140: FMA TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx, IMM[3].zzzz >141: ABS TEMP[5].x, TEMP[2].xxxx >142: FMA TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx, IMM[3].wwww >143: MUL TEMP[5].x, TEMP[1].xxxx, TEMP[4].xxxx >144: FMA TEMP[5].x, TEMP[5].xxxx, IMM[4].xxxx, IMM[4].yyyy >145: FSLT TEMP[2].x, TEMP[2].xxxx, -TEMP[2].xxxx >146: AND TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx >147: INEG TEMP[2].x, TEMP[2].xxxx >148: AND TEMP[2].x, TEMP[2].xxxx, TEMP[5].xxxx >149: FMA TEMP[1].x, TEMP[4].xxxx, TEMP[1].xxxx, TEMP[2].xxxx >150: MOV TEMP[6].z, TEMP[1].xxxx >151: MUL TEMP[0].xyz, TEMP[6].xyzz, IMM[4].zzww >152: MOV TEMP[1].xyz, TEMP[0].xyzz >153: TEX TEMP[1], TEMP[1], SAMP[3], 3D >154: MOV TEMP[0].xyz, TEMP[1].xyzx >155: MUL TEMP[1].x, TEMP[1].wwww, TEMP[3].zzzz >156: MOV TEMP[0].w, TEMP[1].xxxx >157: MUL TEMP[0], TEMP[0], CONST[1][25] >158: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz >159: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][24].xyzz >160: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][11].wwww >161: MOV TEMP[1].w, TEMP[1].xxxx >162: MOV TEMP[1].xyz, TEMP[0].xyzx >163: MOV OUT[0], TEMP[1] >164: END >radeonsi: Compiling shader 161 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 184) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 188) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 392) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 448) > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 3 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 7 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %55 = load <8 x i32>, <8 x i32> addrspace(2)* %54, align 32, !tbaa !0 > %56 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %57 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %56, i64 0, i64 11 > %58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0 > %59 = extractelement <8 x i32> %55, i32 7 > %60 = extractelement <4 x i32> %58, i32 0 > %61 = and i32 %60, %59 > %62 = insertelement <4 x i32> %58, i32 %61, i32 0 > %63 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %64 = load <8 x i32>, <8 x i32> addrspace(2)* %63, align 32, !tbaa !0 > %65 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %66 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %65, i64 0, i64 15 > %67 = load <4 x i32>, <4 x i32> addrspace(2)* %66, align 16, !tbaa !0 > %68 = extractelement <8 x i32> %64, i32 7 > %69 = extractelement <4 x i32> %67, i32 0 > %70 = and i32 %69, %68 > %71 = insertelement <4 x i32> %67, i32 %70, i32 0 > %72 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %87 = fmul float %78, %78 > %88 = fmul float %79, %79 > %89 = fadd float %88, %87 > %90 = fmul float %80, %80 > %91 = fadd float %89, %90 > %92 = call float @llvm.sqrt.f32(float %91) > %93 = fcmp oeq float %92, 0.000000e+00 > %94 = fcmp oeq float %92, 0.000000e+00 > %95 = fcmp oeq float %92, 0.000000e+00 > %96 = fcmp ogt float %78, 0.000000e+00 > %97 = select i1 %96, float 1.000000e+00, float %78 > %98 = fcmp oge float %97, 0.000000e+00 > %99 = fcmp ogt float %79, 0.000000e+00 > %100 = select i1 %99, float 1.000000e+00, float %79 > %101 = fcmp oge float %100, 0.000000e+00 > %102 = fcmp ogt float %80, 0.000000e+00 > %103 = select i1 %102, float 1.000000e+00, float %80 > %104 = fcmp oge float %103, 0.000000e+00 > %.op = fmul float %97, 0x4600000000000000 > %105 = select i1 %98, float %.op, float 0xC600000000000000 > %.op67 = fmul float %100, 0x4600000000000000 > %106 = select i1 %101, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %103, 0x4600000000000000 > %107 = select i1 %104, float %.op68, float 0xC600000000000000 > %108 = fdiv float 1.000000e+00, %92 > %109 = fmul float %78, %108 > %110 = fmul float %79, %108 > %111 = fmul float %80, %108 > %112 = select i1 %93, float %105, float %109 > %113 = select i1 %94, float %106, float %110 > %114 = select i1 %95, float %107, float %111 > %115 = fmul float %112, %75 > %116 = fmul float %113, %76 > %117 = fadd float %116, %115 > %118 = fmul float %114, %77 > %119 = fadd float %117, %118 > %120 = fcmp une float %119, 0.000000e+00 > %121 = fdiv float 1.000000e+00, %119 > %temp16.0 = select i1 %120, float %121, float 0x4600000000000000 > %122 = fmul float %temp16.0, %35 > %123 = fcmp oeq float %112, 0.000000e+00 > %124 = fcmp oeq float %113, 0.000000e+00 > %125 = fcmp oeq float %114, 0.000000e+00 > %126 = fcmp ogt float %81, 0.000000e+00 > %127 = select i1 %126, float 1.000000e+00, float %81 > %128 = fcmp oge float %127, 0.000000e+00 > %129 = fcmp ogt float %82, 0.000000e+00 > %130 = select i1 %129, float 1.000000e+00, float %82 > %131 = fcmp oge float %130, 0.000000e+00 > %132 = fcmp ogt float %83, 0.000000e+00 > %133 = select i1 %132, float 1.000000e+00, float %83 > %134 = fcmp oge float %133, 0.000000e+00 > %.op69 = fmul float %127, 0x4600000000000000 > %135 = select i1 %128, float %.op69, float 0xC600000000000000 > %.op70 = fmul float %130, 0x4600000000000000 > %136 = select i1 %131, float %.op70, float 0xC600000000000000 > %.op71 = fmul float %133, 0x4600000000000000 > %137 = select i1 %134, float %.op71, float 0xC600000000000000 > %138 = fdiv float 1.000000e+00, %112 > %139 = fdiv float 1.000000e+00, %113 > %140 = fdiv float 1.000000e+00, %114 > %141 = fmul float %81, %138 > %142 = fmul float %82, %139 > %143 = fmul float %83, %140 > %144 = select i1 %123, float %135, float %141 > %145 = select i1 %124, float %136, float %142 > %146 = select i1 %125, float %137, float %143 > %147 = fcmp oeq float %112, 0.000000e+00 > %148 = fcmp oeq float %113, 0.000000e+00 > %149 = fcmp oeq float %114, 0.000000e+00 > %150 = fcmp ogt float %84, 0.000000e+00 > %151 = select i1 %150, float 1.000000e+00, float %84 > %152 = fcmp oge float %151, 0.000000e+00 > %153 = fcmp ogt float %85, 0.000000e+00 > %154 = select i1 %153, float 1.000000e+00, float %85 > %155 = fcmp oge float %154, 0.000000e+00 > %156 = fcmp ogt float %86, 0.000000e+00 > %157 = select i1 %156, float 1.000000e+00, float %86 > %158 = fcmp oge float %157, 0.000000e+00 > %.op72 = fmul float %151, 0x4600000000000000 > %159 = select i1 %152, float %.op72, float 0xC600000000000000 > %.op73 = fmul float %154, 0x4600000000000000 > %160 = select i1 %155, float %.op73, float 0xC600000000000000 > %.op74 = fmul float %157, 0x4600000000000000 > %161 = select i1 %158, float %.op74, float 0xC600000000000000 > %162 = fdiv float 1.000000e+00, %112 > %163 = fdiv float 1.000000e+00, %113 > %164 = fdiv float 1.000000e+00, %114 > %165 = fmul float %84, %162 > %166 = fmul float %85, %163 > %167 = fmul float %86, %164 > %168 = select i1 %147, float %159, float %165 > %169 = select i1 %148, float %160, float %166 > %170 = select i1 %149, float %161, float %167 > %171 = call float @llvm.minnum.f32(float %144, float %168) > %172 = call float @llvm.minnum.f32(float %145, float %169) > %173 = call float @llvm.minnum.f32(float %146, float %170) > %174 = call float @llvm.maxnum.f32(float %173, float %172) > %175 = call float @llvm.maxnum.f32(float %174, float %171) > %176 = fadd float %92, %175 > %177 = fcmp oeq float %74, 0.000000e+00 > %178 = fcmp oeq float %74, 0.000000e+00 > %179 = fcmp ogt float %72, 0.000000e+00 > %180 = select i1 %179, float 1.000000e+00, float %72 > %181 = fcmp oge float %180, 0.000000e+00 > %182 = fcmp ogt float %73, 0.000000e+00 > %183 = select i1 %182, float 1.000000e+00, float %73 > %184 = fcmp oge float %183, 0.000000e+00 > %.op75 = fmul float %180, 0x4600000000000000 > %185 = select i1 %181, float %.op75, float 0xC600000000000000 > %.op76 = fmul float %183, 0x4600000000000000 > %186 = select i1 %184, float %.op76, float 0xC600000000000000 > %187 = fdiv float 1.000000e+00, %74 > %188 = fmul float %72, %187 > %189 = fmul float %73, %187 > %190 = select i1 %177, float %185, float %188 > %191 = select i1 %178, float %186, float %189 > %192 = bitcast float %190 to i32 > %193 = bitcast float %191 to i32 > %194 = insertelement <2 x i32> undef, i32 %192, i32 0 > %195 = insertelement <2 x i32> %194, i32 %193, i32 1 > %196 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %195, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %197 = extractelement <4 x float> %196, i32 0 > %198 = fmul float %temp16.0, %197 > %199 = call float @llvm.minnum.f32(float %92, float %198) > %200 = call float @llvm.maxnum.f32(float %122, float %176) > %201 = fsub float %199, %200 > %202 = call float @llvm.maxnum.f32(float %201, float 0.000000e+00) > %203 = fsub float %81, %84 > %204 = fsub float %82, %85 > %205 = fsub float %83, %86 > %206 = fmul float %203, %203 > %207 = fmul float %204, %204 > %208 = fadd float %207, %206 > %209 = fmul float %205, %205 > %210 = fadd float %208, %209 > %211 = call float @llvm.sqrt.f32(float %210) > %212 = fcmp une float %211, 0.000000e+00 > br i1 %212, label %IF53, label %ELSE54 > >IF53: ; preds = %main_body > %213 = fdiv float 1.000000e+00, %211 > %214 = fmul float %202, %213 > br label %ENDIF52 > >ELSE54: ; preds = %main_body > %215 = fcmp ogt float %202, 0.000000e+00 > %216 = select i1 %215, float 1.000000e+00, float %202 > %217 = fcmp oge float %216, 0.000000e+00 > %.op77 = fmul float %216, 0x4600000000000000 > %218 = select i1 %217, float %.op77, float 0xC600000000000000 > br label %ENDIF52 > >ENDIF52: ; preds = %ELSE54, %IF53 > %temp44.0 = phi float [ %214, %IF53 ], [ %218, %ELSE54 ] > %219 = fsub float 1.000000e+00, %temp44.0 > %220 = call float @llvm.fabs.f32(float %219) > %221 = call float @llvm.log2.f32(float %220) > %222 = fmul float %221, %25 > %223 = call float @llvm.exp2.f32(float %222) > %224 = fsub float 1.000000e+00, %223 > %225 = fsub float %92, %200 > %226 = fsub float -0.000000e+00, %112 > %227 = fsub float -0.000000e+00, %81 > %228 = call float @llvm.fma.f32(float %226, float %225, float %227) > %229 = fsub float -0.000000e+00, %113 > %230 = fsub float -0.000000e+00, %82 > %231 = call float @llvm.fma.f32(float %229, float %225, float %230) > %232 = fsub float -0.000000e+00, %114 > %233 = fsub float -0.000000e+00, %83 > %234 = call float @llvm.fma.f32(float %232, float %225, float %233) > %235 = fsub float %92, %199 > %236 = fsub float -0.000000e+00, %112 > %237 = fsub float -0.000000e+00, %81 > %238 = call float @llvm.fma.f32(float %236, float %235, float %237) > %239 = fsub float -0.000000e+00, %113 > %240 = fsub float -0.000000e+00, %82 > %241 = call float @llvm.fma.f32(float %239, float %235, float %240) > %242 = fsub float -0.000000e+00, %114 > %243 = fsub float -0.000000e+00, %83 > %244 = call float @llvm.fma.f32(float %242, float %235, float %243) > %245 = fsub float %84, %81 > %246 = fsub float %85, %82 > %247 = fsub float %86, %83 > %248 = fcmp oeq float %245, 0.000000e+00 > %249 = fcmp oeq float %246, 0.000000e+00 > %250 = fcmp oeq float %247, 0.000000e+00 > %251 = fdiv float 1.000000e+00, %245 > %252 = fdiv float 1.000000e+00, %246 > %253 = fdiv float 1.000000e+00, %247 > %254 = select i1 %248, float 0x4600000000000000, float %251 > %255 = select i1 %249, float 0x4600000000000000, float %252 > %256 = select i1 %250, float 0x4600000000000000, float %253 > %257 = bitcast float %27 to i32 > %258 = icmp eq i32 %257, 0 > br i1 %258, label %ENDIF55, label %IF56 > >IF56: ; preds = %ENDIF52 > %259 = bitcast float %190 to i32 > %260 = bitcast float %191 to i32 > %261 = insertelement <4 x i32> undef, i32 %259, i32 0 > %262 = insertelement <4 x i32> %261, i32 %260, i32 1 > %263 = insertelement <4 x i32> %262, i32 0, i32 2 > %264 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %263, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %265 = extractelement <4 x float> %264, i32 0 > %266 = fcmp olt float %265, 1.000000e+00 > br i1 %266, label %IF59, label %ENDIF55 > >ENDIF55: ; preds = %ENDIF52, %ENDIF61, %IF56 > %temp14.0 = phi float [ %224, %ENDIF52 ], [ %318, %ENDIF61 ], [ %224, %IF56 ] > %267 = call float @llvm.fma.f32(float %228, float %254, float -5.000000e-01) > %268 = call float @llvm.fma.f32(float %231, float %255, float -5.000000e-01) > %269 = call float @llvm.fma.f32(float %234, float %256, float -5.000000e-01) > %270 = call float @llvm.fma.f32(float %238, float %254, float -5.000000e-01) > %271 = call float @llvm.fma.f32(float %241, float %255, float -5.000000e-01) > %272 = call float @llvm.fma.f32(float %244, float %256, float -5.000000e-01) > %273 = fmul float %267, %267 > %274 = fmul float %268, %268 > %275 = fadd float %274, %273 > %276 = fmul float %269, %269 > %277 = fadd float %275, %276 > %278 = call float @llvm.sqrt.f32(float %277) > %279 = fmul float %270, %270 > %280 = fmul float %271, %271 > %281 = fadd float %280, %279 > %282 = fmul float %272, %272 > %283 = fadd float %281, %282 > %284 = call float @llvm.sqrt.f32(float %283) > %285 = fmul float %267, %270 > %286 = fmul float %268, %271 > %287 = fadd float %286, %285 > %288 = fmul float %269, %272 > %289 = fadd float %287, %288 > %290 = fmul float %284, %278 > %291 = fcmp une float %290, 0.000000e+00 > br i1 %291, label %IF65, label %ELSE66 > >IF59: ; preds = %IF56 > %292 = bitcast float %190 to i32 > %293 = bitcast float %191 to i32 > %294 = insertelement <4 x i32> undef, i32 %292, i32 0 > %295 = insertelement <4 x i32> %294, i32 %293, i32 1 > %296 = insertelement <4 x i32> %295, i32 0, i32 2 > %297 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %296, <8 x i32> %55, <4 x i32> %62, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %298 = extractelement <4 x float> %297, i32 0 > %299 = fmul float %temp16.0, %298 > %300 = call float @llvm.minnum.f32(float %92, float %299) > %301 = fsub float %300, %200 > %302 = call float @llvm.maxnum.f32(float %301, float 0.000000e+00) > %303 = fcmp une float %211, 0.000000e+00 > br i1 %303, label %IF62, label %ELSE63 > >IF62: ; preds = %IF59 > %304 = fdiv float 1.000000e+00, %211 > %305 = fmul float %302, %304 > br label %ENDIF61 > >ELSE63: ; preds = %IF59 > %306 = fcmp ogt float %302, 0.000000e+00 > %307 = select i1 %306, float 1.000000e+00, float %302 > %308 = fcmp oge float %307, 0.000000e+00 > %.op79 = fmul float %307, 0x4600000000000000 > %309 = select i1 %308, float %.op79, float 0xC600000000000000 > br label %ENDIF61 > >ENDIF61: ; preds = %ELSE63, %IF62 > %temp16.1 = phi float [ %305, %IF62 ], [ %309, %ELSE63 ] > %310 = fsub float 1.000000e+00, %temp16.1 > %311 = call float @llvm.fabs.f32(float %310) > %312 = call float @llvm.log2.f32(float %311) > %313 = fmul float %312, %25 > %314 = call float @llvm.exp2.f32(float %313) > %315 = fsub float 1.000000e+00, %314 > %316 = call float @llvm.sqrt.f32(float %265) > %317 = fsub float %224, %315 > %318 = call float @llvm.fma.f32(float %316, float %317, float %315) > br label %ENDIF55 > >IF65: ; preds = %ENDIF55 > %319 = fdiv float 1.000000e+00, %290 > %320 = fmul float %289, %319 > br label %ENDIF64 > >ELSE66: ; preds = %ENDIF55 > %321 = fcmp ogt float %289, 0.000000e+00 > %322 = select i1 %321, float 1.000000e+00, float %289 > %323 = fcmp oge float %322, 0.000000e+00 > %.op78 = fmul float %322, 0x4600000000000000 > %324 = select i1 %323, float %.op78, float 0xC600000000000000 > br label %ENDIF64 > >ENDIF64: ; preds = %ELSE66, %IF65 > %temp8.0 = phi float [ %320, %IF65 ], [ %324, %ELSE66 ] > %325 = call float @llvm.fabs.f32(float %temp8.0) > %326 = fsub float 1.000000e+00, %325 > %327 = call float @llvm.sqrt.f32(float %326) > %328 = call float @llvm.fabs.f32(float %temp8.0) > %329 = call float @llvm.fma.f32(float %328, float 0xBF932DC600000000, float 0x3FB302C4E0000000) > %330 = call float @llvm.fabs.f32(float %temp8.0) > %331 = call float @llvm.fma.f32(float %329, float %330, float 0xBFCB269080000000) > %332 = call float @llvm.fabs.f32(float %temp8.0) > %333 = call float @llvm.fma.f32(float %331, float %332, float 0x3FF921B480000000) > %334 = fmul float %327, %333 > %335 = call float @llvm.fma.f32(float %334, float -2.000000e+00, float 0x400921FB60000000) > %336 = fsub float -0.000000e+00, %temp8.0 > %337 = fcmp olt float %temp8.0, %336 > %338 = select i1 %337, float %335, float 0.000000e+00 > %339 = call float @llvm.fma.f32(float %333, float %327, float %338) > %340 = fmul float %278, 0x3FF279A740000000 > %341 = fmul float %284, 0x3FF279A740000000 > %342 = fmul float %339, 0x3FD45F3060000000 > %343 = bitcast float %340 to i32 > %344 = bitcast float %341 to i32 > %345 = bitcast float %342 to i32 > %346 = insertelement <4 x i32> undef, i32 %343, i32 0 > %347 = insertelement <4 x i32> %346, i32 %344, i32 1 > %348 = insertelement <4 x i32> %347, i32 %345, i32 2 > %349 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %348, <8 x i32> %64, <4 x i32> %71, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %350 = extractelement <4 x float> %349, i32 0 > %351 = extractelement <4 x float> %349, i32 1 > %352 = extractelement <4 x float> %349, i32 2 > %353 = extractelement <4 x float> %349, i32 3 > %354 = fmul float %353, %temp14.0 > %355 = fmul float %350, %31 > %356 = fmul float %351, %32 > %357 = fmul float %352, %33 > %358 = fmul float %354, %34 > %359 = fmul float %358, %355 > %360 = fmul float %358, %356 > %361 = fmul float %358, %357 > %362 = fmul float %359, %28 > %363 = fmul float %360, %29 > %364 = fadd float %363, %362 > %365 = fmul float %361, %30 > %366 = fadd float %364, %365 > %367 = fmul float %366, %26 > %368 = bitcast float %5 to i32 > %369 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %368, 10 > %370 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %369, float %359, 11 > %371 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %370, float %360, 12 > %372 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %371, float %361, 13 > %373 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %372, float %367, 14 > %374 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %373, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %374 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..11] >DCL CONST[2][0..4095] >DCL CONST[3][0..24] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 32} >IMM[3] UINT32 {48, 64, 2, 384} >IMM[4] UINT32 {128, 144, 160, 176} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[1].x, CONST[1][1], TEMP[18] >356: DP4 TEMP[3].x, CONST[1][2], TEMP[18] >357: MOV TEMP[1].y, TEMP[3].xxxx >358: DP4 TEMP[3].x, CONST[1][3], TEMP[18] >359: MOV TEMP[1].z, TEMP[3].xxxx >360: DP4 TEMP[3].x, CONST[1][4], TEMP[18] >361: MOV TEMP[1].w, TEMP[3].xxxx >362: MOV TEMP[3], TEMP[1] >363: MOV TEMP[5].xy, IN[2].xyxx >364: DP3 TEMP[6].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[6].xxxx >366: DP3 TEMP[6].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[6].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[6].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[6].x, TEMP[6].xxxx >371: MUL TEMP[1].xyz, TEMP[6].xxxx, TEMP[1].xyzz >372: DP3 TEMP[6].x, CONST[1][8].xyzz, TEMP[1].xyzz >373: DP3 TEMP[8].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[8].xxxx >375: DP3 TEMP[8].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[8].xxxx >377: DP3 TEMP[8].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[8].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >387: MOV TEMP[6].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[2].xyzz >392: MOV TEMP[6].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][9].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][10].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][9].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][10].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][10].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MUL TEMP[0], IN[5], CONST[1][11] >404: MOV OUT[5], TEMP[0] >405: MOV OUT[4], TEMP[1] >406: MOV OUT[3], TEMP[4] >407: MOV OUT[2], TEMP[6] >408: MOV OUT[1], TEMP[5] >409: MOV OUT[0], TEMP[3] >410: END >radeonsi: Compiling shader 162 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 64) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 72) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 76) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 176) > %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 180) > %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 184) > %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 188) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 > %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %13) > %57 = extractelement <4 x float> %56, i32 0 > %58 = extractelement <4 x float> %56, i32 1 > %59 = extractelement <4 x float> %56, i32 2 > %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 > %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %14) > %63 = extractelement <4 x float> %62, i32 0 > %64 = extractelement <4 x float> %62, i32 1 > %65 = extractelement <4 x float> %62, i32 2 > %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 > %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %15) > %69 = extractelement <4 x float> %68, i32 0 > %70 = extractelement <4 x float> %68, i32 1 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %16) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %17) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %18) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = extractelement <4 x float> %85, i32 2 > %89 = extractelement <4 x float> %85, i32 3 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %19) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 > %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %20) > %99 = extractelement <4 x float> %98, i32 0 > %100 = extractelement <4 x float> %98, i32 1 > %101 = extractelement <4 x float> %98, i32 2 > %102 = fmul float %101, 0x406FE01000000000 > %103 = fmul float %100, 0x406FE01000000000 > %104 = fmul float %99, 0x406FE01000000000 > %105 = fptosi float %102 to i32 > %106 = fptosi float %103 to i32 > %107 = fptosi float %104 to i32 > %108 = shl i32 %105, 1 > %109 = or i32 %108, 1 > %110 = shl i32 %106, 1 > %111 = or i32 %110, 1 > %112 = shl i32 %107, 1 > %113 = or i32 %112, 1 > %114 = shl i32 %105, 5 > %115 = or i32 %114, 4 > %116 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %115) > %117 = fmul float %93, %116 > %118 = shl i32 %106, 5 > %119 = or i32 %118, 4 > %120 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %119) > %121 = fmul float %94, %120 > %122 = shl i32 %109, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %122) > %124 = shl i32 %109, 4 > %125 = or i32 %124, 12 > %126 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %125) > %127 = fmul float %123, %126 > %128 = shl i32 %109, 4 > %129 = or i32 %128, 4 > %130 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %129) > %131 = shl i32 %109, 4 > %132 = or i32 %131, 8 > %133 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %132) > %134 = fsub float -0.000000e+00, %127 > %135 = call float @llvm.fma.f32(float %130, float %133, float %134) > %136 = shl i32 %109, 4 > %137 = or i32 %136, 4 > %138 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %137) > %139 = shl i32 %109, 4 > %140 = or i32 %139, 8 > %141 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %140) > %142 = call float @llvm.fma.f32(float %138, float %141, float %127) > %143 = fmul float %142, %93 > %144 = fmul float %135, %93 > %145 = fmul float %144, 2.000000e+00 > %146 = shl i32 %111, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %146) > %148 = shl i32 %111, 4 > %149 = or i32 %148, 12 > %150 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %149) > %151 = fmul float %147, %150 > %152 = shl i32 %111, 4 > %153 = or i32 %152, 4 > %154 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %153) > %155 = shl i32 %111, 4 > %156 = or i32 %155, 8 > %157 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %156) > %158 = fsub float -0.000000e+00, %151 > %159 = call float @llvm.fma.f32(float %154, float %157, float %158) > %160 = shl i32 %111, 4 > %161 = or i32 %160, 4 > %162 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %161) > %163 = shl i32 %111, 4 > %164 = or i32 %163, 8 > %165 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %164) > %166 = call float @llvm.fma.f32(float %162, float %165, float %151) > %167 = fmul float %166, %94 > %168 = fmul float %167, 2.000000e+00 > %169 = fmul float %159, %94 > %170 = fmul float %169, 2.000000e+00 > %171 = shl i32 %109, 4 > %172 = or i32 %171, 4 > %173 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %172) > %174 = shl i32 %109, 4 > %175 = or i32 %174, 8 > %176 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %175) > %177 = shl i32 %109, 4 > %178 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %177) > %179 = shl i32 %109, 4 > %180 = or i32 %179, 12 > %181 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %180) > %182 = fmul float %176, %181 > %183 = fmul float %176, %178 > %184 = fmul float %173, %181 > %185 = shl i32 %109, 4 > %186 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %185) > %187 = shl i32 %109, 4 > %188 = or i32 %187, 4 > %189 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %188) > %190 = call float @llvm.fma.f32(float %186, float %189, float %182) > %191 = fmul float %190, %93 > %192 = fmul float %191, 2.000000e+00 > %193 = shl i32 %109, 4 > %194 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %193) > %195 = shl i32 %109, 4 > %196 = or i32 %195, 4 > %197 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %196) > %198 = shl i32 %109, 4 > %199 = or i32 %198, 8 > %200 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %199) > %201 = shl i32 %109, 4 > %202 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %201) > %203 = shl i32 %109, 4 > %204 = or i32 %203, 4 > %205 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %204) > %206 = shl i32 %109, 4 > %207 = or i32 %206, 8 > %208 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %207) > %209 = fmul float %194, %202 > %210 = fmul float %197, %205 > %211 = fmul float %200, %208 > %212 = fadd float %211, %210 > %213 = fadd float %211, %209 > %214 = fadd float %210, %209 > %215 = fsub float -0.000000e+00, %212 > %216 = call float @llvm.fma.f32(float %215, float 2.000000e+00, float 1.000000e+00) > %217 = fsub float -0.000000e+00, %213 > %218 = call float @llvm.fma.f32(float %217, float 2.000000e+00, float 1.000000e+00) > %219 = fsub float -0.000000e+00, %214 > %220 = call float @llvm.fma.f32(float %219, float 2.000000e+00, float 1.000000e+00) > %221 = fmul float %93, %218 > %222 = shl i32 %111, 4 > %223 = or i32 %222, 4 > %224 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %223) > %225 = shl i32 %111, 4 > %226 = or i32 %225, 8 > %227 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %226) > %228 = shl i32 %111, 4 > %229 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %228) > %230 = shl i32 %111, 4 > %231 = or i32 %230, 12 > %232 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %231) > %233 = fmul float %227, %232 > %234 = fmul float %227, %229 > %235 = fmul float %224, %232 > %236 = shl i32 %111, 4 > %237 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %236) > %238 = shl i32 %111, 4 > %239 = or i32 %238, 4 > %240 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %239) > %241 = call float @llvm.fma.f32(float %237, float %240, float %233) > %242 = fmul float %241, %94 > %243 = fmul float %242, 2.000000e+00 > %244 = shl i32 %111, 4 > %245 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %244) > %246 = shl i32 %111, 4 > %247 = or i32 %246, 4 > %248 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %247) > %249 = shl i32 %111, 4 > %250 = or i32 %249, 8 > %251 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %250) > %252 = shl i32 %111, 4 > %253 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %252) > %254 = shl i32 %111, 4 > %255 = or i32 %254, 4 > %256 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %255) > %257 = shl i32 %111, 4 > %258 = or i32 %257, 8 > %259 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %258) > %260 = fmul float %245, %253 > %261 = fmul float %248, %256 > %262 = fmul float %251, %259 > %263 = fadd float %262, %261 > %264 = fadd float %262, %260 > %265 = fadd float %261, %260 > %266 = fsub float -0.000000e+00, %263 > %267 = call float @llvm.fma.f32(float %266, float 2.000000e+00, float 1.000000e+00) > %268 = fsub float -0.000000e+00, %264 > %269 = call float @llvm.fma.f32(float %268, float 2.000000e+00, float 1.000000e+00) > %270 = fsub float -0.000000e+00, %265 > %271 = call float @llvm.fma.f32(float %270, float 2.000000e+00, float 1.000000e+00) > %272 = fmul float %94, %269 > %273 = fadd float %192, %243 > %274 = fadd float %221, %272 > %275 = fadd float %145, %170 > %276 = fadd float %117, %121 > %277 = shl i32 %107, 5 > %278 = or i32 %277, 4 > %279 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %278) > %280 = fmul float %95, %279 > %281 = shl i32 %113, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %281) > %283 = shl i32 %113, 4 > %284 = or i32 %283, 12 > %285 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %284) > %286 = fmul float %282, %285 > %287 = shl i32 %113, 4 > %288 = or i32 %287, 4 > %289 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %288) > %290 = shl i32 %113, 4 > %291 = or i32 %290, 8 > %292 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %291) > %293 = fsub float -0.000000e+00, %286 > %294 = call float @llvm.fma.f32(float %289, float %292, float %293) > %295 = shl i32 %113, 4 > %296 = or i32 %295, 4 > %297 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %296) > %298 = shl i32 %113, 4 > %299 = or i32 %298, 8 > %300 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %299) > %301 = call float @llvm.fma.f32(float %297, float %300, float %286) > %302 = fmul float %301, %95 > %303 = fmul float %302, 2.000000e+00 > %304 = fmul float %294, %95 > %305 = fmul float %304, 2.000000e+00 > %306 = shl i32 %113, 4 > %307 = or i32 %306, 4 > %308 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %307) > %309 = shl i32 %113, 4 > %310 = or i32 %309, 8 > %311 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %310) > %312 = shl i32 %113, 4 > %313 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %312) > %314 = shl i32 %113, 4 > %315 = or i32 %314, 12 > %316 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %315) > %317 = fmul float %311, %316 > %318 = fmul float %311, %313 > %319 = fmul float %308, %316 > %320 = shl i32 %113, 4 > %321 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %320) > %322 = shl i32 %113, 4 > %323 = or i32 %322, 4 > %324 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %323) > %325 = call float @llvm.fma.f32(float %321, float %324, float %317) > %326 = fmul float %325, %95 > %327 = fmul float %326, 2.000000e+00 > %328 = shl i32 %113, 4 > %329 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %328) > %330 = shl i32 %113, 4 > %331 = or i32 %330, 4 > %332 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %331) > %333 = shl i32 %113, 4 > %334 = or i32 %333, 8 > %335 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %334) > %336 = shl i32 %113, 4 > %337 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %336) > %338 = shl i32 %113, 4 > %339 = or i32 %338, 4 > %340 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %339) > %341 = shl i32 %113, 4 > %342 = or i32 %341, 8 > %343 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %342) > %344 = fmul float %329, %337 > %345 = fmul float %332, %340 > %346 = fmul float %335, %343 > %347 = fadd float %346, %345 > %348 = fadd float %346, %344 > %349 = fadd float %345, %344 > %350 = fsub float -0.000000e+00, %347 > %351 = call float @llvm.fma.f32(float %350, float 2.000000e+00, float 1.000000e+00) > %352 = fsub float -0.000000e+00, %348 > %353 = call float @llvm.fma.f32(float %352, float 2.000000e+00, float 1.000000e+00) > %354 = fsub float -0.000000e+00, %349 > %355 = call float @llvm.fma.f32(float %354, float 2.000000e+00, float 1.000000e+00) > %356 = fmul float %95, %353 > %357 = fadd float %273, %327 > %358 = fadd float %274, %356 > %359 = fadd float %275, %305 > %360 = fadd float %276, %280 > %361 = fmul float %357, %57 > %362 = fmul float %358, %58 > %363 = fadd float %361, %362 > %364 = fmul float %359, %59 > %365 = fadd float %363, %364 > %366 = fadd float %365, %360 > %367 = shl i32 %109, 4 > %368 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %367) > %369 = shl i32 %109, 4 > %370 = or i32 %369, 8 > %371 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %370) > %372 = fsub float -0.000000e+00, %184 > %373 = call float @llvm.fma.f32(float %368, float %371, float %372) > %374 = fmul float %373, %93 > %375 = fmul float %374, 2.000000e+00 > %376 = fmul float %143, 2.000000e+00 > %377 = shl i32 %111, 4 > %378 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %377) > %379 = shl i32 %111, 4 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %380) > %382 = fsub float -0.000000e+00, %235 > %383 = call float @llvm.fma.f32(float %378, float %381, float %382) > %384 = fmul float %383, %94 > %385 = fmul float %384, 2.000000e+00 > %386 = fmul float %93, %220 > %387 = fmul float %93, %216 > %388 = fmul float %94, %271 > %389 = fmul float %94, %267 > %390 = shl i32 %105, 5 > %391 = or i32 %390, 8 > %392 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %391) > %393 = fmul float %93, %392 > %394 = shl i32 %106, 5 > %395 = or i32 %394, 8 > %396 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %395) > %397 = fmul float %94, %396 > %398 = fadd float %385, %375 > %399 = fadd float %168, %376 > %400 = fadd float %388, %386 > %401 = fadd float %397, %393 > %402 = shl i32 %113, 4 > %403 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %402) > %404 = shl i32 %113, 4 > %405 = or i32 %404, 8 > %406 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %405) > %407 = fsub float -0.000000e+00, %319 > %408 = call float @llvm.fma.f32(float %403, float %406, float %407) > %409 = fmul float %408, %95 > %410 = fmul float %409, 2.000000e+00 > %411 = fmul float %95, %355 > %412 = fmul float %95, %351 > %413 = shl i32 %107, 5 > %414 = or i32 %413, 8 > %415 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %414) > %416 = fmul float %95, %415 > %417 = fadd float %398, %410 > %418 = fadd float %399, %303 > %419 = fadd float %400, %411 > %420 = fadd float %401, %416 > %421 = fmul float %417, %57 > %422 = fmul float %418, %58 > %423 = fadd float %421, %422 > %424 = fmul float %419, %59 > %425 = fadd float %423, %424 > %426 = fadd float %425, %420 > %427 = shl i32 %105, 5 > %428 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %427) > %429 = fmul float %93, %428 > %430 = shl i32 %106, 5 > %431 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %430) > %432 = fmul float %94, %431 > %433 = shl i32 %107, 5 > %434 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %433) > %435 = fmul float %95, %434 > %436 = shl i32 %109, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %436) > %438 = shl i32 %109, 4 > %439 = or i32 %438, 4 > %440 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %439) > %441 = fsub float -0.000000e+00, %182 > %442 = call float @llvm.fma.f32(float %437, float %440, float %441) > %443 = fadd float %184, %183 > %444 = fmul float %442, %93 > %445 = fmul float %443, %93 > %446 = fmul float %444, 2.000000e+00 > %447 = fmul float %445, 2.000000e+00 > %448 = shl i32 %111, 4 > %449 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %448) > %450 = shl i32 %111, 4 > %451 = or i32 %450, 4 > %452 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %451) > %453 = fsub float -0.000000e+00, %233 > %454 = call float @llvm.fma.f32(float %449, float %452, float %453) > %455 = shl i32 %113, 4 > %456 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %455) > %457 = shl i32 %113, 4 > %458 = or i32 %457, 4 > %459 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %458) > %460 = fsub float -0.000000e+00, %317 > %461 = call float @llvm.fma.f32(float %456, float %459, float %460) > %462 = fadd float %319, %318 > %463 = fmul float %454, %94 > %464 = fmul float %461, %95 > %465 = fmul float %462, %95 > %466 = fmul float %464, 2.000000e+00 > %467 = fmul float %465, 2.000000e+00 > %468 = fadd float %235, %234 > %469 = fmul float %468, %94 > %470 = fmul float %463, 2.000000e+00 > %471 = fmul float %469, 2.000000e+00 > %472 = fadd float %387, %389 > %473 = fadd float %446, %470 > %474 = fadd float %447, %471 > %475 = fadd float %429, %432 > %476 = fadd float %412, %472 > %477 = fadd float %466, %473 > %478 = fadd float %467, %474 > %479 = fadd float %435, %475 > %480 = fmul float %476, %57 > %481 = fmul float %477, %58 > %482 = fadd float %480, %481 > %483 = fmul float %478, %59 > %484 = fadd float %482, %483 > %485 = fadd float %484, %479 > %486 = fmul float %23, %485 > %487 = fmul float %24, %366 > %488 = fadd float %486, %487 > %489 = fmul float %25, %426 > %490 = fadd float %488, %489 > %491 = fadd float %490, %26 > %492 = fmul float %27, %485 > %493 = fmul float %28, %366 > %494 = fadd float %492, %493 > %495 = fmul float %29, %426 > %496 = fadd float %494, %495 > %497 = fadd float %496, %30 > %498 = fmul float %31, %485 > %499 = fmul float %32, %366 > %500 = fadd float %498, %499 > %501 = fmul float %33, %426 > %502 = fadd float %500, %501 > %503 = fadd float %502, %34 > %504 = fmul float %35, %485 > %505 = fmul float %36, %366 > %506 = fadd float %504, %505 > %507 = fmul float %37, %426 > %508 = fadd float %506, %507 > %509 = fadd float %508, %38 > %510 = fmul float %357, %74 > %511 = fmul float %358, %75 > %512 = fadd float %511, %510 > %513 = fmul float %359, %76 > %514 = fadd float %512, %513 > %515 = fmul float %417, %74 > %516 = fmul float %418, %75 > %517 = fadd float %516, %515 > %518 = fmul float %419, %76 > %519 = fadd float %517, %518 > %520 = fmul float %476, %74 > %521 = fmul float %477, %75 > %522 = fadd float %521, %520 > %523 = fmul float %478, %76 > %524 = fadd float %522, %523 > %525 = fmul float %524, %524 > %526 = fmul float %514, %514 > %527 = fadd float %526, %525 > %528 = fmul float %519, %519 > %529 = fadd float %527, %528 > %530 = call float @llvm.AMDGPU.rsq.clamped.f32(float %529) > %531 = fmul float %530, %524 > %532 = fmul float %530, %514 > %533 = fmul float %530, %519 > %534 = fmul float %39, %531 > %535 = fmul float %40, %532 > %536 = fadd float %535, %534 > %537 = fmul float %41, %533 > %538 = fadd float %536, %537 > %539 = fmul float %357, %80 > %540 = fmul float %358, %81 > %541 = fadd float %540, %539 > %542 = fmul float %359, %82 > %543 = fadd float %541, %542 > %544 = fmul float %357, %63 > %545 = fmul float %358, %64 > %546 = fadd float %545, %544 > %547 = fmul float %359, %65 > %548 = fadd float %546, %547 > %549 = fmul float %417, %80 > %550 = fmul float %418, %81 > %551 = fadd float %550, %549 > %552 = fmul float %419, %82 > %553 = fadd float %551, %552 > %554 = fmul float %417, %63 > %555 = fmul float %418, %64 > %556 = fadd float %555, %554 > %557 = fmul float %419, %65 > %558 = fadd float %556, %557 > %559 = fmul float %476, %80 > %560 = fmul float %477, %81 > %561 = fadd float %560, %559 > %562 = fmul float %478, %82 > %563 = fadd float %561, %562 > %564 = fmul float %476, %63 > %565 = fmul float %477, %64 > %566 = fadd float %565, %564 > %567 = fmul float %478, %65 > %568 = fadd float %566, %567 > %569 = fmul float %563, %563 > %570 = fmul float %543, %543 > %571 = fadd float %570, %569 > %572 = fmul float %553, %553 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %563 > %576 = fmul float %574, %543 > %577 = fmul float %574, %553 > %578 = fmul float %39, %575 > %579 = fmul float %40, %576 > %580 = fadd float %579, %578 > %581 = fmul float %41, %577 > %582 = fadd float %580, %581 > %583 = fmul float %568, %568 > %584 = fmul float %548, %548 > %585 = fadd float %584, %583 > %586 = fmul float %558, %558 > %587 = fadd float %585, %586 > %588 = call float @llvm.AMDGPU.rsq.clamped.f32(float %587) > %589 = fmul float %588, %568 > %590 = fmul float %588, %548 > %591 = fmul float %588, %558 > %592 = fmul float %39, %589 > %593 = fmul float %40, %590 > %594 = fadd float %593, %592 > %595 = fmul float %41, %591 > %596 = fadd float %594, %595 > %597 = fmul float %42, %531 > %598 = fmul float %43, %532 > %599 = fadd float %598, %597 > %600 = fmul float %44, %533 > %601 = fadd float %599, %600 > %602 = fmul float %45, %531 > %603 = fmul float %46, %532 > %604 = fadd float %603, %602 > %605 = fmul float %47, %533 > %606 = fadd float %604, %605 > %607 = fmul float %42, %575 > %608 = fmul float %43, %576 > %609 = fadd float %608, %607 > %610 = fmul float %44, %577 > %611 = fadd float %609, %610 > %612 = fmul float %45, %575 > %613 = fmul float %46, %576 > %614 = fadd float %613, %612 > %615 = fmul float %47, %577 > %616 = fadd float %614, %615 > %617 = fmul float %42, %589 > %618 = fmul float %43, %590 > %619 = fadd float %618, %617 > %620 = fmul float %44, %591 > %621 = fadd float %619, %620 > %622 = fmul float %45, %589 > %623 = fmul float %46, %590 > %624 = fadd float %623, %622 > %625 = fmul float %47, %591 > %626 = fadd float %624, %625 > %627 = fmul float %86, %48 > %628 = fmul float %87, %49 > %629 = fmul float %88, %50 > %630 = fmul float %89, %51 > %631 = bitcast i32 %11 to float > %632 = insertvalue <{ float, float, float }> undef, float %631, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %69, float %70, float %59, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %538, float %582, float %596, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %601, float %611, float %621, float %360) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %606, float %616, float %626, float %509) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %627, float %628, float %629, float %630) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %491, float %497, float %503, float %509) > ret <{ float, float, float }> %632 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..24] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 368, 384, 0} >IMM[1] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[2] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, 2.0000} >IMM[3] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} > 0: ADD TEMP[0].x, CONST[1][23].yyyy, IMM[1].xxxx > 1: ADD TEMP[1].xy, -IN[4].wwww, IMM[1].xyyy > 2: FMA TEMP[2].x, CONST[1][23].xxxx, TEMP[0].xxxx, TEMP[1].xxxx > 3: CEIL TEMP[1].x, TEMP[1].yyyy > 4: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 5: ADD TEMP[0].x, TEMP[2].xxxx, IMM[1].zzzz > 6: FSNE TEMP[2].x, CONST[1][23].yyyy, IMM[1].wwww > 7: UIF TEMP[2].xxxx :0 > 8: RCP TEMP[2].x, CONST[1][23].yyyy > 9: ELSE :0 > 10: MOV TEMP[2].x, IMM[2].xxxx > 11: ENDIF > 12: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[0].xxxx > 13: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 14: FMA TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy, IMM[2].zzzz > 15: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[2].xxxx > 16: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx > 17: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[0].xxxx, IMM[1].xxxx > 18: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx > 19: LG2 TEMP[1].x, TEMP[2].xxxx > 20: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][23].zzzz > 21: EX2 TEMP[1].x, TEMP[1].xxxx > 22: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][0].wwww > 23: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[1][0].xyzz > 24: MOV TEMP[2].xy, IN[0].xyyy > 25: TEX TEMP[2], TEMP[2], SAMP[0], 2D > 26: MUL TEMP[3].x, TEMP[2].wwww, CONST[1][23].wwww > 27: MOV TEMP[4].xyz, TEMP[2].xyzx > 28: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx > 29: MOV TEMP[1].w, TEMP[0].xxxx > 30: MOV TEMP[3].w, TEMP[0].xxxx > 31: MOV TEMP[5].xy, IN[0].xyyy > 32: TEX TEMP[5].yw, TEMP[5], SAMP[1], 2D > 33: FMA TEMP[5].xy, TEMP[5].ywww, IMM[2].wwww, IMM[1].zzzz > 34: MOV TEMP[2].xy, TEMP[5].xyxx > 35: FMA TEMP[6].x, -TEMP[5].xxxx, TEMP[5].xxxx, IMM[1].xxxx > 36: FMA TEMP[5].x, -TEMP[5].yyyy, TEMP[5].yyyy, TEMP[6].xxxx > 37: SQRT TEMP[5].x, TEMP[5].xxxx > 38: MOV TEMP[2].z, TEMP[5].xxxx > 39: DP3 TEMP[5].x, IN[1].xyzz, TEMP[2].xyzz > 40: DP3 TEMP[6].x, IN[2].xyzz, TEMP[2].xyzz > 41: MOV TEMP[5].y, TEMP[6].xxxx > 42: DP3 TEMP[2].x, IN[3].xyzz, TEMP[2].xyzz > 43: MOV TEMP[5].z, TEMP[2].xxxx > 44: DP3 TEMP[2].x, TEMP[5].xyzz, TEMP[5].xyzz > 45: RSQ TEMP[2].x, TEMP[2].xxxx > 46: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[5].xyzz > 47: FMA TEMP[3].xyz, TEMP[2].xyzz, IMM[3].xxxx, IMM[3].xxxx > 48: MOV TEMP[4].w, TEMP[0].xxxx > 49: MOV TEMP[0].w, TEMP[0].xxxx > 50: MOV TEMP[2].xy, IN[0].xyyy > 51: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D > 52: MUL TEMP[0].x, TEMP[2].zzzz, CONST[1][24].xxxx > 53: MOV TEMP[0].yz, TEMP[2].xyxx > 54: MOV OUT[0], TEMP[1] > 55: MOV OUT[1], TEMP[3] > 56: MOV OUT[2], TEMP[4] > 57: MOV OUT[3], TEMP[0] > 58: END >radeonsi: Compiling shader 163 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 368) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 372) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 376) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 380) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %34 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 > %36 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %37 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %36, i64 0, i64 3 > %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 > %39 = extractelement <8 x i32> %35, i32 7 > %40 = extractelement <4 x i32> %38, i32 0 > %41 = and i32 %40, %39 > %42 = insertelement <4 x i32> %38, i32 %41, i32 0 > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 7 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 11 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %73 = fadd float %30, 1.000000e+00 > %74 = fsub float 1.000000e+00, %72 > %75 = fsub float 0x3FEFD70A40000000, %72 > %76 = call float @llvm.fma.f32(float %29, float %73, float %74) > %77 = call float @llvm.ceil.f32(float %75) > %78 = call float @llvm.AMDGPU.clamp.(float %77, float 0.000000e+00, float 1.000000e+00) > %79 = fadd float %76, -1.000000e+00 > %80 = fcmp une float %30, 0.000000e+00 > %81 = fdiv float 1.000000e+00, %30 > %temp8.0 = select i1 %80, float %81, float 0x4600000000000000 > %82 = fmul float %temp8.0, %79 > %83 = call float @llvm.AMDGPU.clamp.(float %82, float 0.000000e+00, float 1.000000e+00) > %84 = call float @llvm.fma.f32(float %83, float -2.000000e+00, float 3.000000e+00) > %85 = fmul float %83, %83 > %86 = fmul float %85, %84 > %87 = fsub float -0.000000e+00, %78 > %88 = call float @llvm.fma.f32(float %87, float %86, float 1.000000e+00) > %89 = fmul float %86, %78 > %90 = call float @llvm.log2.f32(float %88) > %91 = fmul float %90, %31 > %92 = call float @llvm.exp2.f32(float %91) > %93 = fmul float %92, %28 > %94 = fmul float %93, %25 > %95 = fmul float %93, %26 > %96 = fmul float %93, %27 > %97 = bitcast float %61 to i32 > %98 = bitcast float %62 to i32 > %99 = insertelement <2 x i32> undef, i32 %97, i32 0 > %100 = insertelement <2 x i32> %99, i32 %98, i32 1 > %101 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %100, <8 x i32> %35, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %102 = extractelement <4 x float> %101, i32 0 > %103 = extractelement <4 x float> %101, i32 1 > %104 = extractelement <4 x float> %101, i32 2 > %105 = extractelement <4 x float> %101, i32 3 > %106 = fmul float %105, %32 > %107 = fmul float %89, %106 > %108 = bitcast float %61 to i32 > %109 = bitcast float %62 to i32 > %110 = insertelement <2 x i32> undef, i32 %108, i32 0 > %111 = insertelement <2 x i32> %110, i32 %109, i32 1 > %112 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %111, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %113 = extractelement <4 x float> %112, i32 1 > %114 = extractelement <4 x float> %112, i32 3 > %115 = call float @llvm.fma.f32(float %113, float 2.000000e+00, float -1.000000e+00) > %116 = call float @llvm.fma.f32(float %114, float 2.000000e+00, float -1.000000e+00) > %117 = fsub float -0.000000e+00, %115 > %118 = call float @llvm.fma.f32(float %117, float %115, float 1.000000e+00) > %119 = fsub float -0.000000e+00, %116 > %120 = call float @llvm.fma.f32(float %119, float %116, float %118) > %121 = call float @llvm.sqrt.f32(float %120) > %122 = fmul float %63, %115 > %123 = fmul float %64, %116 > %124 = fadd float %123, %122 > %125 = fmul float %65, %121 > %126 = fadd float %124, %125 > %127 = fmul float %66, %115 > %128 = fmul float %67, %116 > %129 = fadd float %128, %127 > %130 = fmul float %68, %121 > %131 = fadd float %129, %130 > %132 = fmul float %69, %115 > %133 = fmul float %70, %116 > %134 = fadd float %133, %132 > %135 = fmul float %71, %121 > %136 = fadd float %134, %135 > %137 = fmul float %126, %126 > %138 = fmul float %131, %131 > %139 = fadd float %138, %137 > %140 = fmul float %136, %136 > %141 = fadd float %139, %140 > %142 = call float @llvm.AMDGPU.rsq.clamped.f32(float %141) > %143 = fmul float %142, %126 > %144 = fmul float %142, %131 > %145 = fmul float %142, %136 > %146 = call float @llvm.fma.f32(float %143, float 5.000000e-01, float 5.000000e-01) > %147 = call float @llvm.fma.f32(float %144, float 5.000000e-01, float 5.000000e-01) > %148 = call float @llvm.fma.f32(float %145, float 5.000000e-01, float 5.000000e-01) > %149 = bitcast float %61 to i32 > %150 = bitcast float %62 to i32 > %151 = insertelement <2 x i32> undef, i32 %149, i32 0 > %152 = insertelement <2 x i32> %151, i32 %150, i32 1 > %153 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %152, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %154 = extractelement <4 x float> %153, i32 0 > %155 = extractelement <4 x float> %153, i32 1 > %156 = extractelement <4 x float> %153, i32 2 > %157 = fmul float %156, %33 > %158 = bitcast float %5 to i32 > %159 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %158, 10 > %160 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %159, float %94, 11 > %161 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %160, float %95, 12 > %162 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %161, float %96, 13 > %163 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %162, float %107, 14 > %164 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %163, float %146, 15 > %165 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %164, float %147, 16 > %166 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %165, float %148, 17 > %167 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %166, float %107, 18 > %168 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %167, float %102, 19 > %169 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %168, float %103, 20 > %170 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %169, float %104, 21 > %171 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %170, float %107, 22 > %172 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %171, float %157, 23 > %173 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %172, float %155, 24 > %174 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %173, float %154, 25 > %175 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %174, float %107, 26 > %176 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %175, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %176 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..30] >DCL TEMP[0..9], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, 0.5000, 0.0087} >IMM[1] UINT32 {0, 288, 304, 320} >IMM[2] UINT32 {336, 448, 368, 480} >IMM[3] UINT32 {352, 384, 432, 176} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][18], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][20], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: MOV TEMP[3], TEMP[1] > 10: MOV TEMP[4].zw, TEMP[1].wwzw > 11: MUL TEMP[0].xy, TEMP[2].xxxx, CONST[1][28].xyyy > 12: MUL TEMP[2].xy, CONST[1][28].xyyy, IMM[0].xyyy > 13: FMA TEMP[4].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[0].xyyy > 14: MUL TEMP[0].xyz, CONST[1][23].xyzz, CONST[1][30].yyyy > 15: FMA TEMP[0].xyz, CONST[1][22].xyzz, CONST[1][30].xxxx, TEMP[0].xyzz > 16: FMA TEMP[2].xyz, CONST[1][24].xyzz, CONST[1][30].zzzz, TEMP[0].xyzz > 17: MUL TEMP[5].x, TEMP[2].yyyy, IN[1].yyyy > 18: FMA TEMP[5].x, TEMP[2].xxxx, -IN[1].xxxx, -TEMP[5].xxxx > 19: FMA TEMP[5].x, -TEMP[2].zzzz, IN[1].zzzz, TEMP[5].xxxx > 20: MUL TEMP[1].x, TEMP[5].xxxx, -IN[1].xxxx > 21: DP2 TEMP[6].x, TEMP[2].zxxx, IN[1].ywww > 22: FMA TEMP[6].x, -TEMP[2].yyyy, IN[1].zzzz, TEMP[6].xxxx > 23: FMA TEMP[1].x, TEMP[6].xxxx, IN[1].wwww, TEMP[1].xxxx > 24: MUL TEMP[6].xy, TEMP[6].xxxx, -IN[1].zyyy > 25: MUL TEMP[7].x, TEMP[2].zzzz, IN[1].xxxx > 26: FMA TEMP[7].x, TEMP[2].yyyy, IN[1].wwww, -TEMP[7].xxxx > 27: FMA TEMP[7].x, TEMP[2].xxxx, IN[1].zzzz, TEMP[7].xxxx > 28: FMA TEMP[1].x, TEMP[7].xxxx, -IN[1].zzzz, TEMP[1].xxxx > 29: DP2 TEMP[8].x, TEMP[2].yzzz, IN[1].xwww > 30: FMA TEMP[2].x, -TEMP[2].xxxx, IN[1].yyyy, TEMP[8].xxxx > 31: FMA TEMP[8].x, -TEMP[2].xxxx, -IN[1].yyyy, TEMP[1].xxxx > 32: FMA TEMP[9].x, TEMP[5].xxxx, -IN[1].yyyy, -TEMP[6].xxxx > 33: FMA TEMP[5].x, TEMP[5].xxxx, -IN[1].zzzz, TEMP[6].yyyy > 34: FMA TEMP[5].x, -TEMP[7].xxxx, -IN[1].xxxx, TEMP[5].xxxx > 35: FMA TEMP[6].x, TEMP[7].xxxx, IN[1].wwww, TEMP[9].xxxx > 36: FMA TEMP[6].x, TEMP[2].xxxx, -IN[1].xxxx, TEMP[6].xxxx > 37: MOV TEMP[8].y, TEMP[6].xxxx > 38: FMA TEMP[2].x, TEMP[2].xxxx, IN[1].wwww, TEMP[5].xxxx > 39: MOV TEMP[8].z, TEMP[2].xxxx > 40: DP3 TEMP[0].x, TEMP[8].xyzz, TEMP[8].xyzz > 41: RSQ TEMP[2].x, TEMP[0].xxxx > 42: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[8].xyzz > 43: ADD TEMP[0].xyz, IN[0].xyzz, -CONST[1][27].xyzz > 44: MUL TEMP[5].x, TEMP[0].yyyy, IN[1].yyyy > 45: FMA TEMP[5].x, TEMP[0].xxxx, -IN[1].xxxx, -TEMP[5].xxxx > 46: FMA TEMP[5].x, -TEMP[0].zzzz, IN[1].zzzz, TEMP[5].xxxx > 47: MUL TEMP[1].x, TEMP[5].xxxx, -IN[1].xxxx > 48: DP2 TEMP[6].x, TEMP[0].zxxx, IN[1].ywww > 49: FMA TEMP[6].x, -TEMP[0].yyyy, IN[1].zzzz, TEMP[6].xxxx > 50: FMA TEMP[1].x, TEMP[6].xxxx, IN[1].wwww, TEMP[1].xxxx > 51: MUL TEMP[6].xy, TEMP[6].xxxx, -IN[1].zyyy > 52: MUL TEMP[7].x, TEMP[0].zzzz, IN[1].xxxx > 53: FMA TEMP[7].x, TEMP[0].yyyy, IN[1].wwww, -TEMP[7].xxxx > 54: FMA TEMP[7].x, TEMP[0].xxxx, IN[1].zzzz, TEMP[7].xxxx > 55: FMA TEMP[1].x, TEMP[7].xxxx, -IN[1].zzzz, TEMP[1].xxxx > 56: DP2 TEMP[9].x, TEMP[0].yzzz, IN[1].xwww > 57: FMA TEMP[0].x, -TEMP[0].xxxx, IN[1].yyyy, TEMP[9].xxxx > 58: FMA TEMP[8].x, -TEMP[0].xxxx, -IN[1].yyyy, TEMP[1].xxxx > 59: FMA TEMP[1].x, TEMP[5].xxxx, -IN[1].yyyy, -TEMP[6].xxxx > 60: FMA TEMP[5].x, TEMP[5].xxxx, -IN[1].zzzz, TEMP[6].yyyy > 61: FMA TEMP[5].x, -TEMP[7].xxxx, -IN[1].xxxx, TEMP[5].xxxx > 62: FMA TEMP[1].x, TEMP[7].xxxx, IN[1].wwww, TEMP[1].xxxx > 63: FMA TEMP[1].x, TEMP[0].xxxx, -IN[1].xxxx, TEMP[1].xxxx > 64: MOV TEMP[8].y, TEMP[1].xxxx > 65: FMA TEMP[0].x, TEMP[0].xxxx, IN[1].wwww, TEMP[5].xxxx > 66: MOV TEMP[8].z, TEMP[0].xxxx > 67: MOV TEMP[0].xyz, TEMP[8].xyzx > 68: MOV TEMP[1].xyz, IN[2].xyzx > 69: MOV TEMP[5].xyz, IN[3].xyzx > 70: MOV OUT[5], TEMP[5] > 71: MOV OUT[4], TEMP[1] > 72: MOV OUT[3], TEMP[0] > 73: MOV OUT[2], TEMP[2] > 74: MOV OUT[1], TEMP[4] > 75: MOV OUT[0], TEMP[3] > 76: END >radeonsi: Compiling shader 164 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 288) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 292) > %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 296) > %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 300) > %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) > %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) > %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) > %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 316) > %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) > %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) > %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) > %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 332) > %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 336) > %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 340) > %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 344) > %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 348) > %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 352) > %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 356) > %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 360) > %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 368) > %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) > %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 376) > %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 384) > %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 388) > %43 = call float @llvm.SI.load.const(<16 x i8> %18, i32 392) > %44 = call float @llvm.SI.load.const(<16 x i8> %18, i32 432) > %45 = call float @llvm.SI.load.const(<16 x i8> %18, i32 436) > %46 = call float @llvm.SI.load.const(<16 x i8> %18, i32 440) > %47 = call float @llvm.SI.load.const(<16 x i8> %18, i32 448) > %48 = call float @llvm.SI.load.const(<16 x i8> %18, i32 452) > %49 = call float @llvm.SI.load.const(<16 x i8> %18, i32 480) > %50 = call float @llvm.SI.load.const(<16 x i8> %18, i32 484) > %51 = call float @llvm.SI.load.const(<16 x i8> %18, i32 488) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %13) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %14) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 > %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %15) > %68 = extractelement <4 x float> %67, i32 0 > %69 = extractelement <4 x float> %67, i32 1 > %70 = extractelement <4 x float> %67, i32 2 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %16) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = fmul float %19, %55 > %78 = fmul float %20, %56 > %79 = fadd float %77, %78 > %80 = fmul float %21, %57 > %81 = fadd float %79, %80 > %82 = fadd float %81, %22 > %83 = fmul float %23, %55 > %84 = fmul float %24, %56 > %85 = fadd float %83, %84 > %86 = fmul float %25, %57 > %87 = fadd float %85, %86 > %88 = fadd float %87, %26 > %89 = fmul float %27, %55 > %90 = fmul float %28, %56 > %91 = fadd float %89, %90 > %92 = fmul float %29, %57 > %93 = fadd float %91, %92 > %94 = fadd float %93, %30 > %95 = fmul float %31, %55 > %96 = fmul float %32, %56 > %97 = fadd float %95, %96 > %98 = fmul float %33, %57 > %99 = fadd float %97, %98 > %100 = fadd float %99, %34 > %101 = fmul float %100, %47 > %102 = fmul float %100, %48 > %103 = fsub float -0.000000e+00, %48 > %104 = call float @llvm.fma.f32(float %82, float %47, float %101) > %105 = call float @llvm.fma.f32(float %88, float %103, float %102) > %106 = fmul float %38, %50 > %107 = fmul float %39, %50 > %108 = fmul float %40, %50 > %109 = call float @llvm.fma.f32(float %35, float %49, float %106) > %110 = call float @llvm.fma.f32(float %36, float %49, float %107) > %111 = call float @llvm.fma.f32(float %37, float %49, float %108) > %112 = call float @llvm.fma.f32(float %41, float %51, float %109) > %113 = call float @llvm.fma.f32(float %42, float %51, float %110) > %114 = call float @llvm.fma.f32(float %43, float %51, float %111) > %115 = fmul float %113, %62 > %116 = fsub float -0.000000e+00, %61 > %117 = fsub float -0.000000e+00, %115 > %118 = call float @llvm.fma.f32(float %112, float %116, float %117) > %119 = fsub float -0.000000e+00, %114 > %120 = call float @llvm.fma.f32(float %119, float %63, float %118) > %121 = fmul float %61, %120 > %122 = fsub float -0.000000e+00, %121 > %123 = fmul float %114, %62 > %124 = fmul float %112, %64 > %125 = fadd float %123, %124 > %126 = fsub float -0.000000e+00, %113 > %127 = call float @llvm.fma.f32(float %126, float %63, float %125) > %128 = call float @llvm.fma.f32(float %127, float %64, float %122) > %129 = fmul float %63, %127 > %130 = fmul float %62, %127 > %131 = fsub float -0.000000e+00, %130 > %132 = fmul float %114, %61 > %133 = fsub float -0.000000e+00, %132 > %134 = call float @llvm.fma.f32(float %113, float %64, float %133) > %135 = call float @llvm.fma.f32(float %112, float %63, float %134) > %136 = fsub float -0.000000e+00, %63 > %137 = call float @llvm.fma.f32(float %135, float %136, float %128) > %138 = fmul float %113, %61 > %139 = fmul float %114, %64 > %140 = fadd float %138, %139 > %141 = fsub float -0.000000e+00, %112 > %142 = call float @llvm.fma.f32(float %141, float %62, float %140) > %143 = fsub float -0.000000e+00, %142 > %144 = fsub float -0.000000e+00, %62 > %145 = call float @llvm.fma.f32(float %143, float %144, float %137) > %146 = fsub float -0.000000e+00, %62 > %147 = call float @llvm.fma.f32(float %120, float %146, float %129) > %148 = fsub float -0.000000e+00, %63 > %149 = call float @llvm.fma.f32(float %120, float %148, float %131) > %150 = fsub float -0.000000e+00, %135 > %151 = fsub float -0.000000e+00, %61 > %152 = call float @llvm.fma.f32(float %150, float %151, float %149) > %153 = call float @llvm.fma.f32(float %135, float %64, float %147) > %154 = fsub float -0.000000e+00, %61 > %155 = call float @llvm.fma.f32(float %142, float %154, float %153) > %156 = call float @llvm.fma.f32(float %142, float %64, float %152) > %157 = fmul float %145, %145 > %158 = fmul float %155, %155 > %159 = fadd float %158, %157 > %160 = fmul float %156, %156 > %161 = fadd float %159, %160 > %162 = call float @llvm.AMDGPU.rsq.clamped.f32(float %161) > %163 = fmul float %162, %145 > %164 = fmul float %162, %155 > %165 = fmul float %162, %156 > %166 = fsub float %55, %44 > %167 = fsub float %56, %45 > %168 = fsub float %57, %46 > %169 = fmul float %167, %62 > %170 = fsub float -0.000000e+00, %61 > %171 = fsub float -0.000000e+00, %169 > %172 = call float @llvm.fma.f32(float %166, float %170, float %171) > %173 = fsub float -0.000000e+00, %168 > %174 = call float @llvm.fma.f32(float %173, float %63, float %172) > %175 = fmul float %61, %174 > %176 = fsub float -0.000000e+00, %175 > %177 = fmul float %168, %62 > %178 = fmul float %166, %64 > %179 = fadd float %177, %178 > %180 = fsub float -0.000000e+00, %167 > %181 = call float @llvm.fma.f32(float %180, float %63, float %179) > %182 = call float @llvm.fma.f32(float %181, float %64, float %176) > %183 = fmul float %63, %181 > %184 = fmul float %62, %181 > %185 = fsub float -0.000000e+00, %184 > %186 = fmul float %168, %61 > %187 = fsub float -0.000000e+00, %186 > %188 = call float @llvm.fma.f32(float %167, float %64, float %187) > %189 = call float @llvm.fma.f32(float %166, float %63, float %188) > %190 = fsub float -0.000000e+00, %63 > %191 = call float @llvm.fma.f32(float %189, float %190, float %182) > %192 = fmul float %167, %61 > %193 = fmul float %168, %64 > %194 = fadd float %192, %193 > %195 = fsub float -0.000000e+00, %166 > %196 = call float @llvm.fma.f32(float %195, float %62, float %194) > %197 = fsub float -0.000000e+00, %196 > %198 = fsub float -0.000000e+00, %62 > %199 = call float @llvm.fma.f32(float %197, float %198, float %191) > %200 = fsub float -0.000000e+00, %62 > %201 = call float @llvm.fma.f32(float %174, float %200, float %183) > %202 = fsub float -0.000000e+00, %63 > %203 = call float @llvm.fma.f32(float %174, float %202, float %185) > %204 = fsub float -0.000000e+00, %189 > %205 = fsub float -0.000000e+00, %61 > %206 = call float @llvm.fma.f32(float %204, float %205, float %203) > %207 = call float @llvm.fma.f32(float %189, float %64, float %201) > %208 = fsub float -0.000000e+00, %61 > %209 = call float @llvm.fma.f32(float %196, float %208, float %207) > %210 = call float @llvm.fma.f32(float %196, float %64, float %206) > %211 = bitcast i32 %11 to float > %212 = insertvalue <{ float, float, float }> undef, float %211, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %104, float %105, float %94, float %100) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %163, float %164, float %165, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %199, float %209, float %210, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %68, float %69, float %70, float %100) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %74, float %75, float %76, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %88, float %94, float %100) > ret <{ float, float, float }> %212 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL CONST[1][0..29] >DCL TEMP[0..14], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.5000} >IMM[1] UINT32 {0, 464, 176, 272} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] UINT32 {192, 416, 400, 0} >IMM[4] FLT32 { -1.0000, -1.1000, 0.0000, 0.0000} > 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz > 1: SQRT TEMP[1].x, TEMP[0].xxxx > 2: MOV TEMP[0].x, TEMP[1].xxxx > 3: FSEQ TEMP[2].xyz, TEMP[1].xxxx, IMM[0].xxxx > 4: SSG TEMP[3].xyz, IN[2].xyzz > 5: MUL TEMP[3].xyz, IMM[0].yyyy, TEMP[3].xyzz > 6: RCP TEMP[4].xyz, TEMP[1].xxxx > 7: MUL TEMP[4].xyz, IN[2].xyzz, TEMP[4].xyzz > 8: UCMP TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz, TEMP[4].xyzz > 9: DP3 TEMP[3].x, TEMP[2].xyzz, IN[1].xyzz > 10: FSNE TEMP[4].x, TEMP[3].xxxx, IMM[0].xxxx > 11: UIF TEMP[4].xxxx :0 > 12: RCP TEMP[4].x, TEMP[3].xxxx > 13: ELSE :0 > 14: MOV TEMP[4].x, IMM[0].yyyy > 15: ENDIF > 16: MUL TEMP[5].x, TEMP[4].xxxx, CONST[1][29].xxxx > 17: FSEQ TEMP[6].xyz, TEMP[2].xyzz, IMM[0].xxxx > 18: SSG TEMP[7].xyz, IN[3].xyzz > 19: MUL TEMP[7].xyz, IMM[0].yyyy, TEMP[7].xyzz > 20: RCP TEMP[8].x, TEMP[2].xxxx > 21: RCP TEMP[8].y, TEMP[2].yyyy > 22: RCP TEMP[8].z, TEMP[2].zzzz > 23: MUL TEMP[8].xyz, IN[3].xyzz, TEMP[8].xyzz > 24: UCMP TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[8].xyzz > 25: FSEQ TEMP[7].xyz, TEMP[2].xyzz, IMM[0].xxxx > 26: SSG TEMP[8].xyz, IN[4].xyzz > 27: MUL TEMP[8].xyz, IMM[0].yyyy, TEMP[8].xyzz > 28: RCP TEMP[9].x, TEMP[2].xxxx > 29: RCP TEMP[9].y, TEMP[2].yyyy > 30: RCP TEMP[9].z, TEMP[2].zzzz > 31: MUL TEMP[9].xyz, IN[4].xyzz, TEMP[9].xyzz > 32: UCMP TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xyzz, TEMP[9].xyzz > 33: MIN TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xyzz > 34: MAX TEMP[8].x, TEMP[6].zzzz, TEMP[6].yyyy > 35: MAX TEMP[8].x, TEMP[8].xxxx, TEMP[6].xxxx > 36: ADD TEMP[8].x, TEMP[1].xxxx, TEMP[8].xxxx > 37: FSEQ TEMP[9].xy, IN[0].wwww, IMM[0].xxxx > 38: SSG TEMP[10].xy, IN[0].xyyy > 39: MUL TEMP[10].xy, IMM[0].yyyy, TEMP[10].xyyy > 40: RCP TEMP[11].xy, IN[0].wwww > 41: MUL TEMP[11].xy, IN[0].xyyy, TEMP[11].xyyy > 42: UCMP TEMP[6].xy, TEMP[9].xyyy, TEMP[10].xyyy, TEMP[11].xyyy > 43: MOV TEMP[9].xy, TEMP[6].xyyy > 44: TEX TEMP[9].x, TEMP[9], SAMP[0], 2D > 45: MUL TEMP[9].x, TEMP[4].xxxx, TEMP[9].xxxx > 46: MIN TEMP[9].x, TEMP[1].xxxx, TEMP[9].xxxx > 47: MAX TEMP[5].x, TEMP[5].xxxx, TEMP[8].xxxx > 48: ADD TEMP[8].x, -TEMP[5].xxxx, TEMP[9].xxxx > 49: MAX TEMP[8].x, TEMP[8].xxxx, IMM[0].xxxx > 50: ADD TEMP[7].xyz, IN[3].xyzz, -IN[4].xyzz > 51: DP3 TEMP[10].x, TEMP[7].xyzz, TEMP[7].xyzz > 52: SQRT TEMP[10].x, TEMP[10].xxxx > 53: FSNE TEMP[11].x, TEMP[10].xxxx, IMM[0].xxxx > 54: UIF TEMP[11].xxxx :0 > 55: RCP TEMP[11].x, TEMP[10].xxxx > 56: MUL TEMP[11].x, TEMP[8].xxxx, TEMP[11].xxxx > 57: ELSE :0 > 58: SSG TEMP[8].x, TEMP[8].xxxx > 59: MUL TEMP[11].x, IMM[0].yyyy, TEMP[8].xxxx > 60: ENDIF > 61: ADD TEMP[8].x, -TEMP[11].xxxx, IMM[0].zzzz > 62: ABS TEMP[8].x, TEMP[8].xxxx > 63: LG2 TEMP[8].x, TEMP[8].xxxx > 64: MUL TEMP[8].x, TEMP[8].xxxx, CONST[1][11].zzzz > 65: EX2 TEMP[8].x, TEMP[8].xxxx > 66: ADD TEMP[8].x, -TEMP[8].xxxx, IMM[0].zzzz > 67: MOV TEMP[3].z, TEMP[8].xxxx > 68: ADD TEMP[11].x, TEMP[1].xxxx, -TEMP[5].xxxx > 69: ADD TEMP[9].x, TEMP[1].xxxx, -TEMP[9].xxxx > 70: FMA TEMP[9].xyz, -TEMP[2].xyzz, TEMP[9].xxxx, -IN[3].xyzz > 71: ADD TEMP[12].xyz, -IN[3].xyzz, IN[4].xyzz > 72: FSEQ TEMP[13].xyz, TEMP[12].xyzz, IMM[0].xxxx > 73: RCP TEMP[14].x, TEMP[12].xxxx > 74: RCP TEMP[14].y, TEMP[12].yyyy > 75: RCP TEMP[14].z, TEMP[12].zzzz > 76: UCMP TEMP[13].xyz, TEMP[13].xyzz, IMM[0].yyyy, TEMP[14].xyzz > 77: FMA TEMP[2].xyz, -TEMP[2].xyzz, TEMP[11].xxxx, -IN[3].xyzz > 78: MUL TEMP[7].xyz, TEMP[2].xyzz, TEMP[13].xyzz > 79: MUL TEMP[12].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 80: USNE TEMP[2].x, CONST[1][17].xxxx, IMM[1].xxxx > 81: UIF TEMP[2].xxxx :0 > 82: MOV TEMP[2].xy, TEMP[6].xyyy > 83: MOV TEMP[2].w, IMM[0].xxxx > 84: TXL TEMP[2].x, TEMP[2], SAMP[1], 2D > 85: FSLT TEMP[9].x, TEMP[2].xxxx, IMM[0].zzzz > 86: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].xxxx > 87: INEG TEMP[9].x, TEMP[9].xxxx > 88: USNE TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx > 89: UIF TEMP[9].xxxx :0 > 90: MOV TEMP[9].xy, TEMP[6].xyyy > 91: MOV TEMP[9].w, IMM[0].xxxx > 92: TXL TEMP[9].x, TEMP[9], SAMP[2], 2D > 93: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[9].xxxx > 94: MIN TEMP[1].x, TEMP[4].xxxx, TEMP[1].xxxx > 95: ADD TEMP[0].x, -TEMP[5].xxxx, TEMP[1].xxxx > 96: MAX TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx > 97: FSNE TEMP[4].x, TEMP[10].xxxx, IMM[0].xxxx > 98: UIF TEMP[4].xxxx :0 > 99: RCP TEMP[4].x, TEMP[10].xxxx >100: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[4].xxxx >101: ELSE :0 >102: SSG TEMP[1].x, TEMP[1].xxxx >103: MUL TEMP[4].x, IMM[0].yyyy, TEMP[1].xxxx >104: ENDIF >105: ADD TEMP[0].x, -TEMP[4].xxxx, IMM[0].zzzz >106: ABS TEMP[1].x, TEMP[0].xxxx >107: LG2 TEMP[1].x, TEMP[1].xxxx >108: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][11].zzzz >109: EX2 TEMP[1].x, TEMP[0].xxxx >110: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz >111: SQRT TEMP[1].x, TEMP[2].xxxx >112: ADD TEMP[2].x, -TEMP[0].xxxx, TEMP[8].xxxx >113: FMA TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx, TEMP[0].xxxx >114: MOV TEMP[3].z, TEMP[1].xxxx >115: ENDIF >116: ENDIF >117: SQRT TEMP[1].x, CONST[1][12].xxxx >118: SQRT TEMP[1].y, CONST[1][12].yyyy >119: FSEQ TEMP[2].xy, TEMP[1].xyyy, IMM[0].xxxx >120: RCP TEMP[4].x, TEMP[1].xxxx >121: RCP TEMP[4].y, TEMP[1].yyyy >122: UCMP TEMP[2].xy, TEMP[2].xyyy, IMM[0].yyyy, TEMP[4].xyyy >123: MOV TEMP[6].xy, TEMP[2].xyxx >124: ADD TEMP[4], TEMP[1].xyxy, IMM[4].xxyy >125: MOV TEMP[7].w, TEMP[12].xxxx >126: MUL TEMP[5].xy, TEMP[7].xwww, TEMP[4].xyyy >127: MAX TEMP[5].xy, TEMP[5].xyyy, IMM[0].xxxx >128: MIN TEMP[5].xy, TEMP[4].zwww, TEMP[5].xyyy >129: FRC TEMP[8].xy, TEMP[5].xyyy >130: ADD TEMP[5].xy, TEMP[5].xyyy, -TEMP[8].xyyy >131: MOV TEMP[12].w, TEMP[7].zzzz >132: MUL TEMP[9].xy, TEMP[12].wzzz, TEMP[4].xyyy >133: FSEQ TEMP[10].xy, CONST[1][12].xyyy, IMM[0].xxxx >134: RCP TEMP[11].x, CONST[1][12].xxxx >135: RCP TEMP[11].y, CONST[1][12].yyyy >136: MUL TEMP[11].xy, IMM[0].wwww, TEMP[11].xyyy >137: UCMP TEMP[10].xy, TEMP[10].xyyy, IMM[0].yyyy, TEMP[11].xyyy >138: FSEQ TEMP[11].xy, CONST[1][12].xyyy, IMM[0].xxxx >139: SSG TEMP[13].xy, TEMP[9].xyyy >140: MUL TEMP[13].xy, IMM[0].yyyy, TEMP[13].xyyy >141: RCP TEMP[14].x, CONST[1][12].xxxx >142: RCP TEMP[14].y, CONST[1][12].yyyy >143: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[14].xyyy >144: UCMP TEMP[9].xy, TEMP[11].xyyy, TEMP[13].xyyy, TEMP[9].xyyy >145: ADD TEMP[9].xy, TEMP[9].xyyy, TEMP[10].xyyy >146: FSEQ TEMP[10].xy, TEMP[1].xyyy, IMM[0].xxxx >147: SSG TEMP[11].xy, TEMP[5].xyyy >148: MUL TEMP[11].xy, IMM[0].yyyy, TEMP[11].xyyy >149: RCP TEMP[13].x, TEMP[1].xxxx >150: RCP TEMP[13].y, TEMP[1].yyyy >151: MUL TEMP[1].xy, TEMP[5].xyyy, TEMP[13].xyyy >152: UCMP TEMP[1].xy, TEMP[10].xyyy, TEMP[11].xyyy, TEMP[1].xyyy >153: ADD TEMP[0].xy, TEMP[9].xyyy, TEMP[1].xyyy >154: MOV TEMP[6].z, IMM[0].xxxx >155: ADD TEMP[4], TEMP[6].xzzy, TEMP[0].xyxy >156: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[0].xyyy >157: MOV TEMP[2].xy, TEMP[0].xyyy >158: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D >159: MOV TEMP[5].xy, TEMP[4].xyyy >160: TEX TEMP[5].xyz, TEMP[5], SAMP[3], 2D >161: MOV TEMP[4].xy, TEMP[4].zwww >162: TEX TEMP[4].xyz, TEMP[4], SAMP[3], 2D >163: ADD TEMP[5].xyz, -TEMP[2].xyzz, TEMP[5].xyzz >164: FMA TEMP[2].xyz, TEMP[8].xxxx, TEMP[5].xyzz, TEMP[2].xyzz >165: MOV TEMP[1].xy, TEMP[1].xyyy >166: TEX TEMP[1].xyz, TEMP[1], SAMP[3], 2D >167: ADD TEMP[0].xyz, -TEMP[4].xyzz, TEMP[1].xyzz >168: FMA TEMP[1].xyz, TEMP[8].xxxx, TEMP[0].xyzz, TEMP[4].xyzz >169: ADD TEMP[0].xyz, -TEMP[2].xyzz, TEMP[1].xyzz >170: FMA TEMP[0].xyz, TEMP[8].yyyy, TEMP[0].xyzz, TEMP[2].xyzz >171: MOV TEMP[12].x, TEMP[7].yyyy >172: MOV TEMP[1].xy, TEMP[12].xyyy >173: TEX TEMP[1].y, TEMP[1], SAMP[4], 2D >174: MUL TEMP[1].x, TEMP[1].yyyy, TEMP[3].zzzz >175: MOV TEMP[0].w, TEMP[1].xxxx >176: MUL TEMP[0], TEMP[0], CONST[1][26] >177: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz >178: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][25].xyzz >179: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][11].wwww >180: MOV TEMP[1].w, TEMP[1].xxxx >181: MOV TEMP[1].xyz, TEMP[0].xyzx >182: MOV OUT[0], TEMP[1] >183: END >radeonsi: Compiling shader 165 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 184) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 188) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 428) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) > %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 > %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 3 > %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 > %43 = extractelement <8 x i32> %39, i32 7 > %44 = extractelement <4 x i32> %42, i32 0 > %45 = and i32 %44, %43 > %46 = insertelement <4 x i32> %42, i32 %45, i32 0 > %47 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 > %49 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %50 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %49, i64 0, i64 7 > %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 > %52 = extractelement <8 x i32> %48, i32 7 > %53 = extractelement <4 x i32> %51, i32 0 > %54 = and i32 %53, %52 > %55 = insertelement <4 x i32> %51, i32 %54, i32 0 > %56 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 > %58 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 11 > %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0 > %61 = extractelement <8 x i32> %57, i32 7 > %62 = extractelement <4 x i32> %60, i32 0 > %63 = and i32 %62, %61 > %64 = insertelement <4 x i32> %60, i32 %63, i32 0 > %65 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %66 = load <8 x i32>, <8 x i32> addrspace(2)* %65, align 32, !tbaa !0 > %67 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %68 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %67, i64 0, i64 15 > %69 = load <4 x i32>, <4 x i32> addrspace(2)* %68, align 16, !tbaa !0 > %70 = extractelement <8 x i32> %66, i32 7 > %71 = extractelement <4 x i32> %69, i32 0 > %72 = and i32 %71, %70 > %73 = insertelement <4 x i32> %69, i32 %72, i32 0 > %74 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %75 = load <8 x i32>, <8 x i32> addrspace(2)* %74, align 32, !tbaa !0 > %76 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %77 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %76, i64 0, i64 19 > %78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0 > %79 = extractelement <8 x i32> %75, i32 7 > %80 = extractelement <4 x i32> %78, i32 0 > %81 = and i32 %80, %79 > %82 = insertelement <4 x i32> %78, i32 %81, i32 0 > %83 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %93 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %94 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %95 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %96 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %97 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %98 = fmul float %89, %89 > %99 = fmul float %90, %90 > %100 = fadd float %99, %98 > %101 = fmul float %91, %91 > %102 = fadd float %100, %101 > %103 = call float @llvm.sqrt.f32(float %102) > %104 = fcmp oeq float %103, 0.000000e+00 > %105 = fcmp oeq float %103, 0.000000e+00 > %106 = fcmp oeq float %103, 0.000000e+00 > %107 = fcmp ogt float %89, 0.000000e+00 > %108 = select i1 %107, float 1.000000e+00, float %89 > %109 = fcmp oge float %108, 0.000000e+00 > %110 = fcmp ogt float %90, 0.000000e+00 > %111 = select i1 %110, float 1.000000e+00, float %90 > %112 = fcmp oge float %111, 0.000000e+00 > %113 = fcmp ogt float %91, 0.000000e+00 > %114 = select i1 %113, float 1.000000e+00, float %91 > %115 = fcmp oge float %114, 0.000000e+00 > %.op = fmul float %108, 0x4600000000000000 > %116 = select i1 %109, float %.op, float 0xC600000000000000 > %.op72 = fmul float %111, 0x4600000000000000 > %117 = select i1 %112, float %.op72, float 0xC600000000000000 > %.op73 = fmul float %114, 0x4600000000000000 > %118 = select i1 %115, float %.op73, float 0xC600000000000000 > %119 = fdiv float 1.000000e+00, %103 > %120 = fmul float %89, %119 > %121 = fmul float %90, %119 > %122 = fmul float %91, %119 > %123 = select i1 %104, float %116, float %120 > %124 = select i1 %105, float %117, float %121 > %125 = select i1 %106, float %118, float %122 > %126 = fmul float %123, %86 > %127 = fmul float %124, %87 > %128 = fadd float %127, %126 > %129 = fmul float %125, %88 > %130 = fadd float %128, %129 > %131 = fcmp une float %130, 0.000000e+00 > %132 = fdiv float 1.000000e+00, %130 > %temp16.0 = select i1 %131, float %132, float 0x4600000000000000 > %133 = fmul float %temp16.0, %37 > %134 = fcmp oeq float %123, 0.000000e+00 > %135 = fcmp oeq float %124, 0.000000e+00 > %136 = fcmp oeq float %125, 0.000000e+00 > %137 = fcmp ogt float %92, 0.000000e+00 > %138 = select i1 %137, float 1.000000e+00, float %92 > %139 = fcmp oge float %138, 0.000000e+00 > %140 = fcmp ogt float %93, 0.000000e+00 > %141 = select i1 %140, float 1.000000e+00, float %93 > %142 = fcmp oge float %141, 0.000000e+00 > %143 = fcmp ogt float %94, 0.000000e+00 > %144 = select i1 %143, float 1.000000e+00, float %94 > %145 = fcmp oge float %144, 0.000000e+00 > %.op74 = fmul float %138, 0x4600000000000000 > %146 = select i1 %139, float %.op74, float 0xC600000000000000 > %.op75 = fmul float %141, 0x4600000000000000 > %147 = select i1 %142, float %.op75, float 0xC600000000000000 > %.op76 = fmul float %144, 0x4600000000000000 > %148 = select i1 %145, float %.op76, float 0xC600000000000000 > %149 = fdiv float 1.000000e+00, %123 > %150 = fdiv float 1.000000e+00, %124 > %151 = fdiv float 1.000000e+00, %125 > %152 = fmul float %92, %149 > %153 = fmul float %93, %150 > %154 = fmul float %94, %151 > %155 = select i1 %134, float %146, float %152 > %156 = select i1 %135, float %147, float %153 > %157 = select i1 %136, float %148, float %154 > %158 = fcmp oeq float %123, 0.000000e+00 > %159 = fcmp oeq float %124, 0.000000e+00 > %160 = fcmp oeq float %125, 0.000000e+00 > %161 = fcmp ogt float %95, 0.000000e+00 > %162 = select i1 %161, float 1.000000e+00, float %95 > %163 = fcmp oge float %162, 0.000000e+00 > %164 = fcmp ogt float %96, 0.000000e+00 > %165 = select i1 %164, float 1.000000e+00, float %96 > %166 = fcmp oge float %165, 0.000000e+00 > %167 = fcmp ogt float %97, 0.000000e+00 > %168 = select i1 %167, float 1.000000e+00, float %97 > %169 = fcmp oge float %168, 0.000000e+00 > %.op77 = fmul float %162, 0x4600000000000000 > %170 = select i1 %163, float %.op77, float 0xC600000000000000 > %.op78 = fmul float %165, 0x4600000000000000 > %171 = select i1 %166, float %.op78, float 0xC600000000000000 > %.op79 = fmul float %168, 0x4600000000000000 > %172 = select i1 %169, float %.op79, float 0xC600000000000000 > %173 = fdiv float 1.000000e+00, %123 > %174 = fdiv float 1.000000e+00, %124 > %175 = fdiv float 1.000000e+00, %125 > %176 = fmul float %95, %173 > %177 = fmul float %96, %174 > %178 = fmul float %97, %175 > %179 = select i1 %158, float %170, float %176 > %180 = select i1 %159, float %171, float %177 > %181 = select i1 %160, float %172, float %178 > %182 = call float @llvm.minnum.f32(float %155, float %179) > %183 = call float @llvm.minnum.f32(float %156, float %180) > %184 = call float @llvm.minnum.f32(float %157, float %181) > %185 = call float @llvm.maxnum.f32(float %184, float %183) > %186 = call float @llvm.maxnum.f32(float %185, float %182) > %187 = fadd float %103, %186 > %188 = fcmp oeq float %85, 0.000000e+00 > %189 = fcmp oeq float %85, 0.000000e+00 > %190 = fcmp ogt float %83, 0.000000e+00 > %191 = select i1 %190, float 1.000000e+00, float %83 > %192 = fcmp oge float %191, 0.000000e+00 > %193 = fcmp ogt float %84, 0.000000e+00 > %194 = select i1 %193, float 1.000000e+00, float %84 > %195 = fcmp oge float %194, 0.000000e+00 > %.op80 = fmul float %191, 0x4600000000000000 > %196 = select i1 %192, float %.op80, float 0xC600000000000000 > %.op81 = fmul float %194, 0x4600000000000000 > %197 = select i1 %195, float %.op81, float 0xC600000000000000 > %198 = fdiv float 1.000000e+00, %85 > %199 = fmul float %83, %198 > %200 = fmul float %84, %198 > %201 = select i1 %188, float %196, float %199 > %202 = select i1 %189, float %197, float %200 > %203 = bitcast float %201 to i32 > %204 = bitcast float %202 to i32 > %205 = insertelement <2 x i32> undef, i32 %203, i32 0 > %206 = insertelement <2 x i32> %205, i32 %204, i32 1 > %207 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %206, <8 x i32> %39, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %208 = extractelement <4 x float> %207, i32 0 > %209 = fmul float %temp16.0, %208 > %210 = call float @llvm.minnum.f32(float %103, float %209) > %211 = call float @llvm.maxnum.f32(float %133, float %187) > %212 = fsub float %210, %211 > %213 = call float @llvm.maxnum.f32(float %212, float 0.000000e+00) > %214 = fsub float %92, %95 > %215 = fsub float %93, %96 > %216 = fsub float %94, %97 > %217 = fmul float %214, %214 > %218 = fmul float %215, %215 > %219 = fadd float %218, %217 > %220 = fmul float %216, %216 > %221 = fadd float %219, %220 > %222 = call float @llvm.sqrt.f32(float %221) > %223 = fcmp une float %222, 0.000000e+00 > br i1 %223, label %IF61, label %ELSE62 > >IF61: ; preds = %main_body > %224 = fdiv float 1.000000e+00, %222 > %225 = fmul float %213, %224 > br label %ENDIF60 > >ELSE62: ; preds = %main_body > %226 = fcmp ogt float %213, 0.000000e+00 > %227 = select i1 %226, float 1.000000e+00, float %213 > %228 = fcmp oge float %227, 0.000000e+00 > %.op82 = fmul float %227, 0x4600000000000000 > %229 = select i1 %228, float %.op82, float 0xC600000000000000 > br label %ENDIF60 > >ENDIF60: ; preds = %ELSE62, %IF61 > %temp44.0 = phi float [ %225, %IF61 ], [ %229, %ELSE62 ] > %230 = fsub float 1.000000e+00, %temp44.0 > %231 = call float @llvm.fabs.f32(float %230) > %232 = call float @llvm.log2.f32(float %231) > %233 = fmul float %232, %25 > %234 = call float @llvm.exp2.f32(float %233) > %235 = fsub float 1.000000e+00, %234 > %236 = fsub float %103, %211 > %237 = fsub float %103, %210 > %238 = fsub float -0.000000e+00, %123 > %239 = fsub float -0.000000e+00, %92 > %240 = call float @llvm.fma.f32(float %238, float %237, float %239) > %241 = fsub float -0.000000e+00, %124 > %242 = fsub float -0.000000e+00, %93 > %243 = call float @llvm.fma.f32(float %241, float %237, float %242) > %244 = fsub float -0.000000e+00, %125 > %245 = fsub float -0.000000e+00, %94 > %246 = call float @llvm.fma.f32(float %244, float %237, float %245) > %247 = fsub float %95, %92 > %248 = fsub float %96, %93 > %249 = fsub float %97, %94 > %250 = fcmp oeq float %247, 0.000000e+00 > %251 = fcmp oeq float %248, 0.000000e+00 > %252 = fcmp oeq float %249, 0.000000e+00 > %253 = fdiv float 1.000000e+00, %247 > %254 = fdiv float 1.000000e+00, %248 > %255 = fdiv float 1.000000e+00, %249 > %256 = select i1 %250, float 0x4600000000000000, float %253 > %257 = select i1 %251, float 0x4600000000000000, float %254 > %258 = select i1 %252, float 0x4600000000000000, float %255 > %259 = fsub float -0.000000e+00, %123 > %260 = fsub float -0.000000e+00, %92 > %261 = call float @llvm.fma.f32(float %259, float %236, float %260) > %262 = fsub float -0.000000e+00, %124 > %263 = fsub float -0.000000e+00, %93 > %264 = call float @llvm.fma.f32(float %262, float %236, float %263) > %265 = fsub float -0.000000e+00, %125 > %266 = fsub float -0.000000e+00, %94 > %267 = call float @llvm.fma.f32(float %265, float %236, float %266) > %268 = fmul float %261, %256 > %269 = fmul float %264, %257 > %270 = fmul float %267, %258 > %271 = fmul float %240, %256 > %272 = fmul float %243, %257 > %273 = fmul float %246, %258 > %274 = bitcast float %29 to i32 > %275 = icmp eq i32 %274, 0 > br i1 %275, label %ENDIF63, label %IF64 > >IF64: ; preds = %ENDIF60 > %276 = bitcast float %201 to i32 > %277 = bitcast float %202 to i32 > %278 = insertelement <4 x i32> undef, i32 %276, i32 0 > %279 = insertelement <4 x i32> %278, i32 %277, i32 1 > %280 = insertelement <4 x i32> %279, i32 0, i32 2 > %281 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %280, <8 x i32> %48, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %282 = extractelement <4 x float> %281, i32 0 > %283 = fcmp olt float %282, 1.000000e+00 > br i1 %283, label %IF67, label %ENDIF63 > >ENDIF63: ; preds = %ENDIF60, %ENDIF69, %IF64 > %temp14.0 = phi float [ %235, %ENDIF60 ], [ %463, %ENDIF69 ], [ %235, %IF64 ] > %284 = call float @llvm.sqrt.f32(float %27) > %285 = call float @llvm.sqrt.f32(float %28) > %286 = fcmp oeq float %284, 0.000000e+00 > %287 = fcmp oeq float %285, 0.000000e+00 > %288 = fdiv float 1.000000e+00, %284 > %289 = fdiv float 1.000000e+00, %285 > %290 = select i1 %286, float 0x4600000000000000, float %288 > %291 = select i1 %287, float 0x4600000000000000, float %289 > %292 = fadd float %284, -1.000000e+00 > %293 = fadd float %285, -1.000000e+00 > %294 = fadd float %284, 0xBFF19999A0000000 > %295 = fadd float %285, 0xBFF19999A0000000 > %296 = fmul float %268, %292 > %297 = fmul float %271, %293 > %298 = call float @llvm.maxnum.f32(float %296, float 0.000000e+00) > %299 = call float @llvm.maxnum.f32(float %297, float 0.000000e+00) > %300 = call float @llvm.minnum.f32(float %294, float %298) > %301 = call float @llvm.minnum.f32(float %295, float %299) > %302 = call float @llvm.floor.f32(float %300) > %303 = fsub float %300, %302 > %304 = call float @llvm.floor.f32(float %301) > %305 = fsub float %301, %304 > %306 = fsub float %300, %303 > %307 = fsub float %301, %305 > %308 = fmul float %270, %292 > %309 = fmul float %273, %293 > %310 = fcmp oeq float %27, 0.000000e+00 > %311 = fcmp oeq float %28, 0.000000e+00 > %312 = fdiv float 1.000000e+00, %27 > %313 = fdiv float 1.000000e+00, %28 > %314 = fmul float %312, 5.000000e-01 > %315 = fmul float %313, 5.000000e-01 > %316 = select i1 %310, float 0x4600000000000000, float %314 > %317 = select i1 %311, float 0x4600000000000000, float %315 > %318 = fcmp oeq float %27, 0.000000e+00 > %319 = fcmp oeq float %28, 0.000000e+00 > %320 = fcmp ogt float %308, 0.000000e+00 > %321 = select i1 %320, float 1.000000e+00, float %308 > %322 = fcmp oge float %321, 0.000000e+00 > %323 = fcmp ogt float %309, 0.000000e+00 > %324 = select i1 %323, float 1.000000e+00, float %309 > %325 = fcmp oge float %324, 0.000000e+00 > %.op83 = fmul float %321, 0x4600000000000000 > %326 = select i1 %322, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %324, 0x4600000000000000 > %327 = select i1 %325, float %.op84, float 0xC600000000000000 > %328 = fdiv float 1.000000e+00, %27 > %329 = fdiv float 1.000000e+00, %28 > %330 = fmul float %308, %328 > %331 = fmul float %309, %329 > %332 = select i1 %318, float %326, float %330 > %333 = select i1 %319, float %327, float %331 > %334 = fadd float %332, %316 > %335 = fadd float %333, %317 > %336 = fcmp oeq float %284, 0.000000e+00 > %337 = fcmp oeq float %285, 0.000000e+00 > %338 = fcmp ogt float %306, 0.000000e+00 > %339 = select i1 %338, float 1.000000e+00, float %306 > %340 = fcmp oge float %339, 0.000000e+00 > %341 = fcmp ogt float %307, 0.000000e+00 > %342 = select i1 %341, float 1.000000e+00, float %307 > %343 = fcmp oge float %342, 0.000000e+00 > %.op85 = fmul float %339, 0x4600000000000000 > %344 = select i1 %340, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %342, 0x4600000000000000 > %345 = select i1 %343, float %.op86, float 0xC600000000000000 > %346 = fdiv float 1.000000e+00, %284 > %347 = fdiv float 1.000000e+00, %285 > %348 = fmul float %306, %346 > %349 = fmul float %307, %347 > %350 = select i1 %336, float %344, float %348 > %351 = select i1 %337, float %345, float %349 > %352 = fadd float %334, %350 > %353 = fadd float %335, %351 > %354 = fadd float %290, %352 > %355 = fadd float %353, 0.000000e+00 > %356 = fadd float %352, 0.000000e+00 > %357 = fadd float %291, %353 > %358 = fadd float %290, %352 > %359 = fadd float %291, %353 > %360 = bitcast float %352 to i32 > %361 = bitcast float %353 to i32 > %362 = insertelement <2 x i32> undef, i32 %360, i32 0 > %363 = insertelement <2 x i32> %362, i32 %361, i32 1 > %364 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %363, <8 x i32> %66, <4 x i32> %73, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %365 = extractelement <4 x float> %364, i32 0 > %366 = extractelement <4 x float> %364, i32 1 > %367 = extractelement <4 x float> %364, i32 2 > %368 = bitcast float %354 to i32 > %369 = bitcast float %355 to i32 > %370 = insertelement <2 x i32> undef, i32 %368, i32 0 > %371 = insertelement <2 x i32> %370, i32 %369, i32 1 > %372 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %371, <8 x i32> %66, <4 x i32> %73, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %373 = extractelement <4 x float> %372, i32 0 > %374 = extractelement <4 x float> %372, i32 1 > %375 = extractelement <4 x float> %372, i32 2 > %376 = bitcast float %356 to i32 > %377 = bitcast float %357 to i32 > %378 = insertelement <2 x i32> undef, i32 %376, i32 0 > %379 = insertelement <2 x i32> %378, i32 %377, i32 1 > %380 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %379, <8 x i32> %66, <4 x i32> %73, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %381 = extractelement <4 x float> %380, i32 0 > %382 = extractelement <4 x float> %380, i32 1 > %383 = extractelement <4 x float> %380, i32 2 > %384 = fsub float %373, %365 > %385 = fsub float %374, %366 > %386 = fsub float %375, %367 > %387 = call float @llvm.fma.f32(float %303, float %384, float %365) > %388 = call float @llvm.fma.f32(float %303, float %385, float %366) > %389 = call float @llvm.fma.f32(float %303, float %386, float %367) > %390 = bitcast float %358 to i32 > %391 = bitcast float %359 to i32 > %392 = insertelement <2 x i32> undef, i32 %390, i32 0 > %393 = insertelement <2 x i32> %392, i32 %391, i32 1 > %394 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %393, <8 x i32> %66, <4 x i32> %73, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %395 = extractelement <4 x float> %394, i32 0 > %396 = extractelement <4 x float> %394, i32 1 > %397 = extractelement <4 x float> %394, i32 2 > %398 = fsub float %395, %381 > %399 = fsub float %396, %382 > %400 = fsub float %397, %383 > %401 = call float @llvm.fma.f32(float %303, float %398, float %381) > %402 = call float @llvm.fma.f32(float %303, float %399, float %382) > %403 = call float @llvm.fma.f32(float %303, float %400, float %383) > %404 = fsub float %401, %387 > %405 = fsub float %402, %388 > %406 = fsub float %403, %389 > %407 = call float @llvm.fma.f32(float %305, float %404, float %387) > %408 = call float @llvm.fma.f32(float %305, float %405, float %388) > %409 = call float @llvm.fma.f32(float %305, float %406, float %389) > %410 = bitcast float %269 to i32 > %411 = bitcast float %272 to i32 > %412 = insertelement <2 x i32> undef, i32 %410, i32 0 > %413 = insertelement <2 x i32> %412, i32 %411, i32 1 > %414 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %413, <8 x i32> %75, <4 x i32> %82, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %415 = extractelement <4 x float> %414, i32 1 > %416 = fmul float %415, %temp14.0 > %417 = fmul float %407, %33 > %418 = fmul float %408, %34 > %419 = fmul float %409, %35 > %420 = fmul float %416, %36 > %421 = fmul float %420, %417 > %422 = fmul float %420, %418 > %423 = fmul float %420, %419 > %424 = fmul float %421, %30 > %425 = fmul float %422, %31 > %426 = fadd float %425, %424 > %427 = fmul float %423, %32 > %428 = fadd float %426, %427 > %429 = fmul float %428, %26 > %430 = bitcast float %5 to i32 > %431 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %430, 10 > %432 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %431, float %421, 11 > %433 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %432, float %422, 12 > %434 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %433, float %423, 13 > %435 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %434, float %429, 14 > %436 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %435, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %436 > >IF67: ; preds = %IF64 > %437 = bitcast float %201 to i32 > %438 = bitcast float %202 to i32 > %439 = insertelement <4 x i32> undef, i32 %437, i32 0 > %440 = insertelement <4 x i32> %439, i32 %438, i32 1 > %441 = insertelement <4 x i32> %440, i32 0, i32 2 > %442 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %441, <8 x i32> %57, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %443 = extractelement <4 x float> %442, i32 0 > %444 = fmul float %temp16.0, %443 > %445 = call float @llvm.minnum.f32(float %444, float %103) > %446 = fsub float %445, %211 > %447 = call float @llvm.maxnum.f32(float %446, float 0.000000e+00) > %448 = fcmp une float %222, 0.000000e+00 > br i1 %448, label %IF70, label %ELSE71 > >IF70: ; preds = %IF67 > %449 = fdiv float 1.000000e+00, %222 > %450 = fmul float %447, %449 > br label %ENDIF69 > >ELSE71: ; preds = %IF67 > %451 = fcmp ogt float %447, 0.000000e+00 > %452 = select i1 %451, float 1.000000e+00, float %447 > %453 = fcmp oge float %452, 0.000000e+00 > %.op87 = fmul float %452, 0x4600000000000000 > %454 = select i1 %453, float %.op87, float 0xC600000000000000 > br label %ENDIF69 > >ENDIF69: ; preds = %ELSE71, %IF70 > %temp16.1 = phi float [ %450, %IF70 ], [ %454, %ELSE71 ] > %455 = fsub float 1.000000e+00, %temp16.1 > %456 = call float @llvm.fabs.f32(float %455) > %457 = call float @llvm.log2.f32(float %456) > %458 = fmul float %457, %25 > %459 = call float @llvm.exp2.f32(float %458) > %460 = fsub float 1.000000e+00, %459 > %461 = call float @llvm.sqrt.f32(float %282) > %462 = fsub float %235, %460 > %463 = call float @llvm.fma.f32(float %461, float %462, float %460) > br label %ENDIF63 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.floor.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..5] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 16, 32, 48} >IMM[1] UINT32 {64, 80, 0, 0} > 0: DP4 TEMP[0].x, IN[2], CONST[1][2] > 1: DP4 TEMP[1].x, IN[2], CONST[1][3] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: DP4 TEMP[1].x, IN[2], CONST[1][4] > 4: MOV TEMP[0].z, TEMP[1].xxxx > 5: DP4 TEMP[1].x, IN[2], CONST[1][5] > 6: MOV TEMP[0].w, TEMP[1].xxxx > 7: MOV OUT[4], CONST[1][1] > 8: MOV OUT[3], CONST[1][0] > 9: MOV OUT[2], IN[1] > 10: MOV OUT[1], IN[0] > 11: MOV OUT[0], TEMP[0] > 12: END >radeonsi: Compiling shader 166 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 0) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 4) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 8) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 12) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 16) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 20) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 24) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 28) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 32) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 36) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 40) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 44) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 48) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 52) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 56) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 60) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 64) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 68) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 72) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 76) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 80) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 84) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 88) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 92) > %42 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 > %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %13) > %45 = extractelement <4 x float> %44, i32 0 > %46 = extractelement <4 x float> %44, i32 1 > %47 = extractelement <4 x float> %44, i32 2 > %48 = extractelement <4 x float> %44, i32 3 > %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 > %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %14) > %52 = extractelement <4 x float> %51, i32 0 > %53 = extractelement <4 x float> %51, i32 1 > %54 = extractelement <4 x float> %51, i32 2 > %55 = extractelement <4 x float> %51, i32 3 > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %15) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = extractelement <4 x float> %58, i32 3 > %63 = fmul float %59, %26 > %64 = fmul float %60, %27 > %65 = fadd float %63, %64 > %66 = fmul float %61, %28 > %67 = fadd float %65, %66 > %68 = fmul float %62, %29 > %69 = fadd float %67, %68 > %70 = fmul float %59, %30 > %71 = fmul float %60, %31 > %72 = fadd float %70, %71 > %73 = fmul float %61, %32 > %74 = fadd float %72, %73 > %75 = fmul float %62, %33 > %76 = fadd float %74, %75 > %77 = fmul float %59, %34 > %78 = fmul float %60, %35 > %79 = fadd float %77, %78 > %80 = fmul float %61, %36 > %81 = fadd float %79, %80 > %82 = fmul float %62, %37 > %83 = fadd float %81, %82 > %84 = fmul float %59, %38 > %85 = fmul float %60, %39 > %86 = fadd float %84, %85 > %87 = fmul float %61, %40 > %88 = fadd float %86, %87 > %89 = fmul float %62, %41 > %90 = fadd float %88, %89 > %91 = bitcast i32 %11 to float > %92 = insertvalue <{ float, float, float }> undef, float %91, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float %47, float %48) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %52, float %53, float %54, float %55) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %18, float %19, float %20, float %21) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %22, float %23, float %24, float %25) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %69, float %76, float %83, float %90) > ret <{ float, float, float }> %92 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL TEMP[0..1], LOCAL > 0: FMA TEMP[0], IN[0], IN[3], IN[2] > 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww > 2: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz > 3: MOV TEMP[0].w, TEMP[1].xxxx > 4: MOV OUT[0], TEMP[0] > 5: END >radeonsi: Compiling shader 167 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %28 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %29 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %30 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %31 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %32 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %36 = call float @llvm.fma.f32(float %23, float %32, float %28) > %37 = call float @llvm.fma.f32(float %24, float %33, float %29) > %38 = call float @llvm.fma.f32(float %25, float %34, float %30) > %39 = call float @llvm.fma.f32(float %26, float %35, float %31) > %40 = fmul float %39, %27 > %41 = fmul float %40, %36 > %42 = fmul float %40, %37 > %43 = fmul float %40, %38 > %44 = bitcast float %5 to i32 > %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %44, 10 > %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %41, 11 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %42, 12 > %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %43, 13 > %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %40, 14 > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL OUT[6], GENERIC[5] >DCL CONST[1][0..40] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 512} >IMM[3] UINT32 {288, 480, 496, 528} >IMM[4] UINT32 {624, 640, 544, 560} >IMM[5] UINT32 {576, 432, 0, 0} >IMM[6] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][32], TEMP[18] >356: ADD TEMP[3].x, TEMP[3].xxxx, CONST[1][18].yyyy >357: MOV TEMP[1].z, TEMP[3].xxxx >358: DP4 TEMP[1].x, CONST[1][30], TEMP[18] >359: DP4 TEMP[3].x, CONST[1][31], TEMP[18] >360: MOV TEMP[1].y, TEMP[3].xxxx >361: DP4 TEMP[3].x, CONST[1][33], TEMP[18] >362: MOV TEMP[1].w, TEMP[3].xxxx >363: ADD TEMP[2].xyz, -TEMP[18].xyzz, CONST[1][39].xyzz >364: MOV TEMP[3], TEMP[1] >365: MOV TEMP[5].xy, IN[2].xyxx >366: DP3 TEMP[6].x, CONST[1][34].xyzz, TEMP[2].xyzz >367: DP3 TEMP[8].x, CONST[1][35].xyzz, TEMP[2].xyzz >368: MOV TEMP[6].y, TEMP[8].xxxx >369: DP3 TEMP[8].x, CONST[1][36].xyzz, TEMP[2].xyzz >370: MOV TEMP[6].z, TEMP[8].xxxx >371: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >372: MOV TEMP[1].y, TEMP[8].xxxx >373: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >374: MOV TEMP[1].z, TEMP[8].xxxx >375: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >376: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >377: RSQ TEMP[8].x, TEMP[8].xxxx >378: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >379: DP3 TEMP[8].x, CONST[1][34].xyzz, TEMP[1].xyzz >380: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >381: MOV TEMP[2].y, TEMP[9].xxxx >382: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >383: MOV TEMP[4].y, TEMP[9].xxxx >384: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >385: MOV TEMP[2].z, TEMP[9].xxxx >386: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >387: MOV TEMP[4].z, TEMP[7].xxxx >388: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >389: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >390: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >391: RSQ TEMP[7].x, TEMP[0].xxxx >392: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >393: DP3 TEMP[7].x, CONST[1][34].xyzz, TEMP[0].xyzz >394: MOV TEMP[8].y, TEMP[7].xxxx >395: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >396: RSQ TEMP[7].x, TEMP[7].xxxx >397: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >398: DP3 TEMP[4].x, CONST[1][34].xyzz, TEMP[2].xyzz >399: MOV TEMP[8].z, TEMP[4].xxxx >400: DP3 TEMP[4].x, CONST[1][35].xyzz, TEMP[1].xyzz >401: DP3 TEMP[1].x, CONST[1][36].xyzz, TEMP[1].xyzz >402: DP3 TEMP[7].x, CONST[1][35].xyzz, TEMP[0].xyzz >403: MOV TEMP[4].y, TEMP[7].xxxx >404: DP3 TEMP[7].x, CONST[1][36].xyzz, TEMP[0].xyzz >405: MOV TEMP[1].y, TEMP[7].xxxx >406: DP3 TEMP[7].x, CONST[1][35].xyzz, TEMP[2].xyzz >407: MOV TEMP[4].z, TEMP[7].xxxx >408: DP3 TEMP[2].x, CONST[1][36].xyzz, TEMP[2].xyzz >409: MOV TEMP[1].z, TEMP[2].xxxx >410: FSEQ TEMP[2].xy, CONST[1][27].zwww, IMM[6].xxxx >411: RCP TEMP[7].x, CONST[1][27].zzzz >412: RCP TEMP[7].y, CONST[1][27].wwww >413: UCMP TEMP[0].xy, TEMP[2].xyyy, IMM[6].yyyy, TEMP[7].xyyy >414: FMA TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy, CONST[1][27].xyyy >415: MOV TEMP[0].xy, -TEMP[0].xyxx >416: MOV TEMP[0].z, IMM[6].xxxx >417: MOV TEMP[0].w, IN[5].wwww >418: MOV OUT[6], TEMP[0] >419: MOV OUT[5], TEMP[1] >420: MOV OUT[4], TEMP[4] >421: MOV OUT[3], TEMP[8] >422: MOV OUT[2], TEMP[6] >423: MOV OUT[1], TEMP[5] >424: MOV OUT[0], TEMP[3] >425: END >radeonsi: Compiling shader 168 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 292) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 432) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 436) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 440) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 444) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 480) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 484) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 488) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 492) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 496) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 500) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 504) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 508) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 512) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 516) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 520) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 524) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 528) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 532) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 536) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 540) > %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 544) > %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 548) > %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 552) > %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 560) > %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 564) > %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 568) > %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 576) > %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 580) > %52 = call float @llvm.SI.load.const(<16 x i8> %22, i32 584) > %53 = call float @llvm.SI.load.const(<16 x i8> %22, i32 624) > %54 = call float @llvm.SI.load.const(<16 x i8> %22, i32 628) > %55 = call float @llvm.SI.load.const(<16 x i8> %22, i32 632) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %13) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 > %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %14) > %67 = extractelement <4 x float> %66, i32 0 > %68 = extractelement <4 x float> %66, i32 1 > %69 = extractelement <4 x float> %66, i32 2 > %70 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 > %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %15) > %73 = extractelement <4 x float> %72, i32 0 > %74 = extractelement <4 x float> %72, i32 1 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %16) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %17) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 > %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %18) > %90 = extractelement <4 x float> %89, i32 3 > %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 > %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %19) > %94 = extractelement <4 x float> %93, i32 0 > %95 = extractelement <4 x float> %93, i32 1 > %96 = extractelement <4 x float> %93, i32 2 > %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 > %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %20) > %100 = extractelement <4 x float> %99, i32 0 > %101 = extractelement <4 x float> %99, i32 1 > %102 = extractelement <4 x float> %99, i32 2 > %103 = fmul float %102, 0x406FE01000000000 > %104 = fmul float %101, 0x406FE01000000000 > %105 = fmul float %100, 0x406FE01000000000 > %106 = fptosi float %103 to i32 > %107 = fptosi float %104 to i32 > %108 = fptosi float %105 to i32 > %109 = shl i32 %106, 1 > %110 = or i32 %109, 1 > %111 = shl i32 %107, 1 > %112 = or i32 %111, 1 > %113 = shl i32 %108, 1 > %114 = or i32 %113, 1 > %115 = shl i32 %106, 5 > %116 = or i32 %115, 4 > %117 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %116) > %118 = fmul float %94, %117 > %119 = shl i32 %107, 5 > %120 = or i32 %119, 4 > %121 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %120) > %122 = fmul float %95, %121 > %123 = shl i32 %110, 4 > %124 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %123) > %125 = shl i32 %110, 4 > %126 = or i32 %125, 12 > %127 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %126) > %128 = fmul float %124, %127 > %129 = shl i32 %110, 4 > %130 = or i32 %129, 4 > %131 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %130) > %132 = shl i32 %110, 4 > %133 = or i32 %132, 8 > %134 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %133) > %135 = fsub float -0.000000e+00, %128 > %136 = call float @llvm.fma.f32(float %131, float %134, float %135) > %137 = shl i32 %110, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %138) > %140 = shl i32 %110, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %141) > %143 = call float @llvm.fma.f32(float %139, float %142, float %128) > %144 = fmul float %143, %94 > %145 = fmul float %136, %94 > %146 = fmul float %145, 2.000000e+00 > %147 = shl i32 %112, 4 > %148 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %147) > %149 = shl i32 %112, 4 > %150 = or i32 %149, 12 > %151 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %150) > %152 = fmul float %148, %151 > %153 = shl i32 %112, 4 > %154 = or i32 %153, 4 > %155 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %154) > %156 = shl i32 %112, 4 > %157 = or i32 %156, 8 > %158 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %157) > %159 = fsub float -0.000000e+00, %152 > %160 = call float @llvm.fma.f32(float %155, float %158, float %159) > %161 = shl i32 %112, 4 > %162 = or i32 %161, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %162) > %164 = shl i32 %112, 4 > %165 = or i32 %164, 8 > %166 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %165) > %167 = call float @llvm.fma.f32(float %163, float %166, float %152) > %168 = fmul float %167, %95 > %169 = fmul float %168, 2.000000e+00 > %170 = fmul float %160, %95 > %171 = fmul float %170, 2.000000e+00 > %172 = shl i32 %110, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %173) > %175 = shl i32 %110, 4 > %176 = or i32 %175, 8 > %177 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %176) > %178 = shl i32 %110, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %178) > %180 = shl i32 %110, 4 > %181 = or i32 %180, 12 > %182 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %181) > %183 = fmul float %177, %182 > %184 = fmul float %177, %179 > %185 = fmul float %174, %182 > %186 = shl i32 %110, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %186) > %188 = shl i32 %110, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %189) > %191 = call float @llvm.fma.f32(float %187, float %190, float %183) > %192 = fmul float %191, %94 > %193 = fmul float %192, 2.000000e+00 > %194 = shl i32 %110, 4 > %195 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %194) > %196 = shl i32 %110, 4 > %197 = or i32 %196, 4 > %198 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %197) > %199 = shl i32 %110, 4 > %200 = or i32 %199, 8 > %201 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %200) > %202 = shl i32 %110, 4 > %203 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %202) > %204 = shl i32 %110, 4 > %205 = or i32 %204, 4 > %206 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %205) > %207 = shl i32 %110, 4 > %208 = or i32 %207, 8 > %209 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %208) > %210 = fmul float %195, %203 > %211 = fmul float %198, %206 > %212 = fmul float %201, %209 > %213 = fadd float %212, %211 > %214 = fadd float %212, %210 > %215 = fadd float %211, %210 > %216 = fsub float -0.000000e+00, %213 > %217 = call float @llvm.fma.f32(float %216, float 2.000000e+00, float 1.000000e+00) > %218 = fsub float -0.000000e+00, %214 > %219 = call float @llvm.fma.f32(float %218, float 2.000000e+00, float 1.000000e+00) > %220 = fsub float -0.000000e+00, %215 > %221 = call float @llvm.fma.f32(float %220, float 2.000000e+00, float 1.000000e+00) > %222 = fmul float %94, %219 > %223 = shl i32 %112, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %224) > %226 = shl i32 %112, 4 > %227 = or i32 %226, 8 > %228 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %227) > %229 = shl i32 %112, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %229) > %231 = shl i32 %112, 4 > %232 = or i32 %231, 12 > %233 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %232) > %234 = fmul float %228, %233 > %235 = fmul float %228, %230 > %236 = fmul float %225, %233 > %237 = shl i32 %112, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %237) > %239 = shl i32 %112, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %240) > %242 = call float @llvm.fma.f32(float %238, float %241, float %234) > %243 = fmul float %242, %95 > %244 = fmul float %243, 2.000000e+00 > %245 = shl i32 %112, 4 > %246 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %245) > %247 = shl i32 %112, 4 > %248 = or i32 %247, 4 > %249 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %248) > %250 = shl i32 %112, 4 > %251 = or i32 %250, 8 > %252 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %251) > %253 = shl i32 %112, 4 > %254 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %253) > %255 = shl i32 %112, 4 > %256 = or i32 %255, 4 > %257 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %256) > %258 = shl i32 %112, 4 > %259 = or i32 %258, 8 > %260 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %259) > %261 = fmul float %246, %254 > %262 = fmul float %249, %257 > %263 = fmul float %252, %260 > %264 = fadd float %263, %262 > %265 = fadd float %263, %261 > %266 = fadd float %262, %261 > %267 = fsub float -0.000000e+00, %264 > %268 = call float @llvm.fma.f32(float %267, float 2.000000e+00, float 1.000000e+00) > %269 = fsub float -0.000000e+00, %265 > %270 = call float @llvm.fma.f32(float %269, float 2.000000e+00, float 1.000000e+00) > %271 = fsub float -0.000000e+00, %266 > %272 = call float @llvm.fma.f32(float %271, float 2.000000e+00, float 1.000000e+00) > %273 = fmul float %95, %270 > %274 = fadd float %193, %244 > %275 = fadd float %222, %273 > %276 = fadd float %146, %171 > %277 = fadd float %118, %122 > %278 = shl i32 %108, 5 > %279 = or i32 %278, 4 > %280 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %279) > %281 = fmul float %96, %280 > %282 = shl i32 %114, 4 > %283 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %282) > %284 = shl i32 %114, 4 > %285 = or i32 %284, 12 > %286 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %285) > %287 = fmul float %283, %286 > %288 = shl i32 %114, 4 > %289 = or i32 %288, 4 > %290 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %289) > %291 = shl i32 %114, 4 > %292 = or i32 %291, 8 > %293 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %292) > %294 = fsub float -0.000000e+00, %287 > %295 = call float @llvm.fma.f32(float %290, float %293, float %294) > %296 = shl i32 %114, 4 > %297 = or i32 %296, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %297) > %299 = shl i32 %114, 4 > %300 = or i32 %299, 8 > %301 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %300) > %302 = call float @llvm.fma.f32(float %298, float %301, float %287) > %303 = fmul float %302, %96 > %304 = fmul float %303, 2.000000e+00 > %305 = fmul float %295, %96 > %306 = fmul float %305, 2.000000e+00 > %307 = shl i32 %114, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %308) > %310 = shl i32 %114, 4 > %311 = or i32 %310, 8 > %312 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %311) > %313 = shl i32 %114, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %313) > %315 = shl i32 %114, 4 > %316 = or i32 %315, 12 > %317 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %316) > %318 = fmul float %312, %317 > %319 = fmul float %312, %314 > %320 = fmul float %309, %317 > %321 = shl i32 %114, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %321) > %323 = shl i32 %114, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %324) > %326 = call float @llvm.fma.f32(float %322, float %325, float %318) > %327 = fmul float %326, %96 > %328 = fmul float %327, 2.000000e+00 > %329 = shl i32 %114, 4 > %330 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %329) > %331 = shl i32 %114, 4 > %332 = or i32 %331, 4 > %333 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %332) > %334 = shl i32 %114, 4 > %335 = or i32 %334, 8 > %336 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %335) > %337 = shl i32 %114, 4 > %338 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %337) > %339 = shl i32 %114, 4 > %340 = or i32 %339, 4 > %341 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %340) > %342 = shl i32 %114, 4 > %343 = or i32 %342, 8 > %344 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %343) > %345 = fmul float %330, %338 > %346 = fmul float %333, %341 > %347 = fmul float %336, %344 > %348 = fadd float %347, %346 > %349 = fadd float %347, %345 > %350 = fadd float %346, %345 > %351 = fsub float -0.000000e+00, %348 > %352 = call float @llvm.fma.f32(float %351, float 2.000000e+00, float 1.000000e+00) > %353 = fsub float -0.000000e+00, %349 > %354 = call float @llvm.fma.f32(float %353, float 2.000000e+00, float 1.000000e+00) > %355 = fsub float -0.000000e+00, %350 > %356 = call float @llvm.fma.f32(float %355, float 2.000000e+00, float 1.000000e+00) > %357 = fmul float %96, %354 > %358 = fadd float %274, %328 > %359 = fadd float %275, %357 > %360 = fadd float %276, %306 > %361 = fadd float %277, %281 > %362 = fmul float %358, %61 > %363 = fmul float %359, %62 > %364 = fadd float %362, %363 > %365 = fmul float %360, %63 > %366 = fadd float %364, %365 > %367 = fadd float %366, %361 > %368 = shl i32 %110, 4 > %369 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %368) > %370 = shl i32 %110, 4 > %371 = or i32 %370, 8 > %372 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %371) > %373 = fsub float -0.000000e+00, %185 > %374 = call float @llvm.fma.f32(float %369, float %372, float %373) > %375 = fmul float %374, %94 > %376 = fmul float %375, 2.000000e+00 > %377 = fmul float %144, 2.000000e+00 > %378 = shl i32 %112, 4 > %379 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %378) > %380 = shl i32 %112, 4 > %381 = or i32 %380, 8 > %382 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %381) > %383 = fsub float -0.000000e+00, %236 > %384 = call float @llvm.fma.f32(float %379, float %382, float %383) > %385 = fmul float %384, %95 > %386 = fmul float %385, 2.000000e+00 > %387 = fmul float %94, %221 > %388 = fmul float %94, %217 > %389 = fmul float %95, %272 > %390 = fmul float %95, %268 > %391 = shl i32 %106, 5 > %392 = or i32 %391, 8 > %393 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %392) > %394 = fmul float %94, %393 > %395 = shl i32 %107, 5 > %396 = or i32 %395, 8 > %397 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %396) > %398 = fmul float %95, %397 > %399 = fadd float %386, %376 > %400 = fadd float %169, %377 > %401 = fadd float %389, %387 > %402 = fadd float %398, %394 > %403 = shl i32 %114, 4 > %404 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %403) > %405 = shl i32 %114, 4 > %406 = or i32 %405, 8 > %407 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %406) > %408 = fsub float -0.000000e+00, %320 > %409 = call float @llvm.fma.f32(float %404, float %407, float %408) > %410 = fmul float %409, %96 > %411 = fmul float %410, 2.000000e+00 > %412 = fmul float %96, %356 > %413 = fmul float %96, %352 > %414 = shl i32 %108, 5 > %415 = or i32 %414, 8 > %416 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %415) > %417 = fmul float %96, %416 > %418 = fadd float %399, %411 > %419 = fadd float %400, %304 > %420 = fadd float %401, %412 > %421 = fadd float %402, %417 > %422 = fmul float %418, %61 > %423 = fmul float %419, %62 > %424 = fadd float %422, %423 > %425 = fmul float %420, %63 > %426 = fadd float %424, %425 > %427 = fadd float %426, %421 > %428 = shl i32 %106, 5 > %429 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %428) > %430 = fmul float %94, %429 > %431 = shl i32 %107, 5 > %432 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %431) > %433 = fmul float %95, %432 > %434 = shl i32 %108, 5 > %435 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %434) > %436 = fmul float %96, %435 > %437 = shl i32 %110, 4 > %438 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %437) > %439 = shl i32 %110, 4 > %440 = or i32 %439, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %440) > %442 = fsub float -0.000000e+00, %183 > %443 = call float @llvm.fma.f32(float %438, float %441, float %442) > %444 = fadd float %185, %184 > %445 = fmul float %443, %94 > %446 = fmul float %444, %94 > %447 = fmul float %445, 2.000000e+00 > %448 = fmul float %446, 2.000000e+00 > %449 = shl i32 %112, 4 > %450 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %449) > %451 = shl i32 %112, 4 > %452 = or i32 %451, 4 > %453 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %452) > %454 = fsub float -0.000000e+00, %234 > %455 = call float @llvm.fma.f32(float %450, float %453, float %454) > %456 = shl i32 %114, 4 > %457 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %456) > %458 = shl i32 %114, 4 > %459 = or i32 %458, 4 > %460 = call float @llvm.SI.load.const(<16 x i8> %57, i32 %459) > %461 = fsub float -0.000000e+00, %318 > %462 = call float @llvm.fma.f32(float %457, float %460, float %461) > %463 = fadd float %320, %319 > %464 = fmul float %455, %95 > %465 = fmul float %462, %96 > %466 = fmul float %463, %96 > %467 = fmul float %465, 2.000000e+00 > %468 = fmul float %466, 2.000000e+00 > %469 = fadd float %236, %235 > %470 = fmul float %469, %95 > %471 = fmul float %464, 2.000000e+00 > %472 = fmul float %470, 2.000000e+00 > %473 = fadd float %388, %390 > %474 = fadd float %447, %471 > %475 = fadd float %448, %472 > %476 = fadd float %430, %433 > %477 = fadd float %413, %473 > %478 = fadd float %467, %474 > %479 = fadd float %468, %475 > %480 = fadd float %436, %476 > %481 = fmul float %477, %61 > %482 = fmul float %478, %62 > %483 = fadd float %481, %482 > %484 = fmul float %479, %63 > %485 = fadd float %483, %484 > %486 = fadd float %485, %480 > %487 = fmul float %36, %486 > %488 = fmul float %37, %367 > %489 = fadd float %487, %488 > %490 = fmul float %38, %427 > %491 = fadd float %489, %490 > %492 = fadd float %491, %39 > %493 = fadd float %492, %23 > %494 = fmul float %28, %486 > %495 = fmul float %29, %367 > %496 = fadd float %494, %495 > %497 = fmul float %30, %427 > %498 = fadd float %496, %497 > %499 = fadd float %498, %31 > %500 = fmul float %32, %486 > %501 = fmul float %33, %367 > %502 = fadd float %500, %501 > %503 = fmul float %34, %427 > %504 = fadd float %502, %503 > %505 = fadd float %504, %35 > %506 = fmul float %40, %486 > %507 = fmul float %41, %367 > %508 = fadd float %506, %507 > %509 = fmul float %42, %427 > %510 = fadd float %508, %509 > %511 = fadd float %510, %43 > %512 = fsub float %53, %486 > %513 = fsub float %54, %367 > %514 = fsub float %55, %427 > %515 = fmul float %44, %512 > %516 = fmul float %45, %513 > %517 = fadd float %516, %515 > %518 = fmul float %46, %514 > %519 = fadd float %517, %518 > %520 = fmul float %47, %512 > %521 = fmul float %48, %513 > %522 = fadd float %521, %520 > %523 = fmul float %49, %514 > %524 = fadd float %522, %523 > %525 = fmul float %50, %512 > %526 = fmul float %51, %513 > %527 = fadd float %526, %525 > %528 = fmul float %52, %514 > %529 = fadd float %527, %528 > %530 = fmul float %358, %78 > %531 = fmul float %359, %79 > %532 = fadd float %531, %530 > %533 = fmul float %360, %80 > %534 = fadd float %532, %533 > %535 = fmul float %418, %78 > %536 = fmul float %419, %79 > %537 = fadd float %536, %535 > %538 = fmul float %420, %80 > %539 = fadd float %537, %538 > %540 = fmul float %477, %78 > %541 = fmul float %478, %79 > %542 = fadd float %541, %540 > %543 = fmul float %479, %80 > %544 = fadd float %542, %543 > %545 = fmul float %544, %544 > %546 = fmul float %534, %534 > %547 = fadd float %546, %545 > %548 = fmul float %539, %539 > %549 = fadd float %547, %548 > %550 = call float @llvm.AMDGPU.rsq.clamped.f32(float %549) > %551 = fmul float %550, %544 > %552 = fmul float %550, %534 > %553 = fmul float %550, %539 > %554 = fmul float %44, %551 > %555 = fmul float %45, %552 > %556 = fadd float %555, %554 > %557 = fmul float %46, %553 > %558 = fadd float %556, %557 > %559 = fmul float %358, %84 > %560 = fmul float %359, %85 > %561 = fadd float %560, %559 > %562 = fmul float %360, %86 > %563 = fadd float %561, %562 > %564 = fmul float %358, %67 > %565 = fmul float %359, %68 > %566 = fadd float %565, %564 > %567 = fmul float %360, %69 > %568 = fadd float %566, %567 > %569 = fmul float %418, %84 > %570 = fmul float %419, %85 > %571 = fadd float %570, %569 > %572 = fmul float %420, %86 > %573 = fadd float %571, %572 > %574 = fmul float %418, %67 > %575 = fmul float %419, %68 > %576 = fadd float %575, %574 > %577 = fmul float %420, %69 > %578 = fadd float %576, %577 > %579 = fmul float %477, %84 > %580 = fmul float %478, %85 > %581 = fadd float %580, %579 > %582 = fmul float %479, %86 > %583 = fadd float %581, %582 > %584 = fmul float %477, %67 > %585 = fmul float %478, %68 > %586 = fadd float %585, %584 > %587 = fmul float %479, %69 > %588 = fadd float %586, %587 > %589 = fmul float %583, %583 > %590 = fmul float %563, %563 > %591 = fadd float %590, %589 > %592 = fmul float %573, %573 > %593 = fadd float %591, %592 > %594 = call float @llvm.AMDGPU.rsq.clamped.f32(float %593) > %595 = fmul float %594, %583 > %596 = fmul float %594, %563 > %597 = fmul float %594, %573 > %598 = fmul float %44, %595 > %599 = fmul float %45, %596 > %600 = fadd float %599, %598 > %601 = fmul float %46, %597 > %602 = fadd float %600, %601 > %603 = fmul float %588, %588 > %604 = fmul float %568, %568 > %605 = fadd float %604, %603 > %606 = fmul float %578, %578 > %607 = fadd float %605, %606 > %608 = call float @llvm.AMDGPU.rsq.clamped.f32(float %607) > %609 = fmul float %608, %588 > %610 = fmul float %608, %568 > %611 = fmul float %608, %578 > %612 = fmul float %44, %609 > %613 = fmul float %45, %610 > %614 = fadd float %613, %612 > %615 = fmul float %46, %611 > %616 = fadd float %614, %615 > %617 = fmul float %47, %551 > %618 = fmul float %48, %552 > %619 = fadd float %618, %617 > %620 = fmul float %49, %553 > %621 = fadd float %619, %620 > %622 = fmul float %50, %551 > %623 = fmul float %51, %552 > %624 = fadd float %623, %622 > %625 = fmul float %52, %553 > %626 = fadd float %624, %625 > %627 = fmul float %47, %595 > %628 = fmul float %48, %596 > %629 = fadd float %628, %627 > %630 = fmul float %49, %597 > %631 = fadd float %629, %630 > %632 = fmul float %50, %595 > %633 = fmul float %51, %596 > %634 = fadd float %633, %632 > %635 = fmul float %52, %597 > %636 = fadd float %634, %635 > %637 = fmul float %47, %609 > %638 = fmul float %48, %610 > %639 = fadd float %638, %637 > %640 = fmul float %49, %611 > %641 = fadd float %639, %640 > %642 = fmul float %50, %609 > %643 = fmul float %51, %610 > %644 = fadd float %643, %642 > %645 = fmul float %52, %611 > %646 = fadd float %644, %645 > %647 = fcmp oeq float %26, 0.000000e+00 > %648 = fcmp oeq float %27, 0.000000e+00 > %649 = fdiv float 1.000000e+00, %26 > %650 = fdiv float 1.000000e+00, %27 > %651 = select i1 %647, float 0x4600000000000000, float %649 > %652 = select i1 %648, float 0x4600000000000000, float %650 > %653 = call float @llvm.fma.f32(float %651, float %61, float %24) > %654 = call float @llvm.fma.f32(float %652, float %62, float %25) > %655 = fsub float -0.000000e+00, %653 > %656 = fsub float -0.000000e+00, %654 > %657 = bitcast i32 %11 to float > %658 = insertvalue <{ float, float, float }> undef, float %657, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %73, float %74, float %63, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %519, float %524, float %529, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %558, float %602, float %616, float %151) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %621, float %631, float %641, float %361) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %626, float %636, float %646, float %511) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %655, float %656, float 0.000000e+00, float %90) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %499, float %505, float %493, float %511) > ret <{ float, float, float }> %658 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL IN[5], GENERIC[5], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], BUFFER, FLOAT >DCL CONST[1][0..38] >DCL TEMP[0..7], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 416, 320, 272} >IMM[2] FLT32 {158456325028528675187087900672.0000, 0.1250, -2.0000, 3.0000} >IMM[3] FLT32 { 0.3000, 0.5900, 0.1100, 0.0000} >IMM[4] UINT32 {288, 608, 240, 304} >IMM[5] UINT32 {256, 592, 464, 448} >IMM[6] UINT32 {336, 0, 0, 0} >IMM[7] INT32 {0, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[2].xyzz, TEMP[0].xyzz > 9: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP3 TEMP[2].x, IN[4].xyzz, TEMP[0].xyzz > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 14: RSQ TEMP[2].x, TEMP[0].xxxx > 15: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 16: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz > 17: RSQ TEMP[2].x, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[2].xxxx, IN[1].xyzz > 19: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[1].xyzz > 20: MOV TEMP[2].xy, IN[0].xyyy > 21: TEX TEMP[2], TEMP[2], SAMP[1], 2D > 22: ABS TEMP[3].x, TEMP[0].xxxx > 23: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[2].wwww > 24: ABS TEMP[4].x, TEMP[0].xxxx > 25: ADD TEMP[0].x, TEMP[4].xxxx, -CONST[1][26].xxxx > 26: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy > 27: FSNE TEMP[4].x, TEMP[2].wwww, IMM[0].wwww > 28: UIF TEMP[4].xxxx :0 > 29: RCP TEMP[4].x, TEMP[2].wwww > 30: MUL TEMP[4].x, TEMP[3].xxxx, TEMP[4].xxxx > 31: ELSE :0 > 32: SSG TEMP[3].x, TEMP[3].xxxx > 33: MUL TEMP[4].x, IMM[2].xxxx, TEMP[3].xxxx > 34: ENDIF > 35: MOV_SAT TEMP[3].x, TEMP[4].xxxx > 36: LG2 TEMP[3].x, TEMP[3].xxxx > 37: MOV TEMP[4].xy, IN[0].xyyy > 38: TEX TEMP[4].xyz, TEMP[4], SAMP[2], 2D > 39: MUL TEMP[5].x, TEMP[4].zzzz, TEMP[4].zzzz > 40: MUL TEMP[6].x, TEMP[5].xxxx, CONST[1][20].zzzz > 41: FMA TEMP[5].x, TEMP[5].xxxx, CONST[1][20].zzzz, IMM[0].xxxx > 42: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy > 43: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[6].xxxx > 44: EX2 TEMP[3].x, TEMP[3].xxxx > 45: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx > 46: MUL TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx > 47: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww > 48: ADD TEMP[5].x, -TEMP[3].xxxx, IMM[0].zzzz > 49: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[1][17].xyzz > 50: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[1][20].wwww > 51: DP3 TEMP[6].x, TEMP[2].xyzz, IMM[3].xyzz > 52: ADD TEMP[6].xyz, -TEMP[2].xyzz, TEMP[6].xxxx > 53: FMA TEMP[7].xyz, CONST[1][18].zzzz, TEMP[6].xyzz, TEMP[2].xyzz > 54: MUL TEMP[1].xyz, TEMP[7].xyzz, CONST[1][38].xyzz > 55: MUL TEMP[6].xyz, TEMP[1].xyzz, CONST[1][15].yzww > 56: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][19].xxxx > 57: MUL TEMP[6].xyz, TEMP[6].xyzz, CONST[1][18].xxxx > 58: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[6].xyzz > 59: FMA TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].yyyy, TEMP[5].xyzz > 60: MUL TEMP[2].xyz, CONST[1][16].xyzz, CONST[1][19].xxxx > 61: FMA TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz, -TEMP[3].xyzz > 62: ADD TEMP[2].x, CONST[1][20].xxxx, IMM[0].zzzz > 63: FMA TEMP[2].x, TEMP[2].xxxx, CONST[1][19].yyyy, IN[5].wwww > 64: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 65: FSNE TEMP[4].x, CONST[1][20].xxxx, IMM[0].wwww > 66: UIF TEMP[4].xxxx :0 > 67: RCP TEMP[4].x, CONST[1][20].xxxx > 68: ELSE :0 > 69: MOV TEMP[4].x, IMM[2].xxxx > 70: ENDIF > 71: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 72: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 73: FMA TEMP[4].x, TEMP[2].xxxx, IMM[2].zzzz, IMM[2].wwww > 74: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx > 75: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 76: LG2 TEMP[2].x, TEMP[2].xxxx > 77: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][19].wwww > 78: EX2 TEMP[2].x, TEMP[2].xxxx > 79: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz > 80: FMA TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xyzz, TEMP[3].xyzz > 81: DP3 TEMP[1].x, TEMP[3].xyzz, CONST[1][37].xyzz > 82: ADD TEMP[4].x, -CONST[1][26].xxxx, CONST[1][26].yyyy > 83: FSNE TEMP[5].x, TEMP[4].xxxx, IMM[0].wwww > 84: UIF TEMP[5].xxxx :0 > 85: RCP TEMP[4].x, TEMP[4].xxxx > 86: ELSE :0 > 87: MOV TEMP[4].x, IMM[2].xxxx > 88: ENDIF > 89: MUL TEMP[4].x, TEMP[0].xxxx, TEMP[4].xxxx > 90: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 91: FMA TEMP[5].x, TEMP[4].xxxx, IMM[2].zzzz, IMM[2].wwww > 92: MUL TEMP[0].x, TEMP[4].xxxx, TEMP[4].xxxx > 93: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].xxxx > 94: MAX TEMP[0].x, TEMP[0].xxxx, CONST[1][17].wwww > 95: LG2 TEMP[4].x, TEMP[0].xxxx > 96: MUL TEMP[0].x, TEMP[4].xxxx, CONST[1][26].zzzz > 97: EX2 TEMP[0].x, TEMP[0].xxxx > 98: FMA TEMP[4].x, TEMP[1].xxxx, CONST[1][18].wwww, TEMP[0].xxxx > 99: ADD TEMP[1].x, -TEMP[4].xxxx, CONST[1][19].zzzz >100: FMA TEMP[2].x, TEMP[2].xxxx, TEMP[1].xxxx, TEMP[4].xxxx >101: ADD TEMP[1].x, CONST[1][29].yyyy, IMM[0].zzzz >102: MOV TEMP[4].xy, IN[5].xyyy >103: TEX TEMP[4].x, TEMP[4], SAMP[3], 2D >104: FMA TEMP[5].x, CONST[1][29].xxxx, TEMP[1].xxxx, TEMP[4].xxxx >105: ADD TEMP[1].x, TEMP[5].xxxx, IMM[0].yyyy >106: FSNE TEMP[5].x, CONST[1][29].yyyy, IMM[0].wwww >107: UIF TEMP[5].xxxx :0 >108: RCP TEMP[5].x, CONST[1][29].yyyy >109: ELSE :0 >110: MOV TEMP[5].x, IMM[2].xxxx >111: ENDIF >112: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx >113: MOV_SAT TEMP[5].x, TEMP[5].xxxx >114: FMA TEMP[6].x, TEMP[5].xxxx, IMM[2].zzzz, IMM[2].wwww >115: MUL TEMP[1].x, TEMP[5].xxxx, TEMP[5].xxxx >116: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx >117: ADD TEMP[5].x, CONST[1][28].yyyy, IMM[0].zzzz >118: FMA TEMP[4].x, CONST[1][28].xxxx, TEMP[5].xxxx, TEMP[4].xxxx >119: ADD TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy >120: FSNE TEMP[5].x, CONST[1][28].yyyy, IMM[0].wwww >121: UIF TEMP[5].xxxx :0 >122: RCP TEMP[5].x, CONST[1][28].yyyy >123: ELSE :0 >124: MOV TEMP[5].x, IMM[2].xxxx >125: ENDIF >126: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx >127: MOV_SAT TEMP[4].x, TEMP[4].xxxx >128: FMA TEMP[5].x, TEMP[4].xxxx, IMM[2].zzzz, IMM[2].wwww >129: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx >130: FMA TEMP[4].x, -TEMP[5].xxxx, TEMP[4].xxxx, IMM[0].zzzz >131: MUL TEMP[1].x, TEMP[4].xxxx, TEMP[1].xxxx >132: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][20].yyyy >133: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][38].wwww >134: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[1].xxxx >135: MOV TEMP[2].w, TEMP[2].xxxx >136: MUL TEMP[0].x, CONST[1][21].yyyy, CONST[1][21].yyyy >137: MIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz >138: ADD TEMP[1].x, CONST[1][21].xxxx, IMM[0].yyyy >139: FMA TEMP[1].x, TEMP[0].xxxx, TEMP[1].xxxx, IMM[0].zzzz >140: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[3].xyzz >141: MOV TEMP[1].x, IMM[7].xxxx >142: MOV TEMP[1].w, IMM[1].xxxx >143: TXF TEMP[1].x, TEMP[1], SAMP[4], BUFFER >144: MUL TEMP[2].xyz, TEMP[1].xxxx, TEMP[0].xyzz >145: MOV OUT[0], TEMP[2] >146: END >radeonsi: Compiling shader 169 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 312) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 332) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 340) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 448) > %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 452) > %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) > %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 468) > %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 592) > %56 = call float @llvm.SI.load.const(<16 x i8> %24, i32 596) > %57 = call float @llvm.SI.load.const(<16 x i8> %24, i32 600) > %58 = call float @llvm.SI.load.const(<16 x i8> %24, i32 608) > %59 = call float @llvm.SI.load.const(<16 x i8> %24, i32 612) > %60 = call float @llvm.SI.load.const(<16 x i8> %24, i32 616) > %61 = call float @llvm.SI.load.const(<16 x i8> %24, i32 620) > %62 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %63 = load <8 x i32>, <8 x i32> addrspace(2)* %62, align 32, !tbaa !0 > %64 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %65 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %64, i64 0, i64 3 > %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 > %67 = extractelement <8 x i32> %63, i32 7 > %68 = extractelement <4 x i32> %66, i32 0 > %69 = and i32 %68, %67 > %70 = insertelement <4 x i32> %66, i32 %69, i32 0 > %71 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0 > %73 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %74 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %73, i64 0, i64 7 > %75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !0 > %76 = extractelement <8 x i32> %72, i32 7 > %77 = extractelement <4 x i32> %75, i32 0 > %78 = and i32 %77, %76 > %79 = insertelement <4 x i32> %75, i32 %78, i32 0 > %80 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %81 = load <8 x i32>, <8 x i32> addrspace(2)* %80, align 32, !tbaa !0 > %82 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %83 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %82, i64 0, i64 11 > %84 = load <4 x i32>, <4 x i32> addrspace(2)* %83, align 16, !tbaa !0 > %85 = extractelement <8 x i32> %81, i32 7 > %86 = extractelement <4 x i32> %84, i32 0 > %87 = and i32 %86, %85 > %88 = insertelement <4 x i32> %84, i32 %87, i32 0 > %89 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %90 = load <8 x i32>, <8 x i32> addrspace(2)* %89, align 32, !tbaa !0 > %91 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %92 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %91, i64 0, i64 15 > %93 = load <4 x i32>, <4 x i32> addrspace(2)* %92, align 16, !tbaa !0 > %94 = extractelement <8 x i32> %90, i32 7 > %95 = extractelement <4 x i32> %93, i32 0 > %96 = and i32 %95, %94 > %97 = insertelement <4 x i32> %93, i32 %96, i32 0 > %98 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %99 = bitcast <8 x i32> addrspace(2)* %98 to <2 x i128> addrspace(2)* > %100 = load <2 x i128>, <2 x i128> addrspace(2)* %99, align 32, !tbaa !0 > %101 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %102 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %103 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %104 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %105 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %106 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %107 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %108 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %109 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %110 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %111 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %112 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %113 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %114 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %115 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %6, <2 x i32> %8) > %116 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %6, <2 x i32> %8) > %117 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %6, <2 x i32> %8) > %118 = bitcast float %101 to i32 > %119 = bitcast float %102 to i32 > %120 = insertelement <2 x i32> undef, i32 %118, i32 0 > %121 = insertelement <2 x i32> %120, i32 %119, i32 1 > %122 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %121, <8 x i32> %63, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %123 = extractelement <4 x float> %122, i32 1 > %124 = extractelement <4 x float> %122, i32 3 > %125 = call float @llvm.fma.f32(float %123, float 2.000000e+00, float -1.000000e+00) > %126 = call float @llvm.fma.f32(float %124, float 2.000000e+00, float -1.000000e+00) > %127 = fsub float -0.000000e+00, %125 > %128 = call float @llvm.fma.f32(float %127, float %125, float 1.000000e+00) > %129 = fsub float -0.000000e+00, %126 > %130 = call float @llvm.fma.f32(float %129, float %126, float %128) > %131 = call float @llvm.sqrt.f32(float %130) > %132 = fmul float %106, %125 > %133 = fmul float %107, %126 > %134 = fadd float %133, %132 > %135 = fmul float %108, %131 > %136 = fadd float %134, %135 > %137 = fmul float %109, %125 > %138 = fmul float %110, %126 > %139 = fadd float %138, %137 > %140 = fmul float %111, %131 > %141 = fadd float %139, %140 > %142 = fmul float %112, %125 > %143 = fmul float %113, %126 > %144 = fadd float %143, %142 > %145 = fmul float %114, %131 > %146 = fadd float %144, %145 > %147 = fmul float %136, %136 > %148 = fmul float %141, %141 > %149 = fadd float %148, %147 > %150 = fmul float %146, %146 > %151 = fadd float %149, %150 > %152 = call float @llvm.AMDGPU.rsq.clamped.f32(float %151) > %153 = fmul float %152, %136 > %154 = fmul float %152, %141 > %155 = fmul float %152, %146 > %156 = fmul float %103, %103 > %157 = fmul float %104, %104 > %158 = fadd float %157, %156 > %159 = fmul float %105, %105 > %160 = fadd float %158, %159 > %161 = call float @llvm.AMDGPU.rsq.clamped.f32(float %160) > %162 = fmul float %161, %103 > %163 = fmul float %161, %104 > %164 = fmul float %161, %105 > %165 = fmul float %153, %162 > %166 = fmul float %154, %163 > %167 = fadd float %166, %165 > %168 = fmul float %155, %164 > %169 = fadd float %167, %168 > %170 = bitcast float %101 to i32 > %171 = bitcast float %102 to i32 > %172 = insertelement <2 x i32> undef, i32 %170, i32 0 > %173 = insertelement <2 x i32> %172, i32 %171, i32 1 > %174 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %173, <8 x i32> %72, <4 x i32> %79, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %175 = extractelement <4 x float> %174, i32 0 > %176 = extractelement <4 x float> %174, i32 1 > %177 = extractelement <4 x float> %174, i32 2 > %178 = extractelement <4 x float> %174, i32 3 > %179 = call float @llvm.fabs.f32(float %169) > %180 = fadd float %179, %178 > %181 = call float @llvm.fabs.f32(float %169) > %182 = fsub float %181, %48 > %183 = fadd float %180, -1.000000e+00 > %184 = fcmp une float %178, 0.000000e+00 > br i1 %184, label %IF, label %ELSE > >IF: ; preds = %main_body > %185 = fdiv float 1.000000e+00, %178 > %186 = fmul float %183, %185 > br label %ENDIF > >ELSE: ; preds = %main_body > %187 = fcmp ogt float %183, 0.000000e+00 > %188 = select i1 %187, float 1.000000e+00, float %183 > %189 = fcmp oge float %188, 0.000000e+00 > %.op = fmul float %188, 0x4600000000000000 > %190 = select i1 %189, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp16.0 = phi float [ %186, %IF ], [ %190, %ELSE ] > %191 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %192 = call float @llvm.log2.f32(float %191) > %193 = bitcast float %101 to i32 > %194 = bitcast float %102 to i32 > %195 = insertelement <2 x i32> undef, i32 %193, i32 0 > %196 = insertelement <2 x i32> %195, i32 %194, i32 1 > %197 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %196, <8 x i32> %81, <4 x i32> %88, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %198 = extractelement <4 x float> %197, i32 0 > %199 = extractelement <4 x float> %197, i32 1 > %200 = extractelement <4 x float> %197, i32 2 > %201 = fmul float %200, %200 > %202 = fmul float %201, %44 > %203 = call float @llvm.fma.f32(float %201, float %44, float 2.000000e+00) > %204 = fmul float %203, 1.250000e-01 > %205 = fmul float %192, %202 > %206 = call float @llvm.exp2.f32(float %205) > %207 = fmul float %206, %204 > %208 = fmul float %198, %207 > %209 = call float @llvm.maxnum.f32(float %208, float 0.000000e+00) > %210 = fsub float 1.000000e+00, %209 > %211 = fmul float %209, %31 > %212 = fmul float %209, %32 > %213 = fmul float %209, %33 > %214 = fmul float %211, %45 > %215 = fmul float %212, %45 > %216 = fmul float %213, %45 > %217 = fmul float %175, 0x3FD3333340000000 > %218 = fmul float %176, 0x3FE2E147A0000000 > %219 = fadd float %218, %217 > %220 = fmul float %177, 0x3FBC28F5C0000000 > %221 = fadd float %219, %220 > %222 = fsub float %221, %175 > %223 = fsub float %221, %176 > %224 = fsub float %221, %177 > %225 = call float @llvm.fma.f32(float %36, float %222, float %175) > %226 = call float @llvm.fma.f32(float %36, float %223, float %176) > %227 = call float @llvm.fma.f32(float %36, float %224, float %177) > %228 = fmul float %225, %58 > %229 = fmul float %226, %59 > %230 = fmul float %227, %60 > %231 = fmul float %228, %25 > %232 = fmul float %229, %26 > %233 = fmul float %230, %27 > %234 = fmul float %228, %38 > %235 = fmul float %229, %38 > %236 = fmul float %230, %38 > %237 = fmul float %231, %35 > %238 = fmul float %232, %35 > %239 = fmul float %233, %35 > %240 = fmul float %210, %237 > %241 = fmul float %210, %238 > %242 = fmul float %210, %239 > %243 = call float @llvm.fma.f32(float %214, float %199, float %240) > %244 = call float @llvm.fma.f32(float %215, float %199, float %241) > %245 = call float @llvm.fma.f32(float %216, float %199, float %242) > %246 = fmul float %28, %38 > %247 = fmul float %29, %38 > %248 = fmul float %30, %38 > %249 = fsub float -0.000000e+00, %243 > %250 = call float @llvm.fma.f32(float %234, float %246, float %249) > %251 = fsub float -0.000000e+00, %244 > %252 = call float @llvm.fma.f32(float %235, float %247, float %251) > %253 = fsub float -0.000000e+00, %245 > %254 = call float @llvm.fma.f32(float %236, float %248, float %253) > %255 = fadd float %42, 1.000000e+00 > %256 = call float @llvm.fma.f32(float %255, float %39, float %117) > %257 = fadd float %256, -1.000000e+00 > %258 = fcmp une float %42, 0.000000e+00 > %259 = fdiv float 1.000000e+00, %42 > %temp16.1 = select i1 %258, float %259, float 0x4600000000000000 > %260 = fmul float %257, %temp16.1 > %261 = call float @llvm.AMDGPU.clamp.(float %260, float 0.000000e+00, float 1.000000e+00) > %262 = call float @llvm.fma.f32(float %261, float -2.000000e+00, float 3.000000e+00) > %263 = fmul float %261, %261 > %264 = fmul float %263, %262 > %265 = call float @llvm.log2.f32(float %264) > %266 = fmul float %265, %41 > %267 = call float @llvm.exp2.f32(float %266) > %268 = call float @llvm.minnum.f32(float %267, float 1.000000e+00) > %269 = call float @llvm.fma.f32(float %268, float %250, float %243) > %270 = call float @llvm.fma.f32(float %268, float %252, float %244) > %271 = call float @llvm.fma.f32(float %268, float %254, float %245) > %272 = fmul float %269, %55 > %273 = fmul float %270, %56 > %274 = fadd float %273, %272 > %275 = fmul float %271, %57 > %276 = fadd float %274, %275 > %277 = fsub float %49, %48 > %278 = fcmp une float %277, 0.000000e+00 > %279 = fdiv float 1.000000e+00, %277 > %temp16.2 = select i1 %278, float %279, float 0x4600000000000000 > %280 = fmul float %182, %temp16.2 > %281 = call float @llvm.AMDGPU.clamp.(float %280, float 0.000000e+00, float 1.000000e+00) > %282 = call float @llvm.fma.f32(float %281, float -2.000000e+00, float 3.000000e+00) > %283 = fmul float %281, %281 > %284 = fmul float %283, %282 > %285 = call float @llvm.maxnum.f32(float %284, float %34) > %286 = call float @llvm.log2.f32(float %285) > %287 = fmul float %286, %50 > %288 = call float @llvm.exp2.f32(float %287) > %289 = call float @llvm.fma.f32(float %276, float %37, float %288) > %290 = fsub float %40, %289 > %291 = call float @llvm.fma.f32(float %268, float %290, float %289) > %292 = fadd float %54, 1.000000e+00 > %293 = bitcast float %115 to i32 > %294 = bitcast float %116 to i32 > %295 = insertelement <2 x i32> undef, i32 %293, i32 0 > %296 = insertelement <2 x i32> %295, i32 %294, i32 1 > %297 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %296, <8 x i32> %90, <4 x i32> %97, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %298 = extractelement <4 x float> %297, i32 0 > %299 = call float @llvm.fma.f32(float %53, float %292, float %298) > %300 = fadd float %299, -1.000000e+00 > %301 = fcmp une float %54, 0.000000e+00 > %302 = fdiv float 1.000000e+00, %54 > %temp20.0 = select i1 %301, float %302, float 0x4600000000000000 > %303 = fmul float %temp20.0, %300 > %304 = call float @llvm.AMDGPU.clamp.(float %303, float 0.000000e+00, float 1.000000e+00) > %305 = call float @llvm.fma.f32(float %304, float -2.000000e+00, float 3.000000e+00) > %306 = fmul float %304, %304 > %307 = fmul float %306, %305 > %308 = fadd float %52, 1.000000e+00 > %309 = call float @llvm.fma.f32(float %51, float %308, float %298) > %310 = fadd float %309, -1.000000e+00 > %311 = fcmp une float %52, 0.000000e+00 > %312 = fdiv float 1.000000e+00, %52 > %temp20.1 = select i1 %311, float %312, float 0x4600000000000000 > %313 = fmul float %temp20.1, %310 > %314 = call float @llvm.AMDGPU.clamp.(float %313, float 0.000000e+00, float 1.000000e+00) > %315 = call float @llvm.fma.f32(float %314, float -2.000000e+00, float 3.000000e+00) > %316 = fmul float %314, %314 > %317 = fsub float -0.000000e+00, %315 > %318 = call float @llvm.fma.f32(float %317, float %316, float 1.000000e+00) > %319 = fmul float %318, %307 > %320 = fmul float %319, %43 > %321 = fmul float %320, %61 > %322 = fmul float %291, %321 > %323 = fmul float %47, %47 > %324 = call float @llvm.minnum.f32(float %323, float 1.000000e+00) > %325 = fadd float %46, -1.000000e+00 > %326 = call float @llvm.fma.f32(float %324, float %325, float 1.000000e+00) > %327 = fmul float %326, %269 > %328 = fmul float %326, %270 > %329 = fmul float %326, %271 > %330 = extractelement <2 x i128> %100, i32 1 > %331 = bitcast i128 %330 to <16 x i8> > %332 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %331, i32 0, i32 0) > %333 = extractelement <4 x float> %332, i32 0 > %334 = fmul float %333, %327 > %335 = fmul float %333, %328 > %336 = fmul float %333, %329 > %337 = bitcast float %5 to i32 > %338 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %337, 10 > %339 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %338, float %334, 11 > %340 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %339, float %335, 12 > %341 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %340, float %336, 13 > %342 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %341, float %322, 14 > %343 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %342, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %343 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..45] >DCL CONST[2][0..4095] >DCL TEMP[0..22], LOCAL >DCL ADDR[0] >IMM[0] UINT32 {0, 624, 720, 608} >IMM[1] UINT32 {640, 1, 16, 400} >IMM[2] FLT32 { 255.0020, 2.0000, 1.0000, 0.5000} >IMM[3] INT32 {1, 2, 4, 0} >IMM[4] UINT32 {320, 496, 512, 528} >IMM[5] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.1667, 0.2500} >IMM[6] UINT32 {480, 688, 576, 304} >IMM[7] UINT32 {544, 560, 592, 704} >IMM[8] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, CONST[1][39].xyzz, CONST[1][45].yyyy > 1: FMA TEMP[0].xyz, CONST[1][45].xxxx, CONST[1][38].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, CONST[1][45].zzzz, CONST[1][40].xyzz, TEMP[0].xyzz > 3: MUL TEMP[2].xyz, IN[5].zyxx, IMM[2].xxxx > 4: F2I TEMP[3].xyz, TEMP[2].xyzz > 5: SHL TEMP[4].xyz, TEMP[3].xyzz, IMM[3].xxxx > 6: UMAD TEMP[5].xyz, TEMP[3].xyzz, IMM[3].yyyy, IMM[3].xxxx > 7: UMUL TEMP[6].x, TEMP[4].xxxx, IMM[1].zzzz > 8: USHR TEMP[7].x, TEMP[6].xxxx, IMM[3].zzzz > 9: UARL ADDR[0].x, TEMP[7].xxxx > 10: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 11: MUL TEMP[6].x, IN[4].xxxx, TEMP[6].yyyy > 12: MOV TEMP[6].w, TEMP[6].xxxx > 13: UMUL TEMP[7].x, TEMP[4].yyyy, IMM[1].zzzz > 14: USHR TEMP[8].x, TEMP[7].xxxx, IMM[3].zzzz > 15: UARL ADDR[0].x, TEMP[8].xxxx > 16: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 17: MUL TEMP[7].x, IN[4].yyyy, TEMP[7].yyyy > 18: MOV TEMP[7].w, TEMP[7].xxxx > 19: UMUL TEMP[8].x, TEMP[5].xxxx, IMM[1].zzzz > 20: USHR TEMP[9].x, TEMP[8].xxxx, IMM[3].zzzz > 21: UARL ADDR[0].x, TEMP[9].xxxx > 22: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 23: UMUL TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz > 24: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 25: UARL ADDR[0].x, TEMP[10].xxxx > 26: MOV TEMP[9].w, CONST[2][ADDR[0].x] > 27: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].wwww > 28: UMUL TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz > 29: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 30: UARL ADDR[0].x, TEMP[10].xxxx > 31: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 32: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 33: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 34: UARL ADDR[0].x, TEMP[11].xxxx > 35: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 36: FMA TEMP[9].x, TEMP[9].yyyy, TEMP[10].zzzz, -TEMP[8].xxxx > 37: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 38: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 39: UARL ADDR[0].x, TEMP[11].xxxx > 40: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 41: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 42: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 43: UARL ADDR[0].x, TEMP[12].xxxx > 44: MOV TEMP[11].z, CONST[2][ADDR[0].x] > 45: FMA TEMP[8].x, TEMP[10].yyyy, TEMP[11].zzzz, TEMP[8].xxxx > 46: MUL TEMP[8].x, TEMP[8].xxxx, IN[4].xxxx > 47: MUL TEMP[9].x, TEMP[9].xxxx, IN[4].xxxx > 48: MUL TEMP[9].x, TEMP[9].xxxx, IMM[2].yyyy > 49: MOV TEMP[6].z, TEMP[9].xxxx > 50: UMUL TEMP[9].x, TEMP[5].yyyy, IMM[1].zzzz > 51: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 52: UARL ADDR[0].x, TEMP[10].xxxx > 53: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 54: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[1].zzzz > 55: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 56: UARL ADDR[0].x, TEMP[11].xxxx > 57: MOV TEMP[10].w, CONST[2][ADDR[0].x] > 58: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[10].wwww > 59: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[1].zzzz > 60: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 61: UARL ADDR[0].x, TEMP[11].xxxx > 62: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 63: UMUL TEMP[11].x, TEMP[5].yyyy, IMM[1].zzzz > 64: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 65: UARL ADDR[0].x, TEMP[12].xxxx > 66: MOV TEMP[11].z, CONST[2][ADDR[0].x] > 67: FMA TEMP[10].x, TEMP[10].yyyy, TEMP[11].zzzz, -TEMP[9].xxxx > 68: UMUL TEMP[11].x, TEMP[5].yyyy, IMM[1].zzzz > 69: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 70: UARL ADDR[0].x, TEMP[12].xxxx > 71: MOV TEMP[11].y, CONST[2][ADDR[0].x] > 72: UMUL TEMP[12].x, TEMP[5].yyyy, IMM[1].zzzz > 73: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz > 74: UARL ADDR[0].x, TEMP[13].xxxx > 75: MOV TEMP[12].z, CONST[2][ADDR[0].x] > 76: FMA TEMP[9].x, TEMP[11].yyyy, TEMP[12].zzzz, TEMP[9].xxxx > 77: MUL TEMP[9].x, TEMP[9].xxxx, IN[4].yyyy > 78: MUL TEMP[9].x, IMM[2].yyyy, TEMP[9].xxxx > 79: MOV TEMP[9].y, TEMP[9].xxxx > 80: MUL TEMP[10].x, TEMP[10].xxxx, IN[4].yyyy > 81: MUL TEMP[10].x, IMM[2].yyyy, TEMP[10].xxxx > 82: MOV TEMP[7].z, TEMP[10].xxxx > 83: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 84: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 85: UARL ADDR[0].x, TEMP[11].xxxx > 86: MOV TEMP[10].yz, CONST[2][ADDR[0].x] > 87: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 88: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 89: UARL ADDR[0].x, TEMP[12].xxxx > 90: MOV TEMP[11].xw, CONST[2][ADDR[0].x] > 91: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww > 92: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 93: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 94: UARL ADDR[0].x, TEMP[12].xxxx > 95: MOV TEMP[11].x, CONST[2][ADDR[0].x] > 96: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz > 97: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz > 98: UARL ADDR[0].x, TEMP[13].xxxx > 99: MOV TEMP[12].y, CONST[2][ADDR[0].x] >100: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >101: MUL TEMP[11].x, TEMP[11].xxxx, IN[4].xxxx >102: MUL TEMP[6].x, IMM[2].yyyy, TEMP[11].xxxx >103: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz >104: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz >105: UARL ADDR[0].x, TEMP[12].xxxx >106: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >107: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz >108: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz >109: UARL ADDR[0].x, TEMP[13].xxxx >110: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >111: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >112: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >113: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[2].yyyy, IMM[2].zzzz >114: MUL TEMP[13].x, IN[4].xxxx, TEMP[12].yyyy >115: MOV TEMP[6].y, TEMP[13].xxxx >116: UMUL TEMP[13].x, TEMP[5].yyyy, IMM[1].zzzz >117: USHR TEMP[14].x, TEMP[13].xxxx, IMM[3].zzzz >118: UARL ADDR[0].x, TEMP[14].xxxx >119: MOV TEMP[13].yz, CONST[2][ADDR[0].x] >120: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >121: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >122: UARL ADDR[0].x, TEMP[15].xxxx >123: MOV TEMP[14].xw, CONST[2][ADDR[0].x] >124: MUL TEMP[13].xyz, TEMP[13].zzyy, TEMP[14].wxww >125: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >126: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >127: UARL ADDR[0].x, TEMP[15].xxxx >128: MOV TEMP[14].x, CONST[2][ADDR[0].x] >129: UMUL TEMP[15].x, TEMP[5].yyyy, IMM[1].zzzz >130: USHR TEMP[16].x, TEMP[15].xxxx, IMM[3].zzzz >131: UARL ADDR[0].x, TEMP[16].xxxx >132: MOV TEMP[15].y, CONST[2][ADDR[0].x] >133: FMA TEMP[14].x, TEMP[14].xxxx, TEMP[15].yyyy, TEMP[13].xxxx >134: MUL TEMP[14].x, TEMP[14].xxxx, IN[4].yyyy >135: MUL TEMP[7].x, IMM[2].yyyy, TEMP[14].xxxx >136: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >137: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >138: UARL ADDR[0].x, TEMP[15].xxxx >139: MOV TEMP[14].xyz, CONST[2][ADDR[0].x] >140: UMUL TEMP[15].x, TEMP[5].yyyy, IMM[1].zzzz >141: USHR TEMP[16].x, TEMP[15].xxxx, IMM[3].zzzz >142: UARL ADDR[0].x, TEMP[16].xxxx >143: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >144: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz >145: ADD TEMP[14].xyz, TEMP[14].zzyy, TEMP[14].yxxx >146: FMA TEMP[15].xyz, -TEMP[14].xyzz, IMM[2].yyyy, IMM[2].zzzz >147: MUL TEMP[16].x, IN[4].yyyy, TEMP[15].yyyy >148: MOV TEMP[7].y, TEMP[16].xxxx >149: ADD TEMP[6], TEMP[6], TEMP[7] >150: UMUL TEMP[16].x, TEMP[4].zzzz, IMM[1].zzzz >151: USHR TEMP[17].x, TEMP[16].xxxx, IMM[3].zzzz >152: UARL ADDR[0].x, TEMP[17].xxxx >153: MOV TEMP[16].y, CONST[2][ADDR[0].x] >154: MUL TEMP[16].x, IN[4].zzzz, TEMP[16].yyyy >155: MOV TEMP[7].w, TEMP[16].xxxx >156: UMUL TEMP[16].x, TEMP[5].zzzz, IMM[1].zzzz >157: USHR TEMP[17].x, TEMP[16].xxxx, IMM[3].zzzz >158: UARL ADDR[0].x, TEMP[17].xxxx >159: MOV TEMP[16].x, CONST[2][ADDR[0].x] >160: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >161: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >162: UARL ADDR[0].x, TEMP[18].xxxx >163: MOV TEMP[17].w, CONST[2][ADDR[0].x] >164: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[17].wwww >165: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >166: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >167: UARL ADDR[0].x, TEMP[18].xxxx >168: MOV TEMP[17].y, CONST[2][ADDR[0].x] >169: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >170: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >171: UARL ADDR[0].x, TEMP[19].xxxx >172: MOV TEMP[18].z, CONST[2][ADDR[0].x] >173: FMA TEMP[17].x, TEMP[17].yyyy, TEMP[18].zzzz, -TEMP[16].xxxx >174: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >175: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >176: UARL ADDR[0].x, TEMP[19].xxxx >177: MOV TEMP[18].y, CONST[2][ADDR[0].x] >178: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >179: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >180: UARL ADDR[0].x, TEMP[20].xxxx >181: MOV TEMP[19].z, CONST[2][ADDR[0].x] >182: FMA TEMP[16].x, TEMP[18].yyyy, TEMP[19].zzzz, TEMP[16].xxxx >183: MUL TEMP[16].x, TEMP[16].xxxx, IN[4].zzzz >184: MUL TEMP[16].x, IMM[2].yyyy, TEMP[16].xxxx >185: MOV TEMP[16].y, TEMP[16].xxxx >186: MUL TEMP[17].x, TEMP[17].xxxx, IN[4].zzzz >187: MUL TEMP[17].x, IMM[2].yyyy, TEMP[17].xxxx >188: MOV TEMP[7].z, TEMP[17].xxxx >189: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >190: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >191: UARL ADDR[0].x, TEMP[18].xxxx >192: MOV TEMP[17].yz, CONST[2][ADDR[0].x] >193: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >194: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >195: UARL ADDR[0].x, TEMP[19].xxxx >196: MOV TEMP[18].xw, CONST[2][ADDR[0].x] >197: MUL TEMP[17].xyz, TEMP[17].zzyy, TEMP[18].wxww >198: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >199: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >200: UARL ADDR[0].x, TEMP[19].xxxx >201: MOV TEMP[18].x, CONST[2][ADDR[0].x] >202: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >203: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >204: UARL ADDR[0].x, TEMP[20].xxxx >205: MOV TEMP[19].y, CONST[2][ADDR[0].x] >206: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].yyyy, TEMP[17].xxxx >207: MUL TEMP[18].x, TEMP[18].xxxx, IN[4].zzzz >208: MUL TEMP[7].x, IMM[2].yyyy, TEMP[18].xxxx >209: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >210: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >211: UARL ADDR[0].x, TEMP[19].xxxx >212: MOV TEMP[18].xyz, CONST[2][ADDR[0].x] >213: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >214: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >215: UARL ADDR[0].x, TEMP[20].xxxx >216: MOV TEMP[19].xyz, CONST[2][ADDR[0].x] >217: MUL TEMP[18].xyz, TEMP[18].xyzz, TEMP[19].xyzz >218: ADD TEMP[18].xyz, TEMP[18].zzyy, TEMP[18].yxxx >219: FMA TEMP[19].xyz, -TEMP[18].xyzz, IMM[2].yyyy, IMM[2].zzzz >220: MUL TEMP[20].x, IN[4].zzzz, TEMP[19].yyyy >221: MOV TEMP[7].y, TEMP[20].xxxx >222: ADD TEMP[6], TEMP[6], TEMP[7] >223: DP3 TEMP[20].x, TEMP[6].xyzz, IN[1].xyzz >224: MOV TEMP[7].y, TEMP[20].xxxx >225: UMUL TEMP[20].x, TEMP[5].xxxx, IMM[1].zzzz >226: USHR TEMP[21].x, TEMP[20].xxxx, IMM[3].zzzz >227: UARL ADDR[0].x, TEMP[21].xxxx >228: MOV TEMP[20].x, CONST[2][ADDR[0].x] >229: UMUL TEMP[21].x, TEMP[5].xxxx, IMM[1].zzzz >230: USHR TEMP[22].x, TEMP[21].xxxx, IMM[3].zzzz >231: UARL ADDR[0].x, TEMP[22].xxxx >232: MOV TEMP[21].z, CONST[2][ADDR[0].x] >233: FMA TEMP[20].x, TEMP[20].xxxx, TEMP[21].zzzz, -TEMP[10].zzzz >234: MUL TEMP[20].x, TEMP[20].xxxx, IN[4].xxxx >235: MUL TEMP[20].x, IMM[2].yyyy, TEMP[20].xxxx >236: MUL TEMP[8].x, TEMP[8].xxxx, IMM[2].yyyy >237: MOV TEMP[20].y, TEMP[8].xxxx >238: UMUL TEMP[8].x, TEMP[5].yyyy, IMM[1].zzzz >239: USHR TEMP[21].x, TEMP[8].xxxx, IMM[3].zzzz >240: UARL ADDR[0].x, TEMP[21].xxxx >241: MOV TEMP[8].x, CONST[2][ADDR[0].x] >242: UMUL TEMP[21].x, TEMP[5].yyyy, IMM[1].zzzz >243: USHR TEMP[22].x, TEMP[21].xxxx, IMM[3].zzzz >244: UARL ADDR[0].x, TEMP[22].xxxx >245: MOV TEMP[21].z, CONST[2][ADDR[0].x] >246: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[21].zzzz, -TEMP[13].zzzz >247: MUL TEMP[8].x, TEMP[8].xxxx, IN[4].yyyy >248: MUL TEMP[9].x, IMM[2].yyyy, TEMP[8].xxxx >249: MUL TEMP[8].x, IN[4].xxxx, TEMP[12].zzzz >250: MOV TEMP[20].z, TEMP[8].xxxx >251: MUL TEMP[11].x, IN[4].xxxx, TEMP[12].xxxx >252: MUL TEMP[8].x, IN[4].yyyy, TEMP[15].zzzz >253: MOV TEMP[9].z, TEMP[8].xxxx >254: MUL TEMP[14].x, IN[4].yyyy, TEMP[15].xxxx >255: UMUL TEMP[8].x, TEMP[4].xxxx, IMM[1].zzzz >256: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >257: UARL ADDR[0].x, TEMP[12].xxxx >258: MOV TEMP[8].z, CONST[2][ADDR[0].x] >259: MUL TEMP[8].x, IN[4].xxxx, TEMP[8].zzzz >260: MOV TEMP[20].w, TEMP[8].xxxx >261: UMUL TEMP[8].x, TEMP[4].yyyy, IMM[1].zzzz >262: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >263: UARL ADDR[0].x, TEMP[12].xxxx >264: MOV TEMP[8].z, CONST[2][ADDR[0].x] >265: MUL TEMP[8].x, IN[4].yyyy, TEMP[8].zzzz >266: MOV TEMP[9].w, TEMP[8].xxxx >267: ADD TEMP[9], TEMP[9], TEMP[20] >268: UMUL TEMP[8].x, TEMP[5].zzzz, IMM[1].zzzz >269: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >270: UARL ADDR[0].x, TEMP[12].xxxx >271: MOV TEMP[8].x, CONST[2][ADDR[0].x] >272: UMUL TEMP[12].x, TEMP[5].zzzz, IMM[1].zzzz >273: USHR TEMP[15].x, TEMP[12].xxxx, IMM[3].zzzz >274: UARL ADDR[0].x, TEMP[15].xxxx >275: MOV TEMP[12].z, CONST[2][ADDR[0].x] >276: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[12].zzzz, -TEMP[17].zzzz >277: MUL TEMP[8].x, TEMP[8].xxxx, IN[4].zzzz >278: MUL TEMP[16].x, IMM[2].yyyy, TEMP[8].xxxx >279: MUL TEMP[8].x, IN[4].zzzz, TEMP[19].zzzz >280: MOV TEMP[16].z, TEMP[8].xxxx >281: MUL TEMP[18].x, IN[4].zzzz, TEMP[19].xxxx >282: UMUL TEMP[8].x, TEMP[4].zzzz, IMM[1].zzzz >283: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >284: UARL ADDR[0].x, TEMP[12].xxxx >285: MOV TEMP[8].z, CONST[2][ADDR[0].x] >286: MUL TEMP[8].x, IN[4].zzzz, TEMP[8].zzzz >287: MOV TEMP[16].w, TEMP[8].xxxx >288: ADD TEMP[9], TEMP[9], TEMP[16] >289: DP3 TEMP[8].x, TEMP[9].xyzz, IN[1].xyzz >290: MOV TEMP[7].z, TEMP[8].xxxx >291: UMUL TEMP[8].x, TEMP[4].xxxx, IMM[1].zzzz >292: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >293: UARL ADDR[0].x, TEMP[12].xxxx >294: MOV TEMP[8].x, CONST[2][ADDR[0].x] >295: MUL TEMP[8].x, IN[4].xxxx, TEMP[8].xxxx >296: MOV TEMP[11].w, TEMP[8].xxxx >297: UMUL TEMP[8].x, TEMP[4].yyyy, IMM[1].zzzz >298: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >299: UARL ADDR[0].x, TEMP[12].xxxx >300: MOV TEMP[8].x, CONST[2][ADDR[0].x] >301: MUL TEMP[8].x, IN[4].yyyy, TEMP[8].xxxx >302: MOV TEMP[14].w, TEMP[8].xxxx >303: UMUL TEMP[8].x, TEMP[4].zzzz, IMM[1].zzzz >304: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >305: UARL ADDR[0].x, TEMP[12].xxxx >306: MOV TEMP[8].x, CONST[2][ADDR[0].x] >307: MUL TEMP[8].x, IN[4].zzzz, TEMP[8].xxxx >308: MOV TEMP[18].w, TEMP[8].xxxx >309: ADD TEMP[2].x, TEMP[10].zzzz, TEMP[10].yyyy >310: MUL TEMP[2].x, TEMP[2].xxxx, IN[4].xxxx >311: MUL TEMP[8].x, IMM[2].yyyy, TEMP[2].xxxx >312: MOV TEMP[11].z, TEMP[8].xxxx >313: UMUL TEMP[8].x, TEMP[5].xxxx, IMM[1].zzzz >314: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >315: UARL ADDR[0].x, TEMP[12].xxxx >316: MOV TEMP[8].x, CONST[2][ADDR[0].x] >317: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz >318: USHR TEMP[15].x, TEMP[12].xxxx, IMM[3].zzzz >319: UARL ADDR[0].x, TEMP[15].xxxx >320: MOV TEMP[12].y, CONST[2][ADDR[0].x] >321: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[12].yyyy, -TEMP[10].xxxx >322: MUL TEMP[8].x, TEMP[8].xxxx, IN[4].xxxx >323: MUL TEMP[8].x, IMM[2].yyyy, TEMP[8].xxxx >324: MOV TEMP[11].y, TEMP[8].xxxx >325: UMUL TEMP[8].x, TEMP[5].zzzz, IMM[1].zzzz >326: USHR TEMP[10].x, TEMP[8].xxxx, IMM[3].zzzz >327: UARL ADDR[0].x, TEMP[10].xxxx >328: MOV TEMP[8].x, CONST[2][ADDR[0].x] >329: UMUL TEMP[10].x, TEMP[5].zzzz, IMM[1].zzzz >330: USHR TEMP[12].x, TEMP[10].xxxx, IMM[3].zzzz >331: UARL ADDR[0].x, TEMP[12].xxxx >332: MOV TEMP[10].y, CONST[2][ADDR[0].x] >333: FMA TEMP[2].x, TEMP[8].xxxx, TEMP[10].yyyy, -TEMP[17].xxxx >334: ADD TEMP[8].x, TEMP[17].zzzz, TEMP[17].yyyy >335: MOV TEMP[2].y, TEMP[8].xxxx >336: MUL TEMP[2].xy, TEMP[2].xyyy, IN[4].zzzz >337: MUL TEMP[8].xy, IMM[2].yyyy, TEMP[2].xyyy >338: MOV TEMP[18].yz, TEMP[8].yxyy >339: ADD TEMP[2].x, TEMP[13].zzzz, TEMP[13].yyyy >340: MUL TEMP[2].x, TEMP[2].xxxx, IN[4].yyyy >341: MUL TEMP[8].x, IMM[2].yyyy, TEMP[2].xxxx >342: MOV TEMP[14].z, TEMP[8].xxxx >343: UMUL TEMP[8].x, TEMP[5].yyyy, IMM[1].zzzz >344: USHR TEMP[10].x, TEMP[8].xxxx, IMM[3].zzzz >345: UARL ADDR[0].x, TEMP[10].xxxx >346: MOV TEMP[8].x, CONST[2][ADDR[0].x] >347: UMUL TEMP[5].x, TEMP[5].yyyy, IMM[1].zzzz >348: USHR TEMP[10].x, TEMP[5].xxxx, IMM[3].zzzz >349: UARL ADDR[0].x, TEMP[10].xxxx >350: MOV TEMP[5].y, CONST[2][ADDR[0].x] >351: FMA TEMP[5].x, TEMP[8].xxxx, TEMP[5].yyyy, -TEMP[13].xxxx >352: MUL TEMP[5].x, TEMP[5].xxxx, IN[4].yyyy >353: MUL TEMP[5].x, IMM[2].yyyy, TEMP[5].xxxx >354: MOV TEMP[14].y, TEMP[5].xxxx >355: ADD TEMP[2], TEMP[11], TEMP[14] >356: ADD TEMP[2], TEMP[18], TEMP[2] >357: DP3 TEMP[7].x, TEMP[2].xyzz, IN[1].xyzz >358: MOV TEMP[5].xyz, TEMP[7].xyzx >359: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[7].xyzz >360: ADD TEMP[7].x, TEMP[7].xxxx, IMM[2].zzzz >361: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].xxxx >362: MUL TEMP[7].x, TEMP[7].xxxx, CONST[1][25].yyyy >363: MUL TEMP[7].x, TEMP[7].xxxx, IMM[2].wwww >364: MOV TEMP[3].xyz, IN[0].xyzx >365: MOV TEMP[3].w, IMM[2].zzzz >366: DP4 TEMP[6].x, TEMP[6], TEMP[3] >367: MOV TEMP[4].y, TEMP[6].xxxx >368: DP4 TEMP[6].x, TEMP[9], TEMP[3] >369: MOV TEMP[4].z, TEMP[6].xxxx >370: DP4 TEMP[4].x, TEMP[2], TEMP[3] >371: DP3 TEMP[6].x, IMM[2].zzzz, TEMP[4].xyzz >372: DP3 TEMP[8].x, CONST[1][38].xyzz, CONST[1][38].xyzz >373: SQRT TEMP[8].x, TEMP[8].xxxx >374: MUL TEMP[9].x, TEMP[8].xxxx, CONST[1][20].wwww >375: FSNE TEMP[10].x, TEMP[9].xxxx, IMM[5].xxxx >376: UIF TEMP[10].xxxx :0 >377: RCP TEMP[9].x, TEMP[9].xxxx >378: MUL TEMP[9].x, TEMP[6].xxxx, TEMP[9].xxxx >379: ELSE :0 >380: SSG TEMP[6].x, TEMP[6].xxxx >381: MUL TEMP[9].x, IMM[5].yyyy, TEMP[6].xxxx >382: ENDIF >383: ADD TEMP[3], -CONST[1][31], CONST[1][32] >384: MUL TEMP[6].x, CONST[1][20].zzzz, CONST[1][45].wwww >385: MUL TEMP[10].x, TEMP[6].xxxx, IMM[5].zzzz >386: MOV_SAT TEMP[10].x, TEMP[10].xxxx >387: FMA TEMP[10], TEMP[10].xxxx, TEMP[3], CONST[1][31] >388: FMA TEMP[9].xy, CONST[1][33].xxxx, TEMP[10].ywww, TEMP[9].xxxx >389: MUL TEMP[3].xy, TEMP[8].xxxx, TEMP[10].xzzz >390: FMA TEMP[8].xy, -IN[3].yzzz, CONST[1][30].xyyy, IMM[2].zzzz >391: ADD TEMP[2].xy, TEMP[9].xyyy, TEMP[8].xyyy >392: SIN TEMP[8].x, TEMP[2].xxxx >393: SIN TEMP[8].y, TEMP[2].yyyy >394: MUL TEMP[2].xy, TEMP[3].xyyy, TEMP[8].xyyy >395: FMA TEMP[2].x, TEMP[6].xxxx, CONST[1][20].yyyy, TEMP[2].xxxx >396: FMA TEMP[3].x, TEMP[2].yyyy, IMM[5].wwww, TEMP[2].xxxx >397: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[3].xxxx >398: FMA TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xxxx, TEMP[4].xyzz >399: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz >400: RSQ TEMP[3].x, TEMP[3].xxxx >401: MUL TEMP[0].xyz, TEMP[3].xxxx, TEMP[1].xyzz >402: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[4].xyzz >403: ADD TEMP[3].xyz, -TEMP[4].xyzz, CONST[1][43].xyzz >404: SQRT TEMP[1].x, TEMP[1].xxxx >405: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz >406: MOV TEMP[0].w, IMM[2].zzzz >407: DP4 TEMP[1].x, CONST[1][36], TEMP[0] >408: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][19].yyyy >409: MOV TEMP[2].z, TEMP[1].xxxx >410: DP4 TEMP[2].x, CONST[1][34], TEMP[0] >411: DP4 TEMP[1].x, CONST[1][35], TEMP[0] >412: MOV TEMP[2].y, TEMP[1].xxxx >413: DP4 TEMP[0].x, CONST[1][37], TEMP[0] >414: MOV TEMP[2].w, TEMP[0].xxxx >415: MOV TEMP[0].xy, IN[2].xyxx >416: MOV OUT[4], IN[3] >417: MOV OUT[2], TEMP[0] >418: MOV OUT[3], TEMP[3] >419: MOV OUT[1], TEMP[5] >420: MOV OUT[0], TEMP[2] >421: END >radeonsi: Compiling shader 170 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 308) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 324) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 328) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 332) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 404) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 480) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 484) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 496) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 500) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 504) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 508) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 512) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 516) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 520) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 524) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 528) > %37 = call float @llvm.SI.load.const(<16 x i8> %20, i32 544) > %38 = call float @llvm.SI.load.const(<16 x i8> %20, i32 548) > %39 = call float @llvm.SI.load.const(<16 x i8> %20, i32 552) > %40 = call float @llvm.SI.load.const(<16 x i8> %20, i32 556) > %41 = call float @llvm.SI.load.const(<16 x i8> %20, i32 560) > %42 = call float @llvm.SI.load.const(<16 x i8> %20, i32 564) > %43 = call float @llvm.SI.load.const(<16 x i8> %20, i32 568) > %44 = call float @llvm.SI.load.const(<16 x i8> %20, i32 572) > %45 = call float @llvm.SI.load.const(<16 x i8> %20, i32 576) > %46 = call float @llvm.SI.load.const(<16 x i8> %20, i32 580) > %47 = call float @llvm.SI.load.const(<16 x i8> %20, i32 584) > %48 = call float @llvm.SI.load.const(<16 x i8> %20, i32 588) > %49 = call float @llvm.SI.load.const(<16 x i8> %20, i32 592) > %50 = call float @llvm.SI.load.const(<16 x i8> %20, i32 596) > %51 = call float @llvm.SI.load.const(<16 x i8> %20, i32 600) > %52 = call float @llvm.SI.load.const(<16 x i8> %20, i32 604) > %53 = call float @llvm.SI.load.const(<16 x i8> %20, i32 608) > %54 = call float @llvm.SI.load.const(<16 x i8> %20, i32 612) > %55 = call float @llvm.SI.load.const(<16 x i8> %20, i32 616) > %56 = call float @llvm.SI.load.const(<16 x i8> %20, i32 624) > %57 = call float @llvm.SI.load.const(<16 x i8> %20, i32 628) > %58 = call float @llvm.SI.load.const(<16 x i8> %20, i32 632) > %59 = call float @llvm.SI.load.const(<16 x i8> %20, i32 640) > %60 = call float @llvm.SI.load.const(<16 x i8> %20, i32 644) > %61 = call float @llvm.SI.load.const(<16 x i8> %20, i32 648) > %62 = call float @llvm.SI.load.const(<16 x i8> %20, i32 688) > %63 = call float @llvm.SI.load.const(<16 x i8> %20, i32 692) > %64 = call float @llvm.SI.load.const(<16 x i8> %20, i32 696) > %65 = call float @llvm.SI.load.const(<16 x i8> %20, i32 720) > %66 = call float @llvm.SI.load.const(<16 x i8> %20, i32 724) > %67 = call float @llvm.SI.load.const(<16 x i8> %20, i32 728) > %68 = call float @llvm.SI.load.const(<16 x i8> %20, i32 732) > %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %13) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %14) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %15) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %16) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = extractelement <4 x float> %90, i32 3 > %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 > %97 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %17) > %98 = extractelement <4 x float> %97, i32 0 > %99 = extractelement <4 x float> %97, i32 1 > %100 = extractelement <4 x float> %97, i32 2 > %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 > %103 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %18) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = extractelement <4 x float> %103, i32 2 > %107 = fmul float %56, %66 > %108 = fmul float %57, %66 > %109 = fmul float %58, %66 > %110 = call float @llvm.fma.f32(float %65, float %53, float %107) > %111 = call float @llvm.fma.f32(float %65, float %54, float %108) > %112 = call float @llvm.fma.f32(float %65, float %55, float %109) > %113 = call float @llvm.fma.f32(float %67, float %59, float %110) > %114 = call float @llvm.fma.f32(float %67, float %60, float %111) > %115 = call float @llvm.fma.f32(float %67, float %61, float %112) > %116 = fmul float %106, 0x406FE01000000000 > %117 = fmul float %105, 0x406FE01000000000 > %118 = fmul float %104, 0x406FE01000000000 > %119 = fptosi float %116 to i32 > %120 = fptosi float %117 to i32 > %121 = fptosi float %118 to i32 > %122 = shl i32 %119, 1 > %123 = or i32 %122, 1 > %124 = shl i32 %120, 1 > %125 = or i32 %124, 1 > %126 = shl i32 %121, 1 > %127 = or i32 %126, 1 > %128 = shl i32 %119, 5 > %129 = or i32 %128, 4 > %130 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %129) > %131 = fmul float %98, %130 > %132 = shl i32 %120, 5 > %133 = or i32 %132, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %133) > %135 = fmul float %99, %134 > %136 = shl i32 %123, 4 > %137 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %136) > %138 = shl i32 %123, 4 > %139 = or i32 %138, 12 > %140 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %139) > %141 = fmul float %137, %140 > %142 = shl i32 %123, 4 > %143 = or i32 %142, 4 > %144 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %143) > %145 = shl i32 %123, 4 > %146 = or i32 %145, 8 > %147 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %146) > %148 = fsub float -0.000000e+00, %141 > %149 = call float @llvm.fma.f32(float %144, float %147, float %148) > %150 = shl i32 %123, 4 > %151 = or i32 %150, 4 > %152 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %151) > %153 = shl i32 %123, 4 > %154 = or i32 %153, 8 > %155 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %154) > %156 = call float @llvm.fma.f32(float %152, float %155, float %141) > %157 = fmul float %156, %98 > %158 = fmul float %149, %98 > %159 = fmul float %158, 2.000000e+00 > %160 = shl i32 %125, 4 > %161 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %160) > %162 = shl i32 %125, 4 > %163 = or i32 %162, 12 > %164 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %163) > %165 = fmul float %161, %164 > %166 = shl i32 %125, 4 > %167 = or i32 %166, 4 > %168 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %167) > %169 = shl i32 %125, 4 > %170 = or i32 %169, 8 > %171 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %170) > %172 = fsub float -0.000000e+00, %165 > %173 = call float @llvm.fma.f32(float %168, float %171, float %172) > %174 = shl i32 %125, 4 > %175 = or i32 %174, 4 > %176 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %175) > %177 = shl i32 %125, 4 > %178 = or i32 %177, 8 > %179 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %178) > %180 = call float @llvm.fma.f32(float %176, float %179, float %165) > %181 = fmul float %180, %99 > %182 = fmul float %181, 2.000000e+00 > %183 = fmul float %173, %99 > %184 = fmul float %183, 2.000000e+00 > %185 = shl i32 %123, 4 > %186 = or i32 %185, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %186) > %188 = shl i32 %123, 4 > %189 = or i32 %188, 8 > %190 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %189) > %191 = shl i32 %123, 4 > %192 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %191) > %193 = shl i32 %123, 4 > %194 = or i32 %193, 12 > %195 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %194) > %196 = fmul float %190, %195 > %197 = fmul float %190, %192 > %198 = fmul float %187, %195 > %199 = shl i32 %123, 4 > %200 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %199) > %201 = shl i32 %123, 4 > %202 = or i32 %201, 4 > %203 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %202) > %204 = call float @llvm.fma.f32(float %200, float %203, float %196) > %205 = fmul float %204, %98 > %206 = fmul float %205, 2.000000e+00 > %207 = shl i32 %123, 4 > %208 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %207) > %209 = shl i32 %123, 4 > %210 = or i32 %209, 4 > %211 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %210) > %212 = shl i32 %123, 4 > %213 = or i32 %212, 8 > %214 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %213) > %215 = shl i32 %123, 4 > %216 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %215) > %217 = shl i32 %123, 4 > %218 = or i32 %217, 4 > %219 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %218) > %220 = shl i32 %123, 4 > %221 = or i32 %220, 8 > %222 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %221) > %223 = fmul float %208, %216 > %224 = fmul float %211, %219 > %225 = fmul float %214, %222 > %226 = fadd float %225, %224 > %227 = fadd float %225, %223 > %228 = fadd float %224, %223 > %229 = fsub float -0.000000e+00, %226 > %230 = call float @llvm.fma.f32(float %229, float 2.000000e+00, float 1.000000e+00) > %231 = fsub float -0.000000e+00, %227 > %232 = call float @llvm.fma.f32(float %231, float 2.000000e+00, float 1.000000e+00) > %233 = fsub float -0.000000e+00, %228 > %234 = call float @llvm.fma.f32(float %233, float 2.000000e+00, float 1.000000e+00) > %235 = fmul float %98, %232 > %236 = shl i32 %125, 4 > %237 = or i32 %236, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %237) > %239 = shl i32 %125, 4 > %240 = or i32 %239, 8 > %241 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %240) > %242 = shl i32 %125, 4 > %243 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %242) > %244 = shl i32 %125, 4 > %245 = or i32 %244, 12 > %246 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %245) > %247 = fmul float %241, %246 > %248 = fmul float %241, %243 > %249 = fmul float %238, %246 > %250 = shl i32 %125, 4 > %251 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %250) > %252 = shl i32 %125, 4 > %253 = or i32 %252, 4 > %254 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %253) > %255 = call float @llvm.fma.f32(float %251, float %254, float %247) > %256 = fmul float %255, %99 > %257 = fmul float %256, 2.000000e+00 > %258 = shl i32 %125, 4 > %259 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %258) > %260 = shl i32 %125, 4 > %261 = or i32 %260, 4 > %262 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %261) > %263 = shl i32 %125, 4 > %264 = or i32 %263, 8 > %265 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %264) > %266 = shl i32 %125, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %266) > %268 = shl i32 %125, 4 > %269 = or i32 %268, 4 > %270 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %269) > %271 = shl i32 %125, 4 > %272 = or i32 %271, 8 > %273 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %272) > %274 = fmul float %259, %267 > %275 = fmul float %262, %270 > %276 = fmul float %265, %273 > %277 = fadd float %276, %275 > %278 = fadd float %276, %274 > %279 = fadd float %275, %274 > %280 = fsub float -0.000000e+00, %277 > %281 = call float @llvm.fma.f32(float %280, float 2.000000e+00, float 1.000000e+00) > %282 = fsub float -0.000000e+00, %278 > %283 = call float @llvm.fma.f32(float %282, float 2.000000e+00, float 1.000000e+00) > %284 = fsub float -0.000000e+00, %279 > %285 = call float @llvm.fma.f32(float %284, float 2.000000e+00, float 1.000000e+00) > %286 = fmul float %99, %283 > %287 = fadd float %206, %257 > %288 = fadd float %235, %286 > %289 = fadd float %159, %184 > %290 = fadd float %131, %135 > %291 = shl i32 %121, 5 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %292) > %294 = fmul float %100, %293 > %295 = shl i32 %127, 4 > %296 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %295) > %297 = shl i32 %127, 4 > %298 = or i32 %297, 12 > %299 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %298) > %300 = fmul float %296, %299 > %301 = shl i32 %127, 4 > %302 = or i32 %301, 4 > %303 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %302) > %304 = shl i32 %127, 4 > %305 = or i32 %304, 8 > %306 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %305) > %307 = fsub float -0.000000e+00, %300 > %308 = call float @llvm.fma.f32(float %303, float %306, float %307) > %309 = shl i32 %127, 4 > %310 = or i32 %309, 4 > %311 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %310) > %312 = shl i32 %127, 4 > %313 = or i32 %312, 8 > %314 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %313) > %315 = call float @llvm.fma.f32(float %311, float %314, float %300) > %316 = fmul float %315, %100 > %317 = fmul float %316, 2.000000e+00 > %318 = fmul float %308, %100 > %319 = fmul float %318, 2.000000e+00 > %320 = shl i32 %127, 4 > %321 = or i32 %320, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %321) > %323 = shl i32 %127, 4 > %324 = or i32 %323, 8 > %325 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %324) > %326 = shl i32 %127, 4 > %327 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %326) > %328 = shl i32 %127, 4 > %329 = or i32 %328, 12 > %330 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %329) > %331 = fmul float %325, %330 > %332 = fmul float %325, %327 > %333 = fmul float %322, %330 > %334 = shl i32 %127, 4 > %335 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %334) > %336 = shl i32 %127, 4 > %337 = or i32 %336, 4 > %338 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %337) > %339 = call float @llvm.fma.f32(float %335, float %338, float %331) > %340 = fmul float %339, %100 > %341 = fmul float %340, 2.000000e+00 > %342 = shl i32 %127, 4 > %343 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %342) > %344 = shl i32 %127, 4 > %345 = or i32 %344, 4 > %346 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %345) > %347 = shl i32 %127, 4 > %348 = or i32 %347, 8 > %349 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %348) > %350 = shl i32 %127, 4 > %351 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %350) > %352 = shl i32 %127, 4 > %353 = or i32 %352, 4 > %354 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %353) > %355 = shl i32 %127, 4 > %356 = or i32 %355, 8 > %357 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %356) > %358 = fmul float %343, %351 > %359 = fmul float %346, %354 > %360 = fmul float %349, %357 > %361 = fadd float %360, %359 > %362 = fadd float %360, %358 > %363 = fadd float %359, %358 > %364 = fsub float -0.000000e+00, %361 > %365 = call float @llvm.fma.f32(float %364, float 2.000000e+00, float 1.000000e+00) > %366 = fsub float -0.000000e+00, %362 > %367 = call float @llvm.fma.f32(float %366, float 2.000000e+00, float 1.000000e+00) > %368 = fsub float -0.000000e+00, %363 > %369 = call float @llvm.fma.f32(float %368, float 2.000000e+00, float 1.000000e+00) > %370 = fmul float %100, %367 > %371 = fadd float %287, %341 > %372 = fadd float %288, %370 > %373 = fadd float %289, %319 > %374 = fadd float %290, %294 > %375 = fmul float %371, %80 > %376 = fmul float %372, %81 > %377 = fadd float %376, %375 > %378 = fmul float %373, %82 > %379 = fadd float %377, %378 > %380 = shl i32 %123, 4 > %381 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %380) > %382 = shl i32 %123, 4 > %383 = or i32 %382, 8 > %384 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %383) > %385 = fsub float -0.000000e+00, %198 > %386 = call float @llvm.fma.f32(float %381, float %384, float %385) > %387 = fmul float %386, %98 > %388 = fmul float %387, 2.000000e+00 > %389 = fmul float %157, 2.000000e+00 > %390 = shl i32 %125, 4 > %391 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %390) > %392 = shl i32 %125, 4 > %393 = or i32 %392, 8 > %394 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %393) > %395 = fsub float -0.000000e+00, %249 > %396 = call float @llvm.fma.f32(float %391, float %394, float %395) > %397 = fmul float %396, %99 > %398 = fmul float %397, 2.000000e+00 > %399 = fmul float %98, %234 > %400 = fmul float %98, %230 > %401 = fmul float %99, %285 > %402 = fmul float %99, %281 > %403 = shl i32 %119, 5 > %404 = or i32 %403, 8 > %405 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %404) > %406 = fmul float %98, %405 > %407 = shl i32 %120, 5 > %408 = or i32 %407, 8 > %409 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %408) > %410 = fmul float %99, %409 > %411 = fadd float %398, %388 > %412 = fadd float %182, %389 > %413 = fadd float %401, %399 > %414 = fadd float %410, %406 > %415 = shl i32 %127, 4 > %416 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %415) > %417 = shl i32 %127, 4 > %418 = or i32 %417, 8 > %419 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %418) > %420 = fsub float -0.000000e+00, %333 > %421 = call float @llvm.fma.f32(float %416, float %419, float %420) > %422 = fmul float %421, %100 > %423 = fmul float %422, 2.000000e+00 > %424 = fmul float %100, %369 > %425 = fmul float %100, %365 > %426 = shl i32 %121, 5 > %427 = or i32 %426, 8 > %428 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %427) > %429 = fmul float %100, %428 > %430 = fadd float %411, %423 > %431 = fadd float %412, %317 > %432 = fadd float %413, %424 > %433 = fadd float %414, %429 > %434 = fmul float %430, %80 > %435 = fmul float %431, %81 > %436 = fadd float %435, %434 > %437 = fmul float %432, %82 > %438 = fadd float %436, %437 > %439 = shl i32 %119, 5 > %440 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %439) > %441 = fmul float %98, %440 > %442 = shl i32 %120, 5 > %443 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %442) > %444 = fmul float %99, %443 > %445 = shl i32 %121, 5 > %446 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %445) > %447 = fmul float %100, %446 > %448 = fadd float %198, %197 > %449 = fmul float %448, %98 > %450 = fmul float %449, 2.000000e+00 > %451 = shl i32 %123, 4 > %452 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %451) > %453 = shl i32 %123, 4 > %454 = or i32 %453, 4 > %455 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %454) > %456 = fsub float -0.000000e+00, %196 > %457 = call float @llvm.fma.f32(float %452, float %455, float %456) > %458 = fmul float %457, %98 > %459 = fmul float %458, 2.000000e+00 > %460 = shl i32 %127, 4 > %461 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %460) > %462 = shl i32 %127, 4 > %463 = or i32 %462, 4 > %464 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %463) > %465 = fsub float -0.000000e+00, %331 > %466 = call float @llvm.fma.f32(float %461, float %464, float %465) > %467 = fadd float %333, %332 > %468 = fmul float %466, %100 > %469 = fmul float %467, %100 > %470 = fmul float %468, 2.000000e+00 > %471 = fmul float %469, 2.000000e+00 > %472 = fadd float %249, %248 > %473 = fmul float %472, %99 > %474 = fmul float %473, 2.000000e+00 > %475 = shl i32 %125, 4 > %476 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %475) > %477 = shl i32 %125, 4 > %478 = or i32 %477, 4 > %479 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %478) > %480 = fsub float -0.000000e+00, %247 > %481 = call float @llvm.fma.f32(float %476, float %479, float %480) > %482 = fmul float %481, %99 > %483 = fmul float %482, 2.000000e+00 > %484 = fadd float %400, %402 > %485 = fadd float %459, %483 > %486 = fadd float %450, %474 > %487 = fadd float %441, %444 > %488 = fadd float %425, %484 > %489 = fadd float %470, %485 > %490 = fadd float %471, %486 > %491 = fadd float %447, %487 > %492 = fmul float %488, %80 > %493 = fmul float %489, %81 > %494 = fadd float %493, %492 > %495 = fmul float %490, %82 > %496 = fadd float %494, %495 > %497 = fmul float %113, %496 > %498 = fmul float %114, %379 > %499 = fadd float %498, %497 > %500 = fmul float %115, %438 > %501 = fadd float %499, %500 > %502 = fadd float %501, 1.000000e+00 > %503 = fmul float %502, %91 > %504 = fmul float %503, %25 > %505 = fmul float %504, 5.000000e-01 > %506 = fmul float %371, %74 > %507 = fmul float %372, %75 > %508 = fadd float %506, %507 > %509 = fmul float %373, %76 > %510 = fadd float %508, %509 > %511 = fadd float %510, %374 > %512 = fmul float %430, %74 > %513 = fmul float %431, %75 > %514 = fadd float %512, %513 > %515 = fmul float %432, %76 > %516 = fadd float %514, %515 > %517 = fadd float %516, %433 > %518 = fmul float %488, %74 > %519 = fmul float %489, %75 > %520 = fadd float %518, %519 > %521 = fmul float %490, %76 > %522 = fadd float %520, %521 > %523 = fadd float %522, %491 > %524 = fadd float %511, %523 > %525 = fadd float %524, %517 > %526 = fmul float %53, %53 > %527 = fmul float %54, %54 > %528 = fadd float %527, %526 > %529 = fmul float %55, %55 > %530 = fadd float %528, %529 > %531 = call float @llvm.sqrt.f32(float %530) > %532 = fmul float %531, %24 > %533 = fcmp une float %532, 0.000000e+00 > br i1 %533, label %IF, label %ELSE > >IF: ; preds = %main_body > %534 = fdiv float 1.000000e+00, %532 > %535 = fmul float %525, %534 > br label %ENDIF > >ELSE: ; preds = %main_body > %536 = fcmp ogt float %525, 0.000000e+00 > %537 = select i1 %536, float 1.000000e+00, float %525 > %538 = fcmp oge float %537, 0.000000e+00 > %.op = fmul float %537, 0x4600000000000000 > %539 = select i1 %538, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp36.0 = phi float [ %535, %IF ], [ %539, %ELSE ] > %540 = fsub float %32, %28 > %541 = fsub float %33, %29 > %542 = fsub float %34, %30 > %543 = fsub float %35, %31 > %544 = fmul float %23, %68 > %545 = fmul float %544, 0x3FC5555560000000 > %546 = call float @llvm.AMDGPU.clamp.(float %545, float 0.000000e+00, float 1.000000e+00) > %547 = call float @llvm.fma.f32(float %546, float %540, float %28) > %548 = call float @llvm.fma.f32(float %546, float %541, float %29) > %549 = call float @llvm.fma.f32(float %546, float %542, float %30) > %550 = call float @llvm.fma.f32(float %546, float %543, float %31) > %551 = call float @llvm.fma.f32(float %36, float %548, float %temp36.0) > %552 = call float @llvm.fma.f32(float %36, float %550, float %temp36.0) > %553 = fmul float %531, %547 > %554 = fmul float %531, %549 > %555 = fsub float -0.000000e+00, %92 > %556 = call float @llvm.fma.f32(float %555, float %26, float 1.000000e+00) > %557 = fsub float -0.000000e+00, %93 > %558 = call float @llvm.fma.f32(float %557, float %27, float 1.000000e+00) > %559 = fadd float %551, %556 > %560 = fadd float %552, %558 > %561 = call float @llvm.sin.f32(float %559) > %562 = call float @llvm.sin.f32(float %560) > %563 = fmul float %553, %561 > %564 = fmul float %554, %562 > %565 = call float @llvm.fma.f32(float %544, float %22, float %563) > %566 = call float @llvm.fma.f32(float %564, float 2.500000e-01, float %565) > %567 = fmul float %113, %566 > %568 = fmul float %114, %566 > %569 = fmul float %115, %566 > %570 = call float @llvm.fma.f32(float %567, float %505, float %523) > %571 = call float @llvm.fma.f32(float %568, float %505, float %511) > %572 = call float @llvm.fma.f32(float %569, float %505, float %517) > %573 = fmul float %570, %570 > %574 = fmul float %571, %571 > %575 = fadd float %574, %573 > %576 = fmul float %572, %572 > %577 = fadd float %575, %576 > %578 = call float @llvm.AMDGPU.rsq.clamped.f32(float %577) > %579 = fmul float %578, %570 > %580 = fmul float %578, %571 > %581 = fmul float %578, %572 > %582 = fmul float %523, %523 > %583 = fmul float %511, %511 > %584 = fadd float %583, %582 > %585 = fmul float %517, %517 > %586 = fadd float %584, %585 > %587 = fsub float %62, %523 > %588 = fsub float %63, %511 > %589 = fsub float %64, %517 > %590 = call float @llvm.sqrt.f32(float %586) > %591 = fmul float %590, %579 > %592 = fmul float %590, %580 > %593 = fmul float %590, %581 > %594 = fmul float %45, %591 > %595 = fmul float %46, %592 > %596 = fadd float %594, %595 > %597 = fmul float %47, %593 > %598 = fadd float %596, %597 > %599 = fadd float %598, %48 > %600 = fmul float %599, %21 > %601 = fmul float %37, %591 > %602 = fmul float %38, %592 > %603 = fadd float %601, %602 > %604 = fmul float %39, %593 > %605 = fadd float %603, %604 > %606 = fadd float %605, %40 > %607 = fmul float %41, %591 > %608 = fmul float %42, %592 > %609 = fadd float %607, %608 > %610 = fmul float %43, %593 > %611 = fadd float %609, %610 > %612 = fadd float %611, %44 > %613 = fmul float %49, %591 > %614 = fmul float %50, %592 > %615 = fadd float %613, %614 > %616 = fmul float %51, %593 > %617 = fadd float %615, %616 > %618 = fadd float %617, %52 > %619 = bitcast i32 %11 to float > %620 = insertvalue <{ float, float, float }> undef, float %619, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %496, float %379, float %438, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %87, float %593, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %587, float %588, float %589, float %543) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %91, float %92, float %93, float %94) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %606, float %612, float %600, float %618) > ret <{ float, float, float }> %620 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sin.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..42] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { -0.1000, 0.0000, 1.0000, 1.0900} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {0, 304, 672, 336} >IMM[3] FLT32 { 0.3000, 0.5900, 0.1100, 1.2500} >IMM[4] UINT32 {400, 240, 256, 368} >IMM[5] UINT32 {528, 384, 352, 272} >IMM[6] FLT32 { 2.0000, -1.0000, 158456325028528675187087900672.0000, -2.0000} >IMM[7] UINT32 {288, 0, 0, 0} >IMM[8] FLT32 { 3.0000, -0.0100, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[1].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: ADD TEMP[1].x, TEMP[0].wwww, IMM[0].xxxx > 3: FSLT TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy > 4: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 5: INEG TEMP[1].x, TEMP[1].xxxx > 6: USNE TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 7: AND TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz > 8: KILL_IF -TEMP[2].xxxx > 9: DP3 TEMP[2].x, TEMP[0].xyzz, IMM[3].xyzz > 10: ADD TEMP[2], -TEMP[0].xyzx, TEMP[2].xxxx > 11: FMA TEMP[3], CONST[1][19].zzzz, TEMP[2], TEMP[0].xyzx > 12: MUL TEMP[0], TEMP[3], CONST[1][42].xyzx > 13: MUL TEMP[2], TEMP[0], CONST[1][21].xxxx > 14: DP3 TEMP[0].x, IN[0].xyzz, IN[0].xyzz > 15: RSQ TEMP[3].x, TEMP[0].xxxx > 16: MUL TEMP[3].xyz, TEMP[3].xxxx, IN[0].xyzz > 17: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz > 18: RSQ TEMP[4].x, TEMP[0].xxxx > 19: MUL TEMP[4].xyz, TEMP[4].xxxx, IN[2].xyzz > 20: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[4].xyzz > 21: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 22: MUL TEMP[0].x, TEMP[5].xxxx, IMM[0].wwww > 23: MUL TEMP[3].x, CONST[1][25].xxxx, IMM[3].wwww > 24: LG2 TEMP[5].x, TEMP[0].xxxx > 25: MUL TEMP[0].x, TEMP[5].xxxx, TEMP[3].xxxx > 26: EX2 TEMP[0].x, TEMP[0].xxxx > 27: ADD TEMP[3], -CONST[1][15].yzwy, CONST[1][16].xyzx > 28: FMA TEMP[5], TEMP[0].xxxx, TEMP[3], CONST[1][15].yzwy > 29: MUL TEMP[3], TEMP[0].wyzw, TEMP[5] > 30: MUL TEMP[3], TEMP[3], CONST[1][19].xxxx > 31: MUL TEMP[5].xyz, CONST[1][23].wxyy, CONST[1][33].xxxx > 32: MOV TEMP[4].xzw, TEMP[5].xxyz > 33: MUL TEMP[5].x, CONST[1][24].xxxx, CONST[1][33].xxxx > 34: MOV TEMP[4].y, TEMP[5].xxxx > 35: ADD TEMP[4], TEMP[4], IN[1].xyxy > 36: MOV TEMP[5].xy, TEMP[4].xyyy > 37: TEX TEMP[5].yw, TEMP[5], SAMP[1], 2D > 38: FMA TEMP[5].xy, TEMP[5].ywww, IMM[6].xxxx, IMM[6].yyyy > 39: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][19].wwww > 40: FMA TEMP[5].xy, CONST[1][22].zwww, TEMP[4].zwww, TEMP[5].xyyy > 41: MOV TEMP[5].xy, TEMP[5].xyyy > 42: TEX TEMP[5].y, TEMP[5], SAMP[2], 2D > 43: ADD TEMP[5].x, -TEMP[5].yyyy, IMM[0].zzzz > 44: FMA TEMP[5].x, -TEMP[5].xxxx, CONST[1][22].yyyy, IMM[0].zzzz > 45: MUL TEMP[3], TEMP[5].xxxx, TEMP[3] > 46: ADD TEMP[4], CONST[1][17].xyzx, -CONST[1][18].xyzx > 47: FMA TEMP[0], TEMP[0].xxxx, TEMP[4], CONST[1][18].xyzx > 48: FMA TEMP[0], TEMP[2], TEMP[0], -TEMP[3].wyzw > 49: ADD TEMP[2].x, CONST[1][22].xxxx, IMM[0].zzzz > 50: FMA TEMP[4].x, TEMP[2].xxxx, CONST[1][21].yyyy, IN[3].wwww > 51: ADD TEMP[2].x, TEMP[4].xxxx, IMM[6].yyyy > 52: FSNE TEMP[4].x, CONST[1][22].xxxx, IMM[0].yyyy > 53: UIF TEMP[4].xxxx :0 > 54: RCP TEMP[4].x, CONST[1][22].xxxx > 55: ELSE :0 > 56: MOV TEMP[4].x, IMM[6].zzzz > 57: ENDIF > 58: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[2].xxxx > 59: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 60: FMA TEMP[5].x, TEMP[4].xxxx, IMM[6].wwww, IMM[8].xxxx > 61: MUL TEMP[2].x, TEMP[4].xxxx, TEMP[4].xxxx > 62: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[5].xxxx > 63: LG2 TEMP[4].x, TEMP[2].xxxx > 64: MUL TEMP[2].x, TEMP[4].xxxx, CONST[1][21].wwww > 65: EX2 TEMP[2].x, TEMP[2].xxxx > 66: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz > 67: FMA TEMP[0], TEMP[2].xxxx, TEMP[0], TEMP[3] > 68: ADD TEMP[2].x, TEMP[0].wwww, IMM[8].yyyy > 69: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 70: AND TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx > 71: INEG TEMP[2].x, TEMP[2].xxxx > 72: USNE TEMP[1].x, TEMP[2].xxxx, IMM[2].xxxx > 73: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz > 74: KILL_IF -TEMP[1].xxxx > 75: MOV OUT[0], TEMP[0] > 76: END >radeonsi: Compiling shader 171 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 312) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 340) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 360) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 364) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 368) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 372) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 380) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 528) > %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 672) > %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 676) > %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 680) > %56 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 > %58 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 3 > %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0 > %61 = extractelement <8 x i32> %57, i32 7 > %62 = extractelement <4 x i32> %60, i32 0 > %63 = and i32 %62, %61 > %64 = insertelement <4 x i32> %60, i32 %63, i32 0 > %65 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %66 = load <8 x i32>, <8 x i32> addrspace(2)* %65, align 32, !tbaa !0 > %67 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %68 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %67, i64 0, i64 7 > %69 = load <4 x i32>, <4 x i32> addrspace(2)* %68, align 16, !tbaa !0 > %70 = extractelement <8 x i32> %66, i32 7 > %71 = extractelement <4 x i32> %69, i32 0 > %72 = and i32 %71, %70 > %73 = insertelement <4 x i32> %69, i32 %72, i32 0 > %74 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %75 = load <8 x i32>, <8 x i32> addrspace(2)* %74, align 32, !tbaa !0 > %76 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %77 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %76, i64 0, i64 11 > %78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0 > %79 = extractelement <8 x i32> %75, i32 7 > %80 = extractelement <4 x i32> %78, i32 0 > %81 = and i32 %80, %79 > %82 = insertelement <4 x i32> %78, i32 %81, i32 0 > %83 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %92 = bitcast float %86 to i32 > %93 = bitcast float %87 to i32 > %94 = insertelement <2 x i32> undef, i32 %92, i32 0 > %95 = insertelement <2 x i32> %94, i32 %93, i32 1 > %96 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %95, <8 x i32> %57, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %97 = extractelement <4 x float> %96, i32 0 > %98 = extractelement <4 x float> %96, i32 1 > %99 = extractelement <4 x float> %96, i32 2 > %100 = extractelement <4 x float> %96, i32 3 > %101 = fadd float %100, 0xBFB99999A0000000 > %102 = fcmp olt float %101, 0.000000e+00 > %103 = select i1 %102, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %103) > %104 = fmul float %97, 0x3FD3333340000000 > %105 = fmul float %98, 0x3FE2E147A0000000 > %106 = fadd float %105, %104 > %107 = fmul float %99, 0x3FBC28F5C0000000 > %108 = fadd float %106, %107 > %109 = fsub float %108, %97 > %110 = fsub float %108, %98 > %111 = fsub float %108, %99 > %112 = fsub float %108, %97 > %113 = call float @llvm.fma.f32(float %38, float %109, float %97) > %114 = call float @llvm.fma.f32(float %38, float %110, float %98) > %115 = call float @llvm.fma.f32(float %38, float %111, float %99) > %116 = call float @llvm.fma.f32(float %38, float %112, float %97) > %117 = fmul float %113, %53 > %118 = fmul float %114, %54 > %119 = fmul float %115, %55 > %120 = fmul float %116, %53 > %121 = fmul float %117, %40 > %122 = fmul float %118, %40 > %123 = fmul float %119, %40 > %124 = fmul float %120, %40 > %125 = fmul float %83, %83 > %126 = fmul float %84, %84 > %127 = fadd float %126, %125 > %128 = fmul float %85, %85 > %129 = fadd float %127, %128 > %130 = call float @llvm.AMDGPU.rsq.clamped.f32(float %129) > %131 = fmul float %130, %83 > %132 = fmul float %130, %84 > %133 = fmul float %130, %85 > %134 = fmul float %88, %88 > %135 = fmul float %89, %89 > %136 = fadd float %135, %134 > %137 = fmul float %90, %90 > %138 = fadd float %136, %137 > %139 = call float @llvm.AMDGPU.rsq.clamped.f32(float %138) > %140 = fmul float %139, %88 > %141 = fmul float %139, %89 > %142 = fmul float %139, %90 > %143 = fmul float %131, %140 > %144 = fmul float %132, %141 > %145 = fadd float %144, %143 > %146 = fmul float %133, %142 > %147 = fadd float %145, %146 > %148 = call float @llvm.AMDGPU.clamp.(float %147, float 0.000000e+00, float 1.000000e+00) > %149 = fmul float %148, 0x3FF170A3E0000000 > %150 = fmul float %51, 1.250000e+00 > %151 = call float @llvm.log2.f32(float %149) > %152 = fmul float %151, %150 > %153 = call float @llvm.exp2.f32(float %152) > %154 = fsub float %28, %25 > %155 = fsub float %29, %26 > %156 = fsub float %30, %27 > %157 = fsub float %28, %25 > %158 = call float @llvm.fma.f32(float %153, float %154, float %25) > %159 = call float @llvm.fma.f32(float %153, float %155, float %26) > %160 = call float @llvm.fma.f32(float %153, float %156, float %27) > %161 = call float @llvm.fma.f32(float %153, float %157, float %25) > %162 = fmul float %120, %158 > %163 = fmul float %118, %159 > %164 = fmul float %119, %160 > %165 = fmul float %120, %161 > %166 = fmul float %162, %37 > %167 = fmul float %163, %37 > %168 = fmul float %164, %37 > %169 = fmul float %165, %37 > %170 = fmul float %49, %52 > %171 = fmul float %47, %52 > %172 = fmul float %48, %52 > %173 = fmul float %50, %52 > %174 = fadd float %170, %86 > %175 = fadd float %173, %87 > %176 = fadd float %171, %86 > %177 = fadd float %172, %87 > %178 = bitcast float %174 to i32 > %179 = bitcast float %175 to i32 > %180 = insertelement <2 x i32> undef, i32 %178, i32 0 > %181 = insertelement <2 x i32> %180, i32 %179, i32 1 > %182 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %181, <8 x i32> %66, <4 x i32> %73, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %183 = extractelement <4 x float> %182, i32 1 > %184 = extractelement <4 x float> %182, i32 3 > %185 = call float @llvm.fma.f32(float %183, float 2.000000e+00, float -1.000000e+00) > %186 = call float @llvm.fma.f32(float %184, float 2.000000e+00, float -1.000000e+00) > %187 = fmul float %185, %39 > %188 = fmul float %186, %39 > %189 = call float @llvm.fma.f32(float %45, float %176, float %187) > %190 = call float @llvm.fma.f32(float %46, float %177, float %188) > %191 = bitcast float %189 to i32 > %192 = bitcast float %190 to i32 > %193 = insertelement <2 x i32> undef, i32 %191, i32 0 > %194 = insertelement <2 x i32> %193, i32 %192, i32 1 > %195 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %194, <8 x i32> %75, <4 x i32> %82, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %196 = extractelement <4 x float> %195, i32 1 > %197 = fsub float 1.000000e+00, %196 > %198 = fsub float -0.000000e+00, %197 > %199 = call float @llvm.fma.f32(float %198, float %44, float 1.000000e+00) > %200 = fmul float %199, %166 > %201 = fmul float %199, %167 > %202 = fmul float %199, %168 > %203 = fmul float %199, %169 > %204 = fsub float %31, %34 > %205 = fsub float %32, %35 > %206 = fsub float %33, %36 > %207 = fsub float %31, %34 > %208 = call float @llvm.fma.f32(float %153, float %204, float %34) > %209 = call float @llvm.fma.f32(float %153, float %205, float %35) > %210 = call float @llvm.fma.f32(float %153, float %206, float %36) > %211 = call float @llvm.fma.f32(float %153, float %207, float %34) > %212 = fsub float -0.000000e+00, %203 > %213 = call float @llvm.fma.f32(float %121, float %208, float %212) > %214 = fsub float -0.000000e+00, %201 > %215 = call float @llvm.fma.f32(float %122, float %209, float %214) > %216 = fsub float -0.000000e+00, %202 > %217 = call float @llvm.fma.f32(float %123, float %210, float %216) > %218 = fsub float -0.000000e+00, %203 > %219 = call float @llvm.fma.f32(float %124, float %211, float %218) > %220 = fadd float %43, 1.000000e+00 > %221 = call float @llvm.fma.f32(float %220, float %41, float %91) > %222 = fadd float %221, -1.000000e+00 > %223 = fcmp une float %43, 0.000000e+00 > %224 = fdiv float 1.000000e+00, %43 > %temp16.0 = select i1 %223, float %224, float 0x4600000000000000 > %225 = fmul float %temp16.0, %222 > %226 = call float @llvm.AMDGPU.clamp.(float %225, float 0.000000e+00, float 1.000000e+00) > %227 = call float @llvm.fma.f32(float %226, float -2.000000e+00, float 3.000000e+00) > %228 = fmul float %226, %226 > %229 = fmul float %228, %227 > %230 = call float @llvm.log2.f32(float %229) > %231 = fmul float %230, %42 > %232 = call float @llvm.exp2.f32(float %231) > %233 = call float @llvm.minnum.f32(float %232, float 1.000000e+00) > %234 = call float @llvm.fma.f32(float %233, float %213, float %200) > %235 = call float @llvm.fma.f32(float %233, float %215, float %201) > %236 = call float @llvm.fma.f32(float %233, float %217, float %202) > %237 = call float @llvm.fma.f32(float %233, float %219, float %203) > %238 = fadd float %237, 0xBF847AE140000000 > %239 = fcmp olt float %238, 0.000000e+00 > %240 = select i1 %239, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %240) > %241 = bitcast float %5 to i32 > %242 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %241, 10 > %243 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %242, float %234, 11 > %244 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %243, float %235, 12 > %245 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %244, float %236, 13 > %246 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %245, float %237, 14 > %247 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %246, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %247 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL CONST[1][0..3] >DCL CONST[2][0..4095] >DCL CONST[3][0..25] >DCL TEMP[0..19], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 32} >IMM[3] UINT32 {48, 2, 400, 0} > 0: MUL TEMP[0].xyz, IN[2].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[4].xxxx > 7: MOV TEMP[3].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[3].x, IN[1].xxxx, TEMP[3].yyyy > 9: MOV TEMP[3].w, TEMP[3].xxxx > 10: UMUL TEMP[4].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[5].xxxx > 13: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[4].x, IN[1].yyyy, TEMP[4].yyyy > 15: MOV TEMP[4].w, TEMP[4].xxxx > 16: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy > 17: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[6].xxxx > 19: MOV TEMP[5].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 21: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[7].xxxx > 23: MOV TEMP[6].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].wwww > 25: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 26: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[7].xxxx > 28: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 30: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[8].xxxx > 32: MOV TEMP[7].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[6].x, TEMP[6].yyyy, TEMP[7].zzzz, -TEMP[5].xxxx > 34: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 35: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[8].xxxx > 37: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 39: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[9].xxxx > 41: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[5].x, TEMP[7].yyyy, TEMP[8].zzzz, TEMP[5].xxxx > 43: MUL TEMP[5].x, TEMP[5].xxxx, IN[1].xxxx > 44: MUL TEMP[6].x, TEMP[6].xxxx, IN[1].xxxx > 45: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy > 46: MOV TEMP[3].z, TEMP[6].xxxx > 47: UMUL TEMP[6].x, TEMP[1].yyyy, IMM[2].yyyy > 48: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[7].xxxx > 50: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 52: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[8].xxxx > 54: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 56: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 57: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[8].xxxx > 59: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 61: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[9].xxxx > 63: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 65: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 66: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[9].xxxx > 68: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[9].x, TEMP[1].yyyy, IMM[2].yyyy > 70: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[10].xxxx > 72: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 74: MUL TEMP[6].x, TEMP[6].xxxx, IN[1].yyyy > 75: MUL TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx > 76: MOV TEMP[6].y, TEMP[6].xxxx > 77: MUL TEMP[7].x, TEMP[7].xxxx, IN[1].yyyy > 78: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 79: MOV TEMP[4].z, TEMP[7].xxxx > 80: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 81: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[8].xxxx > 83: MOV TEMP[7].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 85: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[9].xxxx > 87: MOV TEMP[8].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[7].xyz, TEMP[7].zzyy, TEMP[8].wxww > 89: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 90: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[9].xxxx > 92: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy > 94: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[10].xxxx > 96: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[9].yyyy, TEMP[7].xxxx > 98: MUL TEMP[8].x, TEMP[8].xxxx, IN[1].xxxx > 99: MUL TEMP[3].x, IMM[0].yyyy, TEMP[8].xxxx >100: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy >101: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[9].xxxx >103: MOV TEMP[8].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy >105: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[10].xxxx >107: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xyzz >109: ADD TEMP[8].xyz, TEMP[8].zzyy, TEMP[8].yxxx >110: FMA TEMP[9].xyz, -TEMP[8].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[10].x, IN[1].xxxx, TEMP[9].yyyy >112: MOV TEMP[3].y, TEMP[10].xxxx >113: UMUL TEMP[10].x, TEMP[1].yyyy, IMM[2].yyyy >114: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[11].xxxx >116: MOV TEMP[10].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >118: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[12].xxxx >120: MOV TEMP[11].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww >122: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >123: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[12].xxxx >125: MOV TEMP[11].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >127: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[13].xxxx >129: MOV TEMP[12].y, CONST[2][ADDR[0].x] >130: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >131: MUL TEMP[11].x, TEMP[11].xxxx, IN[1].yyyy >132: MUL TEMP[4].x, IMM[0].yyyy, TEMP[11].xxxx >133: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >134: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[12].xxxx >136: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >138: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[13].xxxx >140: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >142: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >143: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[13].x, IN[1].yyyy, TEMP[12].yyyy >145: MOV TEMP[4].y, TEMP[13].xxxx >146: ADD TEMP[3], TEMP[3], TEMP[4] >147: UMUL TEMP[13].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[14].xxxx >150: MOV TEMP[13].y, CONST[2][ADDR[0].x] >151: MUL TEMP[13].x, IN[1].zzzz, TEMP[13].yyyy >152: MOV TEMP[4].w, TEMP[13].xxxx >153: UMUL TEMP[13].x, TEMP[1].zzzz, IMM[2].yyyy >154: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[14].xxxx >156: MOV TEMP[13].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >158: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[15].xxxx >160: MOV TEMP[14].w, CONST[2][ADDR[0].x] >161: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].wwww >162: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >163: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[15].xxxx >165: MOV TEMP[14].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >167: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[16].xxxx >169: MOV TEMP[15].z, CONST[2][ADDR[0].x] >170: FMA TEMP[14].x, TEMP[14].yyyy, TEMP[15].zzzz, -TEMP[13].xxxx >171: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >172: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[16].xxxx >174: MOV TEMP[15].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >176: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[17].xxxx >178: MOV TEMP[16].z, CONST[2][ADDR[0].x] >179: FMA TEMP[13].x, TEMP[15].yyyy, TEMP[16].zzzz, TEMP[13].xxxx >180: MUL TEMP[13].x, TEMP[13].xxxx, IN[1].zzzz >181: MUL TEMP[13].x, IMM[0].yyyy, TEMP[13].xxxx >182: MOV TEMP[13].y, TEMP[13].xxxx >183: MUL TEMP[14].x, TEMP[14].xxxx, IN[1].zzzz >184: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >185: MOV TEMP[4].z, TEMP[14].xxxx >186: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >187: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[15].xxxx >189: MOV TEMP[14].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >191: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[16].xxxx >193: MOV TEMP[15].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[14].xyz, TEMP[14].zzyy, TEMP[15].wxww >195: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >196: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[16].xxxx >198: MOV TEMP[15].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >200: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[17].xxxx >202: MOV TEMP[16].y, CONST[2][ADDR[0].x] >203: FMA TEMP[15].x, TEMP[15].xxxx, TEMP[16].yyyy, TEMP[14].xxxx >204: MUL TEMP[15].x, TEMP[15].xxxx, IN[1].zzzz >205: MUL TEMP[4].x, IMM[0].yyyy, TEMP[15].xxxx >206: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >207: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[16].xxxx >209: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >211: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[17].xxxx >213: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[15].xyz, TEMP[15].xyzz, TEMP[16].xyzz >215: ADD TEMP[15].xyz, TEMP[15].zzyy, TEMP[15].yxxx >216: FMA TEMP[16].xyz, -TEMP[15].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[17].x, IN[1].zzzz, TEMP[16].yyyy >218: MOV TEMP[4].y, TEMP[17].xxxx >219: ADD TEMP[3], TEMP[3], TEMP[4] >220: MOV TEMP[4].xyz, IN[0].xyzx >221: MOV TEMP[4].w, IMM[0].zzzz >222: DP4 TEMP[17].x, TEMP[3], TEMP[4] >223: MOV TEMP[3].y, TEMP[17].xxxx >224: UMUL TEMP[17].x, TEMP[1].xxxx, IMM[2].yyyy >225: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[18].xxxx >227: MOV TEMP[17].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[18].x, TEMP[1].xxxx, IMM[2].yyyy >229: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[19].xxxx >231: MOV TEMP[18].z, CONST[2][ADDR[0].x] >232: FMA TEMP[17].x, TEMP[17].xxxx, TEMP[18].zzzz, -TEMP[7].zzzz >233: MUL TEMP[17].x, TEMP[17].xxxx, IN[1].xxxx >234: MUL TEMP[17].x, IMM[0].yyyy, TEMP[17].xxxx >235: MUL TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy >236: MOV TEMP[17].y, TEMP[5].xxxx >237: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >238: USHR TEMP[18].x, TEMP[5].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[18].xxxx >240: MOV TEMP[5].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[18].x, TEMP[1].yyyy, IMM[2].yyyy >242: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[19].xxxx >244: MOV TEMP[18].z, CONST[2][ADDR[0].x] >245: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[18].zzzz, -TEMP[10].zzzz >246: MUL TEMP[5].x, TEMP[5].xxxx, IN[1].yyyy >247: MUL TEMP[6].x, IMM[0].yyyy, TEMP[5].xxxx >248: MUL TEMP[5].x, IN[1].xxxx, TEMP[9].zzzz >249: MOV TEMP[17].z, TEMP[5].xxxx >250: MUL TEMP[8].x, IN[1].xxxx, TEMP[9].xxxx >251: MUL TEMP[5].x, IN[1].yyyy, TEMP[12].zzzz >252: MOV TEMP[6].z, TEMP[5].xxxx >253: MUL TEMP[11].x, IN[1].yyyy, TEMP[12].xxxx >254: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[9].xxxx >257: MOV TEMP[5].z, CONST[2][ADDR[0].x] >258: MUL TEMP[5].x, IN[1].xxxx, TEMP[5].zzzz >259: MOV TEMP[17].w, TEMP[5].xxxx >260: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[9].xxxx >263: MOV TEMP[5].z, CONST[2][ADDR[0].x] >264: MUL TEMP[5].x, IN[1].yyyy, TEMP[5].zzzz >265: MOV TEMP[6].w, TEMP[5].xxxx >266: ADD TEMP[6], TEMP[6], TEMP[17] >267: UMUL TEMP[5].x, TEMP[1].zzzz, IMM[2].yyyy >268: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[9].xxxx >270: MOV TEMP[5].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[9].x, TEMP[1].zzzz, IMM[2].yyyy >272: USHR TEMP[12].x, TEMP[9].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[12].xxxx >274: MOV TEMP[9].z, CONST[2][ADDR[0].x] >275: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[9].zzzz, -TEMP[14].zzzz >276: MUL TEMP[5].x, TEMP[5].xxxx, IN[1].zzzz >277: MUL TEMP[13].x, IMM[0].yyyy, TEMP[5].xxxx >278: MUL TEMP[5].x, IN[1].zzzz, TEMP[16].zzzz >279: MOV TEMP[13].z, TEMP[5].xxxx >280: MUL TEMP[15].x, IN[1].zzzz, TEMP[16].xxxx >281: UMUL TEMP[5].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[9].xxxx >284: MOV TEMP[5].z, CONST[2][ADDR[0].x] >285: MUL TEMP[5].x, IN[1].zzzz, TEMP[5].zzzz >286: MOV TEMP[13].w, TEMP[5].xxxx >287: ADD TEMP[6], TEMP[6], TEMP[13] >288: DP4 TEMP[5].x, TEMP[6], TEMP[4] >289: MOV TEMP[3].z, TEMP[5].xxxx >290: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[6].xxxx >293: MOV TEMP[5].x, CONST[2][ADDR[0].x] >294: MUL TEMP[5].x, IN[1].xxxx, TEMP[5].xxxx >295: MOV TEMP[8].w, TEMP[5].xxxx >296: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[6].xxxx >299: MOV TEMP[5].x, CONST[2][ADDR[0].x] >300: MUL TEMP[5].x, IN[1].yyyy, TEMP[5].xxxx >301: MOV TEMP[11].w, TEMP[5].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[5].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[1].zzzz, TEMP[2].xxxx >307: MOV TEMP[15].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy >309: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[5].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy >313: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[6].xxxx >315: MOV TEMP[5].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[7].xxxx >317: ADD TEMP[2].x, TEMP[7].zzzz, TEMP[7].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[1].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[8].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[1].yyyy, IMM[2].yyyy >323: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[5].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >327: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[6].xxxx >329: MOV TEMP[5].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[10].xxxx >331: UMUL TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >332: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[5].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[1].x, TEMP[1].zzzz, IMM[2].yyyy >336: USHR TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[5].xxxx >338: MOV TEMP[1].y, CONST[2][ADDR[0].x] >339: FMA TEMP[1].x, TEMP[2].xxxx, TEMP[1].yyyy, -TEMP[14].xxxx >340: MOV TEMP[0].y, TEMP[1].xxxx >341: ADD TEMP[1].x, TEMP[14].zzzz, TEMP[14].yyyy >342: MOV TEMP[0].z, TEMP[1].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[1].yzzz >344: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[15].yz, TEMP[1].yxyy >346: ADD TEMP[1].x, TEMP[10].zzzz, TEMP[10].yyyy >347: MUL TEMP[1].x, TEMP[1].xxxx, IN[1].yyyy >348: MOV TEMP[0].y, TEMP[1].xxxx >349: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[11].yz, TEMP[1].yxyy >351: ADD TEMP[0], TEMP[8], TEMP[11] >352: ADD TEMP[0], TEMP[15], TEMP[0] >353: DP4 TEMP[3].x, TEMP[0], TEMP[4] >354: MOV TEMP[3].w, IMM[0].zzzz >355: DP4 TEMP[1].x, CONST[1][0], TEMP[3] >356: DP4 TEMP[2].x, CONST[1][1], TEMP[3] >357: MOV TEMP[1].y, TEMP[2].xxxx >358: DP4 TEMP[0].x, CONST[1][2], TEMP[3] >359: DP4 TEMP[2].x, CONST[1][3], TEMP[3] >360: MIN TEMP[0].x, TEMP[0].xxxx, CONST[3][25].zzzz >361: MOV TEMP[1].z, TEMP[0].xxxx >362: MOV TEMP[1].w, TEMP[2].xxxx >363: MOV OUT[0], TEMP[1] >364: END >radeonsi: Compiling shader 172 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 0) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 4) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 8) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 12) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 16) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 20) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 24) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 28) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 32) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 36) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 40) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 44) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 48) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 52) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 56) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 60) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = call float @llvm.SI.load.const(<16 x i8> %37, i32 408) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %13) > %42 = extractelement <4 x float> %41, i32 0 > %43 = extractelement <4 x float> %41, i32 1 > %44 = extractelement <4 x float> %41, i32 2 > %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 > %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %14) > %48 = extractelement <4 x float> %47, i32 0 > %49 = extractelement <4 x float> %47, i32 1 > %50 = extractelement <4 x float> %47, i32 2 > %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 > %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %15) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = extractelement <4 x float> %53, i32 2 > %57 = fmul float %56, 0x406FE01000000000 > %58 = fmul float %55, 0x406FE01000000000 > %59 = fmul float %54, 0x406FE01000000000 > %60 = fptosi float %57 to i32 > %61 = fptosi float %58 to i32 > %62 = fptosi float %59 to i32 > %63 = shl i32 %60, 1 > %64 = or i32 %63, 1 > %65 = shl i32 %61, 1 > %66 = or i32 %65, 1 > %67 = shl i32 %62, 1 > %68 = or i32 %67, 1 > %69 = shl i32 %60, 5 > %70 = or i32 %69, 4 > %71 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %70) > %72 = fmul float %48, %71 > %73 = shl i32 %61, 5 > %74 = or i32 %73, 4 > %75 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %74) > %76 = fmul float %49, %75 > %77 = shl i32 %64, 4 > %78 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %77) > %79 = shl i32 %64, 4 > %80 = or i32 %79, 12 > %81 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %80) > %82 = fmul float %78, %81 > %83 = shl i32 %64, 4 > %84 = or i32 %83, 4 > %85 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %84) > %86 = shl i32 %64, 4 > %87 = or i32 %86, 8 > %88 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %87) > %89 = fsub float -0.000000e+00, %82 > %90 = call float @llvm.fma.f32(float %85, float %88, float %89) > %91 = shl i32 %64, 4 > %92 = or i32 %91, 4 > %93 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %92) > %94 = shl i32 %64, 4 > %95 = or i32 %94, 8 > %96 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %95) > %97 = call float @llvm.fma.f32(float %93, float %96, float %82) > %98 = fmul float %97, %48 > %99 = fmul float %90, %48 > %100 = fmul float %99, 2.000000e+00 > %101 = shl i32 %66, 4 > %102 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %101) > %103 = shl i32 %66, 4 > %104 = or i32 %103, 12 > %105 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %104) > %106 = fmul float %102, %105 > %107 = shl i32 %66, 4 > %108 = or i32 %107, 4 > %109 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %108) > %110 = shl i32 %66, 4 > %111 = or i32 %110, 8 > %112 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %111) > %113 = fsub float -0.000000e+00, %106 > %114 = call float @llvm.fma.f32(float %109, float %112, float %113) > %115 = shl i32 %66, 4 > %116 = or i32 %115, 4 > %117 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %116) > %118 = shl i32 %66, 4 > %119 = or i32 %118, 8 > %120 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %119) > %121 = call float @llvm.fma.f32(float %117, float %120, float %106) > %122 = fmul float %121, %49 > %123 = fmul float %122, 2.000000e+00 > %124 = fmul float %114, %49 > %125 = fmul float %124, 2.000000e+00 > %126 = shl i32 %64, 4 > %127 = or i32 %126, 4 > %128 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %127) > %129 = shl i32 %64, 4 > %130 = or i32 %129, 8 > %131 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %130) > %132 = shl i32 %64, 4 > %133 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %132) > %134 = shl i32 %64, 4 > %135 = or i32 %134, 12 > %136 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %135) > %137 = fmul float %131, %136 > %138 = fmul float %131, %133 > %139 = fmul float %128, %136 > %140 = shl i32 %64, 4 > %141 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %140) > %142 = shl i32 %64, 4 > %143 = or i32 %142, 4 > %144 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %143) > %145 = call float @llvm.fma.f32(float %141, float %144, float %137) > %146 = fmul float %145, %48 > %147 = fmul float %146, 2.000000e+00 > %148 = shl i32 %64, 4 > %149 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %148) > %150 = shl i32 %64, 4 > %151 = or i32 %150, 4 > %152 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %151) > %153 = shl i32 %64, 4 > %154 = or i32 %153, 8 > %155 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %154) > %156 = shl i32 %64, 4 > %157 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %156) > %158 = shl i32 %64, 4 > %159 = or i32 %158, 4 > %160 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %159) > %161 = shl i32 %64, 4 > %162 = or i32 %161, 8 > %163 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %162) > %164 = fmul float %149, %157 > %165 = fmul float %152, %160 > %166 = fmul float %155, %163 > %167 = fadd float %166, %165 > %168 = fadd float %166, %164 > %169 = fadd float %165, %164 > %170 = fsub float -0.000000e+00, %167 > %171 = call float @llvm.fma.f32(float %170, float 2.000000e+00, float 1.000000e+00) > %172 = fsub float -0.000000e+00, %168 > %173 = call float @llvm.fma.f32(float %172, float 2.000000e+00, float 1.000000e+00) > %174 = fsub float -0.000000e+00, %169 > %175 = call float @llvm.fma.f32(float %174, float 2.000000e+00, float 1.000000e+00) > %176 = fmul float %48, %173 > %177 = shl i32 %66, 4 > %178 = or i32 %177, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %178) > %180 = shl i32 %66, 4 > %181 = or i32 %180, 8 > %182 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %181) > %183 = shl i32 %66, 4 > %184 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %183) > %185 = shl i32 %66, 4 > %186 = or i32 %185, 12 > %187 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %186) > %188 = fmul float %182, %187 > %189 = fmul float %182, %184 > %190 = fmul float %179, %187 > %191 = shl i32 %66, 4 > %192 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %191) > %193 = shl i32 %66, 4 > %194 = or i32 %193, 4 > %195 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %194) > %196 = call float @llvm.fma.f32(float %192, float %195, float %188) > %197 = fmul float %196, %49 > %198 = fmul float %197, 2.000000e+00 > %199 = shl i32 %66, 4 > %200 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %199) > %201 = shl i32 %66, 4 > %202 = or i32 %201, 4 > %203 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %202) > %204 = shl i32 %66, 4 > %205 = or i32 %204, 8 > %206 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %205) > %207 = shl i32 %66, 4 > %208 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %207) > %209 = shl i32 %66, 4 > %210 = or i32 %209, 4 > %211 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %210) > %212 = shl i32 %66, 4 > %213 = or i32 %212, 8 > %214 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %213) > %215 = fmul float %200, %208 > %216 = fmul float %203, %211 > %217 = fmul float %206, %214 > %218 = fadd float %217, %216 > %219 = fadd float %217, %215 > %220 = fadd float %216, %215 > %221 = fsub float -0.000000e+00, %218 > %222 = call float @llvm.fma.f32(float %221, float 2.000000e+00, float 1.000000e+00) > %223 = fsub float -0.000000e+00, %219 > %224 = call float @llvm.fma.f32(float %223, float 2.000000e+00, float 1.000000e+00) > %225 = fsub float -0.000000e+00, %220 > %226 = call float @llvm.fma.f32(float %225, float 2.000000e+00, float 1.000000e+00) > %227 = fmul float %49, %224 > %228 = fadd float %147, %198 > %229 = fadd float %176, %227 > %230 = fadd float %100, %125 > %231 = fadd float %72, %76 > %232 = shl i32 %62, 5 > %233 = or i32 %232, 4 > %234 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %233) > %235 = fmul float %50, %234 > %236 = shl i32 %68, 4 > %237 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %236) > %238 = shl i32 %68, 4 > %239 = or i32 %238, 12 > %240 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %239) > %241 = fmul float %237, %240 > %242 = shl i32 %68, 4 > %243 = or i32 %242, 4 > %244 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %243) > %245 = shl i32 %68, 4 > %246 = or i32 %245, 8 > %247 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %246) > %248 = fsub float -0.000000e+00, %241 > %249 = call float @llvm.fma.f32(float %244, float %247, float %248) > %250 = shl i32 %68, 4 > %251 = or i32 %250, 4 > %252 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %251) > %253 = shl i32 %68, 4 > %254 = or i32 %253, 8 > %255 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %254) > %256 = call float @llvm.fma.f32(float %252, float %255, float %241) > %257 = fmul float %256, %50 > %258 = fmul float %257, 2.000000e+00 > %259 = fmul float %249, %50 > %260 = fmul float %259, 2.000000e+00 > %261 = shl i32 %68, 4 > %262 = or i32 %261, 4 > %263 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %262) > %264 = shl i32 %68, 4 > %265 = or i32 %264, 8 > %266 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %265) > %267 = shl i32 %68, 4 > %268 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %267) > %269 = shl i32 %68, 4 > %270 = or i32 %269, 12 > %271 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %270) > %272 = fmul float %266, %271 > %273 = fmul float %266, %268 > %274 = fmul float %263, %271 > %275 = shl i32 %68, 4 > %276 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %275) > %277 = shl i32 %68, 4 > %278 = or i32 %277, 4 > %279 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %278) > %280 = call float @llvm.fma.f32(float %276, float %279, float %272) > %281 = fmul float %280, %50 > %282 = fmul float %281, 2.000000e+00 > %283 = shl i32 %68, 4 > %284 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %283) > %285 = shl i32 %68, 4 > %286 = or i32 %285, 4 > %287 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %286) > %288 = shl i32 %68, 4 > %289 = or i32 %288, 8 > %290 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %289) > %291 = shl i32 %68, 4 > %292 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %291) > %293 = shl i32 %68, 4 > %294 = or i32 %293, 4 > %295 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %294) > %296 = shl i32 %68, 4 > %297 = or i32 %296, 8 > %298 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %297) > %299 = fmul float %284, %292 > %300 = fmul float %287, %295 > %301 = fmul float %290, %298 > %302 = fadd float %301, %300 > %303 = fadd float %301, %299 > %304 = fadd float %300, %299 > %305 = fsub float -0.000000e+00, %302 > %306 = call float @llvm.fma.f32(float %305, float 2.000000e+00, float 1.000000e+00) > %307 = fsub float -0.000000e+00, %303 > %308 = call float @llvm.fma.f32(float %307, float 2.000000e+00, float 1.000000e+00) > %309 = fsub float -0.000000e+00, %304 > %310 = call float @llvm.fma.f32(float %309, float 2.000000e+00, float 1.000000e+00) > %311 = fmul float %50, %308 > %312 = fadd float %228, %282 > %313 = fadd float %229, %311 > %314 = fadd float %230, %260 > %315 = fadd float %231, %235 > %316 = fmul float %312, %42 > %317 = fmul float %313, %43 > %318 = fadd float %316, %317 > %319 = fmul float %314, %44 > %320 = fadd float %318, %319 > %321 = fadd float %320, %315 > %322 = shl i32 %64, 4 > %323 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %322) > %324 = shl i32 %64, 4 > %325 = or i32 %324, 8 > %326 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %325) > %327 = fsub float -0.000000e+00, %139 > %328 = call float @llvm.fma.f32(float %323, float %326, float %327) > %329 = fmul float %328, %48 > %330 = fmul float %329, 2.000000e+00 > %331 = fmul float %98, 2.000000e+00 > %332 = shl i32 %66, 4 > %333 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %332) > %334 = shl i32 %66, 4 > %335 = or i32 %334, 8 > %336 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %335) > %337 = fsub float -0.000000e+00, %190 > %338 = call float @llvm.fma.f32(float %333, float %336, float %337) > %339 = fmul float %338, %49 > %340 = fmul float %339, 2.000000e+00 > %341 = fmul float %48, %175 > %342 = fmul float %48, %171 > %343 = fmul float %49, %226 > %344 = fmul float %49, %222 > %345 = shl i32 %60, 5 > %346 = or i32 %345, 8 > %347 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %346) > %348 = fmul float %48, %347 > %349 = shl i32 %61, 5 > %350 = or i32 %349, 8 > %351 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %350) > %352 = fmul float %49, %351 > %353 = fadd float %340, %330 > %354 = fadd float %123, %331 > %355 = fadd float %343, %341 > %356 = fadd float %352, %348 > %357 = shl i32 %68, 4 > %358 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %357) > %359 = shl i32 %68, 4 > %360 = or i32 %359, 8 > %361 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %360) > %362 = fsub float -0.000000e+00, %274 > %363 = call float @llvm.fma.f32(float %358, float %361, float %362) > %364 = fmul float %363, %50 > %365 = fmul float %364, 2.000000e+00 > %366 = fmul float %50, %310 > %367 = fmul float %50, %306 > %368 = shl i32 %62, 5 > %369 = or i32 %368, 8 > %370 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %369) > %371 = fmul float %50, %370 > %372 = fadd float %353, %365 > %373 = fadd float %354, %258 > %374 = fadd float %355, %366 > %375 = fadd float %356, %371 > %376 = fmul float %372, %42 > %377 = fmul float %373, %43 > %378 = fadd float %376, %377 > %379 = fmul float %374, %44 > %380 = fadd float %378, %379 > %381 = fadd float %380, %375 > %382 = shl i32 %60, 5 > %383 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %382) > %384 = fmul float %48, %383 > %385 = shl i32 %61, 5 > %386 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %385) > %387 = fmul float %49, %386 > %388 = shl i32 %62, 5 > %389 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %388) > %390 = fmul float %50, %389 > %391 = shl i32 %64, 4 > %392 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %391) > %393 = shl i32 %64, 4 > %394 = or i32 %393, 4 > %395 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %394) > %396 = fsub float -0.000000e+00, %137 > %397 = call float @llvm.fma.f32(float %392, float %395, float %396) > %398 = fadd float %139, %138 > %399 = fmul float %397, %48 > %400 = fmul float %398, %48 > %401 = fmul float %399, 2.000000e+00 > %402 = fmul float %400, 2.000000e+00 > %403 = shl i32 %66, 4 > %404 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %403) > %405 = shl i32 %66, 4 > %406 = or i32 %405, 4 > %407 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %406) > %408 = fsub float -0.000000e+00, %188 > %409 = call float @llvm.fma.f32(float %404, float %407, float %408) > %410 = shl i32 %68, 4 > %411 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %410) > %412 = shl i32 %68, 4 > %413 = or i32 %412, 4 > %414 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %413) > %415 = fsub float -0.000000e+00, %272 > %416 = call float @llvm.fma.f32(float %411, float %414, float %415) > %417 = fadd float %274, %273 > %418 = fmul float %409, %49 > %419 = fmul float %416, %50 > %420 = fmul float %417, %50 > %421 = fmul float %419, 2.000000e+00 > %422 = fmul float %420, 2.000000e+00 > %423 = fadd float %190, %189 > %424 = fmul float %423, %49 > %425 = fmul float %418, 2.000000e+00 > %426 = fmul float %424, 2.000000e+00 > %427 = fadd float %342, %344 > %428 = fadd float %401, %425 > %429 = fadd float %402, %426 > %430 = fadd float %384, %387 > %431 = fadd float %367, %427 > %432 = fadd float %421, %428 > %433 = fadd float %422, %429 > %434 = fadd float %390, %430 > %435 = fmul float %431, %42 > %436 = fmul float %432, %43 > %437 = fadd float %435, %436 > %438 = fmul float %433, %44 > %439 = fadd float %437, %438 > %440 = fadd float %439, %434 > %441 = fmul float %18, %440 > %442 = fmul float %19, %321 > %443 = fadd float %441, %442 > %444 = fmul float %20, %381 > %445 = fadd float %443, %444 > %446 = fadd float %445, %21 > %447 = fmul float %22, %440 > %448 = fmul float %23, %321 > %449 = fadd float %447, %448 > %450 = fmul float %24, %381 > %451 = fadd float %449, %450 > %452 = fadd float %451, %25 > %453 = fmul float %26, %440 > %454 = fmul float %27, %321 > %455 = fadd float %453, %454 > %456 = fmul float %28, %381 > %457 = fadd float %455, %456 > %458 = fadd float %457, %29 > %459 = fmul float %30, %440 > %460 = fmul float %31, %321 > %461 = fadd float %459, %460 > %462 = fmul float %32, %381 > %463 = fadd float %461, %462 > %464 = fadd float %463, %33 > %465 = call float @llvm.minnum.f32(float %458, float %38) > %466 = bitcast i32 %11 to float > %467 = insertvalue <{ float, float, float }> undef, float %466, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %446, float %452, float %465, float %464) > ret <{ float, float, float }> %467 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 32} >IMM[3] UINT32 {48, 112, 128, 144} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][0], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][1], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][2], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: DP4 TEMP[5].x, CONST[1][3], TEMP[18] >361: MOV TEMP[3].w, TEMP[5].xxxx >362: MOV TEMP[5].xy, IN[2].xyxx >363: DP3 TEMP[6].x, TEMP[4].xyzz, IN[3].xyzz >364: MOV TEMP[1].y, TEMP[6].xxxx >365: DP3 TEMP[6].x, TEMP[7].xyzz, IN[3].xyzz >366: MOV TEMP[1].z, TEMP[6].xxxx >367: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >368: DP3 TEMP[6].x, TEMP[1].xyzz, TEMP[1].xyzz >369: RSQ TEMP[6].x, TEMP[6].xxxx >370: MUL TEMP[1].xyz, TEMP[6].xxxx, TEMP[1].xyzz >371: DP3 TEMP[6].x, CONST[1][7].xyzz, TEMP[1].xyzz >372: DP3 TEMP[8].x, TEMP[4].xyzz, IN[4].xyzz >373: MOV TEMP[2].y, TEMP[8].xxxx >374: DP3 TEMP[8].x, TEMP[4].xyzz, IN[1].xyzz >375: MOV TEMP[4].y, TEMP[8].xxxx >376: DP3 TEMP[8].x, TEMP[7].xyzz, IN[4].xyzz >377: MOV TEMP[2].z, TEMP[8].xxxx >378: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >379: MOV TEMP[4].z, TEMP[7].xxxx >380: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >381: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >382: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >383: RSQ TEMP[7].x, TEMP[0].xxxx >384: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >385: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >386: MOV TEMP[6].y, TEMP[7].xxxx >387: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >388: RSQ TEMP[7].x, TEMP[7].xxxx >389: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >390: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >391: MOV TEMP[6].z, TEMP[4].xxxx >392: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >393: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >394: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >395: MOV TEMP[4].y, TEMP[7].xxxx >396: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >397: MOV TEMP[1].y, TEMP[0].xxxx >398: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >399: MOV TEMP[4].z, TEMP[0].xxxx >400: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >401: MOV TEMP[1].z, TEMP[0].xxxx >402: MOV OUT[4], TEMP[1] >403: MOV OUT[3], TEMP[4] >404: MOV OUT[2], TEMP[6] >405: MOV OUT[1], TEMP[5] >406: MOV OUT[0], TEMP[3] >407: END >radeonsi: Compiling shader 173 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 > %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 > %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %13) > %53 = extractelement <4 x float> %52, i32 0 > %54 = extractelement <4 x float> %52, i32 1 > %55 = extractelement <4 x float> %52, i32 2 > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %14) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %15) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 > %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %16) > %70 = extractelement <4 x float> %69, i32 0 > %71 = extractelement <4 x float> %69, i32 1 > %72 = extractelement <4 x float> %69, i32 2 > %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 > %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %17) > %76 = extractelement <4 x float> %75, i32 0 > %77 = extractelement <4 x float> %75, i32 1 > %78 = extractelement <4 x float> %75, i32 2 > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %19) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %20) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = extractelement <4 x float> %87, i32 2 > %91 = fmul float %90, 0x406FE01000000000 > %92 = fmul float %89, 0x406FE01000000000 > %93 = fmul float %88, 0x406FE01000000000 > %94 = fptosi float %91 to i32 > %95 = fptosi float %92 to i32 > %96 = fptosi float %93 to i32 > %97 = shl i32 %94, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %95, 1 > %100 = or i32 %99, 1 > %101 = shl i32 %96, 1 > %102 = or i32 %101, 1 > %103 = shl i32 %94, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %104) > %106 = fmul float %82, %105 > %107 = shl i32 %95, 5 > %108 = or i32 %107, 4 > %109 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %108) > %110 = fmul float %83, %109 > %111 = shl i32 %98, 4 > %112 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %111) > %113 = shl i32 %98, 4 > %114 = or i32 %113, 12 > %115 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %114) > %116 = fmul float %112, %115 > %117 = shl i32 %98, 4 > %118 = or i32 %117, 4 > %119 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %118) > %120 = shl i32 %98, 4 > %121 = or i32 %120, 8 > %122 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %121) > %123 = fsub float -0.000000e+00, %116 > %124 = call float @llvm.fma.f32(float %119, float %122, float %123) > %125 = shl i32 %98, 4 > %126 = or i32 %125, 4 > %127 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %126) > %128 = shl i32 %98, 4 > %129 = or i32 %128, 8 > %130 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %129) > %131 = call float @llvm.fma.f32(float %127, float %130, float %116) > %132 = fmul float %131, %82 > %133 = fmul float %124, %82 > %134 = fmul float %133, 2.000000e+00 > %135 = shl i32 %100, 4 > %136 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %135) > %137 = shl i32 %100, 4 > %138 = or i32 %137, 12 > %139 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %138) > %140 = fmul float %136, %139 > %141 = shl i32 %100, 4 > %142 = or i32 %141, 4 > %143 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %142) > %144 = shl i32 %100, 4 > %145 = or i32 %144, 8 > %146 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %145) > %147 = fsub float -0.000000e+00, %140 > %148 = call float @llvm.fma.f32(float %143, float %146, float %147) > %149 = shl i32 %100, 4 > %150 = or i32 %149, 4 > %151 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %150) > %152 = shl i32 %100, 4 > %153 = or i32 %152, 8 > %154 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %153) > %155 = call float @llvm.fma.f32(float %151, float %154, float %140) > %156 = fmul float %155, %83 > %157 = fmul float %156, 2.000000e+00 > %158 = fmul float %148, %83 > %159 = fmul float %158, 2.000000e+00 > %160 = shl i32 %98, 4 > %161 = or i32 %160, 4 > %162 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %161) > %163 = shl i32 %98, 4 > %164 = or i32 %163, 8 > %165 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %164) > %166 = shl i32 %98, 4 > %167 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %166) > %168 = shl i32 %98, 4 > %169 = or i32 %168, 12 > %170 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %169) > %171 = fmul float %165, %170 > %172 = fmul float %165, %167 > %173 = fmul float %162, %170 > %174 = shl i32 %98, 4 > %175 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %174) > %176 = shl i32 %98, 4 > %177 = or i32 %176, 4 > %178 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %177) > %179 = call float @llvm.fma.f32(float %175, float %178, float %171) > %180 = fmul float %179, %82 > %181 = fmul float %180, 2.000000e+00 > %182 = shl i32 %98, 4 > %183 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %182) > %184 = shl i32 %98, 4 > %185 = or i32 %184, 4 > %186 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %185) > %187 = shl i32 %98, 4 > %188 = or i32 %187, 8 > %189 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %188) > %190 = shl i32 %98, 4 > %191 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %190) > %192 = shl i32 %98, 4 > %193 = or i32 %192, 4 > %194 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %193) > %195 = shl i32 %98, 4 > %196 = or i32 %195, 8 > %197 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %196) > %198 = fmul float %183, %191 > %199 = fmul float %186, %194 > %200 = fmul float %189, %197 > %201 = fadd float %200, %199 > %202 = fadd float %200, %198 > %203 = fadd float %199, %198 > %204 = fsub float -0.000000e+00, %201 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fsub float -0.000000e+00, %202 > %207 = call float @llvm.fma.f32(float %206, float 2.000000e+00, float 1.000000e+00) > %208 = fsub float -0.000000e+00, %203 > %209 = call float @llvm.fma.f32(float %208, float 2.000000e+00, float 1.000000e+00) > %210 = fmul float %82, %207 > %211 = shl i32 %100, 4 > %212 = or i32 %211, 4 > %213 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %212) > %214 = shl i32 %100, 4 > %215 = or i32 %214, 8 > %216 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %215) > %217 = shl i32 %100, 4 > %218 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %217) > %219 = shl i32 %100, 4 > %220 = or i32 %219, 12 > %221 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %220) > %222 = fmul float %216, %221 > %223 = fmul float %216, %218 > %224 = fmul float %213, %221 > %225 = shl i32 %100, 4 > %226 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %225) > %227 = shl i32 %100, 4 > %228 = or i32 %227, 4 > %229 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %228) > %230 = call float @llvm.fma.f32(float %226, float %229, float %222) > %231 = fmul float %230, %83 > %232 = fmul float %231, 2.000000e+00 > %233 = shl i32 %100, 4 > %234 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %233) > %235 = shl i32 %100, 4 > %236 = or i32 %235, 4 > %237 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %236) > %238 = shl i32 %100, 4 > %239 = or i32 %238, 8 > %240 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %239) > %241 = shl i32 %100, 4 > %242 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %241) > %243 = shl i32 %100, 4 > %244 = or i32 %243, 4 > %245 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %244) > %246 = shl i32 %100, 4 > %247 = or i32 %246, 8 > %248 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %247) > %249 = fmul float %234, %242 > %250 = fmul float %237, %245 > %251 = fmul float %240, %248 > %252 = fadd float %251, %250 > %253 = fadd float %251, %249 > %254 = fadd float %250, %249 > %255 = fsub float -0.000000e+00, %252 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fsub float -0.000000e+00, %253 > %258 = call float @llvm.fma.f32(float %257, float 2.000000e+00, float 1.000000e+00) > %259 = fsub float -0.000000e+00, %254 > %260 = call float @llvm.fma.f32(float %259, float 2.000000e+00, float 1.000000e+00) > %261 = fmul float %83, %258 > %262 = fadd float %181, %232 > %263 = fadd float %210, %261 > %264 = fadd float %134, %159 > %265 = fadd float %106, %110 > %266 = shl i32 %96, 5 > %267 = or i32 %266, 4 > %268 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %267) > %269 = fmul float %84, %268 > %270 = shl i32 %102, 4 > %271 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %270) > %272 = shl i32 %102, 4 > %273 = or i32 %272, 12 > %274 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %273) > %275 = fmul float %271, %274 > %276 = shl i32 %102, 4 > %277 = or i32 %276, 4 > %278 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %277) > %279 = shl i32 %102, 4 > %280 = or i32 %279, 8 > %281 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %280) > %282 = fsub float -0.000000e+00, %275 > %283 = call float @llvm.fma.f32(float %278, float %281, float %282) > %284 = shl i32 %102, 4 > %285 = or i32 %284, 4 > %286 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %285) > %287 = shl i32 %102, 4 > %288 = or i32 %287, 8 > %289 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %288) > %290 = call float @llvm.fma.f32(float %286, float %289, float %275) > %291 = fmul float %290, %84 > %292 = fmul float %291, 2.000000e+00 > %293 = fmul float %283, %84 > %294 = fmul float %293, 2.000000e+00 > %295 = shl i32 %102, 4 > %296 = or i32 %295, 4 > %297 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %296) > %298 = shl i32 %102, 4 > %299 = or i32 %298, 8 > %300 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %299) > %301 = shl i32 %102, 4 > %302 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %301) > %303 = shl i32 %102, 4 > %304 = or i32 %303, 12 > %305 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %304) > %306 = fmul float %300, %305 > %307 = fmul float %300, %302 > %308 = fmul float %297, %305 > %309 = shl i32 %102, 4 > %310 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %309) > %311 = shl i32 %102, 4 > %312 = or i32 %311, 4 > %313 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %312) > %314 = call float @llvm.fma.f32(float %310, float %313, float %306) > %315 = fmul float %314, %84 > %316 = fmul float %315, 2.000000e+00 > %317 = shl i32 %102, 4 > %318 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %317) > %319 = shl i32 %102, 4 > %320 = or i32 %319, 4 > %321 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %320) > %322 = shl i32 %102, 4 > %323 = or i32 %322, 8 > %324 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %323) > %325 = shl i32 %102, 4 > %326 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %325) > %327 = shl i32 %102, 4 > %328 = or i32 %327, 4 > %329 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %328) > %330 = shl i32 %102, 4 > %331 = or i32 %330, 8 > %332 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %331) > %333 = fmul float %318, %326 > %334 = fmul float %321, %329 > %335 = fmul float %324, %332 > %336 = fadd float %335, %334 > %337 = fadd float %335, %333 > %338 = fadd float %334, %333 > %339 = fsub float -0.000000e+00, %336 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fsub float -0.000000e+00, %337 > %342 = call float @llvm.fma.f32(float %341, float 2.000000e+00, float 1.000000e+00) > %343 = fsub float -0.000000e+00, %338 > %344 = call float @llvm.fma.f32(float %343, float 2.000000e+00, float 1.000000e+00) > %345 = fmul float %84, %342 > %346 = fadd float %262, %316 > %347 = fadd float %263, %345 > %348 = fadd float %264, %294 > %349 = fadd float %265, %269 > %350 = fmul float %346, %53 > %351 = fmul float %347, %54 > %352 = fadd float %350, %351 > %353 = fmul float %348, %55 > %354 = fadd float %352, %353 > %355 = fadd float %354, %349 > %356 = shl i32 %98, 4 > %357 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %356) > %358 = shl i32 %98, 4 > %359 = or i32 %358, 8 > %360 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %359) > %361 = fsub float -0.000000e+00, %173 > %362 = call float @llvm.fma.f32(float %357, float %360, float %361) > %363 = fmul float %362, %82 > %364 = fmul float %363, 2.000000e+00 > %365 = fmul float %132, 2.000000e+00 > %366 = shl i32 %100, 4 > %367 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %366) > %368 = shl i32 %100, 4 > %369 = or i32 %368, 8 > %370 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %369) > %371 = fsub float -0.000000e+00, %224 > %372 = call float @llvm.fma.f32(float %367, float %370, float %371) > %373 = fmul float %372, %83 > %374 = fmul float %373, 2.000000e+00 > %375 = fmul float %82, %209 > %376 = fmul float %82, %205 > %377 = fmul float %83, %260 > %378 = fmul float %83, %256 > %379 = shl i32 %94, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %380) > %382 = fmul float %82, %381 > %383 = shl i32 %95, 5 > %384 = or i32 %383, 8 > %385 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %384) > %386 = fmul float %83, %385 > %387 = fadd float %374, %364 > %388 = fadd float %157, %365 > %389 = fadd float %377, %375 > %390 = fadd float %386, %382 > %391 = shl i32 %102, 4 > %392 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %391) > %393 = shl i32 %102, 4 > %394 = or i32 %393, 8 > %395 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %394) > %396 = fsub float -0.000000e+00, %308 > %397 = call float @llvm.fma.f32(float %392, float %395, float %396) > %398 = fmul float %397, %84 > %399 = fmul float %398, 2.000000e+00 > %400 = fmul float %84, %344 > %401 = fmul float %84, %340 > %402 = shl i32 %96, 5 > %403 = or i32 %402, 8 > %404 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %403) > %405 = fmul float %84, %404 > %406 = fadd float %387, %399 > %407 = fadd float %388, %292 > %408 = fadd float %389, %400 > %409 = fadd float %390, %405 > %410 = fmul float %406, %53 > %411 = fmul float %407, %54 > %412 = fadd float %410, %411 > %413 = fmul float %408, %55 > %414 = fadd float %412, %413 > %415 = fadd float %414, %409 > %416 = shl i32 %94, 5 > %417 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %416) > %418 = fmul float %82, %417 > %419 = shl i32 %95, 5 > %420 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %419) > %421 = fmul float %83, %420 > %422 = shl i32 %96, 5 > %423 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %422) > %424 = fmul float %84, %423 > %425 = shl i32 %98, 4 > %426 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %425) > %427 = shl i32 %98, 4 > %428 = or i32 %427, 4 > %429 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %428) > %430 = fsub float -0.000000e+00, %171 > %431 = call float @llvm.fma.f32(float %426, float %429, float %430) > %432 = fadd float %173, %172 > %433 = fmul float %431, %82 > %434 = fmul float %432, %82 > %435 = fmul float %433, 2.000000e+00 > %436 = fmul float %434, 2.000000e+00 > %437 = shl i32 %100, 4 > %438 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %437) > %439 = shl i32 %100, 4 > %440 = or i32 %439, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %440) > %442 = fsub float -0.000000e+00, %222 > %443 = call float @llvm.fma.f32(float %438, float %441, float %442) > %444 = shl i32 %102, 4 > %445 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %444) > %446 = shl i32 %102, 4 > %447 = or i32 %446, 4 > %448 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %447) > %449 = fsub float -0.000000e+00, %306 > %450 = call float @llvm.fma.f32(float %445, float %448, float %449) > %451 = fadd float %308, %307 > %452 = fmul float %443, %83 > %453 = fmul float %450, %84 > %454 = fmul float %451, %84 > %455 = fmul float %453, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %224, %223 > %458 = fmul float %457, %83 > %459 = fmul float %452, 2.000000e+00 > %460 = fmul float %458, 2.000000e+00 > %461 = fadd float %376, %378 > %462 = fadd float %435, %459 > %463 = fadd float %436, %460 > %464 = fadd float %418, %421 > %465 = fadd float %401, %461 > %466 = fadd float %455, %462 > %467 = fadd float %456, %463 > %468 = fadd float %424, %464 > %469 = fmul float %465, %53 > %470 = fmul float %466, %54 > %471 = fadd float %469, %470 > %472 = fmul float %467, %55 > %473 = fadd float %471, %472 > %474 = fadd float %473, %468 > %475 = fmul float %23, %474 > %476 = fmul float %24, %355 > %477 = fadd float %475, %476 > %478 = fmul float %25, %415 > %479 = fadd float %477, %478 > %480 = fadd float %479, %26 > %481 = fmul float %27, %474 > %482 = fmul float %28, %355 > %483 = fadd float %481, %482 > %484 = fmul float %29, %415 > %485 = fadd float %483, %484 > %486 = fadd float %485, %30 > %487 = fmul float %31, %474 > %488 = fmul float %32, %355 > %489 = fadd float %487, %488 > %490 = fmul float %33, %415 > %491 = fadd float %489, %490 > %492 = fadd float %491, %34 > %493 = fmul float %35, %474 > %494 = fmul float %36, %355 > %495 = fadd float %493, %494 > %496 = fmul float %37, %415 > %497 = fadd float %495, %496 > %498 = fadd float %497, %38 > %499 = fmul float %346, %70 > %500 = fmul float %347, %71 > %501 = fadd float %500, %499 > %502 = fmul float %348, %72 > %503 = fadd float %501, %502 > %504 = fmul float %406, %70 > %505 = fmul float %407, %71 > %506 = fadd float %505, %504 > %507 = fmul float %408, %72 > %508 = fadd float %506, %507 > %509 = fmul float %465, %70 > %510 = fmul float %466, %71 > %511 = fadd float %510, %509 > %512 = fmul float %467, %72 > %513 = fadd float %511, %512 > %514 = fmul float %513, %513 > %515 = fmul float %503, %503 > %516 = fadd float %515, %514 > %517 = fmul float %508, %508 > %518 = fadd float %516, %517 > %519 = call float @llvm.AMDGPU.rsq.clamped.f32(float %518) > %520 = fmul float %519, %513 > %521 = fmul float %519, %503 > %522 = fmul float %519, %508 > %523 = fmul float %39, %520 > %524 = fmul float %40, %521 > %525 = fadd float %524, %523 > %526 = fmul float %41, %522 > %527 = fadd float %525, %526 > %528 = fmul float %346, %76 > %529 = fmul float %347, %77 > %530 = fadd float %529, %528 > %531 = fmul float %348, %78 > %532 = fadd float %530, %531 > %533 = fmul float %346, %59 > %534 = fmul float %347, %60 > %535 = fadd float %534, %533 > %536 = fmul float %348, %61 > %537 = fadd float %535, %536 > %538 = fmul float %406, %76 > %539 = fmul float %407, %77 > %540 = fadd float %539, %538 > %541 = fmul float %408, %78 > %542 = fadd float %540, %541 > %543 = fmul float %406, %59 > %544 = fmul float %407, %60 > %545 = fadd float %544, %543 > %546 = fmul float %408, %61 > %547 = fadd float %545, %546 > %548 = fmul float %465, %76 > %549 = fmul float %466, %77 > %550 = fadd float %549, %548 > %551 = fmul float %467, %78 > %552 = fadd float %550, %551 > %553 = fmul float %465, %59 > %554 = fmul float %466, %60 > %555 = fadd float %554, %553 > %556 = fmul float %467, %61 > %557 = fadd float %555, %556 > %558 = fmul float %552, %552 > %559 = fmul float %532, %532 > %560 = fadd float %559, %558 > %561 = fmul float %542, %542 > %562 = fadd float %560, %561 > %563 = call float @llvm.AMDGPU.rsq.clamped.f32(float %562) > %564 = fmul float %563, %552 > %565 = fmul float %563, %532 > %566 = fmul float %563, %542 > %567 = fmul float %39, %564 > %568 = fmul float %40, %565 > %569 = fadd float %568, %567 > %570 = fmul float %41, %566 > %571 = fadd float %569, %570 > %572 = fmul float %557, %557 > %573 = fmul float %537, %537 > %574 = fadd float %573, %572 > %575 = fmul float %547, %547 > %576 = fadd float %574, %575 > %577 = call float @llvm.AMDGPU.rsq.clamped.f32(float %576) > %578 = fmul float %577, %557 > %579 = fmul float %577, %537 > %580 = fmul float %577, %547 > %581 = fmul float %39, %578 > %582 = fmul float %40, %579 > %583 = fadd float %582, %581 > %584 = fmul float %41, %580 > %585 = fadd float %583, %584 > %586 = fmul float %42, %520 > %587 = fmul float %43, %521 > %588 = fadd float %587, %586 > %589 = fmul float %44, %522 > %590 = fadd float %588, %589 > %591 = fmul float %45, %520 > %592 = fmul float %46, %521 > %593 = fadd float %592, %591 > %594 = fmul float %47, %522 > %595 = fadd float %593, %594 > %596 = fmul float %42, %564 > %597 = fmul float %43, %565 > %598 = fadd float %597, %596 > %599 = fmul float %44, %566 > %600 = fadd float %598, %599 > %601 = fmul float %45, %564 > %602 = fmul float %46, %565 > %603 = fadd float %602, %601 > %604 = fmul float %47, %566 > %605 = fadd float %603, %604 > %606 = fmul float %42, %578 > %607 = fmul float %43, %579 > %608 = fadd float %607, %606 > %609 = fmul float %44, %580 > %610 = fadd float %608, %609 > %611 = fmul float %45, %578 > %612 = fmul float %46, %579 > %613 = fadd float %612, %611 > %614 = fmul float %47, %580 > %615 = fadd float %613, %614 > %616 = bitcast i32 %11 to float > %617 = insertvalue <{ float, float, float }> undef, float %616, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %65, float %66, float %55, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %527, float %571, float %585, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %590, float %600, float %610, float %349) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %595, float %605, float %615, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %480, float %486, float %492, float %498) > ret <{ float, float, float }> %617 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..22] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} >IMM[1] UINT32 {0, 352, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 9: DP3 TEMP[2].x, IN[2].xyzz, TEMP[0].xyzz > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 14: RSQ TEMP[2].x, TEMP[0].xxxx > 15: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 16: FMA TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[0].wwww > 17: MOV TEMP[0].w, CONST[1][22].yyyy > 18: MOV TEMP[1].xy, IN[0].xyyy > 19: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 20: MOV TEMP[2].xy, IN[0].xyyy > 21: TEX TEMP[2], TEMP[2], SAMP[2], 2D > 22: MUL TEMP[3].x, TEMP[2].zzzz, CONST[1][22].xxxx > 23: MOV TEMP[3].yzw, TEMP[2].xyxw > 24: MOV OUT[0], TEMP[0] > 25: MOV OUT[1], TEMP[1] > 26: MOV OUT[2], TEMP[3] > 27: END >radeonsi: Compiling shader 174 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 7 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 11 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %65 = bitcast float %54 to i32 > %66 = bitcast float %55 to i32 > %67 = insertelement <2 x i32> undef, i32 %65, i32 0 > %68 = insertelement <2 x i32> %67, i32 %66, i32 1 > %69 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %68, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %70 = extractelement <4 x float> %69, i32 1 > %71 = extractelement <4 x float> %69, i32 3 > %72 = call float @llvm.fma.f32(float %70, float 2.000000e+00, float -1.000000e+00) > %73 = call float @llvm.fma.f32(float %71, float 2.000000e+00, float -1.000000e+00) > %74 = fsub float -0.000000e+00, %72 > %75 = call float @llvm.fma.f32(float %74, float %72, float 1.000000e+00) > %76 = fsub float -0.000000e+00, %73 > %77 = call float @llvm.fma.f32(float %76, float %73, float %75) > %78 = call float @llvm.sqrt.f32(float %77) > %79 = fmul float %56, %72 > %80 = fmul float %57, %73 > %81 = fadd float %80, %79 > %82 = fmul float %58, %78 > %83 = fadd float %81, %82 > %84 = fmul float %59, %72 > %85 = fmul float %60, %73 > %86 = fadd float %85, %84 > %87 = fmul float %61, %78 > %88 = fadd float %86, %87 > %89 = fmul float %62, %72 > %90 = fmul float %63, %73 > %91 = fadd float %90, %89 > %92 = fmul float %64, %78 > %93 = fadd float %91, %92 > %94 = fmul float %83, %83 > %95 = fmul float %88, %88 > %96 = fadd float %95, %94 > %97 = fmul float %93, %93 > %98 = fadd float %96, %97 > %99 = call float @llvm.AMDGPU.rsq.clamped.f32(float %98) > %100 = fmul float %99, %83 > %101 = fmul float %99, %88 > %102 = fmul float %99, %93 > %103 = call float @llvm.fma.f32(float %100, float 5.000000e-01, float 5.000000e-01) > %104 = call float @llvm.fma.f32(float %101, float 5.000000e-01, float 5.000000e-01) > %105 = call float @llvm.fma.f32(float %102, float 5.000000e-01, float 5.000000e-01) > %106 = bitcast float %54 to i32 > %107 = bitcast float %55 to i32 > %108 = insertelement <2 x i32> undef, i32 %106, i32 0 > %109 = insertelement <2 x i32> %108, i32 %107, i32 1 > %110 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %109, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %111 = extractelement <4 x float> %110, i32 0 > %112 = extractelement <4 x float> %110, i32 1 > %113 = extractelement <4 x float> %110, i32 2 > %114 = extractelement <4 x float> %110, i32 3 > %115 = bitcast float %54 to i32 > %116 = bitcast float %55 to i32 > %117 = insertelement <2 x i32> undef, i32 %115, i32 0 > %118 = insertelement <2 x i32> %117, i32 %116, i32 1 > %119 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %118, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %120 = extractelement <4 x float> %119, i32 0 > %121 = extractelement <4 x float> %119, i32 1 > %122 = extractelement <4 x float> %119, i32 2 > %123 = extractelement <4 x float> %119, i32 3 > %124 = fmul float %122, %25 > %125 = bitcast float %5 to i32 > %126 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %125, 10 > %127 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %126, float %103, 11 > %128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %127, float %104, 12 > %129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128, float %105, 13 > %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129, float %26, 14 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130, float %111, 15 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %112, 16 > %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %113, 17 > %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %114, 18 > %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %124, 19 > %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135, float %121, 20 > %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136, float %120, 21 > %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %123, 22 > %139 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %139 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL OUT[0], POSITION >DCL CONST[1][0..45] >DCL CONST[2][0..4095] >DCL TEMP[0..22], LOCAL >DCL ADDR[0] >IMM[0] UINT32 {0, 624, 720, 608} >IMM[1] UINT32 {640, 1, 16, 400} >IMM[2] FLT32 { 255.0020, 2.0000, 1.0000, 0.5000} >IMM[3] INT32 {1, 2, 4, 0} >IMM[4] UINT32 {320, 496, 512, 528} >IMM[5] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.1667, 0.2500} >IMM[6] UINT32 {480, 576, 304, 544} >IMM[7] UINT32 {560, 592, 0, 0} > 0: MUL TEMP[0].xyz, CONST[1][39].xyzz, CONST[1][45].yyyy > 1: FMA TEMP[0].xyz, CONST[1][45].xxxx, CONST[1][38].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, CONST[1][45].zzzz, CONST[1][40].xyzz, TEMP[0].xyzz > 3: MUL TEMP[2].xyz, IN[4].zyxx, IMM[2].xxxx > 4: F2I TEMP[3].xyz, TEMP[2].xyzz > 5: SHL TEMP[4].xyz, TEMP[3].xyzz, IMM[3].xxxx > 6: UMAD TEMP[5].xyz, TEMP[3].xyzz, IMM[3].yyyy, IMM[3].xxxx > 7: UMUL TEMP[6].x, TEMP[4].xxxx, IMM[1].zzzz > 8: USHR TEMP[7].x, TEMP[6].xxxx, IMM[3].zzzz > 9: UARL ADDR[0].x, TEMP[7].xxxx > 10: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 11: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].yyyy > 12: MOV TEMP[6].w, TEMP[6].xxxx > 13: UMUL TEMP[7].x, TEMP[4].yyyy, IMM[1].zzzz > 14: USHR TEMP[8].x, TEMP[7].xxxx, IMM[3].zzzz > 15: UARL ADDR[0].x, TEMP[8].xxxx > 16: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 17: MUL TEMP[7].x, IN[3].yyyy, TEMP[7].yyyy > 18: MOV TEMP[7].w, TEMP[7].xxxx > 19: UMUL TEMP[8].x, TEMP[5].xxxx, IMM[1].zzzz > 20: USHR TEMP[9].x, TEMP[8].xxxx, IMM[3].zzzz > 21: UARL ADDR[0].x, TEMP[9].xxxx > 22: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 23: UMUL TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz > 24: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 25: UARL ADDR[0].x, TEMP[10].xxxx > 26: MOV TEMP[9].w, CONST[2][ADDR[0].x] > 27: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].wwww > 28: UMUL TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz > 29: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 30: UARL ADDR[0].x, TEMP[10].xxxx > 31: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 32: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 33: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 34: UARL ADDR[0].x, TEMP[11].xxxx > 35: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 36: FMA TEMP[9].x, TEMP[9].yyyy, TEMP[10].zzzz, -TEMP[8].xxxx > 37: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 38: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 39: UARL ADDR[0].x, TEMP[11].xxxx > 40: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 41: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 42: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 43: UARL ADDR[0].x, TEMP[12].xxxx > 44: MOV TEMP[11].z, CONST[2][ADDR[0].x] > 45: FMA TEMP[8].x, TEMP[10].yyyy, TEMP[11].zzzz, TEMP[8].xxxx > 46: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].xxxx > 47: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].xxxx > 48: MUL TEMP[9].x, TEMP[9].xxxx, IMM[2].yyyy > 49: MOV TEMP[6].z, TEMP[9].xxxx > 50: UMUL TEMP[9].x, TEMP[5].yyyy, IMM[1].zzzz > 51: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 52: UARL ADDR[0].x, TEMP[10].xxxx > 53: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 54: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[1].zzzz > 55: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 56: UARL ADDR[0].x, TEMP[11].xxxx > 57: MOV TEMP[10].w, CONST[2][ADDR[0].x] > 58: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[10].wwww > 59: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[1].zzzz > 60: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 61: UARL ADDR[0].x, TEMP[11].xxxx > 62: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 63: UMUL TEMP[11].x, TEMP[5].yyyy, IMM[1].zzzz > 64: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 65: UARL ADDR[0].x, TEMP[12].xxxx > 66: MOV TEMP[11].z, CONST[2][ADDR[0].x] > 67: FMA TEMP[10].x, TEMP[10].yyyy, TEMP[11].zzzz, -TEMP[9].xxxx > 68: UMUL TEMP[11].x, TEMP[5].yyyy, IMM[1].zzzz > 69: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 70: UARL ADDR[0].x, TEMP[12].xxxx > 71: MOV TEMP[11].y, CONST[2][ADDR[0].x] > 72: UMUL TEMP[12].x, TEMP[5].yyyy, IMM[1].zzzz > 73: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz > 74: UARL ADDR[0].x, TEMP[13].xxxx > 75: MOV TEMP[12].z, CONST[2][ADDR[0].x] > 76: FMA TEMP[9].x, TEMP[11].yyyy, TEMP[12].zzzz, TEMP[9].xxxx > 77: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].yyyy > 78: MUL TEMP[9].x, IMM[2].yyyy, TEMP[9].xxxx > 79: MOV TEMP[9].y, TEMP[9].xxxx > 80: MUL TEMP[10].x, TEMP[10].xxxx, IN[3].yyyy > 81: MUL TEMP[10].x, IMM[2].yyyy, TEMP[10].xxxx > 82: MOV TEMP[7].z, TEMP[10].xxxx > 83: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 84: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 85: UARL ADDR[0].x, TEMP[11].xxxx > 86: MOV TEMP[10].yz, CONST[2][ADDR[0].x] > 87: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 88: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 89: UARL ADDR[0].x, TEMP[12].xxxx > 90: MOV TEMP[11].xw, CONST[2][ADDR[0].x] > 91: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww > 92: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 93: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 94: UARL ADDR[0].x, TEMP[12].xxxx > 95: MOV TEMP[11].x, CONST[2][ADDR[0].x] > 96: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz > 97: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz > 98: UARL ADDR[0].x, TEMP[13].xxxx > 99: MOV TEMP[12].y, CONST[2][ADDR[0].x] >100: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >101: MUL TEMP[11].x, TEMP[11].xxxx, IN[3].xxxx >102: MUL TEMP[6].x, IMM[2].yyyy, TEMP[11].xxxx >103: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz >104: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz >105: UARL ADDR[0].x, TEMP[12].xxxx >106: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >107: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz >108: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz >109: UARL ADDR[0].x, TEMP[13].xxxx >110: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >111: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >112: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >113: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[2].yyyy, IMM[2].zzzz >114: MUL TEMP[13].x, IN[3].xxxx, TEMP[12].yyyy >115: MOV TEMP[6].y, TEMP[13].xxxx >116: UMUL TEMP[13].x, TEMP[5].yyyy, IMM[1].zzzz >117: USHR TEMP[14].x, TEMP[13].xxxx, IMM[3].zzzz >118: UARL ADDR[0].x, TEMP[14].xxxx >119: MOV TEMP[13].yz, CONST[2][ADDR[0].x] >120: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >121: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >122: UARL ADDR[0].x, TEMP[15].xxxx >123: MOV TEMP[14].xw, CONST[2][ADDR[0].x] >124: MUL TEMP[13].xyz, TEMP[13].zzyy, TEMP[14].wxww >125: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >126: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >127: UARL ADDR[0].x, TEMP[15].xxxx >128: MOV TEMP[14].x, CONST[2][ADDR[0].x] >129: UMUL TEMP[15].x, TEMP[5].yyyy, IMM[1].zzzz >130: USHR TEMP[16].x, TEMP[15].xxxx, IMM[3].zzzz >131: UARL ADDR[0].x, TEMP[16].xxxx >132: MOV TEMP[15].y, CONST[2][ADDR[0].x] >133: FMA TEMP[14].x, TEMP[14].xxxx, TEMP[15].yyyy, TEMP[13].xxxx >134: MUL TEMP[14].x, TEMP[14].xxxx, IN[3].yyyy >135: MUL TEMP[7].x, IMM[2].yyyy, TEMP[14].xxxx >136: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >137: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >138: UARL ADDR[0].x, TEMP[15].xxxx >139: MOV TEMP[14].xyz, CONST[2][ADDR[0].x] >140: UMUL TEMP[15].x, TEMP[5].yyyy, IMM[1].zzzz >141: USHR TEMP[16].x, TEMP[15].xxxx, IMM[3].zzzz >142: UARL ADDR[0].x, TEMP[16].xxxx >143: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >144: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz >145: ADD TEMP[14].xyz, TEMP[14].zzyy, TEMP[14].yxxx >146: FMA TEMP[15].xyz, -TEMP[14].xyzz, IMM[2].yyyy, IMM[2].zzzz >147: MUL TEMP[16].x, IN[3].yyyy, TEMP[15].yyyy >148: MOV TEMP[7].y, TEMP[16].xxxx >149: ADD TEMP[6], TEMP[6], TEMP[7] >150: UMUL TEMP[16].x, TEMP[4].zzzz, IMM[1].zzzz >151: USHR TEMP[17].x, TEMP[16].xxxx, IMM[3].zzzz >152: UARL ADDR[0].x, TEMP[17].xxxx >153: MOV TEMP[16].y, CONST[2][ADDR[0].x] >154: MUL TEMP[16].x, IN[3].zzzz, TEMP[16].yyyy >155: MOV TEMP[7].w, TEMP[16].xxxx >156: UMUL TEMP[16].x, TEMP[5].zzzz, IMM[1].zzzz >157: USHR TEMP[17].x, TEMP[16].xxxx, IMM[3].zzzz >158: UARL ADDR[0].x, TEMP[17].xxxx >159: MOV TEMP[16].x, CONST[2][ADDR[0].x] >160: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >161: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >162: UARL ADDR[0].x, TEMP[18].xxxx >163: MOV TEMP[17].w, CONST[2][ADDR[0].x] >164: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[17].wwww >165: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >166: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >167: UARL ADDR[0].x, TEMP[18].xxxx >168: MOV TEMP[17].y, CONST[2][ADDR[0].x] >169: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >170: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >171: UARL ADDR[0].x, TEMP[19].xxxx >172: MOV TEMP[18].z, CONST[2][ADDR[0].x] >173: FMA TEMP[17].x, TEMP[17].yyyy, TEMP[18].zzzz, -TEMP[16].xxxx >174: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >175: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >176: UARL ADDR[0].x, TEMP[19].xxxx >177: MOV TEMP[18].y, CONST[2][ADDR[0].x] >178: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >179: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >180: UARL ADDR[0].x, TEMP[20].xxxx >181: MOV TEMP[19].z, CONST[2][ADDR[0].x] >182: FMA TEMP[16].x, TEMP[18].yyyy, TEMP[19].zzzz, TEMP[16].xxxx >183: MUL TEMP[16].x, TEMP[16].xxxx, IN[3].zzzz >184: MUL TEMP[16].x, IMM[2].yyyy, TEMP[16].xxxx >185: MOV TEMP[16].y, TEMP[16].xxxx >186: MUL TEMP[17].x, TEMP[17].xxxx, IN[3].zzzz >187: MUL TEMP[17].x, IMM[2].yyyy, TEMP[17].xxxx >188: MOV TEMP[7].z, TEMP[17].xxxx >189: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >190: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >191: UARL ADDR[0].x, TEMP[18].xxxx >192: MOV TEMP[17].yz, CONST[2][ADDR[0].x] >193: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >194: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >195: UARL ADDR[0].x, TEMP[19].xxxx >196: MOV TEMP[18].xw, CONST[2][ADDR[0].x] >197: MUL TEMP[17].xyz, TEMP[17].zzyy, TEMP[18].wxww >198: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >199: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >200: UARL ADDR[0].x, TEMP[19].xxxx >201: MOV TEMP[18].x, CONST[2][ADDR[0].x] >202: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >203: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >204: UARL ADDR[0].x, TEMP[20].xxxx >205: MOV TEMP[19].y, CONST[2][ADDR[0].x] >206: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].yyyy, TEMP[17].xxxx >207: MUL TEMP[18].x, TEMP[18].xxxx, IN[3].zzzz >208: MUL TEMP[7].x, IMM[2].yyyy, TEMP[18].xxxx >209: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >210: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >211: UARL ADDR[0].x, TEMP[19].xxxx >212: MOV TEMP[18].xyz, CONST[2][ADDR[0].x] >213: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >214: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >215: UARL ADDR[0].x, TEMP[20].xxxx >216: MOV TEMP[19].xyz, CONST[2][ADDR[0].x] >217: MUL TEMP[18].xyz, TEMP[18].xyzz, TEMP[19].xyzz >218: ADD TEMP[18].xyz, TEMP[18].zzyy, TEMP[18].yxxx >219: FMA TEMP[19].xyz, -TEMP[18].xyzz, IMM[2].yyyy, IMM[2].zzzz >220: MUL TEMP[20].x, IN[3].zzzz, TEMP[19].yyyy >221: MOV TEMP[7].y, TEMP[20].xxxx >222: ADD TEMP[6], TEMP[6], TEMP[7] >223: DP3 TEMP[20].x, TEMP[6].xyzz, IN[1].xyzz >224: MOV TEMP[7].y, TEMP[20].xxxx >225: UMUL TEMP[20].x, TEMP[5].xxxx, IMM[1].zzzz >226: USHR TEMP[21].x, TEMP[20].xxxx, IMM[3].zzzz >227: UARL ADDR[0].x, TEMP[21].xxxx >228: MOV TEMP[20].x, CONST[2][ADDR[0].x] >229: UMUL TEMP[21].x, TEMP[5].xxxx, IMM[1].zzzz >230: USHR TEMP[22].x, TEMP[21].xxxx, IMM[3].zzzz >231: UARL ADDR[0].x, TEMP[22].xxxx >232: MOV TEMP[21].z, CONST[2][ADDR[0].x] >233: FMA TEMP[20].x, TEMP[20].xxxx, TEMP[21].zzzz, -TEMP[10].zzzz >234: MUL TEMP[20].x, TEMP[20].xxxx, IN[3].xxxx >235: MUL TEMP[20].x, IMM[2].yyyy, TEMP[20].xxxx >236: MUL TEMP[8].x, TEMP[8].xxxx, IMM[2].yyyy >237: MOV TEMP[20].y, TEMP[8].xxxx >238: UMUL TEMP[8].x, TEMP[5].yyyy, IMM[1].zzzz >239: USHR TEMP[21].x, TEMP[8].xxxx, IMM[3].zzzz >240: UARL ADDR[0].x, TEMP[21].xxxx >241: MOV TEMP[8].x, CONST[2][ADDR[0].x] >242: UMUL TEMP[21].x, TEMP[5].yyyy, IMM[1].zzzz >243: USHR TEMP[22].x, TEMP[21].xxxx, IMM[3].zzzz >244: UARL ADDR[0].x, TEMP[22].xxxx >245: MOV TEMP[21].z, CONST[2][ADDR[0].x] >246: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[21].zzzz, -TEMP[13].zzzz >247: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].yyyy >248: MUL TEMP[9].x, IMM[2].yyyy, TEMP[8].xxxx >249: MUL TEMP[8].x, IN[3].xxxx, TEMP[12].zzzz >250: MOV TEMP[20].z, TEMP[8].xxxx >251: MUL TEMP[11].x, IN[3].xxxx, TEMP[12].xxxx >252: MUL TEMP[8].x, IN[3].yyyy, TEMP[15].zzzz >253: MOV TEMP[9].z, TEMP[8].xxxx >254: MUL TEMP[14].x, IN[3].yyyy, TEMP[15].xxxx >255: UMUL TEMP[8].x, TEMP[4].xxxx, IMM[1].zzzz >256: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >257: UARL ADDR[0].x, TEMP[12].xxxx >258: MOV TEMP[8].z, CONST[2][ADDR[0].x] >259: MUL TEMP[8].x, IN[3].xxxx, TEMP[8].zzzz >260: MOV TEMP[20].w, TEMP[8].xxxx >261: UMUL TEMP[8].x, TEMP[4].yyyy, IMM[1].zzzz >262: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >263: UARL ADDR[0].x, TEMP[12].xxxx >264: MOV TEMP[8].z, CONST[2][ADDR[0].x] >265: MUL TEMP[8].x, IN[3].yyyy, TEMP[8].zzzz >266: MOV TEMP[9].w, TEMP[8].xxxx >267: ADD TEMP[9], TEMP[9], TEMP[20] >268: UMUL TEMP[8].x, TEMP[5].zzzz, IMM[1].zzzz >269: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >270: UARL ADDR[0].x, TEMP[12].xxxx >271: MOV TEMP[8].x, CONST[2][ADDR[0].x] >272: UMUL TEMP[12].x, TEMP[5].zzzz, IMM[1].zzzz >273: USHR TEMP[15].x, TEMP[12].xxxx, IMM[3].zzzz >274: UARL ADDR[0].x, TEMP[15].xxxx >275: MOV TEMP[12].z, CONST[2][ADDR[0].x] >276: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[12].zzzz, -TEMP[17].zzzz >277: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].zzzz >278: MUL TEMP[16].x, IMM[2].yyyy, TEMP[8].xxxx >279: MUL TEMP[8].x, IN[3].zzzz, TEMP[19].zzzz >280: MOV TEMP[16].z, TEMP[8].xxxx >281: MUL TEMP[18].x, IN[3].zzzz, TEMP[19].xxxx >282: UMUL TEMP[8].x, TEMP[4].zzzz, IMM[1].zzzz >283: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >284: UARL ADDR[0].x, TEMP[12].xxxx >285: MOV TEMP[8].z, CONST[2][ADDR[0].x] >286: MUL TEMP[8].x, IN[3].zzzz, TEMP[8].zzzz >287: MOV TEMP[16].w, TEMP[8].xxxx >288: ADD TEMP[9], TEMP[9], TEMP[16] >289: DP3 TEMP[8].x, TEMP[9].xyzz, IN[1].xyzz >290: MOV TEMP[7].z, TEMP[8].xxxx >291: UMUL TEMP[8].x, TEMP[4].xxxx, IMM[1].zzzz >292: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >293: UARL ADDR[0].x, TEMP[12].xxxx >294: MOV TEMP[8].x, CONST[2][ADDR[0].x] >295: MUL TEMP[8].x, IN[3].xxxx, TEMP[8].xxxx >296: MOV TEMP[11].w, TEMP[8].xxxx >297: UMUL TEMP[8].x, TEMP[4].yyyy, IMM[1].zzzz >298: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >299: UARL ADDR[0].x, TEMP[12].xxxx >300: MOV TEMP[8].x, CONST[2][ADDR[0].x] >301: MUL TEMP[8].x, IN[3].yyyy, TEMP[8].xxxx >302: MOV TEMP[14].w, TEMP[8].xxxx >303: UMUL TEMP[8].x, TEMP[4].zzzz, IMM[1].zzzz >304: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >305: UARL ADDR[0].x, TEMP[12].xxxx >306: MOV TEMP[8].x, CONST[2][ADDR[0].x] >307: MUL TEMP[8].x, IN[3].zzzz, TEMP[8].xxxx >308: MOV TEMP[18].w, TEMP[8].xxxx >309: ADD TEMP[2].x, TEMP[10].zzzz, TEMP[10].yyyy >310: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].xxxx >311: MUL TEMP[8].x, IMM[2].yyyy, TEMP[2].xxxx >312: MOV TEMP[11].z, TEMP[8].xxxx >313: UMUL TEMP[8].x, TEMP[5].xxxx, IMM[1].zzzz >314: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >315: UARL ADDR[0].x, TEMP[12].xxxx >316: MOV TEMP[8].x, CONST[2][ADDR[0].x] >317: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz >318: USHR TEMP[15].x, TEMP[12].xxxx, IMM[3].zzzz >319: UARL ADDR[0].x, TEMP[15].xxxx >320: MOV TEMP[12].y, CONST[2][ADDR[0].x] >321: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[12].yyyy, -TEMP[10].xxxx >322: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].xxxx >323: MUL TEMP[8].x, IMM[2].yyyy, TEMP[8].xxxx >324: MOV TEMP[11].y, TEMP[8].xxxx >325: UMUL TEMP[8].x, TEMP[5].zzzz, IMM[1].zzzz >326: USHR TEMP[10].x, TEMP[8].xxxx, IMM[3].zzzz >327: UARL ADDR[0].x, TEMP[10].xxxx >328: MOV TEMP[8].x, CONST[2][ADDR[0].x] >329: UMUL TEMP[10].x, TEMP[5].zzzz, IMM[1].zzzz >330: USHR TEMP[12].x, TEMP[10].xxxx, IMM[3].zzzz >331: UARL ADDR[0].x, TEMP[12].xxxx >332: MOV TEMP[10].y, CONST[2][ADDR[0].x] >333: FMA TEMP[2].x, TEMP[8].xxxx, TEMP[10].yyyy, -TEMP[17].xxxx >334: ADD TEMP[8].x, TEMP[17].zzzz, TEMP[17].yyyy >335: MOV TEMP[2].y, TEMP[8].xxxx >336: MUL TEMP[2].xy, TEMP[2].xyyy, IN[3].zzzz >337: MUL TEMP[8].xy, IMM[2].yyyy, TEMP[2].xyyy >338: MOV TEMP[18].yz, TEMP[8].yxyy >339: ADD TEMP[2].x, TEMP[13].zzzz, TEMP[13].yyyy >340: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy >341: MUL TEMP[8].x, IMM[2].yyyy, TEMP[2].xxxx >342: MOV TEMP[14].z, TEMP[8].xxxx >343: UMUL TEMP[8].x, TEMP[5].yyyy, IMM[1].zzzz >344: USHR TEMP[10].x, TEMP[8].xxxx, IMM[3].zzzz >345: UARL ADDR[0].x, TEMP[10].xxxx >346: MOV TEMP[8].x, CONST[2][ADDR[0].x] >347: UMUL TEMP[5].x, TEMP[5].yyyy, IMM[1].zzzz >348: USHR TEMP[10].x, TEMP[5].xxxx, IMM[3].zzzz >349: UARL ADDR[0].x, TEMP[10].xxxx >350: MOV TEMP[5].y, CONST[2][ADDR[0].x] >351: FMA TEMP[5].x, TEMP[8].xxxx, TEMP[5].yyyy, -TEMP[13].xxxx >352: MUL TEMP[5].x, TEMP[5].xxxx, IN[3].yyyy >353: MUL TEMP[5].x, IMM[2].yyyy, TEMP[5].xxxx >354: MOV TEMP[14].y, TEMP[5].xxxx >355: ADD TEMP[2], TEMP[11], TEMP[14] >356: ADD TEMP[2], TEMP[18], TEMP[2] >357: DP3 TEMP[7].x, TEMP[2].xyzz, IN[1].xyzz >358: DP3 TEMP[5].x, TEMP[1].xyzz, TEMP[7].xyzz >359: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz >360: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].xxxx >361: MUL TEMP[5].x, TEMP[5].xxxx, CONST[1][25].yyyy >362: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww >363: MOV TEMP[3].xyz, IN[0].xyzx >364: MOV TEMP[3].w, IMM[2].zzzz >365: DP4 TEMP[6].x, TEMP[6], TEMP[3] >366: MOV TEMP[4].y, TEMP[6].xxxx >367: DP4 TEMP[6].x, TEMP[9], TEMP[3] >368: MOV TEMP[4].z, TEMP[6].xxxx >369: DP4 TEMP[4].x, TEMP[2], TEMP[3] >370: DP3 TEMP[6].x, IMM[2].zzzz, TEMP[4].xyzz >371: DP3 TEMP[7].x, CONST[1][38].xyzz, CONST[1][38].xyzz >372: SQRT TEMP[7].x, TEMP[7].xxxx >373: MUL TEMP[8].x, TEMP[7].xxxx, CONST[1][20].wwww >374: FSNE TEMP[9].x, TEMP[8].xxxx, IMM[5].xxxx >375: UIF TEMP[9].xxxx :0 >376: RCP TEMP[8].x, TEMP[8].xxxx >377: MUL TEMP[8].x, TEMP[6].xxxx, TEMP[8].xxxx >378: ELSE :0 >379: SSG TEMP[6].x, TEMP[6].xxxx >380: MUL TEMP[8].x, IMM[5].yyyy, TEMP[6].xxxx >381: ENDIF >382: ADD TEMP[3], -CONST[1][31], CONST[1][32] >383: MUL TEMP[6].x, CONST[1][20].zzzz, CONST[1][45].wwww >384: MUL TEMP[9].x, TEMP[6].xxxx, IMM[5].zzzz >385: MOV_SAT TEMP[9].x, TEMP[9].xxxx >386: FMA TEMP[9], TEMP[9].xxxx, TEMP[3], CONST[1][31] >387: FMA TEMP[8].xy, CONST[1][33].xxxx, TEMP[9].ywww, TEMP[8].xxxx >388: MUL TEMP[3].xy, TEMP[7].xxxx, TEMP[9].xzzz >389: FMA TEMP[7].xy, -IN[2].yzzz, CONST[1][30].xyyy, IMM[2].zzzz >390: ADD TEMP[2].xy, TEMP[8].xyyy, TEMP[7].xyyy >391: SIN TEMP[7].x, TEMP[2].xxxx >392: SIN TEMP[7].y, TEMP[2].yyyy >393: MUL TEMP[2].xy, TEMP[3].xyyy, TEMP[7].xyyy >394: FMA TEMP[2].x, TEMP[6].xxxx, CONST[1][20].yyyy, TEMP[2].xxxx >395: FMA TEMP[3].x, TEMP[2].yyyy, IMM[5].wwww, TEMP[2].xxxx >396: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[3].xxxx >397: FMA TEMP[1].xyz, TEMP[0].xyzz, TEMP[5].xxxx, TEMP[4].xyzz >398: DP3 TEMP[3].x, TEMP[4].xyzz, TEMP[4].xyzz >399: SQRT TEMP[3].x, TEMP[3].xxxx >400: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >401: RSQ TEMP[2].x, TEMP[2].xxxx >402: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[2].xxxx >403: MUL TEMP[0].xyz, TEMP[3].xxxx, TEMP[0].xyzz >404: MOV TEMP[0].w, IMM[2].zzzz >405: DP4 TEMP[1].x, CONST[1][36], TEMP[0] >406: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][19].yyyy >407: MOV TEMP[1].z, TEMP[1].xxxx >408: DP4 TEMP[1].x, CONST[1][34], TEMP[0] >409: DP4 TEMP[2].x, CONST[1][35], TEMP[0] >410: MOV TEMP[1].y, TEMP[2].xxxx >411: DP4 TEMP[0].x, CONST[1][37], TEMP[0] >412: MOV TEMP[1].w, TEMP[0].xxxx >413: MOV OUT[0], TEMP[1] >414: END >radeonsi: Compiling shader 175 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 308) > %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 324) > %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 328) > %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 332) > %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 404) > %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 480) > %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 484) > %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 496) > %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 500) > %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 504) > %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 508) > %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 512) > %32 = call float @llvm.SI.load.const(<16 x i8> %19, i32 516) > %33 = call float @llvm.SI.load.const(<16 x i8> %19, i32 520) > %34 = call float @llvm.SI.load.const(<16 x i8> %19, i32 524) > %35 = call float @llvm.SI.load.const(<16 x i8> %19, i32 528) > %36 = call float @llvm.SI.load.const(<16 x i8> %19, i32 544) > %37 = call float @llvm.SI.load.const(<16 x i8> %19, i32 548) > %38 = call float @llvm.SI.load.const(<16 x i8> %19, i32 552) > %39 = call float @llvm.SI.load.const(<16 x i8> %19, i32 556) > %40 = call float @llvm.SI.load.const(<16 x i8> %19, i32 560) > %41 = call float @llvm.SI.load.const(<16 x i8> %19, i32 564) > %42 = call float @llvm.SI.load.const(<16 x i8> %19, i32 568) > %43 = call float @llvm.SI.load.const(<16 x i8> %19, i32 572) > %44 = call float @llvm.SI.load.const(<16 x i8> %19, i32 576) > %45 = call float @llvm.SI.load.const(<16 x i8> %19, i32 580) > %46 = call float @llvm.SI.load.const(<16 x i8> %19, i32 584) > %47 = call float @llvm.SI.load.const(<16 x i8> %19, i32 588) > %48 = call float @llvm.SI.load.const(<16 x i8> %19, i32 592) > %49 = call float @llvm.SI.load.const(<16 x i8> %19, i32 596) > %50 = call float @llvm.SI.load.const(<16 x i8> %19, i32 600) > %51 = call float @llvm.SI.load.const(<16 x i8> %19, i32 604) > %52 = call float @llvm.SI.load.const(<16 x i8> %19, i32 608) > %53 = call float @llvm.SI.load.const(<16 x i8> %19, i32 612) > %54 = call float @llvm.SI.load.const(<16 x i8> %19, i32 616) > %55 = call float @llvm.SI.load.const(<16 x i8> %19, i32 624) > %56 = call float @llvm.SI.load.const(<16 x i8> %19, i32 628) > %57 = call float @llvm.SI.load.const(<16 x i8> %19, i32 632) > %58 = call float @llvm.SI.load.const(<16 x i8> %19, i32 640) > %59 = call float @llvm.SI.load.const(<16 x i8> %19, i32 644) > %60 = call float @llvm.SI.load.const(<16 x i8> %19, i32 648) > %61 = call float @llvm.SI.load.const(<16 x i8> %19, i32 720) > %62 = call float @llvm.SI.load.const(<16 x i8> %19, i32 724) > %63 = call float @llvm.SI.load.const(<16 x i8> %19, i32 728) > %64 = call float @llvm.SI.load.const(<16 x i8> %19, i32 732) > %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 > %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 > %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %13) > %70 = extractelement <4 x float> %69, i32 0 > %71 = extractelement <4 x float> %69, i32 1 > %72 = extractelement <4 x float> %69, i32 2 > %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 > %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %14) > %76 = extractelement <4 x float> %75, i32 0 > %77 = extractelement <4 x float> %75, i32 1 > %78 = extractelement <4 x float> %75, i32 2 > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %15) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %16) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = extractelement <4 x float> %87, i32 2 > %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 > %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %17) > %94 = extractelement <4 x float> %93, i32 0 > %95 = extractelement <4 x float> %93, i32 1 > %96 = extractelement <4 x float> %93, i32 2 > %97 = fmul float %55, %62 > %98 = fmul float %56, %62 > %99 = fmul float %57, %62 > %100 = call float @llvm.fma.f32(float %61, float %52, float %97) > %101 = call float @llvm.fma.f32(float %61, float %53, float %98) > %102 = call float @llvm.fma.f32(float %61, float %54, float %99) > %103 = call float @llvm.fma.f32(float %63, float %58, float %100) > %104 = call float @llvm.fma.f32(float %63, float %59, float %101) > %105 = call float @llvm.fma.f32(float %63, float %60, float %102) > %106 = fmul float %96, 0x406FE01000000000 > %107 = fmul float %95, 0x406FE01000000000 > %108 = fmul float %94, 0x406FE01000000000 > %109 = fptosi float %106 to i32 > %110 = fptosi float %107 to i32 > %111 = fptosi float %108 to i32 > %112 = shl i32 %109, 1 > %113 = or i32 %112, 1 > %114 = shl i32 %110, 1 > %115 = or i32 %114, 1 > %116 = shl i32 %111, 1 > %117 = or i32 %116, 1 > %118 = shl i32 %109, 5 > %119 = or i32 %118, 4 > %120 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %119) > %121 = fmul float %88, %120 > %122 = shl i32 %110, 5 > %123 = or i32 %122, 4 > %124 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %123) > %125 = fmul float %89, %124 > %126 = shl i32 %113, 4 > %127 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %126) > %128 = shl i32 %113, 4 > %129 = or i32 %128, 12 > %130 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %129) > %131 = fmul float %127, %130 > %132 = shl i32 %113, 4 > %133 = or i32 %132, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %133) > %135 = shl i32 %113, 4 > %136 = or i32 %135, 8 > %137 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %136) > %138 = fsub float -0.000000e+00, %131 > %139 = call float @llvm.fma.f32(float %134, float %137, float %138) > %140 = shl i32 %113, 4 > %141 = or i32 %140, 4 > %142 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %141) > %143 = shl i32 %113, 4 > %144 = or i32 %143, 8 > %145 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %144) > %146 = call float @llvm.fma.f32(float %142, float %145, float %131) > %147 = fmul float %146, %88 > %148 = fmul float %139, %88 > %149 = fmul float %148, 2.000000e+00 > %150 = shl i32 %115, 4 > %151 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %150) > %152 = shl i32 %115, 4 > %153 = or i32 %152, 12 > %154 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %153) > %155 = fmul float %151, %154 > %156 = shl i32 %115, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %157) > %159 = shl i32 %115, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %160) > %162 = fsub float -0.000000e+00, %155 > %163 = call float @llvm.fma.f32(float %158, float %161, float %162) > %164 = shl i32 %115, 4 > %165 = or i32 %164, 4 > %166 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %165) > %167 = shl i32 %115, 4 > %168 = or i32 %167, 8 > %169 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %168) > %170 = call float @llvm.fma.f32(float %166, float %169, float %155) > %171 = fmul float %170, %89 > %172 = fmul float %171, 2.000000e+00 > %173 = fmul float %163, %89 > %174 = fmul float %173, 2.000000e+00 > %175 = shl i32 %113, 4 > %176 = or i32 %175, 4 > %177 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %176) > %178 = shl i32 %113, 4 > %179 = or i32 %178, 8 > %180 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %179) > %181 = shl i32 %113, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %181) > %183 = shl i32 %113, 4 > %184 = or i32 %183, 12 > %185 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %184) > %186 = fmul float %180, %185 > %187 = fmul float %180, %182 > %188 = fmul float %177, %185 > %189 = shl i32 %113, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %189) > %191 = shl i32 %113, 4 > %192 = or i32 %191, 4 > %193 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %192) > %194 = call float @llvm.fma.f32(float %190, float %193, float %186) > %195 = fmul float %194, %88 > %196 = fmul float %195, 2.000000e+00 > %197 = shl i32 %113, 4 > %198 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %197) > %199 = shl i32 %113, 4 > %200 = or i32 %199, 4 > %201 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %200) > %202 = shl i32 %113, 4 > %203 = or i32 %202, 8 > %204 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %203) > %205 = shl i32 %113, 4 > %206 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %205) > %207 = shl i32 %113, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %208) > %210 = shl i32 %113, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %211) > %213 = fmul float %198, %206 > %214 = fmul float %201, %209 > %215 = fmul float %204, %212 > %216 = fadd float %215, %214 > %217 = fadd float %215, %213 > %218 = fadd float %214, %213 > %219 = fsub float -0.000000e+00, %216 > %220 = call float @llvm.fma.f32(float %219, float 2.000000e+00, float 1.000000e+00) > %221 = fsub float -0.000000e+00, %217 > %222 = call float @llvm.fma.f32(float %221, float 2.000000e+00, float 1.000000e+00) > %223 = fsub float -0.000000e+00, %218 > %224 = call float @llvm.fma.f32(float %223, float 2.000000e+00, float 1.000000e+00) > %225 = fmul float %88, %222 > %226 = shl i32 %115, 4 > %227 = or i32 %226, 4 > %228 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %227) > %229 = shl i32 %115, 4 > %230 = or i32 %229, 8 > %231 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %230) > %232 = shl i32 %115, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %232) > %234 = shl i32 %115, 4 > %235 = or i32 %234, 12 > %236 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %235) > %237 = fmul float %231, %236 > %238 = fmul float %231, %233 > %239 = fmul float %228, %236 > %240 = shl i32 %115, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %240) > %242 = shl i32 %115, 4 > %243 = or i32 %242, 4 > %244 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %243) > %245 = call float @llvm.fma.f32(float %241, float %244, float %237) > %246 = fmul float %245, %89 > %247 = fmul float %246, 2.000000e+00 > %248 = shl i32 %115, 4 > %249 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %248) > %250 = shl i32 %115, 4 > %251 = or i32 %250, 4 > %252 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %251) > %253 = shl i32 %115, 4 > %254 = or i32 %253, 8 > %255 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %254) > %256 = shl i32 %115, 4 > %257 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %256) > %258 = shl i32 %115, 4 > %259 = or i32 %258, 4 > %260 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %259) > %261 = shl i32 %115, 4 > %262 = or i32 %261, 8 > %263 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %262) > %264 = fmul float %249, %257 > %265 = fmul float %252, %260 > %266 = fmul float %255, %263 > %267 = fadd float %266, %265 > %268 = fadd float %266, %264 > %269 = fadd float %265, %264 > %270 = fsub float -0.000000e+00, %267 > %271 = call float @llvm.fma.f32(float %270, float 2.000000e+00, float 1.000000e+00) > %272 = fsub float -0.000000e+00, %268 > %273 = call float @llvm.fma.f32(float %272, float 2.000000e+00, float 1.000000e+00) > %274 = fsub float -0.000000e+00, %269 > %275 = call float @llvm.fma.f32(float %274, float 2.000000e+00, float 1.000000e+00) > %276 = fmul float %89, %273 > %277 = fadd float %196, %247 > %278 = fadd float %225, %276 > %279 = fadd float %149, %174 > %280 = fadd float %121, %125 > %281 = shl i32 %111, 5 > %282 = or i32 %281, 4 > %283 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %282) > %284 = fmul float %90, %283 > %285 = shl i32 %117, 4 > %286 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %285) > %287 = shl i32 %117, 4 > %288 = or i32 %287, 12 > %289 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %288) > %290 = fmul float %286, %289 > %291 = shl i32 %117, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %292) > %294 = shl i32 %117, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %295) > %297 = fsub float -0.000000e+00, %290 > %298 = call float @llvm.fma.f32(float %293, float %296, float %297) > %299 = shl i32 %117, 4 > %300 = or i32 %299, 4 > %301 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %300) > %302 = shl i32 %117, 4 > %303 = or i32 %302, 8 > %304 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %303) > %305 = call float @llvm.fma.f32(float %301, float %304, float %290) > %306 = fmul float %305, %90 > %307 = fmul float %306, 2.000000e+00 > %308 = fmul float %298, %90 > %309 = fmul float %308, 2.000000e+00 > %310 = shl i32 %117, 4 > %311 = or i32 %310, 4 > %312 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %311) > %313 = shl i32 %117, 4 > %314 = or i32 %313, 8 > %315 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %314) > %316 = shl i32 %117, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %316) > %318 = shl i32 %117, 4 > %319 = or i32 %318, 12 > %320 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %319) > %321 = fmul float %315, %320 > %322 = fmul float %315, %317 > %323 = fmul float %312, %320 > %324 = shl i32 %117, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %324) > %326 = shl i32 %117, 4 > %327 = or i32 %326, 4 > %328 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %327) > %329 = call float @llvm.fma.f32(float %325, float %328, float %321) > %330 = fmul float %329, %90 > %331 = fmul float %330, 2.000000e+00 > %332 = shl i32 %117, 4 > %333 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %332) > %334 = shl i32 %117, 4 > %335 = or i32 %334, 4 > %336 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %335) > %337 = shl i32 %117, 4 > %338 = or i32 %337, 8 > %339 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %338) > %340 = shl i32 %117, 4 > %341 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %340) > %342 = shl i32 %117, 4 > %343 = or i32 %342, 4 > %344 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %343) > %345 = shl i32 %117, 4 > %346 = or i32 %345, 8 > %347 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %346) > %348 = fmul float %333, %341 > %349 = fmul float %336, %344 > %350 = fmul float %339, %347 > %351 = fadd float %350, %349 > %352 = fadd float %350, %348 > %353 = fadd float %349, %348 > %354 = fsub float -0.000000e+00, %351 > %355 = call float @llvm.fma.f32(float %354, float 2.000000e+00, float 1.000000e+00) > %356 = fsub float -0.000000e+00, %352 > %357 = call float @llvm.fma.f32(float %356, float 2.000000e+00, float 1.000000e+00) > %358 = fsub float -0.000000e+00, %353 > %359 = call float @llvm.fma.f32(float %358, float 2.000000e+00, float 1.000000e+00) > %360 = fmul float %90, %357 > %361 = fadd float %277, %331 > %362 = fadd float %278, %360 > %363 = fadd float %279, %309 > %364 = fadd float %280, %284 > %365 = fmul float %361, %76 > %366 = fmul float %362, %77 > %367 = fadd float %366, %365 > %368 = fmul float %363, %78 > %369 = fadd float %367, %368 > %370 = shl i32 %113, 4 > %371 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %370) > %372 = shl i32 %113, 4 > %373 = or i32 %372, 8 > %374 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %373) > %375 = fsub float -0.000000e+00, %188 > %376 = call float @llvm.fma.f32(float %371, float %374, float %375) > %377 = fmul float %376, %88 > %378 = fmul float %377, 2.000000e+00 > %379 = fmul float %147, 2.000000e+00 > %380 = shl i32 %115, 4 > %381 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %380) > %382 = shl i32 %115, 4 > %383 = or i32 %382, 8 > %384 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %383) > %385 = fsub float -0.000000e+00, %239 > %386 = call float @llvm.fma.f32(float %381, float %384, float %385) > %387 = fmul float %386, %89 > %388 = fmul float %387, 2.000000e+00 > %389 = fmul float %88, %224 > %390 = fmul float %88, %220 > %391 = fmul float %89, %275 > %392 = fmul float %89, %271 > %393 = shl i32 %109, 5 > %394 = or i32 %393, 8 > %395 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %394) > %396 = fmul float %88, %395 > %397 = shl i32 %110, 5 > %398 = or i32 %397, 8 > %399 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %398) > %400 = fmul float %89, %399 > %401 = fadd float %388, %378 > %402 = fadd float %172, %379 > %403 = fadd float %391, %389 > %404 = fadd float %400, %396 > %405 = shl i32 %117, 4 > %406 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %405) > %407 = shl i32 %117, 4 > %408 = or i32 %407, 8 > %409 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %408) > %410 = fsub float -0.000000e+00, %323 > %411 = call float @llvm.fma.f32(float %406, float %409, float %410) > %412 = fmul float %411, %90 > %413 = fmul float %412, 2.000000e+00 > %414 = fmul float %90, %359 > %415 = fmul float %90, %355 > %416 = shl i32 %111, 5 > %417 = or i32 %416, 8 > %418 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %417) > %419 = fmul float %90, %418 > %420 = fadd float %401, %413 > %421 = fadd float %402, %307 > %422 = fadd float %403, %414 > %423 = fadd float %404, %419 > %424 = fmul float %420, %76 > %425 = fmul float %421, %77 > %426 = fadd float %425, %424 > %427 = fmul float %422, %78 > %428 = fadd float %426, %427 > %429 = shl i32 %109, 5 > %430 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %429) > %431 = fmul float %88, %430 > %432 = shl i32 %110, 5 > %433 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %432) > %434 = fmul float %89, %433 > %435 = shl i32 %111, 5 > %436 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %435) > %437 = fmul float %90, %436 > %438 = fadd float %188, %187 > %439 = fmul float %438, %88 > %440 = fmul float %439, 2.000000e+00 > %441 = shl i32 %113, 4 > %442 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %441) > %443 = shl i32 %113, 4 > %444 = or i32 %443, 4 > %445 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %444) > %446 = fsub float -0.000000e+00, %186 > %447 = call float @llvm.fma.f32(float %442, float %445, float %446) > %448 = fmul float %447, %88 > %449 = fmul float %448, 2.000000e+00 > %450 = shl i32 %117, 4 > %451 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %450) > %452 = shl i32 %117, 4 > %453 = or i32 %452, 4 > %454 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %453) > %455 = fsub float -0.000000e+00, %321 > %456 = call float @llvm.fma.f32(float %451, float %454, float %455) > %457 = fadd float %323, %322 > %458 = fmul float %456, %90 > %459 = fmul float %457, %90 > %460 = fmul float %458, 2.000000e+00 > %461 = fmul float %459, 2.000000e+00 > %462 = fadd float %239, %238 > %463 = fmul float %462, %89 > %464 = fmul float %463, 2.000000e+00 > %465 = shl i32 %115, 4 > %466 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %465) > %467 = shl i32 %115, 4 > %468 = or i32 %467, 4 > %469 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %468) > %470 = fsub float -0.000000e+00, %237 > %471 = call float @llvm.fma.f32(float %466, float %469, float %470) > %472 = fmul float %471, %89 > %473 = fmul float %472, 2.000000e+00 > %474 = fadd float %390, %392 > %475 = fadd float %449, %473 > %476 = fadd float %440, %464 > %477 = fadd float %431, %434 > %478 = fadd float %415, %474 > %479 = fadd float %460, %475 > %480 = fadd float %461, %476 > %481 = fadd float %437, %477 > %482 = fmul float %478, %76 > %483 = fmul float %479, %77 > %484 = fadd float %483, %482 > %485 = fmul float %480, %78 > %486 = fadd float %484, %485 > %487 = fmul float %103, %486 > %488 = fmul float %104, %369 > %489 = fadd float %488, %487 > %490 = fmul float %105, %428 > %491 = fadd float %489, %490 > %492 = fadd float %491, 1.000000e+00 > %493 = fmul float %492, %82 > %494 = fmul float %493, %24 > %495 = fmul float %494, 5.000000e-01 > %496 = fmul float %361, %70 > %497 = fmul float %362, %71 > %498 = fadd float %496, %497 > %499 = fmul float %363, %72 > %500 = fadd float %498, %499 > %501 = fadd float %500, %364 > %502 = fmul float %420, %70 > %503 = fmul float %421, %71 > %504 = fadd float %502, %503 > %505 = fmul float %422, %72 > %506 = fadd float %504, %505 > %507 = fadd float %506, %423 > %508 = fmul float %478, %70 > %509 = fmul float %479, %71 > %510 = fadd float %508, %509 > %511 = fmul float %480, %72 > %512 = fadd float %510, %511 > %513 = fadd float %512, %481 > %514 = fadd float %501, %513 > %515 = fadd float %514, %507 > %516 = fmul float %52, %52 > %517 = fmul float %53, %53 > %518 = fadd float %517, %516 > %519 = fmul float %54, %54 > %520 = fadd float %518, %519 > %521 = call float @llvm.sqrt.f32(float %520) > %522 = fmul float %521, %23 > %523 = fcmp une float %522, 0.000000e+00 > br i1 %523, label %IF, label %ELSE > >IF: ; preds = %main_body > %524 = fdiv float 1.000000e+00, %522 > %525 = fmul float %515, %524 > br label %ENDIF > >ELSE: ; preds = %main_body > %526 = fcmp ogt float %515, 0.000000e+00 > %527 = select i1 %526, float 1.000000e+00, float %515 > %528 = fcmp oge float %527, 0.000000e+00 > %.op = fmul float %527, 0x4600000000000000 > %529 = select i1 %528, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp32.0 = phi float [ %525, %IF ], [ %529, %ELSE ] > %530 = fsub float %31, %27 > %531 = fsub float %32, %28 > %532 = fsub float %33, %29 > %533 = fsub float %34, %30 > %534 = fmul float %22, %64 > %535 = fmul float %534, 0x3FC5555560000000 > %536 = call float @llvm.AMDGPU.clamp.(float %535, float 0.000000e+00, float 1.000000e+00) > %537 = call float @llvm.fma.f32(float %536, float %530, float %27) > %538 = call float @llvm.fma.f32(float %536, float %531, float %28) > %539 = call float @llvm.fma.f32(float %536, float %532, float %29) > %540 = call float @llvm.fma.f32(float %536, float %533, float %30) > %541 = call float @llvm.fma.f32(float %35, float %538, float %temp32.0) > %542 = call float @llvm.fma.f32(float %35, float %540, float %temp32.0) > %543 = fmul float %521, %537 > %544 = fmul float %521, %539 > %545 = fsub float -0.000000e+00, %83 > %546 = call float @llvm.fma.f32(float %545, float %25, float 1.000000e+00) > %547 = fsub float -0.000000e+00, %84 > %548 = call float @llvm.fma.f32(float %547, float %26, float 1.000000e+00) > %549 = fadd float %541, %546 > %550 = fadd float %542, %548 > %551 = call float @llvm.sin.f32(float %549) > %552 = call float @llvm.sin.f32(float %550) > %553 = fmul float %543, %551 > %554 = fmul float %544, %552 > %555 = call float @llvm.fma.f32(float %534, float %21, float %553) > %556 = call float @llvm.fma.f32(float %554, float 2.500000e-01, float %555) > %557 = fmul float %103, %556 > %558 = fmul float %104, %556 > %559 = fmul float %105, %556 > %560 = call float @llvm.fma.f32(float %557, float %495, float %513) > %561 = call float @llvm.fma.f32(float %558, float %495, float %501) > %562 = call float @llvm.fma.f32(float %559, float %495, float %507) > %563 = fmul float %513, %513 > %564 = fmul float %501, %501 > %565 = fadd float %564, %563 > %566 = fmul float %507, %507 > %567 = fadd float %565, %566 > %568 = call float @llvm.sqrt.f32(float %567) > %569 = fmul float %560, %560 > %570 = fmul float %561, %561 > %571 = fadd float %570, %569 > %572 = fmul float %562, %562 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %560, %574 > %576 = fmul float %561, %574 > %577 = fmul float %562, %574 > %578 = fmul float %568, %575 > %579 = fmul float %568, %576 > %580 = fmul float %568, %577 > %581 = fmul float %44, %578 > %582 = fmul float %45, %579 > %583 = fadd float %581, %582 > %584 = fmul float %46, %580 > %585 = fadd float %583, %584 > %586 = fadd float %585, %47 > %587 = fmul float %586, %20 > %588 = fmul float %36, %578 > %589 = fmul float %37, %579 > %590 = fadd float %588, %589 > %591 = fmul float %38, %580 > %592 = fadd float %590, %591 > %593 = fadd float %592, %39 > %594 = fmul float %40, %578 > %595 = fmul float %41, %579 > %596 = fadd float %594, %595 > %597 = fmul float %42, %580 > %598 = fadd float %596, %597 > %599 = fadd float %598, %43 > %600 = fmul float %48, %578 > %601 = fmul float %49, %579 > %602 = fadd float %600, %601 > %603 = fmul float %50, %580 > %604 = fadd float %602, %603 > %605 = fadd float %604, %51 > %606 = bitcast i32 %11 to float > %607 = insertvalue <{ float, float, float }> undef, float %606, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %593, float %599, float %587, float %605) > ret <{ float, float, float }> %607 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sin.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..39] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { -0.3765, 0.0000, 1.0000, 0.0500} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {0, 624, 0, 0} >IMM[3] FLT32 {158456325028528675187087900672.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D > 2: ADD TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx > 3: FSLT TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy > 4: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 5: INEG TEMP[1].x, TEMP[1].xxxx > 6: USNE TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 7: AND TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz > 8: KILL_IF -TEMP[2].xxxx > 9: ADD TEMP[0].x, IN[0].zzzz, IMM[0].wwww > 10: FSLT TEMP[2].x, TEMP[0].xxxx, IMM[0].yyyy > 11: AND TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx > 12: INEG TEMP[2].x, TEMP[2].xxxx > 13: USNE TEMP[1].x, TEMP[2].xxxx, IMM[2].xxxx > 14: AND TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz > 15: KILL_IF -TEMP[2].xxxx > 16: FSNE TEMP[2].x, CONST[1][39].xxxx, IMM[0].yyyy > 17: UIF TEMP[2].xxxx :0 > 18: RCP TEMP[2].x, CONST[1][39].xxxx > 19: MUL TEMP[2].x, CONST[1][39].yyyy, TEMP[2].xxxx > 20: ELSE :0 > 21: SSG TEMP[3].x, CONST[1][39].yyyy > 22: MUL TEMP[2].x, IMM[3].xxxx, TEMP[3].xxxx > 23: ENDIF > 24: ADD TEMP[0].x, -TEMP[2].xxxx, IN[0].wwww > 25: FSLT TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy > 26: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 27: INEG TEMP[0].x, TEMP[0].xxxx > 28: USNE TEMP[1].x, TEMP[0].xxxx, IMM[2].xxxx > 29: AND TEMP[0].x, TEMP[1].xxxx, IMM[0].zzzz > 30: KILL_IF -TEMP[0].xxxx > 31: ADD TEMP[0], -IN[0].wwww, IMM[0].zzzz > 32: MOV OUT[0], TEMP[0] > 33: END >radeonsi: Compiling shader 176 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 624) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 628) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %40 = bitcast float %36 to i32 > %41 = bitcast float %37 to i32 > %42 = insertelement <2 x i32> undef, i32 %40, i32 0 > %43 = insertelement <2 x i32> %42, i32 %41, i32 1 > %44 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %43, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %45 = extractelement <4 x float> %44, i32 3 > %46 = fadd float %45, 0xBFD8181820000000 > %47 = fcmp olt float %46, 0.000000e+00 > %48 = select i1 %47, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %48) > %49 = fadd float %38, 0x3FA99999A0000000 > %50 = fcmp olt float %49, 0.000000e+00 > %51 = select i1 %50, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %51) > %52 = fcmp une float %25, 0.000000e+00 > br i1 %52, label %IF, label %ELSE > >IF: ; preds = %main_body > %53 = fdiv float 1.000000e+00, %25 > %54 = fmul float %26, %53 > br label %ENDIF > >ELSE: ; preds = %main_body > %55 = fcmp ogt float %26, 0.000000e+00 > %56 = select i1 %55, float 1.000000e+00, float %26 > %57 = fcmp oge float %56, 0.000000e+00 > %.op = fmul float %56, 0x4600000000000000 > %58 = select i1 %57, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp8.0 = phi float [ %54, %IF ], [ %58, %ELSE ] > %59 = fsub float %39, %temp8.0 > %60 = fcmp olt float %59, 0.000000e+00 > %61 = select i1 %60, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %61) > %62 = fsub float 1.000000e+00, %39 > %63 = fsub float 1.000000e+00, %39 > %64 = fsub float 1.000000e+00, %39 > %65 = fsub float 1.000000e+00, %39 > %66 = bitcast float %5 to i32 > %67 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %66, 10 > %68 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %67, float %62, 11 > %69 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %68, float %63, 12 > %70 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %69, float %64, 13 > %71 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %70, float %65, 14 > %72 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %71, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %72 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..3] >DCL CONST[2][0..4095] >DCL CONST[3][0..25] >DCL TEMP[0..19], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 32} >IMM[3] UINT32 {48, 2, 400, 0} > 0: MUL TEMP[0].xyz, IN[3].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[4].xxxx > 7: MOV TEMP[3].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[3].x, IN[2].xxxx, TEMP[3].yyyy > 9: MOV TEMP[3].w, TEMP[3].xxxx > 10: UMUL TEMP[4].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[5].xxxx > 13: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[4].x, IN[2].yyyy, TEMP[4].yyyy > 15: MOV TEMP[4].w, TEMP[4].xxxx > 16: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy > 17: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[6].xxxx > 19: MOV TEMP[5].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 21: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[7].xxxx > 23: MOV TEMP[6].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].wwww > 25: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 26: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[7].xxxx > 28: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 30: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[8].xxxx > 32: MOV TEMP[7].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[6].x, TEMP[6].yyyy, TEMP[7].zzzz, -TEMP[5].xxxx > 34: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 35: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[8].xxxx > 37: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 39: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[9].xxxx > 41: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[5].x, TEMP[7].yyyy, TEMP[8].zzzz, TEMP[5].xxxx > 43: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].xxxx > 44: MUL TEMP[6].x, TEMP[6].xxxx, IN[2].xxxx > 45: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy > 46: MOV TEMP[3].z, TEMP[6].xxxx > 47: UMUL TEMP[6].x, TEMP[1].yyyy, IMM[2].yyyy > 48: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[7].xxxx > 50: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 52: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[8].xxxx > 54: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 56: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 57: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[8].xxxx > 59: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 61: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[9].xxxx > 63: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 65: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 66: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[9].xxxx > 68: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[9].x, TEMP[1].yyyy, IMM[2].yyyy > 70: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[10].xxxx > 72: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 74: MUL TEMP[6].x, TEMP[6].xxxx, IN[2].yyyy > 75: MUL TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx > 76: MOV TEMP[6].y, TEMP[6].xxxx > 77: MUL TEMP[7].x, TEMP[7].xxxx, IN[2].yyyy > 78: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 79: MOV TEMP[4].z, TEMP[7].xxxx > 80: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 81: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[8].xxxx > 83: MOV TEMP[7].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 85: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[9].xxxx > 87: MOV TEMP[8].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[7].xyz, TEMP[7].zzyy, TEMP[8].wxww > 89: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 90: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[9].xxxx > 92: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy > 94: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[10].xxxx > 96: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[9].yyyy, TEMP[7].xxxx > 98: MUL TEMP[8].x, TEMP[8].xxxx, IN[2].xxxx > 99: MUL TEMP[3].x, IMM[0].yyyy, TEMP[8].xxxx >100: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy >101: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[9].xxxx >103: MOV TEMP[8].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy >105: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[10].xxxx >107: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xyzz >109: ADD TEMP[8].xyz, TEMP[8].zzyy, TEMP[8].yxxx >110: FMA TEMP[9].xyz, -TEMP[8].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[10].x, IN[2].xxxx, TEMP[9].yyyy >112: MOV TEMP[3].y, TEMP[10].xxxx >113: UMUL TEMP[10].x, TEMP[1].yyyy, IMM[2].yyyy >114: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[11].xxxx >116: MOV TEMP[10].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >118: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[12].xxxx >120: MOV TEMP[11].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww >122: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >123: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[12].xxxx >125: MOV TEMP[11].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >127: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[13].xxxx >129: MOV TEMP[12].y, CONST[2][ADDR[0].x] >130: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >131: MUL TEMP[11].x, TEMP[11].xxxx, IN[2].yyyy >132: MUL TEMP[4].x, IMM[0].yyyy, TEMP[11].xxxx >133: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >134: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[12].xxxx >136: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >138: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[13].xxxx >140: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >142: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >143: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[13].x, IN[2].yyyy, TEMP[12].yyyy >145: MOV TEMP[4].y, TEMP[13].xxxx >146: ADD TEMP[3], TEMP[3], TEMP[4] >147: UMUL TEMP[13].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[14].xxxx >150: MOV TEMP[13].y, CONST[2][ADDR[0].x] >151: MUL TEMP[13].x, IN[2].zzzz, TEMP[13].yyyy >152: MOV TEMP[4].w, TEMP[13].xxxx >153: UMUL TEMP[13].x, TEMP[1].zzzz, IMM[2].yyyy >154: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[14].xxxx >156: MOV TEMP[13].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >158: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[15].xxxx >160: MOV TEMP[14].w, CONST[2][ADDR[0].x] >161: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].wwww >162: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >163: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[15].xxxx >165: MOV TEMP[14].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >167: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[16].xxxx >169: MOV TEMP[15].z, CONST[2][ADDR[0].x] >170: FMA TEMP[14].x, TEMP[14].yyyy, TEMP[15].zzzz, -TEMP[13].xxxx >171: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >172: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[16].xxxx >174: MOV TEMP[15].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >176: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[17].xxxx >178: MOV TEMP[16].z, CONST[2][ADDR[0].x] >179: FMA TEMP[13].x, TEMP[15].yyyy, TEMP[16].zzzz, TEMP[13].xxxx >180: MUL TEMP[13].x, TEMP[13].xxxx, IN[2].zzzz >181: MUL TEMP[13].x, IMM[0].yyyy, TEMP[13].xxxx >182: MOV TEMP[13].y, TEMP[13].xxxx >183: MUL TEMP[14].x, TEMP[14].xxxx, IN[2].zzzz >184: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >185: MOV TEMP[4].z, TEMP[14].xxxx >186: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >187: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[15].xxxx >189: MOV TEMP[14].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >191: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[16].xxxx >193: MOV TEMP[15].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[14].xyz, TEMP[14].zzyy, TEMP[15].wxww >195: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >196: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[16].xxxx >198: MOV TEMP[15].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >200: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[17].xxxx >202: MOV TEMP[16].y, CONST[2][ADDR[0].x] >203: FMA TEMP[15].x, TEMP[15].xxxx, TEMP[16].yyyy, TEMP[14].xxxx >204: MUL TEMP[15].x, TEMP[15].xxxx, IN[2].zzzz >205: MUL TEMP[4].x, IMM[0].yyyy, TEMP[15].xxxx >206: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >207: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[16].xxxx >209: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >211: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[17].xxxx >213: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[15].xyz, TEMP[15].xyzz, TEMP[16].xyzz >215: ADD TEMP[15].xyz, TEMP[15].zzyy, TEMP[15].yxxx >216: FMA TEMP[16].xyz, -TEMP[15].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[17].x, IN[2].zzzz, TEMP[16].yyyy >218: MOV TEMP[4].y, TEMP[17].xxxx >219: ADD TEMP[3], TEMP[3], TEMP[4] >220: MOV TEMP[4].xyz, IN[0].xyzx >221: MOV TEMP[4].w, IMM[0].zzzz >222: DP4 TEMP[17].x, TEMP[3], TEMP[4] >223: MOV TEMP[3].y, TEMP[17].xxxx >224: UMUL TEMP[17].x, TEMP[1].xxxx, IMM[2].yyyy >225: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[18].xxxx >227: MOV TEMP[17].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[18].x, TEMP[1].xxxx, IMM[2].yyyy >229: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[19].xxxx >231: MOV TEMP[18].z, CONST[2][ADDR[0].x] >232: FMA TEMP[17].x, TEMP[17].xxxx, TEMP[18].zzzz, -TEMP[7].zzzz >233: MUL TEMP[17].x, TEMP[17].xxxx, IN[2].xxxx >234: MUL TEMP[17].x, IMM[0].yyyy, TEMP[17].xxxx >235: MUL TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy >236: MOV TEMP[17].y, TEMP[5].xxxx >237: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >238: USHR TEMP[18].x, TEMP[5].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[18].xxxx >240: MOV TEMP[5].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[18].x, TEMP[1].yyyy, IMM[2].yyyy >242: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[19].xxxx >244: MOV TEMP[18].z, CONST[2][ADDR[0].x] >245: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[18].zzzz, -TEMP[10].zzzz >246: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].yyyy >247: MUL TEMP[6].x, IMM[0].yyyy, TEMP[5].xxxx >248: MUL TEMP[5].x, IN[2].xxxx, TEMP[9].zzzz >249: MOV TEMP[17].z, TEMP[5].xxxx >250: MUL TEMP[8].x, IN[2].xxxx, TEMP[9].xxxx >251: MUL TEMP[5].x, IN[2].yyyy, TEMP[12].zzzz >252: MOV TEMP[6].z, TEMP[5].xxxx >253: MUL TEMP[11].x, IN[2].yyyy, TEMP[12].xxxx >254: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[9].xxxx >257: MOV TEMP[5].z, CONST[2][ADDR[0].x] >258: MUL TEMP[5].x, IN[2].xxxx, TEMP[5].zzzz >259: MOV TEMP[17].w, TEMP[5].xxxx >260: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[9].xxxx >263: MOV TEMP[5].z, CONST[2][ADDR[0].x] >264: MUL TEMP[5].x, IN[2].yyyy, TEMP[5].zzzz >265: MOV TEMP[6].w, TEMP[5].xxxx >266: ADD TEMP[6], TEMP[6], TEMP[17] >267: UMUL TEMP[5].x, TEMP[1].zzzz, IMM[2].yyyy >268: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[9].xxxx >270: MOV TEMP[5].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[9].x, TEMP[1].zzzz, IMM[2].yyyy >272: USHR TEMP[12].x, TEMP[9].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[12].xxxx >274: MOV TEMP[9].z, CONST[2][ADDR[0].x] >275: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[9].zzzz, -TEMP[14].zzzz >276: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].zzzz >277: MUL TEMP[13].x, IMM[0].yyyy, TEMP[5].xxxx >278: MUL TEMP[5].x, IN[2].zzzz, TEMP[16].zzzz >279: MOV TEMP[13].z, TEMP[5].xxxx >280: MUL TEMP[15].x, IN[2].zzzz, TEMP[16].xxxx >281: UMUL TEMP[5].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[9].xxxx >284: MOV TEMP[5].z, CONST[2][ADDR[0].x] >285: MUL TEMP[5].x, IN[2].zzzz, TEMP[5].zzzz >286: MOV TEMP[13].w, TEMP[5].xxxx >287: ADD TEMP[6], TEMP[6], TEMP[13] >288: DP4 TEMP[5].x, TEMP[6], TEMP[4] >289: MOV TEMP[3].z, TEMP[5].xxxx >290: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[6].xxxx >293: MOV TEMP[5].x, CONST[2][ADDR[0].x] >294: MUL TEMP[5].x, IN[2].xxxx, TEMP[5].xxxx >295: MOV TEMP[8].w, TEMP[5].xxxx >296: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[6].xxxx >299: MOV TEMP[5].x, CONST[2][ADDR[0].x] >300: MUL TEMP[5].x, IN[2].yyyy, TEMP[5].xxxx >301: MOV TEMP[11].w, TEMP[5].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[5].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[2].zzzz, TEMP[2].xxxx >307: MOV TEMP[15].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy >309: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[5].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy >313: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[6].xxxx >315: MOV TEMP[5].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[7].xxxx >317: ADD TEMP[2].x, TEMP[7].zzzz, TEMP[7].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[2].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[8].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[1].yyyy, IMM[2].yyyy >323: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[5].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >327: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[6].xxxx >329: MOV TEMP[5].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[10].xxxx >331: UMUL TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >332: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[5].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[1].x, TEMP[1].zzzz, IMM[2].yyyy >336: USHR TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[5].xxxx >338: MOV TEMP[1].y, CONST[2][ADDR[0].x] >339: FMA TEMP[1].x, TEMP[2].xxxx, TEMP[1].yyyy, -TEMP[14].xxxx >340: MOV TEMP[0].y, TEMP[1].xxxx >341: ADD TEMP[1].x, TEMP[14].zzzz, TEMP[14].yyyy >342: MOV TEMP[0].z, TEMP[1].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[2].yzzz >344: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[15].yz, TEMP[1].yxyy >346: ADD TEMP[1].x, TEMP[10].zzzz, TEMP[10].yyyy >347: MUL TEMP[1].x, TEMP[1].xxxx, IN[2].yyyy >348: MOV TEMP[0].y, TEMP[1].xxxx >349: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[11].yz, TEMP[1].yxyy >351: ADD TEMP[0], TEMP[8], TEMP[11] >352: ADD TEMP[0], TEMP[15], TEMP[0] >353: DP4 TEMP[3].x, TEMP[0], TEMP[4] >354: MOV TEMP[3].w, IMM[0].zzzz >355: DP4 TEMP[1].x, CONST[1][0], TEMP[3] >356: DP4 TEMP[2].x, CONST[1][1], TEMP[3] >357: MOV TEMP[1].y, TEMP[2].xxxx >358: DP4 TEMP[0].x, CONST[1][2], TEMP[3] >359: DP4 TEMP[2].x, CONST[1][3], TEMP[3] >360: MOV TEMP[0].y, TEMP[2].xxxx >361: MIN TEMP[3].x, TEMP[0].xxxx, CONST[3][25].zzzz >362: MOV TEMP[1].z, TEMP[3].xxxx >363: MOV TEMP[1].w, TEMP[2].xxxx >364: MOV TEMP[0].zw, TEMP[0].yyxy >365: MOV TEMP[0].xy, IN[1].xyxx >366: MOV OUT[1], TEMP[0] >367: MOV OUT[0], TEMP[1] >368: END >radeonsi: Compiling shader 177 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) > %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) > %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) > %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) > %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) > %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) > %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) > %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) > %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) > %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) > %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) > %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) > %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) > %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) > %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 > %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 408) > %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 > %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %13) > %43 = extractelement <4 x float> %42, i32 0 > %44 = extractelement <4 x float> %42, i32 1 > %45 = extractelement <4 x float> %42, i32 2 > %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 > %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %14) > %49 = extractelement <4 x float> %48, i32 0 > %50 = extractelement <4 x float> %48, i32 1 > %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 > %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %15) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = extractelement <4 x float> %53, i32 2 > %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 > %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %16) > %60 = extractelement <4 x float> %59, i32 0 > %61 = extractelement <4 x float> %59, i32 1 > %62 = extractelement <4 x float> %59, i32 2 > %63 = fmul float %62, 0x406FE01000000000 > %64 = fmul float %61, 0x406FE01000000000 > %65 = fmul float %60, 0x406FE01000000000 > %66 = fptosi float %63 to i32 > %67 = fptosi float %64 to i32 > %68 = fptosi float %65 to i32 > %69 = shl i32 %66, 1 > %70 = or i32 %69, 1 > %71 = shl i32 %67, 1 > %72 = or i32 %71, 1 > %73 = shl i32 %68, 1 > %74 = or i32 %73, 1 > %75 = shl i32 %66, 5 > %76 = or i32 %75, 4 > %77 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %76) > %78 = fmul float %54, %77 > %79 = shl i32 %67, 5 > %80 = or i32 %79, 4 > %81 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %80) > %82 = fmul float %55, %81 > %83 = shl i32 %70, 4 > %84 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %83) > %85 = shl i32 %70, 4 > %86 = or i32 %85, 12 > %87 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %86) > %88 = fmul float %84, %87 > %89 = shl i32 %70, 4 > %90 = or i32 %89, 4 > %91 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %90) > %92 = shl i32 %70, 4 > %93 = or i32 %92, 8 > %94 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %93) > %95 = fsub float -0.000000e+00, %88 > %96 = call float @llvm.fma.f32(float %91, float %94, float %95) > %97 = shl i32 %70, 4 > %98 = or i32 %97, 4 > %99 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %98) > %100 = shl i32 %70, 4 > %101 = or i32 %100, 8 > %102 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %101) > %103 = call float @llvm.fma.f32(float %99, float %102, float %88) > %104 = fmul float %103, %54 > %105 = fmul float %96, %54 > %106 = fmul float %105, 2.000000e+00 > %107 = shl i32 %72, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %107) > %109 = shl i32 %72, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %72, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %114) > %116 = shl i32 %72, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %72, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %122) > %124 = shl i32 %72, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %55 > %129 = fmul float %128, 2.000000e+00 > %130 = fmul float %120, %55 > %131 = fmul float %130, 2.000000e+00 > %132 = shl i32 %70, 4 > %133 = or i32 %132, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %133) > %135 = shl i32 %70, 4 > %136 = or i32 %135, 8 > %137 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %136) > %138 = shl i32 %70, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %138) > %140 = shl i32 %70, 4 > %141 = or i32 %140, 12 > %142 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %141) > %143 = fmul float %137, %142 > %144 = fmul float %137, %139 > %145 = fmul float %134, %142 > %146 = shl i32 %70, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %146) > %148 = shl i32 %70, 4 > %149 = or i32 %148, 4 > %150 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %143) > %152 = fmul float %151, %54 > %153 = fmul float %152, 2.000000e+00 > %154 = shl i32 %70, 4 > %155 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %154) > %156 = shl i32 %70, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %157) > %159 = shl i32 %70, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %160) > %162 = shl i32 %70, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %162) > %164 = shl i32 %70, 4 > %165 = or i32 %164, 4 > %166 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %165) > %167 = shl i32 %70, 4 > %168 = or i32 %167, 8 > %169 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %168) > %170 = fmul float %155, %163 > %171 = fmul float %158, %166 > %172 = fmul float %161, %169 > %173 = fadd float %172, %171 > %174 = fadd float %172, %170 > %175 = fadd float %171, %170 > %176 = fsub float -0.000000e+00, %173 > %177 = call float @llvm.fma.f32(float %176, float 2.000000e+00, float 1.000000e+00) > %178 = fsub float -0.000000e+00, %174 > %179 = call float @llvm.fma.f32(float %178, float 2.000000e+00, float 1.000000e+00) > %180 = fsub float -0.000000e+00, %175 > %181 = call float @llvm.fma.f32(float %180, float 2.000000e+00, float 1.000000e+00) > %182 = fmul float %54, %179 > %183 = shl i32 %72, 4 > %184 = or i32 %183, 4 > %185 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %184) > %186 = shl i32 %72, 4 > %187 = or i32 %186, 8 > %188 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %187) > %189 = shl i32 %72, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %189) > %191 = shl i32 %72, 4 > %192 = or i32 %191, 12 > %193 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %192) > %194 = fmul float %188, %193 > %195 = fmul float %188, %190 > %196 = fmul float %185, %193 > %197 = shl i32 %72, 4 > %198 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %197) > %199 = shl i32 %72, 4 > %200 = or i32 %199, 4 > %201 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %200) > %202 = call float @llvm.fma.f32(float %198, float %201, float %194) > %203 = fmul float %202, %55 > %204 = fmul float %203, 2.000000e+00 > %205 = shl i32 %72, 4 > %206 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %205) > %207 = shl i32 %72, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %208) > %210 = shl i32 %72, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %211) > %213 = shl i32 %72, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %213) > %215 = shl i32 %72, 4 > %216 = or i32 %215, 4 > %217 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %216) > %218 = shl i32 %72, 4 > %219 = or i32 %218, 8 > %220 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %219) > %221 = fmul float %206, %214 > %222 = fmul float %209, %217 > %223 = fmul float %212, %220 > %224 = fadd float %223, %222 > %225 = fadd float %223, %221 > %226 = fadd float %222, %221 > %227 = fsub float -0.000000e+00, %224 > %228 = call float @llvm.fma.f32(float %227, float 2.000000e+00, float 1.000000e+00) > %229 = fsub float -0.000000e+00, %225 > %230 = call float @llvm.fma.f32(float %229, float 2.000000e+00, float 1.000000e+00) > %231 = fsub float -0.000000e+00, %226 > %232 = call float @llvm.fma.f32(float %231, float 2.000000e+00, float 1.000000e+00) > %233 = fmul float %55, %230 > %234 = fadd float %153, %204 > %235 = fadd float %182, %233 > %236 = fadd float %106, %131 > %237 = fadd float %78, %82 > %238 = shl i32 %68, 5 > %239 = or i32 %238, 4 > %240 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %239) > %241 = fmul float %56, %240 > %242 = shl i32 %74, 4 > %243 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %242) > %244 = shl i32 %74, 4 > %245 = or i32 %244, 12 > %246 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %245) > %247 = fmul float %243, %246 > %248 = shl i32 %74, 4 > %249 = or i32 %248, 4 > %250 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %249) > %251 = shl i32 %74, 4 > %252 = or i32 %251, 8 > %253 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %252) > %254 = fsub float -0.000000e+00, %247 > %255 = call float @llvm.fma.f32(float %250, float %253, float %254) > %256 = shl i32 %74, 4 > %257 = or i32 %256, 4 > %258 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %257) > %259 = shl i32 %74, 4 > %260 = or i32 %259, 8 > %261 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %260) > %262 = call float @llvm.fma.f32(float %258, float %261, float %247) > %263 = fmul float %262, %56 > %264 = fmul float %263, 2.000000e+00 > %265 = fmul float %255, %56 > %266 = fmul float %265, 2.000000e+00 > %267 = shl i32 %74, 4 > %268 = or i32 %267, 4 > %269 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %268) > %270 = shl i32 %74, 4 > %271 = or i32 %270, 8 > %272 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %271) > %273 = shl i32 %74, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %273) > %275 = shl i32 %74, 4 > %276 = or i32 %275, 12 > %277 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %276) > %278 = fmul float %272, %277 > %279 = fmul float %272, %274 > %280 = fmul float %269, %277 > %281 = shl i32 %74, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %281) > %283 = shl i32 %74, 4 > %284 = or i32 %283, 4 > %285 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %278) > %287 = fmul float %286, %56 > %288 = fmul float %287, 2.000000e+00 > %289 = shl i32 %74, 4 > %290 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %289) > %291 = shl i32 %74, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %292) > %294 = shl i32 %74, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %295) > %297 = shl i32 %74, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %297) > %299 = shl i32 %74, 4 > %300 = or i32 %299, 4 > %301 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %300) > %302 = shl i32 %74, 4 > %303 = or i32 %302, 8 > %304 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %303) > %305 = fmul float %290, %298 > %306 = fmul float %293, %301 > %307 = fmul float %296, %304 > %308 = fadd float %307, %306 > %309 = fadd float %307, %305 > %310 = fadd float %306, %305 > %311 = fsub float -0.000000e+00, %308 > %312 = call float @llvm.fma.f32(float %311, float 2.000000e+00, float 1.000000e+00) > %313 = fsub float -0.000000e+00, %309 > %314 = call float @llvm.fma.f32(float %313, float 2.000000e+00, float 1.000000e+00) > %315 = fsub float -0.000000e+00, %310 > %316 = call float @llvm.fma.f32(float %315, float 2.000000e+00, float 1.000000e+00) > %317 = fmul float %56, %314 > %318 = fadd float %234, %288 > %319 = fadd float %235, %317 > %320 = fadd float %236, %266 > %321 = fadd float %237, %241 > %322 = fmul float %318, %43 > %323 = fmul float %319, %44 > %324 = fadd float %322, %323 > %325 = fmul float %320, %45 > %326 = fadd float %324, %325 > %327 = fadd float %326, %321 > %328 = shl i32 %70, 4 > %329 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %328) > %330 = shl i32 %70, 4 > %331 = or i32 %330, 8 > %332 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %331) > %333 = fsub float -0.000000e+00, %145 > %334 = call float @llvm.fma.f32(float %329, float %332, float %333) > %335 = fmul float %334, %54 > %336 = fmul float %335, 2.000000e+00 > %337 = fmul float %104, 2.000000e+00 > %338 = shl i32 %72, 4 > %339 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %338) > %340 = shl i32 %72, 4 > %341 = or i32 %340, 8 > %342 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %341) > %343 = fsub float -0.000000e+00, %196 > %344 = call float @llvm.fma.f32(float %339, float %342, float %343) > %345 = fmul float %344, %55 > %346 = fmul float %345, 2.000000e+00 > %347 = fmul float %54, %181 > %348 = fmul float %54, %177 > %349 = fmul float %55, %232 > %350 = fmul float %55, %228 > %351 = shl i32 %66, 5 > %352 = or i32 %351, 8 > %353 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %352) > %354 = fmul float %54, %353 > %355 = shl i32 %67, 5 > %356 = or i32 %355, 8 > %357 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %356) > %358 = fmul float %55, %357 > %359 = fadd float %346, %336 > %360 = fadd float %129, %337 > %361 = fadd float %349, %347 > %362 = fadd float %358, %354 > %363 = shl i32 %74, 4 > %364 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %363) > %365 = shl i32 %74, 4 > %366 = or i32 %365, 8 > %367 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %366) > %368 = fsub float -0.000000e+00, %280 > %369 = call float @llvm.fma.f32(float %364, float %367, float %368) > %370 = fmul float %369, %56 > %371 = fmul float %370, 2.000000e+00 > %372 = fmul float %56, %316 > %373 = fmul float %56, %312 > %374 = shl i32 %68, 5 > %375 = or i32 %374, 8 > %376 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %375) > %377 = fmul float %56, %376 > %378 = fadd float %359, %371 > %379 = fadd float %360, %264 > %380 = fadd float %361, %372 > %381 = fadd float %362, %377 > %382 = fmul float %378, %43 > %383 = fmul float %379, %44 > %384 = fadd float %382, %383 > %385 = fmul float %380, %45 > %386 = fadd float %384, %385 > %387 = fadd float %386, %381 > %388 = shl i32 %66, 5 > %389 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %388) > %390 = fmul float %54, %389 > %391 = shl i32 %67, 5 > %392 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %391) > %393 = fmul float %55, %392 > %394 = shl i32 %68, 5 > %395 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %394) > %396 = fmul float %56, %395 > %397 = shl i32 %70, 4 > %398 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %397) > %399 = shl i32 %70, 4 > %400 = or i32 %399, 4 > %401 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %400) > %402 = fsub float -0.000000e+00, %143 > %403 = call float @llvm.fma.f32(float %398, float %401, float %402) > %404 = fadd float %145, %144 > %405 = fmul float %403, %54 > %406 = fmul float %404, %54 > %407 = fmul float %405, 2.000000e+00 > %408 = fmul float %406, 2.000000e+00 > %409 = shl i32 %72, 4 > %410 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %409) > %411 = shl i32 %72, 4 > %412 = or i32 %411, 4 > %413 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %412) > %414 = fsub float -0.000000e+00, %194 > %415 = call float @llvm.fma.f32(float %410, float %413, float %414) > %416 = shl i32 %74, 4 > %417 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %416) > %418 = shl i32 %74, 4 > %419 = or i32 %418, 4 > %420 = call float @llvm.SI.load.const(<16 x i8> %36, i32 %419) > %421 = fsub float -0.000000e+00, %278 > %422 = call float @llvm.fma.f32(float %417, float %420, float %421) > %423 = fadd float %280, %279 > %424 = fmul float %415, %55 > %425 = fmul float %422, %56 > %426 = fmul float %423, %56 > %427 = fmul float %425, 2.000000e+00 > %428 = fmul float %426, 2.000000e+00 > %429 = fadd float %196, %195 > %430 = fmul float %429, %55 > %431 = fmul float %424, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = fadd float %348, %350 > %434 = fadd float %407, %431 > %435 = fadd float %408, %432 > %436 = fadd float %390, %393 > %437 = fadd float %373, %433 > %438 = fadd float %427, %434 > %439 = fadd float %428, %435 > %440 = fadd float %396, %436 > %441 = fmul float %437, %43 > %442 = fmul float %438, %44 > %443 = fadd float %441, %442 > %444 = fmul float %439, %45 > %445 = fadd float %443, %444 > %446 = fadd float %445, %440 > %447 = fmul float %19, %446 > %448 = fmul float %20, %327 > %449 = fadd float %447, %448 > %450 = fmul float %21, %387 > %451 = fadd float %449, %450 > %452 = fadd float %451, %22 > %453 = fmul float %23, %446 > %454 = fmul float %24, %327 > %455 = fadd float %453, %454 > %456 = fmul float %25, %387 > %457 = fadd float %455, %456 > %458 = fadd float %457, %26 > %459 = fmul float %27, %446 > %460 = fmul float %28, %327 > %461 = fadd float %459, %460 > %462 = fmul float %29, %387 > %463 = fadd float %461, %462 > %464 = fadd float %463, %30 > %465 = fmul float %31, %446 > %466 = fmul float %32, %327 > %467 = fadd float %465, %466 > %468 = fmul float %33, %387 > %469 = fadd float %467, %468 > %470 = fadd float %469, %34 > %471 = call float @llvm.minnum.f32(float %464, float %39) > %472 = bitcast i32 %11 to float > %473 = insertvalue <{ float, float, float }> undef, float %472, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %49, float %50, float %464, float %470) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %452, float %458, float %471, float %470) > ret <{ float, float, float }> %473 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { -0.3765, 0.0000, 1.0000, 158456325028528675187087900672.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {0, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D > 2: ADD TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx > 3: FSLT TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy > 4: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx > 5: INEG TEMP[0].x, TEMP[0].xxxx > 6: USNE TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx > 7: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz > 8: KILL_IF -TEMP[0].xxxx > 9: FSEQ TEMP[0], IN[0].wwww, IMM[0].yyyy > 10: SSG TEMP[1], IN[0].zzzz > 11: MUL TEMP[1], IMM[0].wwww, TEMP[1] > 12: RCP TEMP[2], IN[0].wwww > 13: MUL TEMP[2], IN[0].zzzz, TEMP[2] > 14: UCMP TEMP[0], TEMP[0], TEMP[1], TEMP[2] > 15: MOV OUT[0], TEMP[0] > 16: END >radeonsi: Compiling shader 178 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %36 = bitcast float %32 to i32 > %37 = bitcast float %33 to i32 > %38 = insertelement <2 x i32> undef, i32 %36, i32 0 > %39 = insertelement <2 x i32> %38, i32 %37, i32 1 > %40 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %39, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %41 = extractelement <4 x float> %40, i32 3 > %42 = fadd float %41, 0xBFD8181820000000 > %43 = fcmp olt float %42, 0.000000e+00 > %44 = select i1 %43, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %44) > %45 = fcmp oeq float %35, 0.000000e+00 > %46 = fcmp oeq float %35, 0.000000e+00 > %47 = fcmp oeq float %35, 0.000000e+00 > %48 = fcmp oeq float %35, 0.000000e+00 > %49 = fcmp ogt float %34, 0.000000e+00 > %50 = select i1 %49, float 1.000000e+00, float %34 > %51 = fcmp oge float %50, 0.000000e+00 > %52 = fcmp ogt float %34, 0.000000e+00 > %53 = select i1 %52, float 1.000000e+00, float %34 > %54 = fcmp oge float %53, 0.000000e+00 > %55 = fcmp ogt float %34, 0.000000e+00 > %56 = select i1 %55, float 1.000000e+00, float %34 > %57 = fcmp oge float %56, 0.000000e+00 > %58 = fcmp ogt float %34, 0.000000e+00 > %59 = select i1 %58, float 1.000000e+00, float %34 > %60 = fcmp oge float %59, 0.000000e+00 > %.op = fmul float %50, 0x4600000000000000 > %61 = select i1 %51, float %.op, float 0xC600000000000000 > %.op12 = fmul float %53, 0x4600000000000000 > %62 = select i1 %54, float %.op12, float 0xC600000000000000 > %.op13 = fmul float %56, 0x4600000000000000 > %63 = select i1 %57, float %.op13, float 0xC600000000000000 > %.op14 = fmul float %59, 0x4600000000000000 > %64 = select i1 %60, float %.op14, float 0xC600000000000000 > %65 = fdiv float 1.000000e+00, %35 > %66 = fmul float %34, %65 > %67 = fmul float %34, %65 > %68 = fmul float %34, %65 > %69 = fmul float %34, %65 > %70 = select i1 %45, float %61, float %66 > %71 = select i1 %46, float %62, float %67 > %72 = select i1 %47, float %63, float %68 > %73 = select i1 %48, float %64, float %69 > %74 = bitcast float %5 to i32 > %75 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %74, 10 > %76 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %75, float %70, 11 > %77 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %76, float %71, 12 > %78 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %77, float %72, 13 > %79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %78, float %73, 14 > %80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %79, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %80 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL SV[0], FACE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..22] >DCL TEMP[0..5], LOCAL >IMM[0] UINT32 {0, 352, 0, 0} >IMM[1] INT32 {-1, 0, 1, 0} >IMM[2] FLT32 { -0.3765, 0.0000, 1.0000, 2.0000} >IMM[3] FLT32 { -1.0000, 0.5000, 0.0000, 0.0000} > 0: UIF SV[0].xxxx :0 > 1: MOV TEMP[0].x, IMM[1].xxxx > 2: ELSE :0 > 3: MOV TEMP[0].x, IMM[1].yyyy > 4: ENDIF > 5: MOV TEMP[1].xy, IN[0].xyyy > 6: TEX TEMP[2], TEMP[1], SAMP[0], 2D > 7: ADD TEMP[3].x, TEMP[2].wwww, IMM[2].xxxx > 8: MOV TEMP[2].xyz, TEMP[2].xyzx > 9: FSLT TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 10: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].zzzz > 11: INEG TEMP[3].x, TEMP[3].xxxx > 12: USNE TEMP[1].x, TEMP[3].xxxx, IMM[0].xxxx > 13: AND TEMP[3].x, TEMP[1].xxxx, IMM[2].zzzz > 14: KILL_IF -TEMP[3].xxxx > 15: MOV TEMP[3].xy, IN[0].xyyy > 16: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D > 17: FMA TEMP[3].xy, TEMP[3].ywww, IMM[2].wwww, IMM[3].xxxx > 18: MOV TEMP[1].xy, TEMP[3].xyxx > 19: FMA TEMP[4].x, -TEMP[3].xxxx, TEMP[3].xxxx, IMM[2].zzzz > 20: FMA TEMP[3].x, -TEMP[3].yyyy, TEMP[3].yyyy, TEMP[4].xxxx > 21: SQRT TEMP[3].x, TEMP[3].xxxx > 22: MOV TEMP[1].z, TEMP[3].xxxx > 23: DP3 TEMP[3].x, IN[1].xyzz, TEMP[1].xyzz > 24: DP3 TEMP[4].x, IN[2].xyzz, TEMP[1].xyzz > 25: MOV TEMP[3].y, TEMP[4].xxxx > 26: DP3 TEMP[4].x, IN[3].xyzz, TEMP[1].xyzz > 27: MOV TEMP[3].z, TEMP[4].xxxx > 28: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[3].xyzz > 29: RSQ TEMP[4].x, TEMP[1].xxxx > 30: MUL TEMP[1].xyz, TEMP[4].xxxx, TEMP[3].xyzz > 31: MOV TEMP[3].xyz, -TEMP[1].xyzx > 32: USNE TEMP[4].x, TEMP[0].xxxx, IMM[0].xxxx > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].x, TEMP[1].xxxx > 35: ELSE :0 > 36: MOV TEMP[4].x, TEMP[3].xxxx > 37: ENDIF > 38: MOV TEMP[4].x, TEMP[4].xxxx > 39: USNE TEMP[5].x, TEMP[0].xxxx, IMM[0].xxxx > 40: UIF TEMP[5].xxxx :0 > 41: MOV TEMP[5].x, TEMP[1].yyyy > 42: ELSE :0 > 43: MOV TEMP[5].x, TEMP[3].yyyy > 44: ENDIF > 45: MOV TEMP[4].y, TEMP[5].xxxx > 46: USNE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 47: UIF TEMP[0].xxxx :0 > 48: MOV TEMP[1].x, TEMP[1].zzzz > 49: ELSE :0 > 50: MOV TEMP[1].x, TEMP[3].zzzz > 51: ENDIF > 52: MOV TEMP[4].z, TEMP[1].xxxx > 53: FMA TEMP[1].xyz, TEMP[4].xyzz, IMM[3].yyyy, IMM[3].yyyy > 54: MOV TEMP[1].w, CONST[1][22].zzzz > 55: MOV TEMP[0].xy, IN[0].xyyy > 56: TEX TEMP[0], TEMP[0], SAMP[2], 2D > 57: MOV TEMP[2].w, TEMP[0].wwww > 58: MUL TEMP[3].x, TEMP[0].zzzz, CONST[1][22].yyyy > 59: MOV TEMP[3].yz, TEMP[0].xyxx > 60: MOV TEMP[3].w, CONST[1][22].xxxx > 61: MOV OUT[0], TEMP[1] > 62: MOV OUT[1], TEMP[2] > 63: MOV OUT[2], TEMP[3] > 64: END >radeonsi: Compiling shader 179 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 360) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %38 = load <8 x i32>, <8 x i32> addrspace(2)* %37, align 32, !tbaa !0 > %39 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %40 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %39, i64 0, i64 7 > %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 > %42 = extractelement <8 x i32> %38, i32 7 > %43 = extractelement <4 x i32> %41, i32 0 > %44 = and i32 %43, %42 > %45 = insertelement <4 x i32> %41, i32 %44, i32 0 > %46 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 > %48 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %49 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %48, i64 0, i64 11 > %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 > %51 = extractelement <8 x i32> %47, i32 7 > %52 = extractelement <4 x i32> %50, i32 0 > %53 = and i32 %52, %51 > %54 = insertelement <4 x i32> %50, i32 %53, i32 0 > %55 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %66 = icmp ne i32 %19, 0 > %. = select i1 %66, float 0xFFFFFFFFE0000000, float 0.000000e+00 > %67 = bitcast float %55 to i32 > %68 = bitcast float %56 to i32 > %69 = insertelement <2 x i32> undef, i32 %67, i32 0 > %70 = insertelement <2 x i32> %69, i32 %68, i32 1 > %71 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %70, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %72 = extractelement <4 x float> %71, i32 0 > %73 = extractelement <4 x float> %71, i32 1 > %74 = extractelement <4 x float> %71, i32 2 > %75 = extractelement <4 x float> %71, i32 3 > %76 = fadd float %75, 0xBFD8181820000000 > %77 = fcmp olt float %76, 0.000000e+00 > %78 = select i1 %77, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %78) > %79 = bitcast float %55 to i32 > %80 = bitcast float %56 to i32 > %81 = insertelement <2 x i32> undef, i32 %79, i32 0 > %82 = insertelement <2 x i32> %81, i32 %80, i32 1 > %83 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %82, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %84 = extractelement <4 x float> %83, i32 1 > %85 = extractelement <4 x float> %83, i32 3 > %86 = call float @llvm.fma.f32(float %84, float 2.000000e+00, float -1.000000e+00) > %87 = call float @llvm.fma.f32(float %85, float 2.000000e+00, float -1.000000e+00) > %88 = fsub float -0.000000e+00, %86 > %89 = call float @llvm.fma.f32(float %88, float %86, float 1.000000e+00) > %90 = fsub float -0.000000e+00, %87 > %91 = call float @llvm.fma.f32(float %90, float %87, float %89) > %92 = call float @llvm.sqrt.f32(float %91) > %93 = fmul float %57, %86 > %94 = fmul float %58, %87 > %95 = fadd float %94, %93 > %96 = fmul float %59, %92 > %97 = fadd float %95, %96 > %98 = fmul float %60, %86 > %99 = fmul float %61, %87 > %100 = fadd float %99, %98 > %101 = fmul float %62, %92 > %102 = fadd float %100, %101 > %103 = fmul float %63, %86 > %104 = fmul float %64, %87 > %105 = fadd float %104, %103 > %106 = fmul float %65, %92 > %107 = fadd float %105, %106 > %108 = fmul float %97, %97 > %109 = fmul float %102, %102 > %110 = fadd float %109, %108 > %111 = fmul float %107, %107 > %112 = fadd float %110, %111 > %113 = call float @llvm.AMDGPU.rsq.clamped.f32(float %112) > %114 = fmul float %113, %97 > %115 = fmul float %113, %102 > %116 = fmul float %113, %107 > %117 = fsub float -0.000000e+00, %114 > %118 = fsub float -0.000000e+00, %115 > %119 = fsub float -0.000000e+00, %116 > %120 = bitcast float %. to i32 > %121 = icmp ne i32 %120, 0 > %temp16.0 = select i1 %121, float %114, float %117 > %122 = bitcast float %. to i32 > %123 = icmp ne i32 %122, 0 > %.33 = select i1 %123, float %115, float %118 > %124 = bitcast float %. to i32 > %125 = icmp ne i32 %124, 0 > %temp4.0 = select i1 %125, float %116, float %119 > %126 = call float @llvm.fma.f32(float %temp16.0, float 5.000000e-01, float 5.000000e-01) > %127 = call float @llvm.fma.f32(float %.33, float 5.000000e-01, float 5.000000e-01) > %128 = call float @llvm.fma.f32(float %temp4.0, float 5.000000e-01, float 5.000000e-01) > %129 = bitcast float %55 to i32 > %130 = bitcast float %56 to i32 > %131 = insertelement <2 x i32> undef, i32 %129, i32 0 > %132 = insertelement <2 x i32> %131, i32 %130, i32 1 > %133 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %132, <8 x i32> %47, <4 x i32> %54, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %134 = extractelement <4 x float> %133, i32 0 > %135 = extractelement <4 x float> %133, i32 1 > %136 = extractelement <4 x float> %133, i32 2 > %137 = extractelement <4 x float> %133, i32 3 > %138 = fmul float %136, %26 > %139 = bitcast float %5 to i32 > %140 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %139, 10 > %141 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %140, float %126, 11 > %142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %141, float %127, 12 > %143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float %128, 13 > %144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %27, 14 > %145 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144, float %72, 15 > %146 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %145, float %73, 16 > %147 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %146, float %74, 17 > %148 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %147, float %137, 18 > %149 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %148, float %138, 19 > %150 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %149, float %135, 20 > %151 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %150, float %134, 21 > %152 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %151, float %25, 22 > %153 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %152, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %153 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 32} >IMM[3] UINT32 {48, 112, 128, 144} >IMM[4] UINT32 {160, 0, 0, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][0], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][1], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][2], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: DP4 TEMP[5].x, CONST[1][3], TEMP[18] >361: MOV TEMP[3].w, TEMP[5].xxxx >362: MOV TEMP[5].xy, IN[2].xyxx >363: DP3 TEMP[6].x, TEMP[4].xyzz, IN[3].xyzz >364: MOV TEMP[1].y, TEMP[6].xxxx >365: DP3 TEMP[6].x, TEMP[7].xyzz, IN[3].xyzz >366: MOV TEMP[1].z, TEMP[6].xxxx >367: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >368: DP3 TEMP[6].x, TEMP[1].xyzz, TEMP[1].xyzz >369: RSQ TEMP[6].x, TEMP[6].xxxx >370: MUL TEMP[1].xyz, TEMP[6].xxxx, TEMP[1].xyzz >371: DP3 TEMP[6].x, CONST[1][7].xyzz, TEMP[1].xyzz >372: DP3 TEMP[8].x, TEMP[4].xyzz, IN[4].xyzz >373: MOV TEMP[2].y, TEMP[8].xxxx >374: DP3 TEMP[8].x, TEMP[4].xyzz, IN[1].xyzz >375: MOV TEMP[4].y, TEMP[8].xxxx >376: DP3 TEMP[8].x, TEMP[7].xyzz, IN[4].xyzz >377: MOV TEMP[2].z, TEMP[8].xxxx >378: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >379: MOV TEMP[4].z, TEMP[7].xxxx >380: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >381: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >382: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >383: RSQ TEMP[7].x, TEMP[0].xxxx >384: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >385: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >386: MOV TEMP[6].y, TEMP[7].xxxx >387: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >388: RSQ TEMP[7].x, TEMP[7].xxxx >389: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >390: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >391: MOV TEMP[6].z, TEMP[4].xxxx >392: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >393: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >394: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >395: MOV TEMP[4].y, TEMP[7].xxxx >396: DP3 TEMP[7].x, CONST[1][9].xyzz, TEMP[0].xyzz >397: MOV TEMP[1].y, TEMP[7].xxxx >398: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[2].xyzz >399: MOV TEMP[4].z, TEMP[7].xxxx >400: DP3 TEMP[2].x, CONST[1][9].xyzz, TEMP[2].xyzz >401: MOV TEMP[1].z, TEMP[2].xxxx >402: MUL TEMP[0].xyz, IN[5].wwww, IN[5].xyzz >403: MOV TEMP[0].w, IN[5].wwww >404: MUL TEMP[0], TEMP[0], CONST[1][10] >405: MOV OUT[5], TEMP[0] >406: MOV OUT[4], TEMP[1] >407: MOV OUT[3], TEMP[4] >408: MOV OUT[2], TEMP[6] >409: MOV OUT[1], TEMP[5] >410: MOV OUT[0], TEMP[3] >411: END >radeonsi: Compiling shader 180 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 > %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %13) > %57 = extractelement <4 x float> %56, i32 0 > %58 = extractelement <4 x float> %56, i32 1 > %59 = extractelement <4 x float> %56, i32 2 > %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 > %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %14) > %63 = extractelement <4 x float> %62, i32 0 > %64 = extractelement <4 x float> %62, i32 1 > %65 = extractelement <4 x float> %62, i32 2 > %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 > %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %15) > %69 = extractelement <4 x float> %68, i32 0 > %70 = extractelement <4 x float> %68, i32 1 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %16) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %17) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %18) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = extractelement <4 x float> %85, i32 2 > %89 = extractelement <4 x float> %85, i32 3 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %19) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 > %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %20) > %99 = extractelement <4 x float> %98, i32 0 > %100 = extractelement <4 x float> %98, i32 1 > %101 = extractelement <4 x float> %98, i32 2 > %102 = fmul float %101, 0x406FE01000000000 > %103 = fmul float %100, 0x406FE01000000000 > %104 = fmul float %99, 0x406FE01000000000 > %105 = fptosi float %102 to i32 > %106 = fptosi float %103 to i32 > %107 = fptosi float %104 to i32 > %108 = shl i32 %105, 1 > %109 = or i32 %108, 1 > %110 = shl i32 %106, 1 > %111 = or i32 %110, 1 > %112 = shl i32 %107, 1 > %113 = or i32 %112, 1 > %114 = shl i32 %105, 5 > %115 = or i32 %114, 4 > %116 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %115) > %117 = fmul float %93, %116 > %118 = shl i32 %106, 5 > %119 = or i32 %118, 4 > %120 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %119) > %121 = fmul float %94, %120 > %122 = shl i32 %109, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %122) > %124 = shl i32 %109, 4 > %125 = or i32 %124, 12 > %126 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %125) > %127 = fmul float %123, %126 > %128 = shl i32 %109, 4 > %129 = or i32 %128, 4 > %130 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %129) > %131 = shl i32 %109, 4 > %132 = or i32 %131, 8 > %133 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %132) > %134 = fsub float -0.000000e+00, %127 > %135 = call float @llvm.fma.f32(float %130, float %133, float %134) > %136 = shl i32 %109, 4 > %137 = or i32 %136, 4 > %138 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %137) > %139 = shl i32 %109, 4 > %140 = or i32 %139, 8 > %141 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %140) > %142 = call float @llvm.fma.f32(float %138, float %141, float %127) > %143 = fmul float %142, %93 > %144 = fmul float %135, %93 > %145 = fmul float %144, 2.000000e+00 > %146 = shl i32 %111, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %146) > %148 = shl i32 %111, 4 > %149 = or i32 %148, 12 > %150 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %149) > %151 = fmul float %147, %150 > %152 = shl i32 %111, 4 > %153 = or i32 %152, 4 > %154 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %153) > %155 = shl i32 %111, 4 > %156 = or i32 %155, 8 > %157 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %156) > %158 = fsub float -0.000000e+00, %151 > %159 = call float @llvm.fma.f32(float %154, float %157, float %158) > %160 = shl i32 %111, 4 > %161 = or i32 %160, 4 > %162 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %161) > %163 = shl i32 %111, 4 > %164 = or i32 %163, 8 > %165 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %164) > %166 = call float @llvm.fma.f32(float %162, float %165, float %151) > %167 = fmul float %166, %94 > %168 = fmul float %167, 2.000000e+00 > %169 = fmul float %159, %94 > %170 = fmul float %169, 2.000000e+00 > %171 = shl i32 %109, 4 > %172 = or i32 %171, 4 > %173 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %172) > %174 = shl i32 %109, 4 > %175 = or i32 %174, 8 > %176 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %175) > %177 = shl i32 %109, 4 > %178 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %177) > %179 = shl i32 %109, 4 > %180 = or i32 %179, 12 > %181 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %180) > %182 = fmul float %176, %181 > %183 = fmul float %176, %178 > %184 = fmul float %173, %181 > %185 = shl i32 %109, 4 > %186 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %185) > %187 = shl i32 %109, 4 > %188 = or i32 %187, 4 > %189 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %188) > %190 = call float @llvm.fma.f32(float %186, float %189, float %182) > %191 = fmul float %190, %93 > %192 = fmul float %191, 2.000000e+00 > %193 = shl i32 %109, 4 > %194 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %193) > %195 = shl i32 %109, 4 > %196 = or i32 %195, 4 > %197 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %196) > %198 = shl i32 %109, 4 > %199 = or i32 %198, 8 > %200 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %199) > %201 = shl i32 %109, 4 > %202 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %201) > %203 = shl i32 %109, 4 > %204 = or i32 %203, 4 > %205 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %204) > %206 = shl i32 %109, 4 > %207 = or i32 %206, 8 > %208 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %207) > %209 = fmul float %194, %202 > %210 = fmul float %197, %205 > %211 = fmul float %200, %208 > %212 = fadd float %211, %210 > %213 = fadd float %211, %209 > %214 = fadd float %210, %209 > %215 = fsub float -0.000000e+00, %212 > %216 = call float @llvm.fma.f32(float %215, float 2.000000e+00, float 1.000000e+00) > %217 = fsub float -0.000000e+00, %213 > %218 = call float @llvm.fma.f32(float %217, float 2.000000e+00, float 1.000000e+00) > %219 = fsub float -0.000000e+00, %214 > %220 = call float @llvm.fma.f32(float %219, float 2.000000e+00, float 1.000000e+00) > %221 = fmul float %93, %218 > %222 = shl i32 %111, 4 > %223 = or i32 %222, 4 > %224 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %223) > %225 = shl i32 %111, 4 > %226 = or i32 %225, 8 > %227 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %226) > %228 = shl i32 %111, 4 > %229 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %228) > %230 = shl i32 %111, 4 > %231 = or i32 %230, 12 > %232 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %231) > %233 = fmul float %227, %232 > %234 = fmul float %227, %229 > %235 = fmul float %224, %232 > %236 = shl i32 %111, 4 > %237 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %236) > %238 = shl i32 %111, 4 > %239 = or i32 %238, 4 > %240 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %239) > %241 = call float @llvm.fma.f32(float %237, float %240, float %233) > %242 = fmul float %241, %94 > %243 = fmul float %242, 2.000000e+00 > %244 = shl i32 %111, 4 > %245 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %244) > %246 = shl i32 %111, 4 > %247 = or i32 %246, 4 > %248 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %247) > %249 = shl i32 %111, 4 > %250 = or i32 %249, 8 > %251 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %250) > %252 = shl i32 %111, 4 > %253 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %252) > %254 = shl i32 %111, 4 > %255 = or i32 %254, 4 > %256 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %255) > %257 = shl i32 %111, 4 > %258 = or i32 %257, 8 > %259 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %258) > %260 = fmul float %245, %253 > %261 = fmul float %248, %256 > %262 = fmul float %251, %259 > %263 = fadd float %262, %261 > %264 = fadd float %262, %260 > %265 = fadd float %261, %260 > %266 = fsub float -0.000000e+00, %263 > %267 = call float @llvm.fma.f32(float %266, float 2.000000e+00, float 1.000000e+00) > %268 = fsub float -0.000000e+00, %264 > %269 = call float @llvm.fma.f32(float %268, float 2.000000e+00, float 1.000000e+00) > %270 = fsub float -0.000000e+00, %265 > %271 = call float @llvm.fma.f32(float %270, float 2.000000e+00, float 1.000000e+00) > %272 = fmul float %94, %269 > %273 = fadd float %192, %243 > %274 = fadd float %221, %272 > %275 = fadd float %145, %170 > %276 = fadd float %117, %121 > %277 = shl i32 %107, 5 > %278 = or i32 %277, 4 > %279 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %278) > %280 = fmul float %95, %279 > %281 = shl i32 %113, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %281) > %283 = shl i32 %113, 4 > %284 = or i32 %283, 12 > %285 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %284) > %286 = fmul float %282, %285 > %287 = shl i32 %113, 4 > %288 = or i32 %287, 4 > %289 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %288) > %290 = shl i32 %113, 4 > %291 = or i32 %290, 8 > %292 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %291) > %293 = fsub float -0.000000e+00, %286 > %294 = call float @llvm.fma.f32(float %289, float %292, float %293) > %295 = shl i32 %113, 4 > %296 = or i32 %295, 4 > %297 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %296) > %298 = shl i32 %113, 4 > %299 = or i32 %298, 8 > %300 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %299) > %301 = call float @llvm.fma.f32(float %297, float %300, float %286) > %302 = fmul float %301, %95 > %303 = fmul float %302, 2.000000e+00 > %304 = fmul float %294, %95 > %305 = fmul float %304, 2.000000e+00 > %306 = shl i32 %113, 4 > %307 = or i32 %306, 4 > %308 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %307) > %309 = shl i32 %113, 4 > %310 = or i32 %309, 8 > %311 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %310) > %312 = shl i32 %113, 4 > %313 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %312) > %314 = shl i32 %113, 4 > %315 = or i32 %314, 12 > %316 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %315) > %317 = fmul float %311, %316 > %318 = fmul float %311, %313 > %319 = fmul float %308, %316 > %320 = shl i32 %113, 4 > %321 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %320) > %322 = shl i32 %113, 4 > %323 = or i32 %322, 4 > %324 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %323) > %325 = call float @llvm.fma.f32(float %321, float %324, float %317) > %326 = fmul float %325, %95 > %327 = fmul float %326, 2.000000e+00 > %328 = shl i32 %113, 4 > %329 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %328) > %330 = shl i32 %113, 4 > %331 = or i32 %330, 4 > %332 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %331) > %333 = shl i32 %113, 4 > %334 = or i32 %333, 8 > %335 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %334) > %336 = shl i32 %113, 4 > %337 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %336) > %338 = shl i32 %113, 4 > %339 = or i32 %338, 4 > %340 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %339) > %341 = shl i32 %113, 4 > %342 = or i32 %341, 8 > %343 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %342) > %344 = fmul float %329, %337 > %345 = fmul float %332, %340 > %346 = fmul float %335, %343 > %347 = fadd float %346, %345 > %348 = fadd float %346, %344 > %349 = fadd float %345, %344 > %350 = fsub float -0.000000e+00, %347 > %351 = call float @llvm.fma.f32(float %350, float 2.000000e+00, float 1.000000e+00) > %352 = fsub float -0.000000e+00, %348 > %353 = call float @llvm.fma.f32(float %352, float 2.000000e+00, float 1.000000e+00) > %354 = fsub float -0.000000e+00, %349 > %355 = call float @llvm.fma.f32(float %354, float 2.000000e+00, float 1.000000e+00) > %356 = fmul float %95, %353 > %357 = fadd float %273, %327 > %358 = fadd float %274, %356 > %359 = fadd float %275, %305 > %360 = fadd float %276, %280 > %361 = fmul float %357, %57 > %362 = fmul float %358, %58 > %363 = fadd float %361, %362 > %364 = fmul float %359, %59 > %365 = fadd float %363, %364 > %366 = fadd float %365, %360 > %367 = shl i32 %109, 4 > %368 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %367) > %369 = shl i32 %109, 4 > %370 = or i32 %369, 8 > %371 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %370) > %372 = fsub float -0.000000e+00, %184 > %373 = call float @llvm.fma.f32(float %368, float %371, float %372) > %374 = fmul float %373, %93 > %375 = fmul float %374, 2.000000e+00 > %376 = fmul float %143, 2.000000e+00 > %377 = shl i32 %111, 4 > %378 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %377) > %379 = shl i32 %111, 4 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %380) > %382 = fsub float -0.000000e+00, %235 > %383 = call float @llvm.fma.f32(float %378, float %381, float %382) > %384 = fmul float %383, %94 > %385 = fmul float %384, 2.000000e+00 > %386 = fmul float %93, %220 > %387 = fmul float %93, %216 > %388 = fmul float %94, %271 > %389 = fmul float %94, %267 > %390 = shl i32 %105, 5 > %391 = or i32 %390, 8 > %392 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %391) > %393 = fmul float %93, %392 > %394 = shl i32 %106, 5 > %395 = or i32 %394, 8 > %396 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %395) > %397 = fmul float %94, %396 > %398 = fadd float %385, %375 > %399 = fadd float %168, %376 > %400 = fadd float %388, %386 > %401 = fadd float %397, %393 > %402 = shl i32 %113, 4 > %403 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %402) > %404 = shl i32 %113, 4 > %405 = or i32 %404, 8 > %406 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %405) > %407 = fsub float -0.000000e+00, %319 > %408 = call float @llvm.fma.f32(float %403, float %406, float %407) > %409 = fmul float %408, %95 > %410 = fmul float %409, 2.000000e+00 > %411 = fmul float %95, %355 > %412 = fmul float %95, %351 > %413 = shl i32 %107, 5 > %414 = or i32 %413, 8 > %415 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %414) > %416 = fmul float %95, %415 > %417 = fadd float %398, %410 > %418 = fadd float %399, %303 > %419 = fadd float %400, %411 > %420 = fadd float %401, %416 > %421 = fmul float %417, %57 > %422 = fmul float %418, %58 > %423 = fadd float %421, %422 > %424 = fmul float %419, %59 > %425 = fadd float %423, %424 > %426 = fadd float %425, %420 > %427 = shl i32 %105, 5 > %428 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %427) > %429 = fmul float %93, %428 > %430 = shl i32 %106, 5 > %431 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %430) > %432 = fmul float %94, %431 > %433 = shl i32 %107, 5 > %434 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %433) > %435 = fmul float %95, %434 > %436 = shl i32 %109, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %436) > %438 = shl i32 %109, 4 > %439 = or i32 %438, 4 > %440 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %439) > %441 = fsub float -0.000000e+00, %182 > %442 = call float @llvm.fma.f32(float %437, float %440, float %441) > %443 = fadd float %184, %183 > %444 = fmul float %442, %93 > %445 = fmul float %443, %93 > %446 = fmul float %444, 2.000000e+00 > %447 = fmul float %445, 2.000000e+00 > %448 = shl i32 %111, 4 > %449 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %448) > %450 = shl i32 %111, 4 > %451 = or i32 %450, 4 > %452 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %451) > %453 = fsub float -0.000000e+00, %233 > %454 = call float @llvm.fma.f32(float %449, float %452, float %453) > %455 = shl i32 %113, 4 > %456 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %455) > %457 = shl i32 %113, 4 > %458 = or i32 %457, 4 > %459 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %458) > %460 = fsub float -0.000000e+00, %317 > %461 = call float @llvm.fma.f32(float %456, float %459, float %460) > %462 = fadd float %319, %318 > %463 = fmul float %454, %94 > %464 = fmul float %461, %95 > %465 = fmul float %462, %95 > %466 = fmul float %464, 2.000000e+00 > %467 = fmul float %465, 2.000000e+00 > %468 = fadd float %235, %234 > %469 = fmul float %468, %94 > %470 = fmul float %463, 2.000000e+00 > %471 = fmul float %469, 2.000000e+00 > %472 = fadd float %387, %389 > %473 = fadd float %446, %470 > %474 = fadd float %447, %471 > %475 = fadd float %429, %432 > %476 = fadd float %412, %472 > %477 = fadd float %466, %473 > %478 = fadd float %467, %474 > %479 = fadd float %435, %475 > %480 = fmul float %476, %57 > %481 = fmul float %477, %58 > %482 = fadd float %480, %481 > %483 = fmul float %478, %59 > %484 = fadd float %482, %483 > %485 = fadd float %484, %479 > %486 = fmul float %23, %485 > %487 = fmul float %24, %366 > %488 = fadd float %486, %487 > %489 = fmul float %25, %426 > %490 = fadd float %488, %489 > %491 = fadd float %490, %26 > %492 = fmul float %27, %485 > %493 = fmul float %28, %366 > %494 = fadd float %492, %493 > %495 = fmul float %29, %426 > %496 = fadd float %494, %495 > %497 = fadd float %496, %30 > %498 = fmul float %31, %485 > %499 = fmul float %32, %366 > %500 = fadd float %498, %499 > %501 = fmul float %33, %426 > %502 = fadd float %500, %501 > %503 = fadd float %502, %34 > %504 = fmul float %35, %485 > %505 = fmul float %36, %366 > %506 = fadd float %504, %505 > %507 = fmul float %37, %426 > %508 = fadd float %506, %507 > %509 = fadd float %508, %38 > %510 = fmul float %357, %74 > %511 = fmul float %358, %75 > %512 = fadd float %511, %510 > %513 = fmul float %359, %76 > %514 = fadd float %512, %513 > %515 = fmul float %417, %74 > %516 = fmul float %418, %75 > %517 = fadd float %516, %515 > %518 = fmul float %419, %76 > %519 = fadd float %517, %518 > %520 = fmul float %476, %74 > %521 = fmul float %477, %75 > %522 = fadd float %521, %520 > %523 = fmul float %478, %76 > %524 = fadd float %522, %523 > %525 = fmul float %524, %524 > %526 = fmul float %514, %514 > %527 = fadd float %526, %525 > %528 = fmul float %519, %519 > %529 = fadd float %527, %528 > %530 = call float @llvm.AMDGPU.rsq.clamped.f32(float %529) > %531 = fmul float %530, %524 > %532 = fmul float %530, %514 > %533 = fmul float %530, %519 > %534 = fmul float %39, %531 > %535 = fmul float %40, %532 > %536 = fadd float %535, %534 > %537 = fmul float %41, %533 > %538 = fadd float %536, %537 > %539 = fmul float %357, %80 > %540 = fmul float %358, %81 > %541 = fadd float %540, %539 > %542 = fmul float %359, %82 > %543 = fadd float %541, %542 > %544 = fmul float %357, %63 > %545 = fmul float %358, %64 > %546 = fadd float %545, %544 > %547 = fmul float %359, %65 > %548 = fadd float %546, %547 > %549 = fmul float %417, %80 > %550 = fmul float %418, %81 > %551 = fadd float %550, %549 > %552 = fmul float %419, %82 > %553 = fadd float %551, %552 > %554 = fmul float %417, %63 > %555 = fmul float %418, %64 > %556 = fadd float %555, %554 > %557 = fmul float %419, %65 > %558 = fadd float %556, %557 > %559 = fmul float %476, %80 > %560 = fmul float %477, %81 > %561 = fadd float %560, %559 > %562 = fmul float %478, %82 > %563 = fadd float %561, %562 > %564 = fmul float %476, %63 > %565 = fmul float %477, %64 > %566 = fadd float %565, %564 > %567 = fmul float %478, %65 > %568 = fadd float %566, %567 > %569 = fmul float %563, %563 > %570 = fmul float %543, %543 > %571 = fadd float %570, %569 > %572 = fmul float %553, %553 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %563 > %576 = fmul float %574, %543 > %577 = fmul float %574, %553 > %578 = fmul float %39, %575 > %579 = fmul float %40, %576 > %580 = fadd float %579, %578 > %581 = fmul float %41, %577 > %582 = fadd float %580, %581 > %583 = fmul float %568, %568 > %584 = fmul float %548, %548 > %585 = fadd float %584, %583 > %586 = fmul float %558, %558 > %587 = fadd float %585, %586 > %588 = call float @llvm.AMDGPU.rsq.clamped.f32(float %587) > %589 = fmul float %588, %568 > %590 = fmul float %588, %548 > %591 = fmul float %588, %558 > %592 = fmul float %39, %589 > %593 = fmul float %40, %590 > %594 = fadd float %593, %592 > %595 = fmul float %41, %591 > %596 = fadd float %594, %595 > %597 = fmul float %42, %531 > %598 = fmul float %43, %532 > %599 = fadd float %598, %597 > %600 = fmul float %44, %533 > %601 = fadd float %599, %600 > %602 = fmul float %45, %531 > %603 = fmul float %46, %532 > %604 = fadd float %603, %602 > %605 = fmul float %47, %533 > %606 = fadd float %604, %605 > %607 = fmul float %42, %575 > %608 = fmul float %43, %576 > %609 = fadd float %608, %607 > %610 = fmul float %44, %577 > %611 = fadd float %609, %610 > %612 = fmul float %45, %575 > %613 = fmul float %46, %576 > %614 = fadd float %613, %612 > %615 = fmul float %47, %577 > %616 = fadd float %614, %615 > %617 = fmul float %42, %589 > %618 = fmul float %43, %590 > %619 = fadd float %618, %617 > %620 = fmul float %44, %591 > %621 = fadd float %619, %620 > %622 = fmul float %45, %589 > %623 = fmul float %46, %590 > %624 = fadd float %623, %622 > %625 = fmul float %47, %591 > %626 = fadd float %624, %625 > %627 = fmul float %89, %86 > %628 = fmul float %89, %87 > %629 = fmul float %89, %88 > %630 = fmul float %627, %48 > %631 = fmul float %628, %49 > %632 = fmul float %629, %50 > %633 = fmul float %89, %51 > %634 = bitcast i32 %11 to float > %635 = insertvalue <{ float, float, float }> undef, float %634, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %69, float %70, float %59, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %538, float %582, float %596, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %601, float %611, float %621, float %360) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %606, float %616, float %626, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %630, float %631, float %632, float %633) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %491, float %497, float %503, float %509) > ret <{ float, float, float }> %635 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL SV[0], FACE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..22] >DCL TEMP[0..5], LOCAL >IMM[0] UINT32 {0, 352, 0, 0} >IMM[1] INT32 {-1, 0, 1, 0} >IMM[2] FLT32 { -0.3765, 0.0000, 1.0000, 2.0000} >IMM[3] FLT32 { -1.0000, 0.5000, 0.0000, 0.0000} > 0: UIF SV[0].xxxx :0 > 1: MOV TEMP[0].x, IMM[1].xxxx > 2: ELSE :0 > 3: MOV TEMP[0].x, IMM[1].yyyy > 4: ENDIF > 5: MOV TEMP[1].xy, IN[0].xyyy > 6: TEX TEMP[2], TEMP[1], SAMP[0], 2D > 7: ADD TEMP[3].x, TEMP[2].wwww, IMM[2].xxxx > 8: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[4].xyzz > 9: FSLT TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 10: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].zzzz > 11: INEG TEMP[3].x, TEMP[3].xxxx > 12: USNE TEMP[1].x, TEMP[3].xxxx, IMM[0].xxxx > 13: AND TEMP[3].x, TEMP[1].xxxx, IMM[2].zzzz > 14: KILL_IF -TEMP[3].xxxx > 15: MOV TEMP[3].xy, IN[0].xyyy > 16: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D > 17: FMA TEMP[3].xy, TEMP[3].ywww, IMM[2].wwww, IMM[3].xxxx > 18: MOV TEMP[1].xy, TEMP[3].xyxx > 19: FMA TEMP[4].x, -TEMP[3].xxxx, TEMP[3].xxxx, IMM[2].zzzz > 20: FMA TEMP[3].x, -TEMP[3].yyyy, TEMP[3].yyyy, TEMP[4].xxxx > 21: SQRT TEMP[3].x, TEMP[3].xxxx > 22: MOV TEMP[1].z, TEMP[3].xxxx > 23: DP3 TEMP[3].x, IN[1].xyzz, TEMP[1].xyzz > 24: DP3 TEMP[4].x, IN[2].xyzz, TEMP[1].xyzz > 25: MOV TEMP[3].y, TEMP[4].xxxx > 26: DP3 TEMP[4].x, IN[3].xyzz, TEMP[1].xyzz > 27: MOV TEMP[3].z, TEMP[4].xxxx > 28: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[3].xyzz > 29: RSQ TEMP[4].x, TEMP[1].xxxx > 30: MUL TEMP[1].xyz, TEMP[4].xxxx, TEMP[3].xyzz > 31: MOV TEMP[3].xyz, -TEMP[1].xyzx > 32: USNE TEMP[4].x, TEMP[0].xxxx, IMM[0].xxxx > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].x, TEMP[1].xxxx > 35: ELSE :0 > 36: MOV TEMP[4].x, TEMP[3].xxxx > 37: ENDIF > 38: MOV TEMP[4].x, TEMP[4].xxxx > 39: USNE TEMP[5].x, TEMP[0].xxxx, IMM[0].xxxx > 40: UIF TEMP[5].xxxx :0 > 41: MOV TEMP[5].x, TEMP[1].yyyy > 42: ELSE :0 > 43: MOV TEMP[5].x, TEMP[3].yyyy > 44: ENDIF > 45: MOV TEMP[4].y, TEMP[5].xxxx > 46: USNE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 47: UIF TEMP[0].xxxx :0 > 48: MOV TEMP[1].x, TEMP[1].zzzz > 49: ELSE :0 > 50: MOV TEMP[1].x, TEMP[3].zzzz > 51: ENDIF > 52: MOV TEMP[4].z, TEMP[1].xxxx > 53: FMA TEMP[1].xyz, TEMP[4].xyzz, IMM[3].yyyy, IMM[3].yyyy > 54: MOV TEMP[1].w, CONST[1][22].zzzz > 55: MOV TEMP[0].xy, IN[0].xyyy > 56: TEX TEMP[0], TEMP[0], SAMP[2], 2D > 57: MOV TEMP[2].w, TEMP[0].wwww > 58: MUL TEMP[3].x, TEMP[0].zzzz, CONST[1][22].yyyy > 59: MOV TEMP[3].yz, TEMP[0].xyxx > 60: MOV TEMP[3].w, CONST[1][22].xxxx > 61: MOV OUT[0], TEMP[1] > 62: MOV OUT[1], TEMP[2] > 63: MOV OUT[2], TEMP[3] > 64: END >radeonsi: Compiling shader 181 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 360) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %38 = load <8 x i32>, <8 x i32> addrspace(2)* %37, align 32, !tbaa !0 > %39 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %40 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %39, i64 0, i64 7 > %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 > %42 = extractelement <8 x i32> %38, i32 7 > %43 = extractelement <4 x i32> %41, i32 0 > %44 = and i32 %43, %42 > %45 = insertelement <4 x i32> %41, i32 %44, i32 0 > %46 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 > %48 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %49 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %48, i64 0, i64 11 > %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 > %51 = extractelement <8 x i32> %47, i32 7 > %52 = extractelement <4 x i32> %50, i32 0 > %53 = and i32 %52, %51 > %54 = insertelement <4 x i32> %50, i32 %53, i32 0 > %55 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %69 = icmp ne i32 %19, 0 > %. = select i1 %69, float 0xFFFFFFFFE0000000, float 0.000000e+00 > %70 = bitcast float %55 to i32 > %71 = bitcast float %56 to i32 > %72 = insertelement <2 x i32> undef, i32 %70, i32 0 > %73 = insertelement <2 x i32> %72, i32 %71, i32 1 > %74 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %73, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %75 = extractelement <4 x float> %74, i32 0 > %76 = extractelement <4 x float> %74, i32 1 > %77 = extractelement <4 x float> %74, i32 2 > %78 = extractelement <4 x float> %74, i32 3 > %79 = fadd float %78, 0xBFD8181820000000 > %80 = fmul float %75, %66 > %81 = fmul float %76, %67 > %82 = fmul float %77, %68 > %83 = fcmp olt float %79, 0.000000e+00 > %84 = select i1 %83, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %84) > %85 = bitcast float %55 to i32 > %86 = bitcast float %56 to i32 > %87 = insertelement <2 x i32> undef, i32 %85, i32 0 > %88 = insertelement <2 x i32> %87, i32 %86, i32 1 > %89 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %88, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %90 = extractelement <4 x float> %89, i32 1 > %91 = extractelement <4 x float> %89, i32 3 > %92 = call float @llvm.fma.f32(float %90, float 2.000000e+00, float -1.000000e+00) > %93 = call float @llvm.fma.f32(float %91, float 2.000000e+00, float -1.000000e+00) > %94 = fsub float -0.000000e+00, %92 > %95 = call float @llvm.fma.f32(float %94, float %92, float 1.000000e+00) > %96 = fsub float -0.000000e+00, %93 > %97 = call float @llvm.fma.f32(float %96, float %93, float %95) > %98 = call float @llvm.sqrt.f32(float %97) > %99 = fmul float %57, %92 > %100 = fmul float %58, %93 > %101 = fadd float %100, %99 > %102 = fmul float %59, %98 > %103 = fadd float %101, %102 > %104 = fmul float %60, %92 > %105 = fmul float %61, %93 > %106 = fadd float %105, %104 > %107 = fmul float %62, %98 > %108 = fadd float %106, %107 > %109 = fmul float %63, %92 > %110 = fmul float %64, %93 > %111 = fadd float %110, %109 > %112 = fmul float %65, %98 > %113 = fadd float %111, %112 > %114 = fmul float %103, %103 > %115 = fmul float %108, %108 > %116 = fadd float %115, %114 > %117 = fmul float %113, %113 > %118 = fadd float %116, %117 > %119 = call float @llvm.AMDGPU.rsq.clamped.f32(float %118) > %120 = fmul float %119, %103 > %121 = fmul float %119, %108 > %122 = fmul float %119, %113 > %123 = fsub float -0.000000e+00, %120 > %124 = fsub float -0.000000e+00, %121 > %125 = fsub float -0.000000e+00, %122 > %126 = bitcast float %. to i32 > %127 = icmp ne i32 %126, 0 > %temp16.0 = select i1 %127, float %120, float %123 > %128 = bitcast float %. to i32 > %129 = icmp ne i32 %128, 0 > %.33 = select i1 %129, float %121, float %124 > %130 = bitcast float %. to i32 > %131 = icmp ne i32 %130, 0 > %temp4.0 = select i1 %131, float %122, float %125 > %132 = call float @llvm.fma.f32(float %temp16.0, float 5.000000e-01, float 5.000000e-01) > %133 = call float @llvm.fma.f32(float %.33, float 5.000000e-01, float 5.000000e-01) > %134 = call float @llvm.fma.f32(float %temp4.0, float 5.000000e-01, float 5.000000e-01) > %135 = bitcast float %55 to i32 > %136 = bitcast float %56 to i32 > %137 = insertelement <2 x i32> undef, i32 %135, i32 0 > %138 = insertelement <2 x i32> %137, i32 %136, i32 1 > %139 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %138, <8 x i32> %47, <4 x i32> %54, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %140 = extractelement <4 x float> %139, i32 0 > %141 = extractelement <4 x float> %139, i32 1 > %142 = extractelement <4 x float> %139, i32 2 > %143 = extractelement <4 x float> %139, i32 3 > %144 = fmul float %142, %26 > %145 = bitcast float %5 to i32 > %146 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %145, 10 > %147 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %146, float %132, 11 > %148 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %147, float %133, 12 > %149 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %148, float %134, 13 > %150 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %149, float %27, 14 > %151 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %150, float %80, 15 > %152 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %151, float %81, 16 > %153 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %152, float %82, 17 > %154 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %153, float %143, 18 > %155 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %154, float %144, 19 > %156 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %155, float %141, 20 > %157 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %156, float %140, 21 > %158 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %157, float %25, 22 > %159 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %158, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %159 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..24] >DCL TEMP[0..4], LOCAL >IMM[0] UINT32 {0, 352, 384, 368} >IMM[1] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[2] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, -0.3765} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] FLT32 { 2.0000, 0.5000, 0.0000, 0.0000} > 0: ADD TEMP[0].x, CONST[1][22].yyyy, IMM[1].xxxx > 1: ADD TEMP[1].xy, -IN[4].wwww, IMM[1].xyyy > 2: FMA TEMP[2].x, CONST[1][22].xxxx, TEMP[0].xxxx, TEMP[1].xxxx > 3: CEIL TEMP[3].x, TEMP[1].yyyy > 4: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 5: ADD TEMP[0].x, TEMP[2].xxxx, IMM[1].zzzz > 6: FSNE TEMP[2].x, CONST[1][22].yyyy, IMM[1].wwww > 7: UIF TEMP[2].xxxx :0 > 8: RCP TEMP[2].x, CONST[1][22].yyyy > 9: ELSE :0 > 10: MOV TEMP[2].x, IMM[2].xxxx > 11: ENDIF > 12: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[0].xxxx > 13: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 14: FMA TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy, IMM[2].zzzz > 15: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[2].xxxx > 16: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx > 17: FMA TEMP[2].x, TEMP[3].xxxx, TEMP[0].xxxx, IMM[2].wwww > 18: FMA TEMP[0].x, -TEMP[3].xxxx, TEMP[0].xxxx, IMM[1].xxxx > 19: LG2 TEMP[3].x, TEMP[0].xxxx > 20: MUL TEMP[0].x, TEMP[3].xxxx, CONST[1][24].xxxx > 21: EX2 TEMP[3].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[3].xxxx, CONST[1][23].wwww > 23: MUL TEMP[3].xyz, TEMP[0].xxxx, CONST[1][23].xyzz > 24: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[1].wwww > 25: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx > 26: INEG TEMP[2].x, TEMP[2].xxxx > 27: USNE TEMP[1].x, TEMP[2].xxxx, IMM[0].xxxx > 28: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 29: KILL_IF -TEMP[1].xxxx > 30: MOV TEMP[3].w, IMM[1].wwww > 31: MOV TEMP[1].xy, IN[0].xyyy > 32: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 33: FMA TEMP[1].xy, TEMP[1].ywww, IMM[4].xxxx, IMM[1].zzzz > 34: MOV TEMP[0].xy, TEMP[1].xyxx > 35: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[1].xxxx > 36: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 37: SQRT TEMP[1].x, TEMP[1].xxxx > 38: MOV TEMP[0].z, TEMP[1].xxxx > 39: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 40: DP3 TEMP[2].x, IN[2].xyzz, TEMP[0].xyzz > 41: MOV TEMP[1].y, TEMP[2].xxxx > 42: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 43: MOV TEMP[1].z, TEMP[2].xxxx > 44: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 45: RSQ TEMP[2].x, TEMP[0].xxxx > 46: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 47: FMA TEMP[1].xyz, TEMP[0].xyzz, IMM[4].yyyy, IMM[4].yyyy > 48: MOV TEMP[1].w, CONST[1][24].zzzz > 49: MOV TEMP[0].xy, IN[0].xyyy > 50: TEX TEMP[0], TEMP[0], SAMP[1], 2D > 51: MOV TEMP[2].w, IMM[1].wwww > 52: MOV TEMP[4].xy, IN[0].xyyy > 53: TEX TEMP[4].xyz, TEMP[4], SAMP[2], 2D > 54: MUL TEMP[2].x, TEMP[4].zzzz, CONST[1][24].yyyy > 55: MOV TEMP[2].yz, TEMP[4].xyxx > 56: MOV OUT[0], TEMP[3] > 57: MOV OUT[1], TEMP[1] > 58: MOV OUT[2], TEMP[0] > 59: MOV OUT[3], TEMP[2] > 60: END >radeonsi: Compiling shader 182 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 368) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 372) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 376) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 380) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 392) > %34 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 > %36 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %37 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %36, i64 0, i64 3 > %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 > %39 = extractelement <8 x i32> %35, i32 7 > %40 = extractelement <4 x i32> %38, i32 0 > %41 = and i32 %40, %39 > %42 = insertelement <4 x i32> %38, i32 %41, i32 0 > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 7 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 11 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %73 = fadd float %26, 1.000000e+00 > %74 = fsub float 1.000000e+00, %72 > %75 = fsub float 0x3FEFD70A40000000, %72 > %76 = call float @llvm.fma.f32(float %25, float %73, float %74) > %77 = call float @llvm.ceil.f32(float %75) > %78 = call float @llvm.AMDGPU.clamp.(float %77, float 0.000000e+00, float 1.000000e+00) > %79 = fadd float %76, -1.000000e+00 > %80 = fcmp une float %26, 0.000000e+00 > %81 = fdiv float 1.000000e+00, %26 > %temp8.0 = select i1 %80, float %81, float 0x4600000000000000 > %82 = fmul float %temp8.0, %79 > %83 = call float @llvm.AMDGPU.clamp.(float %82, float 0.000000e+00, float 1.000000e+00) > %84 = call float @llvm.fma.f32(float %83, float -2.000000e+00, float 3.000000e+00) > %85 = fmul float %83, %83 > %86 = fmul float %85, %84 > %87 = call float @llvm.fma.f32(float %78, float %86, float 0xBFD8181820000000) > %88 = fsub float -0.000000e+00, %78 > %89 = call float @llvm.fma.f32(float %88, float %86, float 1.000000e+00) > %90 = call float @llvm.log2.f32(float %89) > %91 = fmul float %90, %31 > %92 = call float @llvm.exp2.f32(float %91) > %93 = fmul float %92, %30 > %94 = fmul float %93, %27 > %95 = fmul float %93, %28 > %96 = fmul float %93, %29 > %97 = fcmp olt float %87, 0.000000e+00 > %98 = select i1 %97, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %98) > %99 = bitcast float %61 to i32 > %100 = bitcast float %62 to i32 > %101 = insertelement <2 x i32> undef, i32 %99, i32 0 > %102 = insertelement <2 x i32> %101, i32 %100, i32 1 > %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %35, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %104 = extractelement <4 x float> %103, i32 1 > %105 = extractelement <4 x float> %103, i32 3 > %106 = call float @llvm.fma.f32(float %104, float 2.000000e+00, float -1.000000e+00) > %107 = call float @llvm.fma.f32(float %105, float 2.000000e+00, float -1.000000e+00) > %108 = fsub float -0.000000e+00, %106 > %109 = call float @llvm.fma.f32(float %108, float %106, float 1.000000e+00) > %110 = fsub float -0.000000e+00, %107 > %111 = call float @llvm.fma.f32(float %110, float %107, float %109) > %112 = call float @llvm.sqrt.f32(float %111) > %113 = fmul float %63, %106 > %114 = fmul float %64, %107 > %115 = fadd float %114, %113 > %116 = fmul float %65, %112 > %117 = fadd float %115, %116 > %118 = fmul float %66, %106 > %119 = fmul float %67, %107 > %120 = fadd float %119, %118 > %121 = fmul float %68, %112 > %122 = fadd float %120, %121 > %123 = fmul float %69, %106 > %124 = fmul float %70, %107 > %125 = fadd float %124, %123 > %126 = fmul float %71, %112 > %127 = fadd float %125, %126 > %128 = fmul float %117, %117 > %129 = fmul float %122, %122 > %130 = fadd float %129, %128 > %131 = fmul float %127, %127 > %132 = fadd float %130, %131 > %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) > %134 = fmul float %133, %117 > %135 = fmul float %133, %122 > %136 = fmul float %133, %127 > %137 = call float @llvm.fma.f32(float %134, float 5.000000e-01, float 5.000000e-01) > %138 = call float @llvm.fma.f32(float %135, float 5.000000e-01, float 5.000000e-01) > %139 = call float @llvm.fma.f32(float %136, float 5.000000e-01, float 5.000000e-01) > %140 = bitcast float %61 to i32 > %141 = bitcast float %62 to i32 > %142 = insertelement <2 x i32> undef, i32 %140, i32 0 > %143 = insertelement <2 x i32> %142, i32 %141, i32 1 > %144 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %143, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %145 = extractelement <4 x float> %144, i32 0 > %146 = extractelement <4 x float> %144, i32 1 > %147 = extractelement <4 x float> %144, i32 2 > %148 = extractelement <4 x float> %144, i32 3 > %149 = bitcast float %61 to i32 > %150 = bitcast float %62 to i32 > %151 = insertelement <2 x i32> undef, i32 %149, i32 0 > %152 = insertelement <2 x i32> %151, i32 %150, i32 1 > %153 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %152, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %154 = extractelement <4 x float> %153, i32 0 > %155 = extractelement <4 x float> %153, i32 1 > %156 = extractelement <4 x float> %153, i32 2 > %157 = fmul float %156, %32 > %158 = bitcast float %5 to i32 > %159 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %158, 10 > %160 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %159, float %94, 11 > %161 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %160, float %95, 12 > %162 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %161, float %96, 13 > %163 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %162, float 0.000000e+00, 14 > %164 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %163, float %137, 15 > %165 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %164, float %138, 16 > %166 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %165, float %139, 17 > %167 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %166, float %33, 18 > %168 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %167, float %145, 19 > %169 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %168, float %146, 20 > %170 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %169, float %147, 21 > %171 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %170, float %148, 22 > %172 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %171, float %157, 23 > %173 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %172, float %155, 24 > %174 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %173, float %154, 25 > %175 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %174, float 0.000000e+00, 26 > %176 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %175, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %176 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL SV[0], FACE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..24] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 352, 384, 368} >IMM[1] INT32 {-1, 0, 1, 0} >IMM[2] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[3] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, -0.3765} >IMM[4] FLT32 { 2.0000, 0.5000, 0.0000, 0.0000} > 0: UIF SV[0].xxxx :0 > 1: MOV TEMP[0].x, IMM[1].xxxx > 2: ELSE :0 > 3: MOV TEMP[0].x, IMM[1].yyyy > 4: ENDIF > 5: ADD TEMP[1].x, CONST[1][22].yyyy, IMM[2].xxxx > 6: MOV TEMP[2].xy, IN[0].xyyy > 7: TEX TEMP[3].w, TEMP[2], SAMP[0], 2D > 8: ADD TEMP[3].xy, -TEMP[3].wwww, IMM[2].xyyy > 9: FMA TEMP[4].x, CONST[1][22].xxxx, TEMP[1].xxxx, TEMP[3].xxxx > 10: CEIL TEMP[3].x, TEMP[3].yyyy > 11: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 12: ADD TEMP[1].x, TEMP[4].xxxx, IMM[2].zzzz > 13: FSNE TEMP[4].x, CONST[1][22].yyyy, IMM[2].wwww > 14: UIF TEMP[4].xxxx :0 > 15: RCP TEMP[4].x, CONST[1][22].yyyy > 16: ELSE :0 > 17: MOV TEMP[4].x, IMM[3].xxxx > 18: ENDIF > 19: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].xxxx > 20: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 21: FMA TEMP[5].x, TEMP[4].xxxx, IMM[3].yyyy, IMM[3].zzzz > 22: MUL TEMP[1].x, TEMP[4].xxxx, TEMP[4].xxxx > 23: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx > 24: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[3].xxxx > 25: FMA TEMP[1].x, -TEMP[3].xxxx, TEMP[1].xxxx, IMM[2].xxxx > 26: LG2 TEMP[3].x, TEMP[1].xxxx > 27: MUL TEMP[1].x, TEMP[3].xxxx, CONST[1][24].xxxx > 28: EX2 TEMP[3].x, TEMP[1].xxxx > 29: MUL TEMP[1].x, TEMP[3].xxxx, CONST[1][23].wwww > 30: MUL TEMP[3].xyz, TEMP[1].xxxx, CONST[1][23].xyzz > 31: MOV TEMP[5].xy, IN[0].xyyy > 32: TEX TEMP[5], TEMP[5], SAMP[1], 2D > 33: MOV TEMP[6].xyz, TEMP[5].xyzx > 34: FMA TEMP[4].x, TEMP[4].xxxx, TEMP[5].wwww, IMM[3].wwww > 35: FSLT TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww > 36: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].zzzz > 37: INEG TEMP[4].x, TEMP[4].xxxx > 38: USNE TEMP[2].x, TEMP[4].xxxx, IMM[0].xxxx > 39: AND TEMP[4].x, TEMP[2].xxxx, IMM[2].xxxx > 40: KILL_IF -TEMP[4].xxxx > 41: MOV TEMP[3].w, IMM[2].wwww > 42: MOV TEMP[4].xy, IN[0].xyyy > 43: TEX TEMP[4].yw, TEMP[4], SAMP[2], 2D > 44: FMA TEMP[4].xy, TEMP[4].ywww, IMM[4].xxxx, IMM[2].zzzz > 45: MOV TEMP[1].xy, TEMP[4].xyxx > 46: FMA TEMP[5].x, -TEMP[4].xxxx, TEMP[4].xxxx, IMM[2].xxxx > 47: FMA TEMP[4].x, -TEMP[4].yyyy, TEMP[4].yyyy, TEMP[5].xxxx > 48: SQRT TEMP[4].x, TEMP[4].xxxx > 49: MOV TEMP[1].z, TEMP[4].xxxx > 50: DP3 TEMP[2].x, IN[1].xyzz, TEMP[1].xyzz > 51: DP3 TEMP[4].x, IN[2].xyzz, TEMP[1].xyzz > 52: MOV TEMP[2].y, TEMP[4].xxxx > 53: DP3 TEMP[4].x, IN[3].xyzz, TEMP[1].xyzz > 54: MOV TEMP[2].z, TEMP[4].xxxx > 55: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[2].xyzz > 56: RSQ TEMP[4].x, TEMP[1].xxxx > 57: MUL TEMP[1].xyz, TEMP[4].xxxx, TEMP[2].xyzz > 58: MOV TEMP[2].xyz, -TEMP[1].xyzx > 59: USNE TEMP[4].x, TEMP[0].xxxx, IMM[0].xxxx > 60: UIF TEMP[4].xxxx :0 > 61: MOV TEMP[4].x, TEMP[1].xxxx > 62: ELSE :0 > 63: MOV TEMP[4].x, TEMP[2].xxxx > 64: ENDIF > 65: MOV TEMP[4].x, TEMP[4].xxxx > 66: USNE TEMP[5].x, TEMP[0].xxxx, IMM[0].xxxx > 67: UIF TEMP[5].xxxx :0 > 68: MOV TEMP[5].x, TEMP[1].yyyy > 69: ELSE :0 > 70: MOV TEMP[5].x, TEMP[2].yyyy > 71: ENDIF > 72: MOV TEMP[4].y, TEMP[5].xxxx > 73: USNE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 74: UIF TEMP[0].xxxx :0 > 75: MOV TEMP[0].x, TEMP[1].zzzz > 76: ELSE :0 > 77: MOV TEMP[0].x, TEMP[2].zzzz > 78: ENDIF > 79: MOV TEMP[4].z, TEMP[0].xxxx > 80: FMA TEMP[2].xyz, TEMP[4].xyzz, IMM[4].yyyy, IMM[4].yyyy > 81: MOV TEMP[2].w, CONST[1][24].wwww > 82: MOV TEMP[0].xy, IN[0].xyyy > 83: TEX TEMP[0], TEMP[0], SAMP[3], 2D > 84: MOV TEMP[6].w, TEMP[0].wwww > 85: MUL TEMP[1].x, TEMP[0].zzzz, CONST[1][24].zzzz > 86: MOV TEMP[1].yz, TEMP[0].xyxx > 87: MOV TEMP[1].w, CONST[1][24].yyyy > 88: MOV OUT[0], TEMP[3] > 89: MOV OUT[1], TEMP[2] > 90: MOV OUT[2], TEMP[6] > 91: MOV OUT[3], TEMP[1] > 92: END >radeonsi: Compiling shader 183 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 368) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 372) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 376) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 380) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 392) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 396) > %35 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %36 = load <8 x i32>, <8 x i32> addrspace(2)* %35, align 32, !tbaa !0 > %37 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %38 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %37, i64 0, i64 3 > %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !tbaa !0 > %40 = extractelement <8 x i32> %36, i32 7 > %41 = extractelement <4 x i32> %39, i32 0 > %42 = and i32 %41, %40 > %43 = insertelement <4 x i32> %39, i32 %42, i32 0 > %44 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 > %46 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %47 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %46, i64 0, i64 7 > %48 = load <4 x i32>, <4 x i32> addrspace(2)* %47, align 16, !tbaa !0 > %49 = extractelement <8 x i32> %45, i32 7 > %50 = extractelement <4 x i32> %48, i32 0 > %51 = and i32 %50, %49 > %52 = insertelement <4 x i32> %48, i32 %51, i32 0 > %53 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %54 = load <8 x i32>, <8 x i32> addrspace(2)* %53, align 32, !tbaa !0 > %55 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %56 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %55, i64 0, i64 11 > %57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !tbaa !0 > %58 = extractelement <8 x i32> %54, i32 7 > %59 = extractelement <4 x i32> %57, i32 0 > %60 = and i32 %59, %58 > %61 = insertelement <4 x i32> %57, i32 %60, i32 0 > %62 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %63 = load <8 x i32>, <8 x i32> addrspace(2)* %62, align 32, !tbaa !0 > %64 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %65 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %64, i64 0, i64 15 > %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 > %67 = extractelement <8 x i32> %63, i32 7 > %68 = extractelement <4 x i32> %66, i32 0 > %69 = and i32 %68, %67 > %70 = insertelement <4 x i32> %66, i32 %69, i32 0 > %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %82 = icmp ne i32 %19, 0 > %. = select i1 %82, float 0xFFFFFFFFE0000000, float 0.000000e+00 > %83 = fadd float %26, 1.000000e+00 > %84 = bitcast float %71 to i32 > %85 = bitcast float %72 to i32 > %86 = insertelement <2 x i32> undef, i32 %84, i32 0 > %87 = insertelement <2 x i32> %86, i32 %85, i32 1 > %88 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %87, <8 x i32> %36, <4 x i32> %43, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %89 = extractelement <4 x float> %88, i32 3 > %90 = fsub float 1.000000e+00, %89 > %91 = fsub float 0x3FEFD70A40000000, %89 > %92 = call float @llvm.fma.f32(float %25, float %83, float %90) > %93 = call float @llvm.ceil.f32(float %91) > %94 = call float @llvm.AMDGPU.clamp.(float %93, float 0.000000e+00, float 1.000000e+00) > %95 = fadd float %92, -1.000000e+00 > %96 = fcmp une float %26, 0.000000e+00 > %97 = fdiv float 1.000000e+00, %26 > %temp16.0 = select i1 %96, float %97, float 0x4600000000000000 > %98 = fmul float %temp16.0, %95 > %99 = call float @llvm.AMDGPU.clamp.(float %98, float 0.000000e+00, float 1.000000e+00) > %100 = call float @llvm.fma.f32(float %99, float -2.000000e+00, float 3.000000e+00) > %101 = fmul float %99, %99 > %102 = fmul float %101, %100 > %103 = fmul float %102, %94 > %104 = fsub float -0.000000e+00, %94 > %105 = call float @llvm.fma.f32(float %104, float %102, float 1.000000e+00) > %106 = call float @llvm.log2.f32(float %105) > %107 = fmul float %106, %31 > %108 = call float @llvm.exp2.f32(float %107) > %109 = fmul float %108, %30 > %110 = fmul float %109, %27 > %111 = fmul float %109, %28 > %112 = fmul float %109, %29 > %113 = bitcast float %71 to i32 > %114 = bitcast float %72 to i32 > %115 = insertelement <2 x i32> undef, i32 %113, i32 0 > %116 = insertelement <2 x i32> %115, i32 %114, i32 1 > %117 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %116, <8 x i32> %45, <4 x i32> %52, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %118 = extractelement <4 x float> %117, i32 0 > %119 = extractelement <4 x float> %117, i32 1 > %120 = extractelement <4 x float> %117, i32 2 > %121 = extractelement <4 x float> %117, i32 3 > %122 = call float @llvm.fma.f32(float %103, float %121, float 0xBFD8181820000000) > %123 = fcmp olt float %122, 0.000000e+00 > %124 = select i1 %123, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %124) > %125 = bitcast float %71 to i32 > %126 = bitcast float %72 to i32 > %127 = insertelement <2 x i32> undef, i32 %125, i32 0 > %128 = insertelement <2 x i32> %127, i32 %126, i32 1 > %129 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %128, <8 x i32> %54, <4 x i32> %61, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %130 = extractelement <4 x float> %129, i32 1 > %131 = extractelement <4 x float> %129, i32 3 > %132 = call float @llvm.fma.f32(float %130, float 2.000000e+00, float -1.000000e+00) > %133 = call float @llvm.fma.f32(float %131, float 2.000000e+00, float -1.000000e+00) > %134 = fsub float -0.000000e+00, %132 > %135 = call float @llvm.fma.f32(float %134, float %132, float 1.000000e+00) > %136 = fsub float -0.000000e+00, %133 > %137 = call float @llvm.fma.f32(float %136, float %133, float %135) > %138 = call float @llvm.sqrt.f32(float %137) > %139 = fmul float %73, %132 > %140 = fmul float %74, %133 > %141 = fadd float %140, %139 > %142 = fmul float %75, %138 > %143 = fadd float %141, %142 > %144 = fmul float %76, %132 > %145 = fmul float %77, %133 > %146 = fadd float %145, %144 > %147 = fmul float %78, %138 > %148 = fadd float %146, %147 > %149 = fmul float %79, %132 > %150 = fmul float %80, %133 > %151 = fadd float %150, %149 > %152 = fmul float %81, %138 > %153 = fadd float %151, %152 > %154 = fmul float %143, %143 > %155 = fmul float %148, %148 > %156 = fadd float %155, %154 > %157 = fmul float %153, %153 > %158 = fadd float %156, %157 > %159 = call float @llvm.AMDGPU.rsq.clamped.f32(float %158) > %160 = fmul float %159, %143 > %161 = fmul float %159, %148 > %162 = fmul float %159, %153 > %163 = fsub float -0.000000e+00, %160 > %164 = fsub float -0.000000e+00, %161 > %165 = fsub float -0.000000e+00, %162 > %166 = bitcast float %. to i32 > %167 = icmp ne i32 %166, 0 > %.40 = select i1 %167, float %160, float %163 > %168 = bitcast float %. to i32 > %169 = icmp ne i32 %168, 0 > %temp20.0 = select i1 %169, float %161, float %164 > %170 = bitcast float %. to i32 > %171 = icmp ne i32 %170, 0 > %.41 = select i1 %171, float %162, float %165 > %172 = call float @llvm.fma.f32(float %.40, float 5.000000e-01, float 5.000000e-01) > %173 = call float @llvm.fma.f32(float %temp20.0, float 5.000000e-01, float 5.000000e-01) > %174 = call float @llvm.fma.f32(float %.41, float 5.000000e-01, float 5.000000e-01) > %175 = bitcast float %71 to i32 > %176 = bitcast float %72 to i32 > %177 = insertelement <2 x i32> undef, i32 %175, i32 0 > %178 = insertelement <2 x i32> %177, i32 %176, i32 1 > %179 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %178, <8 x i32> %63, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %180 = extractelement <4 x float> %179, i32 0 > %181 = extractelement <4 x float> %179, i32 1 > %182 = extractelement <4 x float> %179, i32 2 > %183 = extractelement <4 x float> %179, i32 3 > %184 = fmul float %182, %33 > %185 = bitcast float %5 to i32 > %186 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %185, 10 > %187 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %186, float %110, 11 > %188 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %187, float %111, 12 > %189 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %188, float %112, 13 > %190 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %189, float 0.000000e+00, 14 > %191 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %190, float %172, 15 > %192 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %191, float %173, 16 > %193 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %192, float %174, 17 > %194 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %193, float %34, 18 > %195 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %194, float %118, 19 > %196 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %195, float %119, 20 > %197 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %196, float %120, 21 > %198 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %197, float %183, 22 > %199 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %198, float %184, 23 > %200 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %199, float %181, 24 > %201 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %200, float %180, 25 > %202 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %201, float %32, 26 > %203 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %202, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %203 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL SV[0], FACE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..24] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 352, 384, 368} >IMM[1] INT32 {-1, 0, 1, 0} >IMM[2] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[3] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, -0.3765} >IMM[4] FLT32 { 2.0000, 0.5000, 0.0000, 0.0000} > 0: UIF SV[0].xxxx :0 > 1: MOV TEMP[0].x, IMM[1].xxxx > 2: ELSE :0 > 3: MOV TEMP[0].x, IMM[1].yyyy > 4: ENDIF > 5: ADD TEMP[1].x, CONST[1][22].yyyy, IMM[2].xxxx > 6: ADD TEMP[2].xy, -IN[4].wwww, IMM[2].xyyy > 7: FMA TEMP[3].x, CONST[1][22].xxxx, TEMP[1].xxxx, TEMP[2].xxxx > 8: CEIL TEMP[4].x, TEMP[2].yyyy > 9: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 10: ADD TEMP[1].x, TEMP[3].xxxx, IMM[2].zzzz > 11: FSNE TEMP[3].x, CONST[1][22].yyyy, IMM[2].wwww > 12: UIF TEMP[3].xxxx :0 > 13: RCP TEMP[3].x, CONST[1][22].yyyy > 14: ELSE :0 > 15: MOV TEMP[3].x, IMM[3].xxxx > 16: ENDIF > 17: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[1].xxxx > 18: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 19: FMA TEMP[5].x, TEMP[3].xxxx, IMM[3].yyyy, IMM[3].zzzz > 20: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[3].xxxx > 21: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx > 22: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx > 23: FMA TEMP[1].x, -TEMP[4].xxxx, TEMP[1].xxxx, IMM[2].xxxx > 24: LG2 TEMP[4].x, TEMP[1].xxxx > 25: MUL TEMP[1].x, TEMP[4].xxxx, CONST[1][24].xxxx > 26: EX2 TEMP[4].x, TEMP[1].xxxx > 27: MUL TEMP[1].x, TEMP[4].xxxx, CONST[1][23].wwww > 28: MUL TEMP[4].xyz, TEMP[1].xxxx, CONST[1][23].xyzz > 29: MOV TEMP[5].xy, IN[0].xyyy > 30: TEX TEMP[5], TEMP[5], SAMP[0], 2D > 31: MOV TEMP[6].xyz, TEMP[5].xyzx > 32: FMA TEMP[3].x, TEMP[3].xxxx, TEMP[5].wwww, IMM[3].wwww > 33: FSLT TEMP[3].x, TEMP[3].xxxx, IMM[2].wwww > 34: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].zzzz > 35: INEG TEMP[3].x, TEMP[3].xxxx > 36: USNE TEMP[2].x, TEMP[3].xxxx, IMM[0].xxxx > 37: AND TEMP[3].x, TEMP[2].xxxx, IMM[2].xxxx > 38: KILL_IF -TEMP[3].xxxx > 39: MOV TEMP[4].w, IMM[2].wwww > 40: MOV TEMP[3].xy, IN[0].xyyy > 41: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D > 42: FMA TEMP[3].xy, TEMP[3].ywww, IMM[4].xxxx, IMM[2].zzzz > 43: MOV TEMP[1].xy, TEMP[3].xyxx > 44: FMA TEMP[5].x, -TEMP[3].xxxx, TEMP[3].xxxx, IMM[2].xxxx > 45: FMA TEMP[3].x, -TEMP[3].yyyy, TEMP[3].yyyy, TEMP[5].xxxx > 46: SQRT TEMP[3].x, TEMP[3].xxxx > 47: MOV TEMP[1].z, TEMP[3].xxxx > 48: DP3 TEMP[2].x, IN[1].xyzz, TEMP[1].xyzz > 49: DP3 TEMP[3].x, IN[2].xyzz, TEMP[1].xyzz > 50: MOV TEMP[2].y, TEMP[3].xxxx > 51: DP3 TEMP[3].x, IN[3].xyzz, TEMP[1].xyzz > 52: MOV TEMP[2].z, TEMP[3].xxxx > 53: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[2].xyzz > 54: RSQ TEMP[3].x, TEMP[1].xxxx > 55: MUL TEMP[1].xyz, TEMP[3].xxxx, TEMP[2].xyzz > 56: MOV TEMP[2].xyz, -TEMP[1].xyzx > 57: USNE TEMP[3].x, TEMP[0].xxxx, IMM[0].xxxx > 58: UIF TEMP[3].xxxx :0 > 59: MOV TEMP[3].x, TEMP[1].xxxx > 60: ELSE :0 > 61: MOV TEMP[3].x, TEMP[2].xxxx > 62: ENDIF > 63: MOV TEMP[3].x, TEMP[3].xxxx > 64: USNE TEMP[5].x, TEMP[0].xxxx, IMM[0].xxxx > 65: UIF TEMP[5].xxxx :0 > 66: MOV TEMP[5].x, TEMP[1].yyyy > 67: ELSE :0 > 68: MOV TEMP[5].x, TEMP[2].yyyy > 69: ENDIF > 70: MOV TEMP[3].y, TEMP[5].xxxx > 71: USNE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 72: UIF TEMP[0].xxxx :0 > 73: MOV TEMP[0].x, TEMP[1].zzzz > 74: ELSE :0 > 75: MOV TEMP[0].x, TEMP[2].zzzz > 76: ENDIF > 77: MOV TEMP[3].z, TEMP[0].xxxx > 78: FMA TEMP[2].xyz, TEMP[3].xyzz, IMM[4].yyyy, IMM[4].yyyy > 79: MOV TEMP[2].w, CONST[1][24].wwww > 80: MOV TEMP[0].xy, IN[0].xyyy > 81: TEX TEMP[0], TEMP[0], SAMP[2], 2D > 82: MOV TEMP[6].w, TEMP[0].wwww > 83: MUL TEMP[1].x, TEMP[0].zzzz, CONST[1][24].zzzz > 84: MOV TEMP[1].yz, TEMP[0].xyxx > 85: MOV TEMP[1].w, CONST[1][24].yyyy > 86: MOV OUT[0], TEMP[4] > 87: MOV OUT[1], TEMP[2] > 88: MOV OUT[2], TEMP[6] > 89: MOV OUT[3], TEMP[1] > 90: END >radeonsi: Compiling shader 184 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 368) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 372) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 376) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 380) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 392) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 396) > %35 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %36 = load <8 x i32>, <8 x i32> addrspace(2)* %35, align 32, !tbaa !0 > %37 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %38 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %37, i64 0, i64 3 > %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !tbaa !0 > %40 = extractelement <8 x i32> %36, i32 7 > %41 = extractelement <4 x i32> %39, i32 0 > %42 = and i32 %41, %40 > %43 = insertelement <4 x i32> %39, i32 %42, i32 0 > %44 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 > %46 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %47 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %46, i64 0, i64 7 > %48 = load <4 x i32>, <4 x i32> addrspace(2)* %47, align 16, !tbaa !0 > %49 = extractelement <8 x i32> %45, i32 7 > %50 = extractelement <4 x i32> %48, i32 0 > %51 = and i32 %50, %49 > %52 = insertelement <4 x i32> %48, i32 %51, i32 0 > %53 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %54 = load <8 x i32>, <8 x i32> addrspace(2)* %53, align 32, !tbaa !0 > %55 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %56 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %55, i64 0, i64 11 > %57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !tbaa !0 > %58 = extractelement <8 x i32> %54, i32 7 > %59 = extractelement <4 x i32> %57, i32 0 > %60 = and i32 %59, %58 > %61 = insertelement <4 x i32> %57, i32 %60, i32 0 > %62 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %74 = icmp ne i32 %19, 0 > %. = select i1 %74, float 0xFFFFFFFFE0000000, float 0.000000e+00 > %75 = fadd float %26, 1.000000e+00 > %76 = fsub float 1.000000e+00, %73 > %77 = fsub float 0x3FEFD70A40000000, %73 > %78 = call float @llvm.fma.f32(float %25, float %75, float %76) > %79 = call float @llvm.ceil.f32(float %77) > %80 = call float @llvm.AMDGPU.clamp.(float %79, float 0.000000e+00, float 1.000000e+00) > %81 = fadd float %78, -1.000000e+00 > %82 = fcmp une float %26, 0.000000e+00 > %83 = fdiv float 1.000000e+00, %26 > %temp12.0 = select i1 %82, float %83, float 0x4600000000000000 > %84 = fmul float %temp12.0, %81 > %85 = call float @llvm.AMDGPU.clamp.(float %84, float 0.000000e+00, float 1.000000e+00) > %86 = call float @llvm.fma.f32(float %85, float -2.000000e+00, float 3.000000e+00) > %87 = fmul float %85, %85 > %88 = fmul float %87, %86 > %89 = fmul float %88, %80 > %90 = fsub float -0.000000e+00, %80 > %91 = call float @llvm.fma.f32(float %90, float %88, float 1.000000e+00) > %92 = call float @llvm.log2.f32(float %91) > %93 = fmul float %92, %31 > %94 = call float @llvm.exp2.f32(float %93) > %95 = fmul float %94, %30 > %96 = fmul float %95, %27 > %97 = fmul float %95, %28 > %98 = fmul float %95, %29 > %99 = bitcast float %62 to i32 > %100 = bitcast float %63 to i32 > %101 = insertelement <2 x i32> undef, i32 %99, i32 0 > %102 = insertelement <2 x i32> %101, i32 %100, i32 1 > %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %36, <4 x i32> %43, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = extractelement <4 x float> %103, i32 2 > %107 = extractelement <4 x float> %103, i32 3 > %108 = call float @llvm.fma.f32(float %89, float %107, float 0xBFD8181820000000) > %109 = fcmp olt float %108, 0.000000e+00 > %110 = select i1 %109, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %110) > %111 = bitcast float %62 to i32 > %112 = bitcast float %63 to i32 > %113 = insertelement <2 x i32> undef, i32 %111, i32 0 > %114 = insertelement <2 x i32> %113, i32 %112, i32 1 > %115 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %114, <8 x i32> %45, <4 x i32> %52, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %116 = extractelement <4 x float> %115, i32 1 > %117 = extractelement <4 x float> %115, i32 3 > %118 = call float @llvm.fma.f32(float %116, float 2.000000e+00, float -1.000000e+00) > %119 = call float @llvm.fma.f32(float %117, float 2.000000e+00, float -1.000000e+00) > %120 = fsub float -0.000000e+00, %118 > %121 = call float @llvm.fma.f32(float %120, float %118, float 1.000000e+00) > %122 = fsub float -0.000000e+00, %119 > %123 = call float @llvm.fma.f32(float %122, float %119, float %121) > %124 = call float @llvm.sqrt.f32(float %123) > %125 = fmul float %64, %118 > %126 = fmul float %65, %119 > %127 = fadd float %126, %125 > %128 = fmul float %66, %124 > %129 = fadd float %127, %128 > %130 = fmul float %67, %118 > %131 = fmul float %68, %119 > %132 = fadd float %131, %130 > %133 = fmul float %69, %124 > %134 = fadd float %132, %133 > %135 = fmul float %70, %118 > %136 = fmul float %71, %119 > %137 = fadd float %136, %135 > %138 = fmul float %72, %124 > %139 = fadd float %137, %138 > %140 = fmul float %129, %129 > %141 = fmul float %134, %134 > %142 = fadd float %141, %140 > %143 = fmul float %139, %139 > %144 = fadd float %142, %143 > %145 = call float @llvm.AMDGPU.rsq.clamped.f32(float %144) > %146 = fmul float %145, %129 > %147 = fmul float %145, %134 > %148 = fmul float %145, %139 > %149 = fsub float -0.000000e+00, %146 > %150 = fsub float -0.000000e+00, %147 > %151 = fsub float -0.000000e+00, %148 > %152 = bitcast float %. to i32 > %153 = icmp ne i32 %152, 0 > %.40 = select i1 %153, float %146, float %149 > %154 = bitcast float %. to i32 > %155 = icmp ne i32 %154, 0 > %temp20.0 = select i1 %155, float %147, float %150 > %156 = bitcast float %. to i32 > %157 = icmp ne i32 %156, 0 > %.41 = select i1 %157, float %148, float %151 > %158 = call float @llvm.fma.f32(float %.40, float 5.000000e-01, float 5.000000e-01) > %159 = call float @llvm.fma.f32(float %temp20.0, float 5.000000e-01, float 5.000000e-01) > %160 = call float @llvm.fma.f32(float %.41, float 5.000000e-01, float 5.000000e-01) > %161 = bitcast float %62 to i32 > %162 = bitcast float %63 to i32 > %163 = insertelement <2 x i32> undef, i32 %161, i32 0 > %164 = insertelement <2 x i32> %163, i32 %162, i32 1 > %165 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %164, <8 x i32> %54, <4 x i32> %61, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %166 = extractelement <4 x float> %165, i32 0 > %167 = extractelement <4 x float> %165, i32 1 > %168 = extractelement <4 x float> %165, i32 2 > %169 = extractelement <4 x float> %165, i32 3 > %170 = fmul float %168, %33 > %171 = bitcast float %5 to i32 > %172 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %171, 10 > %173 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %172, float %96, 11 > %174 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %173, float %97, 12 > %175 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %174, float %98, 13 > %176 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %175, float 0.000000e+00, 14 > %177 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %176, float %158, 15 > %178 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %177, float %159, 16 > %179 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %178, float %160, 17 > %180 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %179, float %34, 18 > %181 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %180, float %104, 19 > %182 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %181, float %105, 20 > %183 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %182, float %106, 21 > %184 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %183, float %169, 22 > %185 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %184, float %170, 23 > %186 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %185, float %167, 24 > %187 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %186, float %166, 25 > %188 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %187, float %32, 26 > %189 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %188, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %189 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 185 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, -0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 176, 112} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {128, 144, 64, 80} >IMM[5] FLT32 { 0.5000, 158456325028528675187087900672.0000, 63.0000, 0.0000} >IMM[6] UINT32 {96, 368, 352, 0} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 66: MOV TEMP[1].z, TEMP[2].xxxx > 67: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 68: MOV TEMP[0].yw, TEMP[2].yxyy > 69: ABS TEMP[2].x, TEMP[3].xxxx > 70: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 71: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 72: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 73: INEG TEMP[9].xy, TEMP[9].xyyy > 74: MOV TEMP[4].yz, TEMP[9].yxyy > 75: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 76: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 77: INEG TEMP[9].xy, TEMP[9].xyyy > 78: MOV TEMP[5].zw, TEMP[9].yyxy > 79: INEG TEMP[9].xy, TEMP[4].yzzz > 80: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 81: MOV TEMP[4].yz, TEMP[9].yxyy > 82: I2F TEMP[9].xy, TEMP[4].yzzz > 83: MOV TEMP[4].yz, TEMP[9].yxyy > 84: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 85: ABS TEMP[2].x, TEMP[6].xxxx > 86: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 87: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 88: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 89: INEG TEMP[9].xy, TEMP[9].xyyy > 90: MOV TEMP[4].yz, TEMP[9].yxyy > 91: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 92: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 93: INEG TEMP[9].xy, TEMP[9].xyyy > 94: MOV TEMP[5].zw, TEMP[9].yyxy > 95: INEG TEMP[9].xy, TEMP[4].yzzz > 96: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 97: MOV TEMP[4].yz, TEMP[9].yxyy > 98: I2F TEMP[9].xy, TEMP[4].yzzz > 99: MOV TEMP[4].yz, TEMP[9].yxyy >100: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >101: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >102: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >103: INEG TEMP[2].xy, TEMP[2].xyyy >104: MOV TEMP[5].xy, TEMP[2].xyxx >105: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >106: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >107: INEG TEMP[2].xy, TEMP[2].xyyy >108: MOV TEMP[5].zw, TEMP[2].yyxy >109: INEG TEMP[2].xy, TEMP[5].xyyy >110: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >111: MOV TEMP[5].xy, TEMP[2].xyxx >112: I2F TEMP[5].xy, TEMP[5].xyyy >113: ABS TEMP[2].x, TEMP[8].xxxx >114: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >115: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >116: MOV TEMP[4].zw, TEMP[2].yyxy >117: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >118: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >119: INEG TEMP[2].xy, TEMP[2].xyyy >120: MOV TEMP[5].xy, TEMP[2].xyxx >121: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >122: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >123: INEG TEMP[2].xy, TEMP[2].xyyy >124: MOV TEMP[5].zw, TEMP[2].yyxy >125: AND TEMP[2], TEMP[5], IMM[2].yyyy >126: MOV TEMP[2], TEMP[2] >127: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >128: MOV TEMP[5].xy, TEMP[2].xyxx >129: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >130: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >131: INEG TEMP[2].xy, TEMP[2].xyyy >132: MOV TEMP[5].zw, TEMP[2].yyxy >133: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >134: MOV TEMP[5].zw, TEMP[2].yyxy >135: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >136: MOV TEMP[5].xy, TEMP[2].xyxx >137: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >138: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >139: INEG TEMP[2].x, TEMP[2].xxxx >140: MOV TEMP[1].z, TEMP[2].xxxx >141: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >142: MOV TEMP[1].z, TEMP[2].xxxx >143: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >144: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >145: INEG TEMP[2].xy, TEMP[2].xyyy >146: MOV TEMP[0].yw, TEMP[2].yxyy >147: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >148: MOV TEMP[0].yw, TEMP[2].yxyy >149: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >150: MOV TEMP[0].y, TEMP[2].xxxx >151: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >152: MOV TEMP[0].y, TEMP[2].xxxx >153: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >154: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >155: INEG TEMP[2].xy, TEMP[2].xyyy >156: MOV TEMP[0].xw, TEMP[2].xxxy >157: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >158: MOV TEMP[0].xw, TEMP[2].xxxy >159: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >160: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >161: INEG TEMP[2].xy, TEMP[2].xyyy >162: MOV TEMP[1].xy, TEMP[2].xyxx >163: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >164: MOV TEMP[1].xy, TEMP[2].xyxx >165: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >166: MOV TEMP[0].xz, TEMP[2].xxyx >167: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >168: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >169: INEG TEMP[2].xy, TEMP[2].xyyy >170: MOV TEMP[1].xy, TEMP[2].xyxx >171: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >172: MOV TEMP[1].xy, TEMP[2].xyxx >173: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >174: MOV TEMP[0].xz, TEMP[2].xxyx >175: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >176: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >177: INEG TEMP[2].xy, TEMP[2].xyyy >178: MOV TEMP[1].xy, TEMP[2].xyxx >179: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >180: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >181: INEG TEMP[2].xyz, TEMP[2].xyzz >182: MOV TEMP[0].xyz, TEMP[2].xyzx >183: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >184: MOV TEMP[0].xz, TEMP[2].xxyx >185: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >186: MOV TEMP[0].x, TEMP[2].xxxx >187: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >188: MOV TEMP[0].x, TEMP[2].xxxx >189: ADD TEMP[2].xyz, -IN[0][0].zxyy, IN[1][0].zxyy >190: MOV TEMP[0].yzw, TEMP[2].yxyz >191: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >192: MUL TEMP[4].xyz, TEMP[0].yzww, TEMP[1].yzxx >193: FMA TEMP[2].xyz, TEMP[0].wyzz, TEMP[1].zxyy, -TEMP[4].xyzz >194: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz >195: MOV TEMP[1].w, TEMP[3].xxxx >196: RSQ TEMP[3].x, TEMP[1].wwww >197: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx >198: MOV TEMP[0].yzw, TEMP[2].yxyz >199: MOV TEMP[2].xyz, CONST[1][11] >200: MOV TEMP[4].xyz, TEMP[2].xyzx >201: MOV TEMP[4].w, IMM[0].xxxx >202: MOV TEMP[2], CONST[1][7] >203: DP4 TEMP[5].x, TEMP[2], TEMP[4] >204: MOV TEMP[2], CONST[1][8] >205: DP4 TEMP[2].x, TEMP[2], TEMP[4] >206: MOV TEMP[5].y, TEMP[2].xxxx >207: MOV TEMP[2], CONST[1][9] >208: DP4 TEMP[2].x, TEMP[2], TEMP[4] >209: MOV TEMP[5].z, TEMP[2].xxxx >210: ADD TEMP[4].xyz, TEMP[5].xyzz, -IN[0][0].xyzz >211: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[4].xyzz >212: MOV TEMP[1].w, TEMP[2].xxxx >213: RSQ TEMP[2].x, TEMP[1].wwww >214: MOV TEMP[1].w, TEMP[2].xxxx >215: MUL TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz >216: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[4].xyzz >217: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].wwww >218: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >219: INEG TEMP[2].x, TEMP[2].xxxx >220: MOV TEMP[0].y, TEMP[2].xxxx >221: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >222: MOV TEMP[0].x, TEMP[2].xxxx >223: MOV TEMP[2].x, TEMP[0].xxxx >224: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >225: UIF TEMP[2].xxxx :0 >226: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >227: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >228: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >229: MOV TEMP[0].yzw, TEMP[2].yxyz >230: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >231: MOV TEMP[0].y, TEMP[2].xxxx >232: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >233: MOV TEMP[0].z, TEMP[2].xxxx >234: SQRT TEMP[2].x, TEMP[0].xxxx >235: SQRT TEMP[2].y, TEMP[0].yyyy >236: SQRT TEMP[2].z, TEMP[0].zzzz >237: MOV TEMP[0].xyz, TEMP[2].xyzx >238: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >239: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].xxxx >240: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >241: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[5].xxxx >242: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >243: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[5].xxxx >244: MOV TEMP[2].y, CONST[3][4] >245: MOV TEMP[7].x, TEMP[2].yyyy >246: MOV TEMP[2].y, CONST[3][5] >247: MOV TEMP[7].y, TEMP[2].yyyy >248: MOV TEMP[2].y, CONST[3][6] >249: MOV TEMP[7].z, TEMP[2].yyyy >250: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >251: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >252: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >253: MOV TEMP[1].w, IMM[0].xxxx >254: MOV TEMP[6], CONST[3][0] >255: DP4 TEMP[7].x, TEMP[6], TEMP[1] >256: MOV TEMP[6], CONST[3][1] >257: DP4 TEMP[6].x, TEMP[6], TEMP[1] >258: MOV TEMP[7].y, TEMP[6].xxxx >259: MOV TEMP[6], CONST[3][3] >260: DP4 TEMP[6].x, TEMP[6], TEMP[1] >261: MOV TEMP[4].w, IMM[0].xxxx >262: MOV TEMP[8], CONST[3][0] >263: DP4 TEMP[8].x, TEMP[8], TEMP[4] >264: MOV TEMP[9], CONST[3][1] >265: DP4 TEMP[9].x, TEMP[9], TEMP[4] >266: MOV TEMP[8].y, TEMP[9].xxxx >267: MOV TEMP[9], CONST[3][3] >268: DP4 TEMP[9].x, TEMP[9], TEMP[4] >269: MOV TEMP[5].w, IMM[0].xxxx >270: MOV TEMP[10], CONST[3][0] >271: DP4 TEMP[4].x, TEMP[10], TEMP[5] >272: MOV TEMP[10], CONST[3][1] >273: DP4 TEMP[10].x, TEMP[10], TEMP[5] >274: MOV TEMP[4].y, TEMP[10].xxxx >275: MOV TEMP[10], CONST[3][3] >276: DP4 TEMP[10].x, TEMP[10], TEMP[5] >277: MOV TEMP[2].w, IMM[0].xxxx >278: MOV TEMP[11], CONST[3][0] >279: DP4 TEMP[5].x, TEMP[11], TEMP[2] >280: MOV TEMP[11], CONST[3][1] >281: DP4 TEMP[11].x, TEMP[11], TEMP[2] >282: MOV TEMP[5].y, TEMP[11].xxxx >283: MOV TEMP[11], CONST[3][3] >284: DP4 TEMP[11].x, TEMP[11], TEMP[2] >285: MOV TEMP[3].w, IMM[0].xxxx >286: MOV TEMP[12], CONST[3][0] >287: DP4 TEMP[2].x, TEMP[12], TEMP[3] >288: MOV TEMP[12], CONST[3][1] >289: DP4 TEMP[12].x, TEMP[12], TEMP[3] >290: MOV TEMP[2].y, TEMP[12].xxxx >291: MOV TEMP[12], CONST[3][3] >292: DP4 TEMP[12].x, TEMP[12], TEMP[3] >293: MOV TEMP[0].w, IMM[0].xxxx >294: MOV TEMP[13], CONST[3][0] >295: DP4 TEMP[3].x, TEMP[13], TEMP[0] >296: MOV TEMP[13], CONST[3][1] >297: DP4 TEMP[13].x, TEMP[13], TEMP[0] >298: MOV TEMP[3].y, TEMP[13].xxxx >299: MOV TEMP[13], CONST[3][3] >300: DP4 TEMP[13].x, TEMP[13], TEMP[0] >301: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >302: SSG TEMP[15].xy, TEMP[8].xyyy >303: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >304: RCP TEMP[16].xy, TEMP[9].xxxx >305: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >306: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >307: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >308: SSG TEMP[15].xy, TEMP[4].xyyy >309: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >310: RCP TEMP[16].xy, TEMP[10].xxxx >311: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >312: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >313: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >314: SSG TEMP[16].xy, TEMP[5].xyyy >315: MUL TEMP[16].xy, IMM[5].yyyy, TEMP[16].xyyy >316: RCP TEMP[11].xy, TEMP[11].xxxx >317: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >318: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >319: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >320: SSG TEMP[15].xy, TEMP[7].xyyy >321: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >322: RCP TEMP[16].xy, TEMP[6].xxxx >323: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >324: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >325: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >326: MOV TEMP[0].yz, TEMP[5].yxyy >327: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >328: SSG TEMP[7].xy, TEMP[2].xyyy >329: MUL TEMP[7].xy, IMM[5].yyyy, TEMP[7].xyyy >330: RCP TEMP[11].xy, TEMP[12].xxxx >331: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >332: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >333: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >334: MOV TEMP[4].zw, TEMP[2].yyxy >335: MOV TEMP[2].xy, CONST[3][23] >336: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >337: MOV TEMP[4].zw, TEMP[2].yyxy >338: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >339: SSG TEMP[5].xy, TEMP[3].xyyy >340: MUL TEMP[5].xy, IMM[5].yyyy, TEMP[5].xyyy >341: RCP TEMP[7].xy, TEMP[13].xxxx >342: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >343: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >344: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >345: MOV TEMP[0].xw, TEMP[2].xxxy >346: MOV TEMP[2].xy, CONST[3][23] >347: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >348: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >349: MOV TEMP[0].y, TEMP[2].xxxx >350: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >351: MOV TEMP[0].z, TEMP[2].xxxx >352: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >353: SQRT TEMP[2].x, TEMP[0].xxxx >354: SQRT TEMP[2].y, TEMP[0].yyyy >355: SQRT TEMP[2].z, TEMP[0].zzzz >356: MOV TEMP[2].xyz, TEMP[2].xyzx >357: MOV TEMP[3].z, CONST[1][22] >358: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >359: MOV TEMP[0].w, TEMP[3].xxxx >360: MOV TEMP[3].z, CONST[1][22] >361: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >362: MOV TEMP[3].z, CONST[1][22] >363: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >364: MOV TEMP[1].y, TEMP[3].xxxx >365: MOV TEMP[3].w, CONST[1][22] >366: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >367: UIF TEMP[3].xxxx :0 >368: MOV TEMP[3].w, CONST[1][22] >369: RCP TEMP[3].x, TEMP[3].wwww >370: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >371: ELSE :0 >372: SSG TEMP[5].x, TEMP[0].wwww >373: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >374: ENDIF >375: MOV_SAT TEMP[3].x, TEMP[3].xxxx >376: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >377: MOV TEMP[0].w, TEMP[3].xxxx >378: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >379: MOV TEMP[0].y, TEMP[3].xxxx >380: MOV TEMP[3].w, CONST[1][22] >381: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >382: UIF TEMP[3].xxxx :0 >383: MOV TEMP[3].w, CONST[1][22] >384: RCP TEMP[3].x, TEMP[3].wwww >385: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >386: ELSE :0 >387: SSG TEMP[5].x, TEMP[1].xxxx >388: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >389: ENDIF >390: MOV_SAT TEMP[3].x, TEMP[3].xxxx >391: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >392: MOV TEMP[0].w, TEMP[3].xxxx >393: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >394: MOV TEMP[0].z, TEMP[3].xxxx >395: MOV TEMP[3].w, CONST[1][22] >396: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >397: UIF TEMP[3].xxxx :0 >398: MOV TEMP[3].w, CONST[1][22] >399: RCP TEMP[3].x, TEMP[3].wwww >400: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >401: ELSE :0 >402: SSG TEMP[5].x, TEMP[1].yyyy >403: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >404: ENDIF >405: MOV_SAT TEMP[3].x, TEMP[3].xxxx >406: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >407: MOV TEMP[0].w, TEMP[3].xxxx >408: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >409: MOV TEMP[2].xy, CONST[1][22] >410: MOV TEMP[3].xy, CONST[2][4] >411: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >412: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >413: MOV TEMP[0].w, TEMP[2].xxxx >414: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >415: SSG TEMP[3].xy, TEMP[0].xyyy >416: MUL TEMP[3].xy, IMM[5].yyyy, TEMP[3].xyyy >417: RCP TEMP[5].xy, TEMP[1].xxxx >418: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >419: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >420: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >421: MOV TEMP[0].y, TEMP[2].xxxx >422: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >423: MOV TEMP[4].z, TEMP[2].xxxx >424: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >425: UIF TEMP[2].xxxx :0 >426: RCP TEMP[1].x, TEMP[1].xxxx >427: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >428: ELSE :0 >429: SSG TEMP[2].x, TEMP[0].zzzz >430: MUL TEMP[1].x, IMM[5].yyyy, TEMP[2].xxxx >431: ENDIF >432: MOV TEMP[0].y, TEMP[1].xxxx >433: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >434: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >435: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >436: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >437: MOV TEMP[4].w, TEMP[0].xxxx >438: ELSE :0 >439: MOV TEMP[4], IMM[0].zzzz >440: ENDIF >441: MIN TEMP[0], TEMP[4], IMM[5].zzzz >442: MOV TEMP[1].x, TEMP[0].xxxx >443: MOV TEMP[2].x, TEMP[0].yyyy >444: MOV TEMP[3].x, TEMP[0].zzzz >445: MOV TEMP[0].x, TEMP[0].wwww >446: MOV OUT[8], TEMP[1] >447: MOV OUT[9], TEMP[2] >448: MOV OUT[10], TEMP[3] >449: MOV OUT[11], TEMP[0] >450: MOV OUT[0].x, TEMP[1].xxxx >451: MOV OUT[0].y, TEMP[2].xxxx >452: MOV OUT[0].z, TEMP[3].xxxx >453: MOV OUT[1].x, TEMP[0].xxxx >454: END >radeonsi: Compiling shader 186 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call float @llvm.SI.load.const(<16 x i8> %33, i32 64) > %35 = call float @llvm.SI.load.const(<16 x i8> %33, i32 68) > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = call float @llvm.SI.load.const(<16 x i8> %37, i32 0) > %39 = call float @llvm.SI.load.const(<16 x i8> %37, i32 4) > %40 = call float @llvm.SI.load.const(<16 x i8> %37, i32 8) > %41 = call float @llvm.SI.load.const(<16 x i8> %37, i32 12) > %42 = call float @llvm.SI.load.const(<16 x i8> %37, i32 16) > %43 = call float @llvm.SI.load.const(<16 x i8> %37, i32 20) > %44 = call float @llvm.SI.load.const(<16 x i8> %37, i32 24) > %45 = call float @llvm.SI.load.const(<16 x i8> %37, i32 28) > %46 = call float @llvm.SI.load.const(<16 x i8> %37, i32 32) > %47 = call float @llvm.SI.load.const(<16 x i8> %37, i32 36) > %48 = call float @llvm.SI.load.const(<16 x i8> %37, i32 40) > %49 = call float @llvm.SI.load.const(<16 x i8> %37, i32 44) > %50 = call float @llvm.SI.load.const(<16 x i8> %37, i32 48) > %51 = call float @llvm.SI.load.const(<16 x i8> %37, i32 52) > %52 = call float @llvm.SI.load.const(<16 x i8> %37, i32 56) > %53 = call float @llvm.SI.load.const(<16 x i8> %37, i32 60) > %54 = call float @llvm.SI.load.const(<16 x i8> %37, i32 68) > %55 = call float @llvm.SI.load.const(<16 x i8> %37, i32 84) > %56 = call float @llvm.SI.load.const(<16 x i8> %37, i32 100) > %57 = call float @llvm.SI.load.const(<16 x i8> %37, i32 368) > %58 = call float @llvm.SI.load.const(<16 x i8> %37, i32 372) > %59 = lshr i32 %10, 8 > %60 = and i32 %59, 31 > %61 = lshr i32 %7, 13 > %62 = and i32 %61, 255 > %63 = and i32 %7, 8191 > %64 = and i32 %10, 255 > %65 = mul nuw nsw i32 %63, %64 > %66 = mul nuw nsw i32 %60, %62 > %67 = add nuw nsw i32 %65, %66 > %68 = add nuw nsw i32 %67, 16 > %69 = zext i32 %68 to i64 > %70 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %69 > %71 = load i32, i32 addrspace(3)* %70, align 4 > %72 = lshr i32 %7, 13 > %73 = and i32 %72, 255 > %74 = and i32 %7, 8191 > %75 = and i32 %10, 255 > %76 = mul nuw nsw i32 %74, %75 > %77 = mul nuw nsw i32 %60, %73 > %78 = add nuw nsw i32 %76, %77 > %79 = add nuw nsw i32 %78, 17 > %80 = zext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = load i32, i32 addrspace(3)* %81, align 4 > %83 = lshr i32 %7, 13 > %84 = and i32 %83, 255 > %85 = and i32 %7, 8191 > %86 = and i32 %10, 255 > %87 = mul nuw nsw i32 %85, %86 > %88 = mul nuw nsw i32 %60, %84 > %89 = add nuw nsw i32 %87, %88 > %90 = add nuw nsw i32 %89, 18 > %91 = zext i32 %90 to i64 > %92 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %91 > %93 = load i32, i32 addrspace(3)* %92, align 4 > %94 = lshr i32 %7, 13 > %95 = and i32 %94, 255 > %96 = and i32 %7, 8191 > %97 = and i32 %10, 255 > %98 = mul nuw nsw i32 %96, %97 > %99 = mul nuw nsw i32 %60, %95 > %100 = add nuw nsw i32 %98, %99 > %101 = add nuw nsw i32 %100, 19 > %102 = zext i32 %101 to i64 > %103 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %102 > %104 = load i32, i32 addrspace(3)* %103, align 4 > %105 = lshr i32 %6, 13 > %106 = and i32 %105, 255 > %107 = shl i32 %5, 2 > %108 = and i32 %107, 262140 > %109 = and i32 %6, 8191 > %110 = and i32 %10, 255 > %111 = mul nuw nsw i32 %109, %110 > %112 = add nuw nsw i32 %108, %111 > %113 = mul nuw nsw i32 %60, %106 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 16 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > store i32 %71, i32 addrspace(3)* %117, align 4 > %118 = add nuw nsw i32 %114, 17 > %119 = zext i32 %118 to i64 > %120 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %119 > store i32 %82, i32 addrspace(3)* %120, align 4 > %121 = add nuw nsw i32 %114, 18 > %122 = zext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > store i32 %93, i32 addrspace(3)* %123, align 4 > %124 = add nuw nsw i32 %114, 19 > %125 = zext i32 %124 to i64 > %126 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %125 > store i32 %104, i32 addrspace(3)* %126, align 4 > %127 = lshr i32 %7, 13 > %128 = and i32 %127, 255 > %129 = and i32 %7, 8191 > %130 = and i32 %10, 255 > %131 = mul nuw nsw i32 %129, %130 > %132 = mul nuw nsw i32 %60, %128 > %133 = add nuw nsw i32 %131, %132 > %134 = add nuw nsw i32 %133, 20 > %135 = zext i32 %134 to i64 > %136 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %135 > %137 = load i32, i32 addrspace(3)* %136, align 4 > %138 = lshr i32 %7, 13 > %139 = and i32 %138, 255 > %140 = and i32 %7, 8191 > %141 = and i32 %10, 255 > %142 = mul nuw nsw i32 %140, %141 > %143 = mul nuw nsw i32 %60, %139 > %144 = add nuw nsw i32 %142, %143 > %145 = add nuw nsw i32 %144, 21 > %146 = zext i32 %145 to i64 > %147 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %146 > %148 = load i32, i32 addrspace(3)* %147, align 4 > %149 = lshr i32 %7, 13 > %150 = and i32 %149, 255 > %151 = and i32 %7, 8191 > %152 = and i32 %10, 255 > %153 = mul nuw nsw i32 %151, %152 > %154 = mul nuw nsw i32 %60, %150 > %155 = add nuw nsw i32 %153, %154 > %156 = add nuw nsw i32 %155, 22 > %157 = zext i32 %156 to i64 > %158 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %157 > %159 = load i32, i32 addrspace(3)* %158, align 4 > %160 = lshr i32 %7, 13 > %161 = and i32 %160, 255 > %162 = and i32 %7, 8191 > %163 = and i32 %10, 255 > %164 = mul nuw nsw i32 %162, %163 > %165 = mul nuw nsw i32 %60, %161 > %166 = add nuw nsw i32 %164, %165 > %167 = add nuw nsw i32 %166, 23 > %168 = zext i32 %167 to i64 > %169 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %168 > %170 = load i32, i32 addrspace(3)* %169, align 4 > %171 = lshr i32 %6, 13 > %172 = and i32 %171, 255 > %173 = shl i32 %5, 2 > %174 = and i32 %173, 262140 > %175 = and i32 %6, 8191 > %176 = and i32 %10, 255 > %177 = mul nuw nsw i32 %175, %176 > %178 = add nuw nsw i32 %174, %177 > %179 = mul nuw nsw i32 %60, %172 > %180 = add nuw nsw i32 %178, %179 > %181 = add nuw nsw i32 %180, 20 > %182 = zext i32 %181 to i64 > %183 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %182 > store i32 %137, i32 addrspace(3)* %183, align 4 > %184 = add nuw nsw i32 %180, 21 > %185 = zext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > store i32 %148, i32 addrspace(3)* %186, align 4 > %187 = add nuw nsw i32 %180, 22 > %188 = zext i32 %187 to i64 > %189 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %188 > store i32 %159, i32 addrspace(3)* %189, align 4 > %190 = add nuw nsw i32 %180, 23 > %191 = zext i32 %190 to i64 > %192 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %191 > store i32 %170, i32 addrspace(3)* %192, align 4 > %193 = lshr i32 %7, 13 > %194 = and i32 %193, 255 > %195 = and i32 %7, 8191 > %196 = and i32 %10, 255 > %197 = mul nuw nsw i32 %195, %196 > %198 = mul nuw nsw i32 %60, %194 > %199 = add nuw nsw i32 %197, %198 > %200 = add nuw nsw i32 %199, 24 > %201 = zext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = load i32, i32 addrspace(3)* %202, align 4 > %204 = lshr i32 %7, 13 > %205 = and i32 %204, 255 > %206 = and i32 %7, 8191 > %207 = and i32 %10, 255 > %208 = mul nuw nsw i32 %206, %207 > %209 = mul nuw nsw i32 %60, %205 > %210 = add nuw nsw i32 %208, %209 > %211 = add nuw nsw i32 %210, 25 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = load i32, i32 addrspace(3)* %213, align 4 > %215 = lshr i32 %7, 13 > %216 = and i32 %215, 255 > %217 = and i32 %7, 8191 > %218 = and i32 %10, 255 > %219 = mul nuw nsw i32 %217, %218 > %220 = mul nuw nsw i32 %60, %216 > %221 = add nuw nsw i32 %219, %220 > %222 = add nuw nsw i32 %221, 26 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = load i32, i32 addrspace(3)* %224, align 4 > %226 = lshr i32 %7, 13 > %227 = and i32 %226, 255 > %228 = and i32 %7, 8191 > %229 = and i32 %10, 255 > %230 = mul nuw nsw i32 %228, %229 > %231 = mul nuw nsw i32 %60, %227 > %232 = add nuw nsw i32 %230, %231 > %233 = add nuw nsw i32 %232, 27 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = load i32, i32 addrspace(3)* %235, align 4 > %237 = lshr i32 %6, 13 > %238 = and i32 %237, 255 > %239 = shl i32 %5, 2 > %240 = and i32 %239, 262140 > %241 = and i32 %6, 8191 > %242 = and i32 %10, 255 > %243 = mul nuw nsw i32 %241, %242 > %244 = add nuw nsw i32 %240, %243 > %245 = mul nuw nsw i32 %60, %238 > %246 = add nuw nsw i32 %244, %245 > %247 = add nuw nsw i32 %246, 24 > %248 = zext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > store i32 %203, i32 addrspace(3)* %249, align 4 > %250 = add nuw nsw i32 %246, 25 > %251 = zext i32 %250 to i64 > %252 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %251 > store i32 %214, i32 addrspace(3)* %252, align 4 > %253 = add nuw nsw i32 %246, 26 > %254 = zext i32 %253 to i64 > %255 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %254 > store i32 %225, i32 addrspace(3)* %255, align 4 > %256 = add nuw nsw i32 %246, 27 > %257 = zext i32 %256 to i64 > %258 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %257 > store i32 %236, i32 addrspace(3)* %258, align 4 > %259 = lshr i32 %7, 13 > %260 = and i32 %259, 255 > %261 = and i32 %7, 8191 > %262 = and i32 %10, 255 > %263 = mul nuw nsw i32 %261, %262 > %264 = mul nuw nsw i32 %60, %260 > %265 = add nuw nsw i32 %263, %264 > %266 = add nuw nsw i32 %265, 28 > %267 = zext i32 %266 to i64 > %268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %267 > %269 = load i32, i32 addrspace(3)* %268, align 4 > %270 = lshr i32 %7, 13 > %271 = and i32 %270, 255 > %272 = and i32 %7, 8191 > %273 = and i32 %10, 255 > %274 = mul nuw nsw i32 %272, %273 > %275 = mul nuw nsw i32 %60, %271 > %276 = add nuw nsw i32 %274, %275 > %277 = add nuw nsw i32 %276, 29 > %278 = zext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = load i32, i32 addrspace(3)* %279, align 4 > %281 = lshr i32 %7, 13 > %282 = and i32 %281, 255 > %283 = and i32 %7, 8191 > %284 = and i32 %10, 255 > %285 = mul nuw nsw i32 %283, %284 > %286 = mul nuw nsw i32 %60, %282 > %287 = add nuw nsw i32 %285, %286 > %288 = add nuw nsw i32 %287, 30 > %289 = zext i32 %288 to i64 > %290 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %289 > %291 = load i32, i32 addrspace(3)* %290, align 4 > %292 = lshr i32 %7, 13 > %293 = and i32 %292, 255 > %294 = and i32 %7, 8191 > %295 = and i32 %10, 255 > %296 = mul nuw nsw i32 %294, %295 > %297 = mul nuw nsw i32 %60, %293 > %298 = add nuw nsw i32 %296, %297 > %299 = add nuw nsw i32 %298, 31 > %300 = zext i32 %299 to i64 > %301 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %300 > %302 = load i32, i32 addrspace(3)* %301, align 4 > %303 = lshr i32 %6, 13 > %304 = and i32 %303, 255 > %305 = shl i32 %5, 2 > %306 = and i32 %305, 262140 > %307 = and i32 %6, 8191 > %308 = and i32 %10, 255 > %309 = mul nuw nsw i32 %307, %308 > %310 = add nuw nsw i32 %306, %309 > %311 = mul nuw nsw i32 %60, %304 > %312 = add nuw nsw i32 %310, %311 > %313 = add nuw nsw i32 %312, 28 > %314 = zext i32 %313 to i64 > %315 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %314 > store i32 %269, i32 addrspace(3)* %315, align 4 > %316 = add nuw nsw i32 %312, 29 > %317 = zext i32 %316 to i64 > %318 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %317 > store i32 %280, i32 addrspace(3)* %318, align 4 > %319 = add nuw nsw i32 %312, 30 > %320 = zext i32 %319 to i64 > %321 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %320 > store i32 %291, i32 addrspace(3)* %321, align 4 > %322 = add nuw nsw i32 %312, 31 > %323 = zext i32 %322 to i64 > %324 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %323 > store i32 %302, i32 addrspace(3)* %324, align 4 > %325 = lshr i32 %7, 13 > %326 = and i32 %325, 255 > %327 = and i32 %7, 8191 > %328 = and i32 %10, 255 > %329 = mul nuw nsw i32 %327, %328 > %330 = mul nuw nsw i32 %60, %326 > %331 = add nuw nsw i32 %329, %330 > %332 = add nuw nsw i32 %331, 32 > %333 = zext i32 %332 to i64 > %334 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %333 > %335 = load i32, i32 addrspace(3)* %334, align 4 > %336 = lshr i32 %7, 13 > %337 = and i32 %336, 255 > %338 = and i32 %7, 8191 > %339 = and i32 %10, 255 > %340 = mul nuw nsw i32 %338, %339 > %341 = mul nuw nsw i32 %60, %337 > %342 = add nuw nsw i32 %340, %341 > %343 = add nuw nsw i32 %342, 33 > %344 = zext i32 %343 to i64 > %345 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %344 > %346 = load i32, i32 addrspace(3)* %345, align 4 > %347 = lshr i32 %7, 13 > %348 = and i32 %347, 255 > %349 = and i32 %7, 8191 > %350 = and i32 %10, 255 > %351 = mul nuw nsw i32 %349, %350 > %352 = mul nuw nsw i32 %60, %348 > %353 = add nuw nsw i32 %351, %352 > %354 = add nuw nsw i32 %353, 34 > %355 = zext i32 %354 to i64 > %356 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %355 > %357 = load i32, i32 addrspace(3)* %356, align 4 > %358 = lshr i32 %7, 13 > %359 = and i32 %358, 255 > %360 = and i32 %7, 8191 > %361 = and i32 %10, 255 > %362 = mul nuw nsw i32 %360, %361 > %363 = mul nuw nsw i32 %60, %359 > %364 = add nuw nsw i32 %362, %363 > %365 = add nuw nsw i32 %364, 35 > %366 = zext i32 %365 to i64 > %367 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %366 > %368 = load i32, i32 addrspace(3)* %367, align 4 > %369 = lshr i32 %6, 13 > %370 = and i32 %369, 255 > %371 = shl i32 %5, 2 > %372 = and i32 %371, 262140 > %373 = and i32 %6, 8191 > %374 = and i32 %10, 255 > %375 = mul nuw nsw i32 %373, %374 > %376 = add nuw nsw i32 %372, %375 > %377 = mul nuw nsw i32 %60, %370 > %378 = add nuw nsw i32 %376, %377 > %379 = add nuw nsw i32 %378, 32 > %380 = zext i32 %379 to i64 > %381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %380 > store i32 %335, i32 addrspace(3)* %381, align 4 > %382 = add nuw nsw i32 %378, 33 > %383 = zext i32 %382 to i64 > %384 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %383 > store i32 %346, i32 addrspace(3)* %384, align 4 > %385 = add nuw nsw i32 %378, 34 > %386 = zext i32 %385 to i64 > %387 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %386 > store i32 %357, i32 addrspace(3)* %387, align 4 > %388 = add nuw nsw i32 %378, 35 > %389 = zext i32 %388 to i64 > %390 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %389 > store i32 %368, i32 addrspace(3)* %390, align 4 > %391 = lshr i32 %7, 13 > %392 = and i32 %391, 255 > %393 = and i32 %7, 8191 > %394 = and i32 %10, 255 > %395 = mul nuw nsw i32 %393, %394 > %396 = mul nuw nsw i32 %60, %392 > %397 = add nuw nsw i32 %395, %396 > %398 = add nuw nsw i32 %397, 36 > %399 = zext i32 %398 to i64 > %400 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %399 > %401 = load i32, i32 addrspace(3)* %400, align 4 > %402 = lshr i32 %7, 13 > %403 = and i32 %402, 255 > %404 = and i32 %7, 8191 > %405 = and i32 %10, 255 > %406 = mul nuw nsw i32 %404, %405 > %407 = mul nuw nsw i32 %60, %403 > %408 = add nuw nsw i32 %406, %407 > %409 = add nuw nsw i32 %408, 37 > %410 = zext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = load i32, i32 addrspace(3)* %411, align 4 > %413 = lshr i32 %7, 13 > %414 = and i32 %413, 255 > %415 = and i32 %7, 8191 > %416 = and i32 %10, 255 > %417 = mul nuw nsw i32 %415, %416 > %418 = mul nuw nsw i32 %60, %414 > %419 = add nuw nsw i32 %417, %418 > %420 = add nuw nsw i32 %419, 38 > %421 = zext i32 %420 to i64 > %422 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %421 > %423 = load i32, i32 addrspace(3)* %422, align 4 > %424 = lshr i32 %7, 13 > %425 = and i32 %424, 255 > %426 = and i32 %7, 8191 > %427 = and i32 %10, 255 > %428 = mul nuw nsw i32 %426, %427 > %429 = mul nuw nsw i32 %60, %425 > %430 = add nuw nsw i32 %428, %429 > %431 = add nuw nsw i32 %430, 39 > %432 = zext i32 %431 to i64 > %433 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %432 > %434 = load i32, i32 addrspace(3)* %433, align 4 > %435 = lshr i32 %6, 13 > %436 = and i32 %435, 255 > %437 = shl i32 %5, 2 > %438 = and i32 %437, 262140 > %439 = and i32 %6, 8191 > %440 = and i32 %10, 255 > %441 = mul nuw nsw i32 %439, %440 > %442 = add nuw nsw i32 %438, %441 > %443 = mul nuw nsw i32 %60, %436 > %444 = add nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 36 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > store i32 %401, i32 addrspace(3)* %447, align 4 > %448 = add nuw nsw i32 %444, 37 > %449 = zext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > store i32 %412, i32 addrspace(3)* %450, align 4 > %451 = add nuw nsw i32 %444, 38 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > store i32 %423, i32 addrspace(3)* %453, align 4 > %454 = add nuw nsw i32 %444, 39 > %455 = zext i32 %454 to i64 > %456 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %455 > store i32 %434, i32 addrspace(3)* %456, align 4 > %457 = and i32 %7, 8191 > %458 = and i32 %10, 255 > %459 = mul nuw nsw i32 %457, %458 > %460 = add nuw nsw i32 %459, 16 > %461 = zext i32 %460 to i64 > %462 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %461 > %463 = bitcast i32 addrspace(3)* %462 to float addrspace(3)* > %464 = load float, float addrspace(3)* %463, align 4 > %465 = and i32 %7, 8191 > %466 = and i32 %10, 255 > %467 = mul nuw nsw i32 %465, %466 > %468 = add nuw nsw i32 %467, 17 > %469 = zext i32 %468 to i64 > %470 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %469 > %471 = bitcast i32 addrspace(3)* %470 to float addrspace(3)* > %472 = load float, float addrspace(3)* %471, align 4 > %473 = and i32 %7, 8191 > %474 = and i32 %10, 255 > %475 = mul nuw nsw i32 %473, %474 > %476 = add nuw nsw i32 %475, 18 > %477 = zext i32 %476 to i64 > %478 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %477 > %479 = bitcast i32 addrspace(3)* %478 to float addrspace(3)* > %480 = load float, float addrspace(3)* %479, align 4 > %481 = fmul float %38, %464 > %482 = fmul float %39, %472 > %483 = fadd float %481, %482 > %484 = fmul float %40, %480 > %485 = fadd float %483, %484 > %486 = fadd float %485, %41 > %487 = fmul float %42, %464 > %488 = fmul float %43, %472 > %489 = fadd float %487, %488 > %490 = fmul float %44, %480 > %491 = fadd float %489, %490 > %492 = fadd float %491, %45 > %493 = fmul float %46, %464 > %494 = fmul float %47, %472 > %495 = fadd float %493, %494 > %496 = fmul float %48, %480 > %497 = fadd float %495, %496 > %498 = fadd float %497, %49 > %499 = fmul float %50, %464 > %500 = fmul float %51, %472 > %501 = fadd float %499, %500 > %502 = fmul float %52, %480 > %503 = fadd float %501, %502 > %504 = fadd float %503, %53 > %505 = lshr i32 %7, 13 > %506 = and i32 %505, 255 > %507 = and i32 %7, 8191 > %508 = and i32 %10, 255 > %509 = mul nuw nsw i32 %507, %508 > %510 = add nuw nsw i32 %509, %506 > %511 = add nuw nsw i32 %510, 16 > %512 = zext i32 %511 to i64 > %513 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %512 > %514 = bitcast i32 addrspace(3)* %513 to float addrspace(3)* > %515 = load float, float addrspace(3)* %514, align 4 > %516 = lshr i32 %7, 13 > %517 = and i32 %516, 255 > %518 = and i32 %7, 8191 > %519 = and i32 %10, 255 > %520 = mul nuw nsw i32 %518, %519 > %521 = add nuw nsw i32 %520, %517 > %522 = add nuw nsw i32 %521, 17 > %523 = zext i32 %522 to i64 > %524 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %523 > %525 = bitcast i32 addrspace(3)* %524 to float addrspace(3)* > %526 = load float, float addrspace(3)* %525, align 4 > %527 = lshr i32 %7, 13 > %528 = and i32 %527, 255 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = add nuw nsw i32 %531, %528 > %533 = add nuw nsw i32 %532, 18 > %534 = zext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %38, %515 > %539 = fmul float %39, %526 > %540 = fadd float %538, %539 > %541 = fmul float %40, %537 > %542 = fadd float %540, %541 > %543 = fadd float %542, %41 > %544 = fmul float %42, %515 > %545 = fmul float %43, %526 > %546 = fadd float %544, %545 > %547 = fmul float %44, %537 > %548 = fadd float %546, %547 > %549 = fadd float %548, %45 > %550 = fmul float %46, %515 > %551 = fmul float %47, %526 > %552 = fadd float %550, %551 > %553 = fmul float %48, %537 > %554 = fadd float %552, %553 > %555 = fadd float %554, %49 > %556 = fmul float %50, %515 > %557 = fmul float %51, %526 > %558 = fadd float %556, %557 > %559 = fmul float %52, %537 > %560 = fadd float %558, %559 > %561 = fadd float %560, %53 > %562 = and i32 %7, 8191 > %563 = and i32 %10, 255 > %564 = mul nuw nsw i32 %562, %563 > %565 = lshr i32 %7, 12 > %566 = and i32 %565, 510 > %567 = add nuw nsw i32 %564, %566 > %568 = add nuw nsw i32 %567, 16 > %569 = zext i32 %568 to i64 > %570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %569 > %571 = bitcast i32 addrspace(3)* %570 to float addrspace(3)* > %572 = load float, float addrspace(3)* %571, align 4 > %573 = and i32 %7, 8191 > %574 = and i32 %10, 255 > %575 = mul nuw nsw i32 %573, %574 > %576 = lshr i32 %7, 12 > %577 = and i32 %576, 510 > %578 = add nuw nsw i32 %575, %577 > %579 = add nuw nsw i32 %578, 17 > %580 = zext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = and i32 %7, 8191 > %585 = and i32 %10, 255 > %586 = mul nuw nsw i32 %584, %585 > %587 = lshr i32 %7, 12 > %588 = and i32 %587, 510 > %589 = add nuw nsw i32 %586, %588 > %590 = add nuw nsw i32 %589, 18 > %591 = zext i32 %590 to i64 > %592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %591 > %593 = bitcast i32 addrspace(3)* %592 to float addrspace(3)* > %594 = load float, float addrspace(3)* %593, align 4 > %595 = fmul float %38, %572 > %596 = fmul float %39, %583 > %597 = fadd float %595, %596 > %598 = fmul float %40, %594 > %599 = fadd float %597, %598 > %600 = fadd float %599, %41 > %601 = fmul float %42, %572 > %602 = fmul float %43, %583 > %603 = fadd float %601, %602 > %604 = fmul float %44, %594 > %605 = fadd float %603, %604 > %606 = fadd float %605, %45 > %607 = fmul float %46, %572 > %608 = fmul float %47, %583 > %609 = fadd float %607, %608 > %610 = fmul float %48, %594 > %611 = fadd float %609, %610 > %612 = fadd float %611, %49 > %613 = fmul float %50, %572 > %614 = fmul float %51, %583 > %615 = fadd float %613, %614 > %616 = fmul float %52, %594 > %617 = fadd float %615, %616 > %618 = fadd float %617, %53 > %619 = fadd float %498, 1.000000e+02 > %620 = fadd float %555, 1.000000e+02 > %621 = fadd float %612, 1.000000e+02 > %622 = call float @llvm.fabs.f32(float %504) > %623 = call float @llvm.minnum.f32(float %622, float 1.000000e+02) > %624 = fcmp ogt float %486, 0.000000e+00 > %625 = fcmp ogt float %492, 0.000000e+00 > %626 = fcmp olt float %486, 0.000000e+00 > %627 = fcmp olt float %492, 0.000000e+00 > %628 = sext i1 %626 to i32 > %629 = sext i1 %627 to i32 > %630 = zext i1 %624 to i32 > %631 = zext i1 %625 to i32 > %632 = add nsw i32 %630, %628 > %633 = add nsw i32 %631, %629 > %634 = sitofp i32 %632 to float > %635 = sitofp i32 %633 to float > %636 = fsub float -0.000000e+00, %623 > %637 = call float @llvm.fma.f32(float %636, float %634, float %486) > %638 = fsub float -0.000000e+00, %623 > %639 = call float @llvm.fma.f32(float %638, float %635, float %492) > %640 = call float @llvm.fabs.f32(float %561) > %641 = call float @llvm.minnum.f32(float %640, float 1.000000e+02) > %642 = fcmp ogt float %543, 0.000000e+00 > %643 = fcmp ogt float %549, 0.000000e+00 > %644 = fcmp olt float %543, 0.000000e+00 > %645 = fcmp olt float %549, 0.000000e+00 > %646 = sext i1 %644 to i32 > %647 = sext i1 %645 to i32 > %648 = zext i1 %642 to i32 > %649 = zext i1 %643 to i32 > %650 = add nsw i32 %648, %646 > %651 = add nsw i32 %649, %647 > %652 = sitofp i32 %650 to float > %653 = sitofp i32 %651 to float > %654 = fsub float -0.000000e+00, %641 > %655 = call float @llvm.fma.f32(float %654, float %652, float %543) > %656 = fsub float -0.000000e+00, %641 > %657 = call float @llvm.fma.f32(float %656, float %653, float %549) > %658 = fcmp ogt float %600, 0.000000e+00 > %659 = fcmp ogt float %606, 0.000000e+00 > %660 = fcmp olt float %600, 0.000000e+00 > %661 = fcmp olt float %606, 0.000000e+00 > %662 = sext i1 %660 to i32 > %663 = sext i1 %661 to i32 > %664 = zext i1 %658 to i32 > %665 = zext i1 %659 to i32 > %666 = add nsw i32 %664, %662 > %667 = add nsw i32 %665, %663 > %668 = sitofp i32 %666 to float > %669 = sitofp i32 %667 to float > %670 = call float @llvm.fabs.f32(float %618) > %671 = call float @llvm.minnum.f32(float %670, float 1.000000e+02) > %672 = fsub float -0.000000e+00, %671 > %673 = call float @llvm.fma.f32(float %672, float %668, float %600) > %674 = fsub float -0.000000e+00, %671 > %675 = call float @llvm.fma.f32(float %674, float %669, float %606) > %676 = fsub float -0.000000e+00, %504 > %677 = fcmp olt float %637, %676 > %678 = fsub float -0.000000e+00, %504 > %679 = fcmp olt float %639, %678 > %680 = zext i1 %677 to i32 > %681 = zext i1 %679 to i32 > %682 = fsub float -0.000000e+00, %561 > %683 = fcmp olt float %655, %682 > %684 = fsub float -0.000000e+00, %561 > %685 = fcmp olt float %657, %684 > %686 = zext i1 %683 to i32 > %687 = zext i1 %685 to i32 > %688 = add nuw nsw i32 %686, %680 > %689 = add nuw nsw i32 %687, %681 > %690 = fsub float -0.000000e+00, %618 > %691 = fcmp olt float %673, %690 > %692 = fsub float -0.000000e+00, %618 > %693 = fcmp olt float %675, %692 > %694 = zext i1 %691 to i32 > %695 = zext i1 %693 to i32 > %696 = add nuw nsw i32 %694, %688 > %697 = add nuw nsw i32 %695, %689 > %698 = fcmp olt float %619, 0.000000e+00 > %699 = zext i1 %698 to i32 > %700 = fcmp olt float %620, 0.000000e+00 > %701 = fcmp olt float %621, 0.000000e+00 > %702 = zext i1 %700 to i32 > %703 = zext i1 %701 to i32 > %704 = add nuw nsw i32 %702, %699 > %705 = add nuw nsw i32 %703, %704 > %706 = fcmp olt float %504, %637 > %707 = fcmp olt float %504, %639 > %708 = zext i1 %706 to i32 > %709 = zext i1 %707 to i32 > %710 = fcmp olt float %561, %655 > %711 = fcmp olt float %561, %657 > %712 = zext i1 %710 to i32 > %713 = zext i1 %711 to i32 > %714 = add nuw nsw i32 %708, %712 > %715 = add nuw nsw i32 %709, %713 > %716 = fcmp olt float %618, %673 > %717 = fcmp olt float %618, %675 > %718 = zext i1 %716 to i32 > %719 = zext i1 %717 to i32 > %720 = add nuw nsw i32 %714, %718 > %721 = add nuw nsw i32 %715, %719 > %722 = icmp eq i32 %696, 3 > %723 = icmp eq i32 %697, 3 > %724 = sext i1 %722 to i32 > %725 = sext i1 %723 to i32 > %726 = icmp eq i32 %720, 3 > %727 = icmp eq i32 %721, 3 > %728 = select i1 %727, i32 -1, i32 %725 > %729 = select i1 %726, i32 -1, i32 %724 > %730 = or i32 %728, %729 > %731 = and i32 %7, 8191 > %732 = and i32 %10, 255 > %733 = mul nuw nsw i32 %731, %732 > %734 = add nuw nsw i32 %733, 18 > %735 = zext i32 %734 to i64 > %736 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %735 > %737 = bitcast i32 addrspace(3)* %736 to float addrspace(3)* > %738 = load float, float addrspace(3)* %737, align 4 > %739 = lshr i32 %7, 13 > %740 = and i32 %739, 255 > %741 = and i32 %7, 8191 > %742 = and i32 %10, 255 > %743 = mul nuw nsw i32 %741, %742 > %744 = add nuw nsw i32 %743, %740 > %745 = add nuw nsw i32 %744, 18 > %746 = zext i32 %745 to i64 > %747 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %746 > %748 = bitcast i32 addrspace(3)* %747 to float addrspace(3)* > %749 = load float, float addrspace(3)* %748, align 4 > %750 = fsub float %749, %738 > %751 = and i32 %7, 8191 > %752 = and i32 %10, 255 > %753 = mul nuw nsw i32 %751, %752 > %754 = add nuw nsw i32 %753, 16 > %755 = zext i32 %754 to i64 > %756 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %755 > %757 = bitcast i32 addrspace(3)* %756 to float addrspace(3)* > %758 = load float, float addrspace(3)* %757, align 4 > %759 = lshr i32 %7, 13 > %760 = and i32 %759, 255 > %761 = and i32 %7, 8191 > %762 = and i32 %10, 255 > %763 = mul nuw nsw i32 %761, %762 > %764 = add nuw nsw i32 %763, %760 > %765 = add nuw nsw i32 %764, 16 > %766 = zext i32 %765 to i64 > %767 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %766 > %768 = bitcast i32 addrspace(3)* %767 to float addrspace(3)* > %769 = load float, float addrspace(3)* %768, align 4 > %770 = fsub float %769, %758 > %771 = and i32 %7, 8191 > %772 = and i32 %10, 255 > %773 = mul nuw nsw i32 %771, %772 > %774 = add nuw nsw i32 %773, 17 > %775 = zext i32 %774 to i64 > %776 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %775 > %777 = bitcast i32 addrspace(3)* %776 to float addrspace(3)* > %778 = load float, float addrspace(3)* %777, align 4 > %779 = lshr i32 %7, 13 > %780 = and i32 %779, 255 > %781 = and i32 %7, 8191 > %782 = and i32 %10, 255 > %783 = mul nuw nsw i32 %781, %782 > %784 = add nuw nsw i32 %783, %780 > %785 = add nuw nsw i32 %784, 17 > %786 = zext i32 %785 to i64 > %787 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %786 > %788 = bitcast i32 addrspace(3)* %787 to float addrspace(3)* > %789 = load float, float addrspace(3)* %788, align 4 > %790 = fsub float %789, %778 > %791 = and i32 %7, 8191 > %792 = and i32 %10, 255 > %793 = mul nuw nsw i32 %791, %792 > %794 = add nuw nsw i32 %793, 16 > %795 = zext i32 %794 to i64 > %796 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %795 > %797 = bitcast i32 addrspace(3)* %796 to float addrspace(3)* > %798 = load float, float addrspace(3)* %797, align 4 > %799 = and i32 %7, 8191 > %800 = and i32 %10, 255 > %801 = mul nuw nsw i32 %799, %800 > %802 = lshr i32 %7, 12 > %803 = and i32 %802, 510 > %804 = add nuw nsw i32 %801, %803 > %805 = add nuw nsw i32 %804, 16 > %806 = zext i32 %805 to i64 > %807 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %806 > %808 = bitcast i32 addrspace(3)* %807 to float addrspace(3)* > %809 = load float, float addrspace(3)* %808, align 4 > %810 = fsub float %809, %798 > %811 = and i32 %7, 8191 > %812 = and i32 %10, 255 > %813 = mul nuw nsw i32 %811, %812 > %814 = add nuw nsw i32 %813, 17 > %815 = zext i32 %814 to i64 > %816 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %815 > %817 = bitcast i32 addrspace(3)* %816 to float addrspace(3)* > %818 = load float, float addrspace(3)* %817, align 4 > %819 = and i32 %7, 8191 > %820 = and i32 %10, 255 > %821 = mul nuw nsw i32 %819, %820 > %822 = lshr i32 %7, 12 > %823 = and i32 %822, 510 > %824 = add nuw nsw i32 %821, %823 > %825 = add nuw nsw i32 %824, 17 > %826 = zext i32 %825 to i64 > %827 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %826 > %828 = bitcast i32 addrspace(3)* %827 to float addrspace(3)* > %829 = load float, float addrspace(3)* %828, align 4 > %830 = fsub float %829, %818 > %831 = and i32 %7, 8191 > %832 = and i32 %10, 255 > %833 = mul nuw nsw i32 %831, %832 > %834 = add nuw nsw i32 %833, 18 > %835 = zext i32 %834 to i64 > %836 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %835 > %837 = bitcast i32 addrspace(3)* %836 to float addrspace(3)* > %838 = load float, float addrspace(3)* %837, align 4 > %839 = and i32 %7, 8191 > %840 = and i32 %10, 255 > %841 = mul nuw nsw i32 %839, %840 > %842 = lshr i32 %7, 12 > %843 = and i32 %842, 510 > %844 = add nuw nsw i32 %841, %843 > %845 = add nuw nsw i32 %844, 18 > %846 = zext i32 %845 to i64 > %847 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %846 > %848 = bitcast i32 addrspace(3)* %847 to float addrspace(3)* > %849 = load float, float addrspace(3)* %848, align 4 > %850 = fsub float %849, %838 > %851 = fmul float %750, %830 > %852 = fmul float %770, %850 > %853 = fmul float %790, %810 > %854 = fsub float -0.000000e+00, %851 > %855 = call float @llvm.fma.f32(float %790, float %850, float %854) > %856 = fsub float -0.000000e+00, %852 > %857 = call float @llvm.fma.f32(float %750, float %810, float %856) > %858 = fsub float -0.000000e+00, %853 > %859 = call float @llvm.fma.f32(float %770, float %830, float %858) > %860 = fmul float %855, %855 > %861 = fmul float %857, %857 > %862 = fadd float %861, %860 > %863 = fmul float %859, %859 > %864 = fadd float %862, %863 > %865 = call float @llvm.AMDGPU.rsq.clamped.f32(float %864) > %866 = fmul float %855, %865 > %867 = fmul float %857, %865 > %868 = fmul float %859, %865 > %869 = fmul float %13, %25 > %870 = fmul float %14, %26 > %871 = fadd float %869, %870 > %872 = fmul float %15, %27 > %873 = fadd float %871, %872 > %874 = fadd float %873, %16 > %875 = fmul float %17, %25 > %876 = fmul float %18, %26 > %877 = fadd float %875, %876 > %878 = fmul float %19, %27 > %879 = fadd float %877, %878 > %880 = fadd float %879, %20 > %881 = fmul float %21, %25 > %882 = fmul float %22, %26 > %883 = fadd float %881, %882 > %884 = fmul float %23, %27 > %885 = fadd float %883, %884 > %886 = fadd float %885, %24 > %887 = and i32 %7, 8191 > %888 = and i32 %10, 255 > %889 = mul nuw nsw i32 %887, %888 > %890 = add nuw nsw i32 %889, 16 > %891 = zext i32 %890 to i64 > %892 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %891 > %893 = bitcast i32 addrspace(3)* %892 to float addrspace(3)* > %894 = load float, float addrspace(3)* %893, align 4 > %895 = fsub float %874, %894 > %896 = and i32 %7, 8191 > %897 = and i32 %10, 255 > %898 = mul nuw nsw i32 %896, %897 > %899 = add nuw nsw i32 %898, 17 > %900 = zext i32 %899 to i64 > %901 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %900 > %902 = bitcast i32 addrspace(3)* %901 to float addrspace(3)* > %903 = load float, float addrspace(3)* %902, align 4 > %904 = fsub float %880, %903 > %905 = and i32 %7, 8191 > %906 = and i32 %10, 255 > %907 = mul nuw nsw i32 %905, %906 > %908 = add nuw nsw i32 %907, 18 > %909 = zext i32 %908 to i64 > %910 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %909 > %911 = bitcast i32 addrspace(3)* %910 to float addrspace(3)* > %912 = load float, float addrspace(3)* %911, align 4 > %913 = fsub float %886, %912 > %914 = fmul float %895, %895 > %915 = fmul float %904, %904 > %916 = fadd float %915, %914 > %917 = fmul float %913, %913 > %918 = fadd float %916, %917 > %919 = call float @llvm.AMDGPU.rsq.clamped.f32(float %918) > %920 = fmul float %919, %895 > %921 = fmul float %919, %904 > %922 = fmul float %919, %913 > %923 = fmul float %866, %920 > %924 = fmul float %867, %921 > %925 = fadd float %924, %923 > %926 = fmul float %868, %922 > %927 = fadd float %925, %926 > %928 = icmp eq i32 %730, 0 > %notlhs = fcmp uge float %927, -5.000000e-01 > %notrhs = icmp ne i32 %705, 3 > %not. = and i1 %notrhs, %notlhs > %929 = and i1 %928, %not. > br i1 %929, label %IF, label %ENDIF > >IF: ; preds = %main_body > %930 = lshr i32 %7, 13 > %931 = and i32 %930, 255 > %932 = and i32 %7, 8191 > %933 = and i32 %10, 255 > %934 = mul nuw nsw i32 %932, %933 > %935 = add nuw nsw i32 %934, %931 > %936 = add nuw nsw i32 %935, 16 > %937 = zext i32 %936 to i64 > %938 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %937 > %939 = bitcast i32 addrspace(3)* %938 to float addrspace(3)* > %940 = load float, float addrspace(3)* %939, align 4 > %941 = and i32 %7, 8191 > %942 = and i32 %10, 255 > %943 = mul nuw nsw i32 %941, %942 > %944 = add nuw nsw i32 %943, 16 > %945 = zext i32 %944 to i64 > %946 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %945 > %947 = bitcast i32 addrspace(3)* %946 to float addrspace(3)* > %948 = load float, float addrspace(3)* %947, align 4 > %949 = fsub float %948, %940 > %950 = lshr i32 %7, 13 > %951 = and i32 %950, 255 > %952 = and i32 %7, 8191 > %953 = and i32 %10, 255 > %954 = mul nuw nsw i32 %952, %953 > %955 = add nuw nsw i32 %954, %951 > %956 = add nuw nsw i32 %955, 17 > %957 = zext i32 %956 to i64 > %958 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %957 > %959 = bitcast i32 addrspace(3)* %958 to float addrspace(3)* > %960 = load float, float addrspace(3)* %959, align 4 > %961 = and i32 %7, 8191 > %962 = and i32 %10, 255 > %963 = mul nuw nsw i32 %961, %962 > %964 = add nuw nsw i32 %963, 17 > %965 = zext i32 %964 to i64 > %966 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %965 > %967 = bitcast i32 addrspace(3)* %966 to float addrspace(3)* > %968 = load float, float addrspace(3)* %967, align 4 > %969 = fsub float %968, %960 > %970 = lshr i32 %7, 13 > %971 = and i32 %970, 255 > %972 = and i32 %7, 8191 > %973 = and i32 %10, 255 > %974 = mul nuw nsw i32 %972, %973 > %975 = add nuw nsw i32 %974, %971 > %976 = add nuw nsw i32 %975, 18 > %977 = zext i32 %976 to i64 > %978 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %977 > %979 = bitcast i32 addrspace(3)* %978 to float addrspace(3)* > %980 = load float, float addrspace(3)* %979, align 4 > %981 = and i32 %7, 8191 > %982 = and i32 %10, 255 > %983 = mul nuw nsw i32 %981, %982 > %984 = add nuw nsw i32 %983, 18 > %985 = zext i32 %984 to i64 > %986 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %985 > %987 = bitcast i32 addrspace(3)* %986 to float addrspace(3)* > %988 = load float, float addrspace(3)* %987, align 4 > %989 = fsub float %988, %980 > %990 = fmul float %949, %949 > %991 = fmul float %969, %969 > %992 = fadd float %991, %990 > %993 = fmul float %989, %989 > %994 = fadd float %992, %993 > %995 = and i32 %7, 8191 > %996 = and i32 %10, 255 > %997 = mul nuw nsw i32 %995, %996 > %998 = lshr i32 %7, 12 > %999 = and i32 %998, 510 > %1000 = add nuw nsw i32 %997, %999 > %1001 = add nuw nsw i32 %1000, 16 > %1002 = zext i32 %1001 to i64 > %1003 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1002 > %1004 = bitcast i32 addrspace(3)* %1003 to float addrspace(3)* > %1005 = load float, float addrspace(3)* %1004, align 4 > %1006 = lshr i32 %7, 13 > %1007 = and i32 %1006, 255 > %1008 = and i32 %7, 8191 > %1009 = and i32 %10, 255 > %1010 = mul nuw nsw i32 %1008, %1009 > %1011 = add nuw nsw i32 %1010, %1007 > %1012 = add nuw nsw i32 %1011, 16 > %1013 = zext i32 %1012 to i64 > %1014 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1013 > %1015 = bitcast i32 addrspace(3)* %1014 to float addrspace(3)* > %1016 = load float, float addrspace(3)* %1015, align 4 > %1017 = fsub float %1016, %1005 > %1018 = and i32 %7, 8191 > %1019 = and i32 %10, 255 > %1020 = mul nuw nsw i32 %1018, %1019 > %1021 = lshr i32 %7, 12 > %1022 = and i32 %1021, 510 > %1023 = add nuw nsw i32 %1020, %1022 > %1024 = add nuw nsw i32 %1023, 17 > %1025 = zext i32 %1024 to i64 > %1026 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1025 > %1027 = bitcast i32 addrspace(3)* %1026 to float addrspace(3)* > %1028 = load float, float addrspace(3)* %1027, align 4 > %1029 = lshr i32 %7, 13 > %1030 = and i32 %1029, 255 > %1031 = and i32 %7, 8191 > %1032 = and i32 %10, 255 > %1033 = mul nuw nsw i32 %1031, %1032 > %1034 = add nuw nsw i32 %1033, %1030 > %1035 = add nuw nsw i32 %1034, 17 > %1036 = zext i32 %1035 to i64 > %1037 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1036 > %1038 = bitcast i32 addrspace(3)* %1037 to float addrspace(3)* > %1039 = load float, float addrspace(3)* %1038, align 4 > %1040 = fsub float %1039, %1028 > %1041 = and i32 %7, 8191 > %1042 = and i32 %10, 255 > %1043 = mul nuw nsw i32 %1041, %1042 > %1044 = lshr i32 %7, 12 > %1045 = and i32 %1044, 510 > %1046 = add nuw nsw i32 %1043, %1045 > %1047 = add nuw nsw i32 %1046, 18 > %1048 = zext i32 %1047 to i64 > %1049 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1048 > %1050 = bitcast i32 addrspace(3)* %1049 to float addrspace(3)* > %1051 = load float, float addrspace(3)* %1050, align 4 > %1052 = lshr i32 %7, 13 > %1053 = and i32 %1052, 255 > %1054 = and i32 %7, 8191 > %1055 = and i32 %10, 255 > %1056 = mul nuw nsw i32 %1054, %1055 > %1057 = add nuw nsw i32 %1056, %1053 > %1058 = add nuw nsw i32 %1057, 18 > %1059 = zext i32 %1058 to i64 > %1060 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1059 > %1061 = bitcast i32 addrspace(3)* %1060 to float addrspace(3)* > %1062 = load float, float addrspace(3)* %1061, align 4 > %1063 = fsub float %1062, %1051 > %1064 = fmul float %1017, %1017 > %1065 = fmul float %1040, %1040 > %1066 = fadd float %1065, %1064 > %1067 = fmul float %1063, %1063 > %1068 = fadd float %1066, %1067 > %1069 = fmul float %810, %810 > %1070 = fmul float %830, %830 > %1071 = fadd float %1070, %1069 > %1072 = fmul float %850, %850 > %1073 = fadd float %1071, %1072 > %1074 = call float @llvm.sqrt.f32(float %994) > %1075 = call float @llvm.sqrt.f32(float %1068) > %1076 = call float @llvm.sqrt.f32(float %1073) > %1077 = lshr i32 %7, 13 > %1078 = and i32 %1077, 255 > %1079 = and i32 %7, 8191 > %1080 = and i32 %10, 255 > %1081 = mul nuw nsw i32 %1079, %1080 > %1082 = add nuw nsw i32 %1081, %1078 > %1083 = add nuw nsw i32 %1082, 16 > %1084 = zext i32 %1083 to i64 > %1085 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1084 > %1086 = bitcast i32 addrspace(3)* %1085 to float addrspace(3)* > %1087 = load float, float addrspace(3)* %1086, align 4 > %1088 = and i32 %7, 8191 > %1089 = and i32 %10, 255 > %1090 = mul nuw nsw i32 %1088, %1089 > %1091 = add nuw nsw i32 %1090, 16 > %1092 = zext i32 %1091 to i64 > %1093 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1092 > %1094 = bitcast i32 addrspace(3)* %1093 to float addrspace(3)* > %1095 = load float, float addrspace(3)* %1094, align 4 > %1096 = fadd float %1087, %1095 > %1097 = lshr i32 %7, 13 > %1098 = and i32 %1097, 255 > %1099 = and i32 %7, 8191 > %1100 = and i32 %10, 255 > %1101 = mul nuw nsw i32 %1099, %1100 > %1102 = add nuw nsw i32 %1101, %1098 > %1103 = add nuw nsw i32 %1102, 17 > %1104 = zext i32 %1103 to i64 > %1105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1104 > %1106 = bitcast i32 addrspace(3)* %1105 to float addrspace(3)* > %1107 = load float, float addrspace(3)* %1106, align 4 > %1108 = and i32 %7, 8191 > %1109 = and i32 %10, 255 > %1110 = mul nuw nsw i32 %1108, %1109 > %1111 = add nuw nsw i32 %1110, 17 > %1112 = zext i32 %1111 to i64 > %1113 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1112 > %1114 = bitcast i32 addrspace(3)* %1113 to float addrspace(3)* > %1115 = load float, float addrspace(3)* %1114, align 4 > %1116 = fadd float %1107, %1115 > %1117 = lshr i32 %7, 13 > %1118 = and i32 %1117, 255 > %1119 = and i32 %7, 8191 > %1120 = and i32 %10, 255 > %1121 = mul nuw nsw i32 %1119, %1120 > %1122 = add nuw nsw i32 %1121, %1118 > %1123 = add nuw nsw i32 %1122, 18 > %1124 = zext i32 %1123 to i64 > %1125 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1124 > %1126 = bitcast i32 addrspace(3)* %1125 to float addrspace(3)* > %1127 = load float, float addrspace(3)* %1126, align 4 > %1128 = and i32 %7, 8191 > %1129 = and i32 %10, 255 > %1130 = mul nuw nsw i32 %1128, %1129 > %1131 = add nuw nsw i32 %1130, 18 > %1132 = zext i32 %1131 to i64 > %1133 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1132 > %1134 = bitcast i32 addrspace(3)* %1133 to float addrspace(3)* > %1135 = load float, float addrspace(3)* %1134, align 4 > %1136 = fadd float %1127, %1135 > %1137 = fmul float %1096, 5.000000e-01 > %1138 = fmul float %1116, 5.000000e-01 > %1139 = fmul float %1136, 5.000000e-01 > %1140 = and i32 %7, 8191 > %1141 = and i32 %10, 255 > %1142 = mul nuw nsw i32 %1140, %1141 > %1143 = lshr i32 %7, 12 > %1144 = and i32 %1143, 510 > %1145 = add nuw nsw i32 %1142, %1144 > %1146 = add nuw nsw i32 %1145, 16 > %1147 = zext i32 %1146 to i64 > %1148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1147 > %1149 = bitcast i32 addrspace(3)* %1148 to float addrspace(3)* > %1150 = load float, float addrspace(3)* %1149, align 4 > %1151 = lshr i32 %7, 13 > %1152 = and i32 %1151, 255 > %1153 = and i32 %7, 8191 > %1154 = and i32 %10, 255 > %1155 = mul nuw nsw i32 %1153, %1154 > %1156 = add nuw nsw i32 %1155, %1152 > %1157 = add nuw nsw i32 %1156, 16 > %1158 = zext i32 %1157 to i64 > %1159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1158 > %1160 = bitcast i32 addrspace(3)* %1159 to float addrspace(3)* > %1161 = load float, float addrspace(3)* %1160, align 4 > %1162 = fadd float %1150, %1161 > %1163 = and i32 %7, 8191 > %1164 = and i32 %10, 255 > %1165 = mul nuw nsw i32 %1163, %1164 > %1166 = lshr i32 %7, 12 > %1167 = and i32 %1166, 510 > %1168 = add nuw nsw i32 %1165, %1167 > %1169 = add nuw nsw i32 %1168, 17 > %1170 = zext i32 %1169 to i64 > %1171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1170 > %1172 = bitcast i32 addrspace(3)* %1171 to float addrspace(3)* > %1173 = load float, float addrspace(3)* %1172, align 4 > %1174 = lshr i32 %7, 13 > %1175 = and i32 %1174, 255 > %1176 = and i32 %7, 8191 > %1177 = and i32 %10, 255 > %1178 = mul nuw nsw i32 %1176, %1177 > %1179 = add nuw nsw i32 %1178, %1175 > %1180 = add nuw nsw i32 %1179, 17 > %1181 = zext i32 %1180 to i64 > %1182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1181 > %1183 = bitcast i32 addrspace(3)* %1182 to float addrspace(3)* > %1184 = load float, float addrspace(3)* %1183, align 4 > %1185 = fadd float %1173, %1184 > %1186 = and i32 %7, 8191 > %1187 = and i32 %10, 255 > %1188 = mul nuw nsw i32 %1186, %1187 > %1189 = lshr i32 %7, 12 > %1190 = and i32 %1189, 510 > %1191 = add nuw nsw i32 %1188, %1190 > %1192 = add nuw nsw i32 %1191, 18 > %1193 = zext i32 %1192 to i64 > %1194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1193 > %1195 = bitcast i32 addrspace(3)* %1194 to float addrspace(3)* > %1196 = load float, float addrspace(3)* %1195, align 4 > %1197 = lshr i32 %7, 13 > %1198 = and i32 %1197, 255 > %1199 = and i32 %7, 8191 > %1200 = and i32 %10, 255 > %1201 = mul nuw nsw i32 %1199, %1200 > %1202 = add nuw nsw i32 %1201, %1198 > %1203 = add nuw nsw i32 %1202, 18 > %1204 = zext i32 %1203 to i64 > %1205 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1204 > %1206 = bitcast i32 addrspace(3)* %1205 to float addrspace(3)* > %1207 = load float, float addrspace(3)* %1206, align 4 > %1208 = fadd float %1196, %1207 > %1209 = fmul float %1162, 5.000000e-01 > %1210 = fmul float %1185, 5.000000e-01 > %1211 = fmul float %1208, 5.000000e-01 > %1212 = and i32 %7, 8191 > %1213 = and i32 %10, 255 > %1214 = mul nuw nsw i32 %1212, %1213 > %1215 = add nuw nsw i32 %1214, 16 > %1216 = zext i32 %1215 to i64 > %1217 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1216 > %1218 = bitcast i32 addrspace(3)* %1217 to float addrspace(3)* > %1219 = load float, float addrspace(3)* %1218, align 4 > %1220 = and i32 %7, 8191 > %1221 = and i32 %10, 255 > %1222 = mul nuw nsw i32 %1220, %1221 > %1223 = lshr i32 %7, 12 > %1224 = and i32 %1223, 510 > %1225 = add nuw nsw i32 %1222, %1224 > %1226 = add nuw nsw i32 %1225, 16 > %1227 = zext i32 %1226 to i64 > %1228 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1227 > %1229 = bitcast i32 addrspace(3)* %1228 to float addrspace(3)* > %1230 = load float, float addrspace(3)* %1229, align 4 > %1231 = fadd float %1219, %1230 > %1232 = and i32 %7, 8191 > %1233 = and i32 %10, 255 > %1234 = mul nuw nsw i32 %1232, %1233 > %1235 = add nuw nsw i32 %1234, 17 > %1236 = zext i32 %1235 to i64 > %1237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1236 > %1238 = bitcast i32 addrspace(3)* %1237 to float addrspace(3)* > %1239 = load float, float addrspace(3)* %1238, align 4 > %1240 = and i32 %7, 8191 > %1241 = and i32 %10, 255 > %1242 = mul nuw nsw i32 %1240, %1241 > %1243 = lshr i32 %7, 12 > %1244 = and i32 %1243, 510 > %1245 = add nuw nsw i32 %1242, %1244 > %1246 = add nuw nsw i32 %1245, 17 > %1247 = zext i32 %1246 to i64 > %1248 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1247 > %1249 = bitcast i32 addrspace(3)* %1248 to float addrspace(3)* > %1250 = load float, float addrspace(3)* %1249, align 4 > %1251 = fadd float %1239, %1250 > %1252 = and i32 %7, 8191 > %1253 = and i32 %10, 255 > %1254 = mul nuw nsw i32 %1252, %1253 > %1255 = add nuw nsw i32 %1254, 18 > %1256 = zext i32 %1255 to i64 > %1257 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1256 > %1258 = bitcast i32 addrspace(3)* %1257 to float addrspace(3)* > %1259 = load float, float addrspace(3)* %1258, align 4 > %1260 = and i32 %7, 8191 > %1261 = and i32 %10, 255 > %1262 = mul nuw nsw i32 %1260, %1261 > %1263 = lshr i32 %7, 12 > %1264 = and i32 %1263, 510 > %1265 = add nuw nsw i32 %1262, %1264 > %1266 = add nuw nsw i32 %1265, 18 > %1267 = zext i32 %1266 to i64 > %1268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1267 > %1269 = bitcast i32 addrspace(3)* %1268 to float addrspace(3)* > %1270 = load float, float addrspace(3)* %1269, align 4 > %1271 = fadd float %1259, %1270 > %1272 = fmul float %1231, 5.000000e-01 > %1273 = fmul float %1251, 5.000000e-01 > %1274 = fmul float %1271, 5.000000e-01 > %1275 = call float @llvm.fma.f32(float %54, float %1074, float %1137) > %1276 = call float @llvm.fma.f32(float %55, float %1074, float %1138) > %1277 = call float @llvm.fma.f32(float %56, float %1074, float %1139) > %1278 = call float @llvm.fma.f32(float %54, float %1075, float %1209) > %1279 = call float @llvm.fma.f32(float %55, float %1075, float %1210) > %1280 = call float @llvm.fma.f32(float %56, float %1075, float %1211) > %1281 = call float @llvm.fma.f32(float %54, float %1076, float %1272) > %1282 = call float @llvm.fma.f32(float %55, float %1076, float %1273) > %1283 = call float @llvm.fma.f32(float %56, float %1076, float %1274) > %1284 = fmul float %38, %1137 > %1285 = fmul float %39, %1138 > %1286 = fadd float %1284, %1285 > %1287 = fmul float %40, %1139 > %1288 = fadd float %1286, %1287 > %1289 = fadd float %1288, %41 > %1290 = fmul float %42, %1137 > %1291 = fmul float %43, %1138 > %1292 = fadd float %1290, %1291 > %1293 = fmul float %44, %1139 > %1294 = fadd float %1292, %1293 > %1295 = fadd float %1294, %45 > %1296 = fmul float %50, %1137 > %1297 = fmul float %51, %1138 > %1298 = fadd float %1296, %1297 > %1299 = fmul float %52, %1139 > %1300 = fadd float %1298, %1299 > %1301 = fadd float %1300, %53 > %1302 = fmul float %38, %1209 > %1303 = fmul float %39, %1210 > %1304 = fadd float %1302, %1303 > %1305 = fmul float %40, %1211 > %1306 = fadd float %1304, %1305 > %1307 = fadd float %1306, %41 > %1308 = fmul float %42, %1209 > %1309 = fmul float %43, %1210 > %1310 = fadd float %1308, %1309 > %1311 = fmul float %44, %1211 > %1312 = fadd float %1310, %1311 > %1313 = fadd float %1312, %45 > %1314 = fmul float %50, %1209 > %1315 = fmul float %51, %1210 > %1316 = fadd float %1314, %1315 > %1317 = fmul float %52, %1211 > %1318 = fadd float %1316, %1317 > %1319 = fadd float %1318, %53 > %1320 = fmul float %38, %1272 > %1321 = fmul float %39, %1273 > %1322 = fadd float %1320, %1321 > %1323 = fmul float %40, %1274 > %1324 = fadd float %1322, %1323 > %1325 = fadd float %1324, %41 > %1326 = fmul float %42, %1272 > %1327 = fmul float %43, %1273 > %1328 = fadd float %1326, %1327 > %1329 = fmul float %44, %1274 > %1330 = fadd float %1328, %1329 > %1331 = fadd float %1330, %45 > %1332 = fmul float %50, %1272 > %1333 = fmul float %51, %1273 > %1334 = fadd float %1332, %1333 > %1335 = fmul float %52, %1274 > %1336 = fadd float %1334, %1335 > %1337 = fadd float %1336, %53 > %1338 = fmul float %38, %1275 > %1339 = fmul float %39, %1276 > %1340 = fadd float %1338, %1339 > %1341 = fmul float %40, %1277 > %1342 = fadd float %1340, %1341 > %1343 = fadd float %1342, %41 > %1344 = fmul float %42, %1275 > %1345 = fmul float %43, %1276 > %1346 = fadd float %1344, %1345 > %1347 = fmul float %44, %1277 > %1348 = fadd float %1346, %1347 > %1349 = fadd float %1348, %45 > %1350 = fmul float %50, %1275 > %1351 = fmul float %51, %1276 > %1352 = fadd float %1350, %1351 > %1353 = fmul float %52, %1277 > %1354 = fadd float %1352, %1353 > %1355 = fadd float %1354, %53 > %1356 = fmul float %38, %1278 > %1357 = fmul float %39, %1279 > %1358 = fadd float %1356, %1357 > %1359 = fmul float %40, %1280 > %1360 = fadd float %1358, %1359 > %1361 = fadd float %1360, %41 > %1362 = fmul float %42, %1278 > %1363 = fmul float %43, %1279 > %1364 = fadd float %1362, %1363 > %1365 = fmul float %44, %1280 > %1366 = fadd float %1364, %1365 > %1367 = fadd float %1366, %45 > %1368 = fmul float %50, %1278 > %1369 = fmul float %51, %1279 > %1370 = fadd float %1368, %1369 > %1371 = fmul float %52, %1280 > %1372 = fadd float %1370, %1371 > %1373 = fadd float %1372, %53 > %1374 = fmul float %38, %1281 > %1375 = fmul float %39, %1282 > %1376 = fadd float %1374, %1375 > %1377 = fmul float %40, %1283 > %1378 = fadd float %1376, %1377 > %1379 = fadd float %1378, %41 > %1380 = fmul float %42, %1281 > %1381 = fmul float %43, %1282 > %1382 = fadd float %1380, %1381 > %1383 = fmul float %44, %1283 > %1384 = fadd float %1382, %1383 > %1385 = fadd float %1384, %45 > %1386 = fmul float %50, %1281 > %1387 = fmul float %51, %1282 > %1388 = fadd float %1386, %1387 > %1389 = fmul float %52, %1283 > %1390 = fadd float %1388, %1389 > %1391 = fadd float %1390, %53 > %1392 = fcmp oeq float %1319, 0.000000e+00 > %1393 = fcmp oeq float %1319, 0.000000e+00 > %1394 = fcmp ogt float %1307, 0.000000e+00 > %1395 = select i1 %1394, float 1.000000e+00, float %1307 > %1396 = fcmp oge float %1395, 0.000000e+00 > %1397 = fcmp ogt float %1313, 0.000000e+00 > %1398 = select i1 %1397, float 1.000000e+00, float %1313 > %1399 = fcmp oge float %1398, 0.000000e+00 > %.op = fmul float %1395, 0x4600000000000000 > %1400 = select i1 %1396, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1398, 0x4600000000000000 > %1401 = select i1 %1399, float %.op80, float 0xC600000000000000 > %1402 = fdiv float 1.000000e+00, %1319 > %1403 = fmul float %1307, %1402 > %1404 = fmul float %1313, %1402 > %1405 = select i1 %1392, float %1400, float %1403 > %1406 = select i1 %1393, float %1401, float %1404 > %1407 = fcmp oeq float %1337, 0.000000e+00 > %1408 = fcmp oeq float %1337, 0.000000e+00 > %1409 = fcmp ogt float %1325, 0.000000e+00 > %1410 = select i1 %1409, float 1.000000e+00, float %1325 > %1411 = fcmp oge float %1410, 0.000000e+00 > %1412 = fcmp ogt float %1331, 0.000000e+00 > %1413 = select i1 %1412, float 1.000000e+00, float %1331 > %1414 = fcmp oge float %1413, 0.000000e+00 > %.op81 = fmul float %1410, 0x4600000000000000 > %1415 = select i1 %1411, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1413, 0x4600000000000000 > %1416 = select i1 %1414, float %.op82, float 0xC600000000000000 > %1417 = fdiv float 1.000000e+00, %1337 > %1418 = fmul float %1325, %1417 > %1419 = fmul float %1331, %1417 > %1420 = select i1 %1407, float %1415, float %1418 > %1421 = select i1 %1408, float %1416, float %1419 > %1422 = fcmp oeq float %1355, 0.000000e+00 > %1423 = fcmp oeq float %1355, 0.000000e+00 > %1424 = fcmp ogt float %1343, 0.000000e+00 > %1425 = select i1 %1424, float 1.000000e+00, float %1343 > %1426 = fcmp oge float %1425, 0.000000e+00 > %1427 = fcmp ogt float %1349, 0.000000e+00 > %1428 = select i1 %1427, float 1.000000e+00, float %1349 > %1429 = fcmp oge float %1428, 0.000000e+00 > %.op83 = fmul float %1425, 0x4600000000000000 > %1430 = select i1 %1426, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1428, 0x4600000000000000 > %1431 = select i1 %1429, float %.op84, float 0xC600000000000000 > %1432 = fdiv float 1.000000e+00, %1355 > %1433 = fmul float %1343, %1432 > %1434 = fmul float %1349, %1432 > %1435 = select i1 %1422, float %1430, float %1433 > %1436 = select i1 %1423, float %1431, float %1434 > %1437 = fcmp oeq float %1301, 0.000000e+00 > %1438 = fcmp oeq float %1301, 0.000000e+00 > %1439 = fcmp ogt float %1289, 0.000000e+00 > %1440 = select i1 %1439, float 1.000000e+00, float %1289 > %1441 = fcmp oge float %1440, 0.000000e+00 > %1442 = fcmp ogt float %1295, 0.000000e+00 > %1443 = select i1 %1442, float 1.000000e+00, float %1295 > %1444 = fcmp oge float %1443, 0.000000e+00 > %.op85 = fmul float %1440, 0x4600000000000000 > %1445 = select i1 %1441, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1443, 0x4600000000000000 > %1446 = select i1 %1444, float %.op86, float 0xC600000000000000 > %1447 = fdiv float 1.000000e+00, %1301 > %1448 = fmul float %1289, %1447 > %1449 = fmul float %1295, %1447 > %1450 = select i1 %1437, float %1445, float %1448 > %1451 = select i1 %1438, float %1446, float %1449 > %1452 = fsub float %1450, %1435 > %1453 = fsub float %1451, %1436 > %1454 = fcmp oeq float %1373, 0.000000e+00 > %1455 = fcmp oeq float %1373, 0.000000e+00 > %1456 = fcmp ogt float %1361, 0.000000e+00 > %1457 = select i1 %1456, float 1.000000e+00, float %1361 > %1458 = fcmp oge float %1457, 0.000000e+00 > %1459 = fcmp ogt float %1367, 0.000000e+00 > %1460 = select i1 %1459, float 1.000000e+00, float %1367 > %1461 = fcmp oge float %1460, 0.000000e+00 > %.op87 = fmul float %1457, 0x4600000000000000 > %1462 = select i1 %1458, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1460, 0x4600000000000000 > %1463 = select i1 %1461, float %.op88, float 0xC600000000000000 > %1464 = fdiv float 1.000000e+00, %1373 > %1465 = fmul float %1361, %1464 > %1466 = fmul float %1367, %1464 > %1467 = select i1 %1454, float %1462, float %1465 > %1468 = select i1 %1455, float %1463, float %1466 > %1469 = fsub float %1405, %1467 > %1470 = fsub float %1406, %1468 > %1471 = fmul float %1469, %57 > %1472 = fmul float %1470, %58 > %1473 = fcmp oeq float %1391, 0.000000e+00 > %1474 = fcmp oeq float %1391, 0.000000e+00 > %1475 = fcmp ogt float %1379, 0.000000e+00 > %1476 = select i1 %1475, float 1.000000e+00, float %1379 > %1477 = fcmp oge float %1476, 0.000000e+00 > %1478 = fcmp ogt float %1385, 0.000000e+00 > %1479 = select i1 %1478, float 1.000000e+00, float %1385 > %1480 = fcmp oge float %1479, 0.000000e+00 > %.op89 = fmul float %1476, 0x4600000000000000 > %1481 = select i1 %1477, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1479, 0x4600000000000000 > %1482 = select i1 %1480, float %.op90, float 0xC600000000000000 > %1483 = fdiv float 1.000000e+00, %1391 > %1484 = fmul float %1379, %1483 > %1485 = fmul float %1385, %1483 > %1486 = select i1 %1473, float %1481, float %1484 > %1487 = select i1 %1474, float %1482, float %1485 > %1488 = fsub float %1420, %1486 > %1489 = fsub float %1421, %1487 > %1490 = fmul float %1488, %57 > %1491 = fmul float %1452, %57 > %1492 = fmul float %1453, %58 > %1493 = fmul float %1489, %58 > %1494 = fmul float %1491, %1491 > %1495 = fmul float %1492, %1492 > %1496 = fadd float %1494, %1495 > %1497 = fmul float %1471, %1471 > %1498 = fmul float %1472, %1472 > %1499 = fadd float %1497, %1498 > %1500 = fmul float %1490, %1490 > %1501 = fmul float %1493, %1493 > %1502 = fadd float %1500, %1501 > %1503 = call float @llvm.sqrt.f32(float %1502) > %1504 = call float @llvm.sqrt.f32(float %1496) > %1505 = call float @llvm.sqrt.f32(float %1499) > %1506 = fsub float %1301, %30 > %1507 = fsub float %1319, %30 > %1508 = fsub float %1337, %30 > %1509 = fcmp une float %31, 0.000000e+00 > br i1 %1509, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %31, %ENDIF77 ], [ %53, %main_body ] > %temp16.0 = phi float [ %1723, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1724, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1713, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1726, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %30, %ENDIF77 ], [ %52, %main_body ] > %temp13.0 = phi float [ %1706, %ENDIF77 ], [ %51, %main_body ] > %temp11.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %24, %main_body ] > %temp10.0 = phi float [ %1505, %ENDIF77 ], [ %23, %main_body ] > %temp9.0 = phi float [ %1698, %ENDIF77 ], [ %22, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %919, %main_body ] > %temp6.0 = phi float [ %1139, %ENDIF77 ], [ %850, %main_body ] > %temp5.0 = phi float [ %1693, %ENDIF77 ], [ %830, %main_body ] > %1510 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1511 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1512 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1513 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1514 = lshr i32 %5, 16 > %1515 = shl nuw nsw i32 %1514, 2 > %1516 = and i32 %6, 8191 > %1517 = and i32 %10, 255 > %1518 = mul nuw nsw i32 %1516, %1517 > %1519 = add nuw nsw i32 %1515, %1518 > %1520 = add nuw nsw i32 %1519, 8 > %1521 = zext i32 %1520 to i64 > %1522 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1521 > %1523 = bitcast i32 addrspace(3)* %1522 to float addrspace(3)* > store float %1510, float addrspace(3)* %1523, align 4 > %1524 = add nuw nsw i32 %1519, 9 > %1525 = zext i32 %1524 to i64 > %1526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1525 > %1527 = bitcast i32 addrspace(3)* %1526 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1527, align 4 > %1528 = add nuw nsw i32 %1519, 10 > %1529 = zext i32 %1528 to i64 > %1530 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1529 > %1531 = bitcast i32 addrspace(3)* %1530 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1531, align 4 > %1532 = add nuw nsw i32 %1519, 11 > %1533 = zext i32 %1532 to i64 > %1534 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1533 > %1535 = bitcast i32 addrspace(3)* %1534 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1535, align 4 > %1536 = lshr i32 %5, 16 > %1537 = shl nuw nsw i32 %1536, 2 > %1538 = and i32 %6, 8191 > %1539 = and i32 %10, 255 > %1540 = mul nuw nsw i32 %1538, %1539 > %1541 = add nuw nsw i32 %1537, %1540 > %1542 = add nuw nsw i32 %1541, 12 > %1543 = zext i32 %1542 to i64 > %1544 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1543 > %1545 = bitcast i32 addrspace(3)* %1544 to float addrspace(3)* > store float %1511, float addrspace(3)* %1545, align 4 > %1546 = add nuw nsw i32 %1541, 13 > %1547 = zext i32 %1546 to i64 > %1548 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1547 > %1549 = bitcast i32 addrspace(3)* %1548 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1549, align 4 > %1550 = add nuw nsw i32 %1541, 14 > %1551 = zext i32 %1550 to i64 > %1552 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1551 > %1553 = bitcast i32 addrspace(3)* %1552 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1553, align 4 > %1554 = add nuw nsw i32 %1541, 15 > %1555 = zext i32 %1554 to i64 > %1556 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1555 > %1557 = bitcast i32 addrspace(3)* %1556 to float addrspace(3)* > store float %temp11.0, float addrspace(3)* %1557, align 4 > %1558 = lshr i32 %5, 16 > %1559 = shl nuw nsw i32 %1558, 2 > %1560 = and i32 %6, 8191 > %1561 = and i32 %10, 255 > %1562 = mul nuw nsw i32 %1560, %1561 > %1563 = add nuw nsw i32 %1559, %1562 > %1564 = add nuw nsw i32 %1563, 16 > %1565 = zext i32 %1564 to i64 > %1566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1565 > %1567 = bitcast i32 addrspace(3)* %1566 to float addrspace(3)* > store float %1512, float addrspace(3)* %1567, align 4 > %1568 = add nuw nsw i32 %1563, 17 > %1569 = zext i32 %1568 to i64 > %1570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1569 > %1571 = bitcast i32 addrspace(3)* %1570 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1571, align 4 > %1572 = add nuw nsw i32 %1563, 18 > %1573 = zext i32 %1572 to i64 > %1574 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1573 > %1575 = bitcast i32 addrspace(3)* %1574 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1575, align 4 > %1576 = add nuw nsw i32 %1563, 19 > %1577 = zext i32 %1576 to i64 > %1578 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1577 > %1579 = bitcast i32 addrspace(3)* %1578 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1579, align 4 > %1580 = lshr i32 %5, 16 > %1581 = shl nuw nsw i32 %1580, 2 > %1582 = and i32 %6, 8191 > %1583 = and i32 %10, 255 > %1584 = mul nuw nsw i32 %1582, %1583 > %1585 = add nuw nsw i32 %1581, %1584 > %1586 = add nuw nsw i32 %1585, 20 > %1587 = zext i32 %1586 to i64 > %1588 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1587 > %1589 = bitcast i32 addrspace(3)* %1588 to float addrspace(3)* > store float %1513, float addrspace(3)* %1589, align 4 > %1590 = add nuw nsw i32 %1585, 21 > %1591 = zext i32 %1590 to i64 > %1592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1591 > %1593 = bitcast i32 addrspace(3)* %1592 to float addrspace(3)* > store float %1511, float addrspace(3)* %1593, align 4 > %1594 = add nuw nsw i32 %1585, 22 > %1595 = zext i32 %1594 to i64 > %1596 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1595 > %1597 = bitcast i32 addrspace(3)* %1596 to float addrspace(3)* > store float %1512, float addrspace(3)* %1597, align 4 > %1598 = add nuw nsw i32 %1585, 23 > %1599 = zext i32 %1598 to i64 > %1600 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1599 > %1601 = bitcast i32 addrspace(3)* %1600 to float addrspace(3)* > store float %1513, float addrspace(3)* %1601, align 4 > %1602 = lshr i32 %5, 16 > %1603 = shl nuw nsw i32 %1602, 2 > %1604 = and i32 %6, 8191 > %1605 = and i32 %10, 255 > %1606 = mul nuw nsw i32 %1604, %1605 > %1607 = add nuw nsw i32 %1603, %1606 > %1608 = zext i32 %1607 to i64 > %1609 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1608 > %1610 = bitcast i32 addrspace(3)* %1609 to float addrspace(3)* > store float %1510, float addrspace(3)* %1610, align 4 > %1611 = lshr i32 %5, 16 > %1612 = shl nuw nsw i32 %1611, 2 > %1613 = and i32 %6, 8191 > %1614 = and i32 %10, 255 > %1615 = mul nuw nsw i32 %1613, %1614 > %1616 = add nuw nsw i32 %1612, %1615 > %1617 = add nuw nsw i32 %1616, 1 > %1618 = zext i32 %1617 to i64 > %1619 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1618 > %1620 = bitcast i32 addrspace(3)* %1619 to float addrspace(3)* > store float %1511, float addrspace(3)* %1620, align 4 > %1621 = lshr i32 %5, 16 > %1622 = shl nuw nsw i32 %1621, 2 > %1623 = and i32 %6, 8191 > %1624 = and i32 %10, 255 > %1625 = mul nuw nsw i32 %1623, %1624 > %1626 = add nuw nsw i32 %1622, %1625 > %1627 = add nuw nsw i32 %1626, 2 > %1628 = zext i32 %1627 to i64 > %1629 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1628 > %1630 = bitcast i32 addrspace(3)* %1629 to float addrspace(3)* > store float %1512, float addrspace(3)* %1630, align 4 > %1631 = lshr i32 %5, 16 > %1632 = shl nuw nsw i32 %1631, 2 > %1633 = and i32 %6, 8191 > %1634 = and i32 %10, 255 > %1635 = mul nuw nsw i32 %1633, %1634 > %1636 = add nuw nsw i32 %1632, %1635 > %1637 = add nuw nsw i32 %1636, 4 > %1638 = zext i32 %1637 to i64 > %1639 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1638 > %1640 = bitcast i32 addrspace(3)* %1639 to float addrspace(3)* > store float %1513, float addrspace(3)* %1640, align 4 > %1641 = and i32 %10, 255 > %1642 = lshr i32 %10, 8 > %1643 = and i32 %1642, 31 > %1644 = lshr i32 %5, 16 > %1645 = shl nuw nsw i32 %1644, 2 > %1646 = and i32 %6, 8191 > %1647 = and i32 %10, 255 > %1648 = mul nuw nsw i32 %1646, %1647 > %1649 = add nuw nsw i32 %1645, %1648 > %1650 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1651 = bitcast i64 %1650 to <2 x i32> > %1652 = extractelement <2 x i32> %1651, i32 0 > %1653 = extractelement <2 x i32> %1651, i32 1 > %1654 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1652, 0 > %1655 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1654, i32 %1653, 1 > %1656 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1655, i32 %8, 13 > %1657 = bitcast i32 %1641 to float > %1658 = bitcast i32 %1643 to float > %1659 = bitcast i32 %1649 to float > %1660 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1656, float %1657, 14 > %1661 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1660, float %1658, 15 > %1662 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1661, float %1659, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1662 > >IF69: ; preds = %IF > %1663 = fdiv float 1.000000e+00, %31 > %1664 = fmul float %1506, %1663 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1665 = fcmp ogt float %1506, 0.000000e+00 > %1666 = select i1 %1665, float 1.000000e+00, float %1506 > %1667 = fcmp oge float %1666, 0.000000e+00 > %.op91 = fmul float %1666, 0x4600000000000000 > %1668 = select i1 %1667, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1664, %IF69 ], [ %1668, %ELSE70 ] > %1669 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1670 = fsub float 1.000000e+00, %1669 > %1671 = fmul float %1670, %1504 > %1672 = fcmp une float %31, 0.000000e+00 > br i1 %1672, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1673 = fdiv float 1.000000e+00, %31 > %1674 = fmul float %1507, %1673 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1675 = fcmp ogt float %1507, 0.000000e+00 > %1676 = select i1 %1675, float 1.000000e+00, float %1507 > %1677 = fcmp oge float %1676, 0.000000e+00 > %.op92 = fmul float %1676, 0x4600000000000000 > %1678 = select i1 %1677, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1674, %IF72 ], [ %1678, %ELSE73 ] > %1679 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1680 = fsub float 1.000000e+00, %1679 > %1681 = fmul float %1680, %1505 > %1682 = fcmp une float %31, 0.000000e+00 > br i1 %1682, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1683 = fdiv float 1.000000e+00, %31 > %1684 = fmul float %1508, %1683 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1685 = fcmp ogt float %1508, 0.000000e+00 > %1686 = select i1 %1685, float 1.000000e+00, float %1508 > %1687 = fcmp oge float %1686, 0.000000e+00 > %.op93 = fmul float %1686, 0x4600000000000000 > %1688 = select i1 %1687, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1684, %IF75 ], [ %1688, %ELSE76 ] > %1689 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1690 = fsub float 1.000000e+00, %1689 > %1691 = fmul float %1690, %1503 > %1692 = fmul float %28, %34 > %1693 = fmul float %29, %35 > %1694 = call float @llvm.maxnum.f32(float %1693, float 1.000000e+00) > %1695 = fcmp oeq float %1692, 0.000000e+00 > %1696 = fcmp oeq float %1692, 0.000000e+00 > %1697 = sext i1 %1696 to i32 > %1698 = bitcast i32 %1697 to float > %1699 = fcmp ogt float %1691, 0.000000e+00 > %1700 = select i1 %1699, float 1.000000e+00, float %1691 > %1701 = fcmp oge float %1700, 0.000000e+00 > %1702 = fcmp ogt float %1671, 0.000000e+00 > %1703 = select i1 %1702, float 1.000000e+00, float %1671 > %1704 = fcmp oge float %1703, 0.000000e+00 > %.op94 = fmul float %1700, 0x4600000000000000 > %1705 = select i1 %1701, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1703, 0x4600000000000000 > %1706 = select i1 %1704, float %.op95, float 0xC600000000000000 > %1707 = fdiv float 1.000000e+00, %1692 > %1708 = fmul float %1691, %1707 > %1709 = fmul float %1671, %1707 > %1710 = select i1 %1695, float %1705, float %1708 > %1711 = select i1 %1696, float %1706, float %1709 > %1712 = call float @llvm.maxnum.f32(float %1711, float 1.000000e+00) > %1713 = call float @llvm.minnum.f32(float %1694, float %1712) > %1714 = fcmp une float %1692, 0.000000e+00 > br i1 %1714, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1715 = fdiv float 1.000000e+00, %1692 > %1716 = fmul float %1681, %1715 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1717 = fcmp ogt float %1681, 0.000000e+00 > %1718 = select i1 %1717, float 1.000000e+00, float %1681 > %1719 = fcmp oge float %1718, 0.000000e+00 > %.op96 = fmul float %1718, 0x4600000000000000 > %1720 = select i1 %1719, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1716, %IF78 ], [ %1720, %ELSE79 ] > %1721 = call float @llvm.maxnum.f32(float %1710, float 1.000000e+00) > %1722 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1723 = call float @llvm.minnum.f32(float %1694, float %1722) > %1724 = call float @llvm.minnum.f32(float %1694, float %1721) > %1725 = call float @llvm.maxnum.f32(float %1713, float %1724) > %1726 = call float @llvm.maxnum.f32(float %1725, float %1723) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[0].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[0].xxxx >101: MOV OUT[4], TEMP[3] >102: MOV OUT[2], TEMP[6] >103: MOV OUT[3], TEMP[4] >104: MOV OUT[1], TEMP[5] >105: MOV OUT[0], TEMP[1] >106: END >radeonsi: Compiling shader 187 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = bitcast i32 %10 to float > %711 = insertvalue <{ float, float, float }> undef, float %710, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %711 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..24] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} >IMM[1] UINT32 {0, 384, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 9: DP3 TEMP[2].x, IN[2].xyzz, TEMP[0].xyzz > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 14: RSQ TEMP[2].x, TEMP[0].xxxx > 15: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 16: FMA TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[0].wwww > 17: MOV TEMP[0].w, CONST[1][24].yyyy > 18: MOV TEMP[1].xy, IN[0].xyyy > 19: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 20: MOV TEMP[2].xy, IN[0].xyyy > 21: TEX TEMP[2], TEMP[2], SAMP[2], 2D > 22: MUL TEMP[3].x, TEMP[2].zzzz, CONST[1][24].xxxx > 23: MOV TEMP[3].yzw, TEMP[2].xyxw > 24: MOV OUT[0], TEMP[0] > 25: MOV OUT[1], TEMP[1] > 26: MOV OUT[2], TEMP[3] > 27: END >radeonsi: Compiling shader 188 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 7 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 11 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %65 = bitcast float %54 to i32 > %66 = bitcast float %55 to i32 > %67 = insertelement <2 x i32> undef, i32 %65, i32 0 > %68 = insertelement <2 x i32> %67, i32 %66, i32 1 > %69 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %68, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %70 = extractelement <4 x float> %69, i32 1 > %71 = extractelement <4 x float> %69, i32 3 > %72 = call float @llvm.fma.f32(float %70, float 2.000000e+00, float -1.000000e+00) > %73 = call float @llvm.fma.f32(float %71, float 2.000000e+00, float -1.000000e+00) > %74 = fsub float -0.000000e+00, %72 > %75 = call float @llvm.fma.f32(float %74, float %72, float 1.000000e+00) > %76 = fsub float -0.000000e+00, %73 > %77 = call float @llvm.fma.f32(float %76, float %73, float %75) > %78 = call float @llvm.sqrt.f32(float %77) > %79 = fmul float %56, %72 > %80 = fmul float %57, %73 > %81 = fadd float %80, %79 > %82 = fmul float %58, %78 > %83 = fadd float %81, %82 > %84 = fmul float %59, %72 > %85 = fmul float %60, %73 > %86 = fadd float %85, %84 > %87 = fmul float %61, %78 > %88 = fadd float %86, %87 > %89 = fmul float %62, %72 > %90 = fmul float %63, %73 > %91 = fadd float %90, %89 > %92 = fmul float %64, %78 > %93 = fadd float %91, %92 > %94 = fmul float %83, %83 > %95 = fmul float %88, %88 > %96 = fadd float %95, %94 > %97 = fmul float %93, %93 > %98 = fadd float %96, %97 > %99 = call float @llvm.AMDGPU.rsq.clamped.f32(float %98) > %100 = fmul float %99, %83 > %101 = fmul float %99, %88 > %102 = fmul float %99, %93 > %103 = call float @llvm.fma.f32(float %100, float 5.000000e-01, float 5.000000e-01) > %104 = call float @llvm.fma.f32(float %101, float 5.000000e-01, float 5.000000e-01) > %105 = call float @llvm.fma.f32(float %102, float 5.000000e-01, float 5.000000e-01) > %106 = bitcast float %54 to i32 > %107 = bitcast float %55 to i32 > %108 = insertelement <2 x i32> undef, i32 %106, i32 0 > %109 = insertelement <2 x i32> %108, i32 %107, i32 1 > %110 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %109, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %111 = extractelement <4 x float> %110, i32 0 > %112 = extractelement <4 x float> %110, i32 1 > %113 = extractelement <4 x float> %110, i32 2 > %114 = extractelement <4 x float> %110, i32 3 > %115 = bitcast float %54 to i32 > %116 = bitcast float %55 to i32 > %117 = insertelement <2 x i32> undef, i32 %115, i32 0 > %118 = insertelement <2 x i32> %117, i32 %116, i32 1 > %119 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %118, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %120 = extractelement <4 x float> %119, i32 0 > %121 = extractelement <4 x float> %119, i32 1 > %122 = extractelement <4 x float> %119, i32 2 > %123 = extractelement <4 x float> %119, i32 3 > %124 = fmul float %122, %25 > %125 = bitcast float %5 to i32 > %126 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %125, 10 > %127 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %126, float %103, 11 > %128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %127, float %104, 12 > %129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128, float %105, 13 > %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129, float %26, 14 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130, float %111, 15 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %112, 16 > %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %113, 17 > %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %114, 18 > %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %124, 19 > %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135, float %121, 20 > %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136, float %120, 21 > %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %123, 22 > %139 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %139 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL CONST[1][0..9] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[0].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[0].xxxx > 7: MOV TEMP[0].xy, IN[1].xyxx > 8: MOV OUT[1], TEMP[0] > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 189 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 128) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 132) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 136) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 140) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 144) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 148) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 152) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 156) > %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 > %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %13) > %32 = extractelement <4 x float> %31, i32 0 > %33 = extractelement <4 x float> %31, i32 1 > %34 = extractelement <4 x float> %31, i32 2 > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %14) > %38 = fmul float %17, %32 > %39 = fmul float %18, %33 > %40 = fadd float %38, %39 > %41 = fmul float %19, %34 > %42 = fadd float %40, %41 > %43 = fadd float %42, %20 > %44 = fmul float %21, %32 > %45 = fmul float %22, %33 > %46 = fadd float %44, %45 > %47 = fmul float %23, %34 > %48 = fadd float %46, %47 > %49 = fadd float %48, %24 > %50 = fmul float %25, %32 > %51 = fmul float %26, %33 > %52 = fadd float %50, %51 > %53 = fmul float %27, %34 > %54 = fadd float %52, %53 > %55 = fadd float %54, %28 > %56 = lshr i32 %8, 13 > %57 = and i32 %56, 255 > %58 = mul i32 %57, %10 > %59 = add i32 %58, 16 > %60 = sext i32 %59 to i64 > %61 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %60 > %62 = bitcast i32 addrspace(3)* %61 to float addrspace(3)* > store float %43, float addrspace(3)* %62, align 4 > %63 = add i32 %58, 17 > %64 = sext i32 %63 to i64 > %65 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %64 > %66 = bitcast i32 addrspace(3)* %65 to float addrspace(3)* > store float %49, float addrspace(3)* %66, align 4 > %67 = add i32 %58, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > store float %55, float addrspace(3)* %70, align 4 > %71 = add i32 %58, 20 > %bc = bitcast <4 x float> %37 to <4 x i32> > %72 = extractelement <4 x i32> %bc, i32 0 > %73 = sext i32 %71 to i64 > %74 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %73 > store i32 %72, i32 addrspace(3)* %74, align 4 > %75 = add i32 %58, 21 > %bc12 = bitcast <4 x float> %37 to <4 x i32> > %76 = extractelement <4 x i32> %bc12, i32 1 > %77 = sext i32 %75 to i64 > %78 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %77 > store i32 %76, i32 addrspace(3)* %78, align 4 > %79 = add i32 %58, 22 > %80 = sext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = bitcast i32 addrspace(3)* %81 to float addrspace(3)* > store float %34, float addrspace(3)* %82, align 4 > %83 = add i32 %58, 23 > %84 = sext i32 %83 to i64 > %85 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %84 > store i32 1065353216, i32 addrspace(3)* %85, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..3], ARRAY(1), GENERIC[0] >DCL OUT[4], PATCH >DCL OUT[5], PATCH[1] >DCL OUT[6], PATCH[2] >DCL OUT[7], PATCH[3] >DCL CONST[1][0..51] >DCL CONST[2][0..39] >DCL TEMP[0..10], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 0.0000, 0.4000, 100.0000} >IMM[1] UINT32 {0, 784, 800, 816} >IMM[2] UINT32 {1, 624, 0, 0} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: MOV TEMP[0].xyz, IN[0][0].xyzx > 11: MOV TEMP[0].w, IMM[0].xxxx > 12: MOV TEMP[1], CONST[1][49] > 13: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 14: MOV TEMP[2], CONST[1][50] > 15: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 16: MOV TEMP[3], CONST[1][51] > 17: DP4 TEMP[0].x, TEMP[3], TEMP[0] > 18: MOV TEMP[4].xyz, IN[1][0].xyzx > 19: MOV TEMP[4].w, IMM[0].xxxx > 20: MOV TEMP[5], CONST[1][49] > 21: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 22: MOV TEMP[0].y, TEMP[5].xxxx > 23: MOV TEMP[5], CONST[1][50] > 24: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 25: MOV TEMP[0].z, TEMP[5].xxxx > 26: MOV TEMP[5], CONST[1][51] > 27: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 28: MOV TEMP[0].w, TEMP[5].xxxx > 29: MOV TEMP[4].xyz, IN[2][0].xyzx > 30: MOV TEMP[4].w, IMM[0].xxxx > 31: MOV TEMP[5], CONST[1][49] > 32: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 33: MOV TEMP[3].z, TEMP[5].xxxx > 34: MOV TEMP[6], CONST[1][50] > 35: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 36: MOV TEMP[7].z, CONST[2][39] > 37: MUL TEMP[7].xy, TEMP[0].xwww, TEMP[7].zzzz > 38: MOV TEMP[0].xw, TEMP[7].xxxy > 39: MOV TEMP[7], CONST[1][51] > 40: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 41: MOV TEMP[8].z, CONST[2][39] > 42: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[8].zzzz > 43: MOV TEMP[7].x, CONST[2][39] > 44: FSLT TEMP[7].x, TEMP[1].xxxx, -TEMP[7].xxxx > 45: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 46: INEG TEMP[7].x, TEMP[7].xxxx > 47: MOV TEMP[4].y, TEMP[7].xxxx > 48: MOV TEMP[7].x, CONST[2][39] > 49: FSLT TEMP[7].xy, TEMP[0].yzzz, -TEMP[7].xxxx > 50: AND TEMP[7].xy, TEMP[7].xyyy, IMM[3].xxxx > 51: INEG TEMP[7].xy, TEMP[7].xyyy > 52: MOV TEMP[4].zw, TEMP[7].yyxy > 53: AND TEMP[7].xy, TEMP[4].yzzz, IMM[2].xxxx > 54: MOV TEMP[4].yz, TEMP[7].yxyy > 55: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 56: MOV TEMP[4].y, TEMP[7].xxxx > 57: MOV TEMP[7].x, CONST[2][39] > 58: FSLT TEMP[7].x, TEMP[5].xxxx, -TEMP[7].xxxx > 59: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 60: INEG TEMP[7].x, TEMP[7].xxxx > 61: MOV TEMP[4].z, TEMP[7].xxxx > 62: AND TEMP[7].x, TEMP[4].zzzz, IMM[2].xxxx > 63: MOV TEMP[4].z, TEMP[7].xxxx > 64: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 65: MOV TEMP[4].y, TEMP[7].xxxx > 66: MOV TEMP[7].x, CONST[2][39] > 67: FSLT TEMP[7].x, TEMP[2].xxxx, -TEMP[7].xxxx > 68: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 69: INEG TEMP[7].x, TEMP[7].xxxx > 70: MOV TEMP[4].z, TEMP[7].xxxx > 71: AND TEMP[7].xy, TEMP[4].zwww, IMM[2].xxxx > 72: MOV TEMP[4].zw, TEMP[7].yyxy > 73: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 74: MOV TEMP[4].z, TEMP[7].xxxx > 75: MOV TEMP[7].x, CONST[2][39] > 76: FSLT TEMP[7].x, TEMP[6].xxxx, -TEMP[7].xxxx > 77: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 78: INEG TEMP[7].x, TEMP[7].xxxx > 79: MOV TEMP[4].w, TEMP[7].xxxx > 80: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 81: MOV TEMP[4].w, TEMP[7].xxxx > 82: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 83: MOV TEMP[4].z, TEMP[7].xxxx > 84: FSLT TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy > 85: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 86: INEG TEMP[7].x, TEMP[7].xxxx > 87: MOV TEMP[4].w, TEMP[7].xxxx > 88: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 89: MOV TEMP[4].w, TEMP[7].xxxx > 90: FSLT TEMP[7].x, TEMP[0].wwww, IMM[0].yyyy > 91: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 92: INEG TEMP[7].x, TEMP[7].xxxx > 93: MOV TEMP[7].x, TEMP[7].xxxx > 94: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx > 95: MOV TEMP[7].x, TEMP[8].xxxx > 96: UADD TEMP[8].x, TEMP[4].wwww, TEMP[7].xxxx > 97: MOV TEMP[4].w, TEMP[8].xxxx > 98: FSLT TEMP[8].x, TEMP[4].xxxx, IMM[0].yyyy > 99: AND TEMP[8].x, TEMP[8].xxxx, IMM[3].xxxx >100: INEG TEMP[8].x, TEMP[8].xxxx >101: MOV TEMP[7].x, TEMP[8].xxxx >102: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx >103: MOV TEMP[7].x, TEMP[8].xxxx >104: UADD TEMP[7].x, TEMP[4].wwww, TEMP[7].xxxx >105: MOV TEMP[4].w, TEMP[7].xxxx >106: MOV TEMP[7].x, CONST[2][39] >107: FSLT TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx >108: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >109: INEG TEMP[1].x, TEMP[1].xxxx >110: MOV TEMP[3].x, TEMP[1].xxxx >111: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >112: MOV TEMP[3].x, TEMP[1].xxxx >113: MOV TEMP[1].x, CONST[2][39] >114: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].yzzz >115: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >116: INEG TEMP[1].xy, TEMP[1].xyyy >117: MOV TEMP[0].yz, TEMP[1].yxyy >118: AND TEMP[1].xy, TEMP[0].yzzz, IMM[2].xxxx >119: MOV TEMP[0].yz, TEMP[1].yxyy >120: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >121: MOV TEMP[0].y, TEMP[1].xxxx >122: MOV TEMP[1].x, CONST[2][39] >123: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx >124: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >125: INEG TEMP[1].x, TEMP[1].xxxx >126: MOV TEMP[3].x, TEMP[1].xxxx >127: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >128: MOV TEMP[3].x, TEMP[1].xxxx >129: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >130: MOV TEMP[0].y, TEMP[1].xxxx >131: MOV TEMP[1].x, CONST[2][39] >132: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx >133: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >134: INEG TEMP[1].x, TEMP[1].xxxx >135: MOV TEMP[3].x, TEMP[1].xxxx >136: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >137: MOV TEMP[3].x, TEMP[1].xxxx >138: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >139: MOV TEMP[0].z, TEMP[1].xxxx >140: MOV TEMP[1].x, CONST[2][39] >141: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx >142: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >143: INEG TEMP[1].x, TEMP[1].xxxx >144: MOV TEMP[3].x, TEMP[1].xxxx >145: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >146: MOV TEMP[3].x, TEMP[1].xxxx >147: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >148: MOV TEMP[0].z, TEMP[1].xxxx >149: MOV TEMP[1].x, CONST[2][39] >150: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].xwww >151: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >152: INEG TEMP[1].xy, TEMP[1].xyyy >153: MOV TEMP[3].xy, TEMP[1].xyxx >154: AND TEMP[1].xy, TEMP[3].xyyy, IMM[2].xxxx >155: MOV TEMP[3].xy, TEMP[1].xyxx >156: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >157: MOV TEMP[3].x, TEMP[1].xxxx >158: MOV TEMP[1].x, CONST[2][39] >159: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx >160: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >161: INEG TEMP[1].x, TEMP[1].xxxx >162: MOV TEMP[3].y, TEMP[1].xxxx >163: AND TEMP[1].x, TEMP[3].yyyy, IMM[2].xxxx >164: MOV TEMP[3].y, TEMP[1].xxxx >165: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >166: MOV TEMP[3].x, TEMP[1].xxxx >167: USEQ TEMP[1].x, TEMP[4].yyyy, IMM[3].yyyy >168: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >169: INEG TEMP[1].x, TEMP[1].xxxx >170: MOV TEMP[3].y, TEMP[1].xxxx >171: USEQ TEMP[1].xy, TEMP[0].yzzz, IMM[3].yyyy >172: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >173: INEG TEMP[1].xy, TEMP[1].xyyy >174: MOV TEMP[0].yz, TEMP[1].yxyy >175: OR TEMP[1].x, TEMP[0].yyyy, TEMP[3].yyyy >176: MOV TEMP[0].y, TEMP[1].xxxx >177: USEQ TEMP[1].x, TEMP[4].zzzz, IMM[3].yyyy >178: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >179: INEG TEMP[1].x, TEMP[1].xxxx >180: MOV TEMP[3].y, TEMP[1].xxxx >181: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].yyyy >182: MOV TEMP[0].z, TEMP[1].xxxx >183: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >184: MOV TEMP[0].y, TEMP[1].xxxx >185: USEQ TEMP[1].x, TEMP[4].wwww, IMM[3].yyyy >186: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >187: INEG TEMP[1].x, TEMP[1].xxxx >188: MOV TEMP[0].z, TEMP[1].xxxx >189: USEQ TEMP[1].x, TEMP[3].xxxx, IMM[3].yyyy >190: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >191: INEG TEMP[1].x, TEMP[1].xxxx >192: MOV TEMP[3].x, TEMP[1].xxxx >193: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >194: MOV TEMP[0].z, TEMP[1].xxxx >195: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >196: MOV TEMP[0].y, TEMP[1].xxxx >197: MOV TEMP[1].x, TEMP[0].yyyy >198: USNE TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx >199: UIF TEMP[1].xxxx :0 >200: MOV TEMP[1].x, IMM[0].yyyy >201: MOV TEMP[2].x, IMM[0].yyyy >202: MOV TEMP[5].x, IMM[0].yyyy >203: MOV TEMP[6].x, IMM[0].yyyy >204: ELSE :0 >205: ADD TEMP[3].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >206: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >207: MOV TEMP[0].y, TEMP[7].xxxx >208: ADD TEMP[3].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >209: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >210: MOV TEMP[0].z, TEMP[7].xxxx >211: SQRT TEMP[7].x, TEMP[0].yyyy >212: SQRT TEMP[7].y, TEMP[0].zzzz >213: MOV TEMP[7].xy, TEMP[7].xyxx >214: ADD TEMP[3].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >215: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz >216: SQRT TEMP[8].x, TEMP[3].xxxx >217: MIN TEMP[9].x, TEMP[0].wwww, TEMP[0].xxxx >218: MIN TEMP[10].x, TEMP[0].wwww, TEMP[4].xxxx >219: MOV TEMP[0].w, TEMP[10].xxxx >220: MIN TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >221: MUL TEMP[4].x, TEMP[9].xxxx, IMM[0].zzzz >222: MOV TEMP[3].y, TEMP[4].xxxx >223: MAX TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww >224: MUL TEMP[4].xy, TEMP[0].xwww, IMM[0].zzzz >225: MOV TEMP[0].xw, TEMP[4].xxxy >226: MAX TEMP[4].xy, TEMP[0].xwww, IMM[0].wwww >227: FSNE TEMP[9].x, TEMP[3].xxxx, IMM[0].yyyy >228: UIF TEMP[9].xxxx :0 >229: RCP TEMP[3].x, TEMP[3].xxxx >230: MUL TEMP[3].x, TEMP[7].xxxx, TEMP[3].xxxx >231: ELSE :0 >232: SSG TEMP[9].x, TEMP[7].xxxx >233: MUL TEMP[3].x, IMM[4].xxxx, TEMP[9].xxxx >234: ENDIF >235: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >236: MOV TEMP[0].y, TEMP[3].xxxx >237: FSNE TEMP[3].x, TEMP[4].yyyy, IMM[0].yyyy >238: UIF TEMP[3].xxxx :0 >239: RCP TEMP[3].x, TEMP[4].yyyy >240: MUL TEMP[3].x, TEMP[7].yyyy, TEMP[3].xxxx >241: ELSE :0 >242: SSG TEMP[7].x, TEMP[7].yyyy >243: MUL TEMP[3].x, IMM[4].xxxx, TEMP[7].xxxx >244: ENDIF >245: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >246: MOV TEMP[0].z, TEMP[3].xxxx >247: FSNE TEMP[3].x, TEMP[4].xxxx, IMM[0].yyyy >248: UIF TEMP[3].xxxx :0 >249: RCP TEMP[3].x, TEMP[4].xxxx >250: MUL TEMP[3].x, TEMP[8].xxxx, TEMP[3].xxxx >251: ELSE :0 >252: SSG TEMP[4].x, TEMP[8].xxxx >253: MUL TEMP[3].x, IMM[4].xxxx, TEMP[4].xxxx >254: ENDIF >255: MAX TEMP[0].x, TEMP[3].xxxx, IMM[0].xxxx >256: MIN TEMP[0].xyz, TEMP[0].xyzz, IMM[4].yyyy >257: MAX TEMP[3].x, TEMP[0].yyyy, TEMP[0].xxxx >258: MOV TEMP[0].w, TEMP[3].xxxx >259: MAX TEMP[6].x, TEMP[0].wwww, TEMP[0].zzzz >260: MOV TEMP[1].x, TEMP[0].zzzz >261: MOV TEMP[2].x, TEMP[0].xxxx >262: MOV TEMP[5].x, TEMP[0].yyyy >263: ENDIF >264: MOV OUT[4], TEMP[1] >265: MOV OUT[5], TEMP[2] >266: MOV OUT[6], TEMP[5] >267: MOV OUT[7], TEMP[6] >268: MOV OUT[0].x, TEMP[1].xxxx >269: MOV OUT[0].y, TEMP[2].xxxx >270: MOV OUT[0].z, TEMP[5].xxxx >271: MOV OUT[1].x, TEMP[6].xxxx >272: END >radeonsi: Compiling shader 190 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 800) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 804) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 808) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 812) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 816) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 820) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 824) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 828) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = lshr i32 %10, 8 > %30 = and i32 %29, 31 > %31 = lshr i32 %7, 13 > %32 = and i32 %31, 255 > %33 = and i32 %7, 8191 > %34 = and i32 %10, 255 > %35 = mul nuw nsw i32 %33, %34 > %36 = mul nuw nsw i32 %30, %32 > %37 = add nuw nsw i32 %35, %36 > %38 = add nuw nsw i32 %37, 16 > %39 = zext i32 %38 to i64 > %40 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %39 > %41 = load i32, i32 addrspace(3)* %40, align 4 > %42 = lshr i32 %7, 13 > %43 = and i32 %42, 255 > %44 = and i32 %7, 8191 > %45 = and i32 %10, 255 > %46 = mul nuw nsw i32 %44, %45 > %47 = mul nuw nsw i32 %30, %43 > %48 = add nuw nsw i32 %46, %47 > %49 = add nuw nsw i32 %48, 17 > %50 = zext i32 %49 to i64 > %51 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %50 > %52 = load i32, i32 addrspace(3)* %51, align 4 > %53 = lshr i32 %7, 13 > %54 = and i32 %53, 255 > %55 = and i32 %7, 8191 > %56 = and i32 %10, 255 > %57 = mul nuw nsw i32 %55, %56 > %58 = mul nuw nsw i32 %30, %54 > %59 = add nuw nsw i32 %57, %58 > %60 = add nuw nsw i32 %59, 18 > %61 = zext i32 %60 to i64 > %62 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %61 > %63 = load i32, i32 addrspace(3)* %62, align 4 > %64 = lshr i32 %7, 13 > %65 = and i32 %64, 255 > %66 = and i32 %7, 8191 > %67 = and i32 %10, 255 > %68 = mul nuw nsw i32 %66, %67 > %69 = mul nuw nsw i32 %30, %65 > %70 = add nuw nsw i32 %68, %69 > %71 = add nuw nsw i32 %70, 19 > %72 = zext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = load i32, i32 addrspace(3)* %73, align 4 > %75 = lshr i32 %6, 13 > %76 = and i32 %75, 255 > %77 = shl i32 %5, 2 > %78 = and i32 %77, 262140 > %79 = and i32 %6, 8191 > %80 = and i32 %10, 255 > %81 = mul nuw nsw i32 %79, %80 > %82 = add nuw nsw i32 %78, %81 > %83 = mul nuw nsw i32 %30, %76 > %84 = add nuw nsw i32 %82, %83 > %85 = add nuw nsw i32 %84, 16 > %86 = zext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > store i32 %41, i32 addrspace(3)* %87, align 4 > %88 = add nuw nsw i32 %84, 17 > %89 = zext i32 %88 to i64 > %90 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %89 > store i32 %52, i32 addrspace(3)* %90, align 4 > %91 = add nuw nsw i32 %84, 18 > %92 = zext i32 %91 to i64 > %93 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %92 > store i32 %63, i32 addrspace(3)* %93, align 4 > %94 = add nuw nsw i32 %84, 19 > %95 = zext i32 %94 to i64 > %96 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %95 > store i32 %74, i32 addrspace(3)* %96, align 4 > %97 = lshr i32 %7, 13 > %98 = and i32 %97, 255 > %99 = and i32 %7, 8191 > %100 = and i32 %10, 255 > %101 = mul nuw nsw i32 %99, %100 > %102 = mul nuw nsw i32 %30, %98 > %103 = add nuw nsw i32 %101, %102 > %104 = add nuw nsw i32 %103, 20 > %105 = zext i32 %104 to i64 > %106 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %105 > %107 = load i32, i32 addrspace(3)* %106, align 4 > %108 = lshr i32 %7, 13 > %109 = and i32 %108, 255 > %110 = and i32 %7, 8191 > %111 = and i32 %10, 255 > %112 = mul nuw nsw i32 %110, %111 > %113 = mul nuw nsw i32 %30, %109 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 21 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > %118 = load i32, i32 addrspace(3)* %117, align 4 > %119 = lshr i32 %7, 13 > %120 = and i32 %119, 255 > %121 = and i32 %7, 8191 > %122 = and i32 %10, 255 > %123 = mul nuw nsw i32 %121, %122 > %124 = mul nuw nsw i32 %30, %120 > %125 = add nuw nsw i32 %123, %124 > %126 = add nuw nsw i32 %125, 22 > %127 = zext i32 %126 to i64 > %128 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %127 > %129 = load i32, i32 addrspace(3)* %128, align 4 > %130 = lshr i32 %7, 13 > %131 = and i32 %130, 255 > %132 = and i32 %7, 8191 > %133 = and i32 %10, 255 > %134 = mul nuw nsw i32 %132, %133 > %135 = mul nuw nsw i32 %30, %131 > %136 = add nuw nsw i32 %134, %135 > %137 = add nuw nsw i32 %136, 23 > %138 = zext i32 %137 to i64 > %139 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %138 > %140 = load i32, i32 addrspace(3)* %139, align 4 > %141 = lshr i32 %6, 13 > %142 = and i32 %141, 255 > %143 = shl i32 %5, 2 > %144 = and i32 %143, 262140 > %145 = and i32 %6, 8191 > %146 = and i32 %10, 255 > %147 = mul nuw nsw i32 %145, %146 > %148 = add nuw nsw i32 %144, %147 > %149 = mul nuw nsw i32 %30, %142 > %150 = add nuw nsw i32 %148, %149 > %151 = add nuw nsw i32 %150, 20 > %152 = zext i32 %151 to i64 > %153 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %152 > store i32 %107, i32 addrspace(3)* %153, align 4 > %154 = add nuw nsw i32 %150, 21 > %155 = zext i32 %154 to i64 > %156 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %155 > store i32 %118, i32 addrspace(3)* %156, align 4 > %157 = add nuw nsw i32 %150, 22 > %158 = zext i32 %157 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %129, i32 addrspace(3)* %159, align 4 > %160 = add nuw nsw i32 %150, 23 > %161 = zext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > store i32 %140, i32 addrspace(3)* %162, align 4 > %163 = and i32 %7, 8191 > %164 = and i32 %10, 255 > %165 = mul nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 16 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > %169 = bitcast i32 addrspace(3)* %168 to float addrspace(3)* > %170 = load float, float addrspace(3)* %169, align 4 > %171 = and i32 %7, 8191 > %172 = and i32 %10, 255 > %173 = mul nuw nsw i32 %171, %172 > %174 = add nuw nsw i32 %173, 17 > %175 = zext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = and i32 %7, 8191 > %180 = and i32 %10, 255 > %181 = mul nuw nsw i32 %179, %180 > %182 = add nuw nsw i32 %181, 18 > %183 = zext i32 %182 to i64 > %184 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %183 > %185 = bitcast i32 addrspace(3)* %184 to float addrspace(3)* > %186 = load float, float addrspace(3)* %185, align 4 > %187 = fmul float %13, %170 > %188 = fmul float %14, %178 > %189 = fadd float %187, %188 > %190 = fmul float %15, %186 > %191 = fadd float %189, %190 > %192 = fadd float %191, %16 > %193 = fmul float %17, %170 > %194 = fmul float %18, %178 > %195 = fadd float %193, %194 > %196 = fmul float %19, %186 > %197 = fadd float %195, %196 > %198 = fadd float %197, %20 > %199 = fmul float %21, %170 > %200 = fmul float %22, %178 > %201 = fadd float %199, %200 > %202 = fmul float %23, %186 > %203 = fadd float %201, %202 > %204 = fadd float %203, %24 > %205 = lshr i32 %7, 13 > %206 = and i32 %205, 255 > %207 = and i32 %7, 8191 > %208 = and i32 %10, 255 > %209 = mul nuw nsw i32 %207, %208 > %210 = add nuw nsw i32 %209, %206 > %211 = add nuw nsw i32 %210, 16 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = bitcast i32 addrspace(3)* %213 to float addrspace(3)* > %215 = load float, float addrspace(3)* %214, align 4 > %216 = lshr i32 %7, 13 > %217 = and i32 %216, 255 > %218 = and i32 %7, 8191 > %219 = and i32 %10, 255 > %220 = mul nuw nsw i32 %218, %219 > %221 = add nuw nsw i32 %220, %217 > %222 = add nuw nsw i32 %221, 17 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = bitcast i32 addrspace(3)* %224 to float addrspace(3)* > %226 = load float, float addrspace(3)* %225, align 4 > %227 = lshr i32 %7, 13 > %228 = and i32 %227, 255 > %229 = and i32 %7, 8191 > %230 = and i32 %10, 255 > %231 = mul nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, %228 > %233 = add nuw nsw i32 %232, 18 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %13, %215 > %239 = fmul float %14, %226 > %240 = fadd float %238, %239 > %241 = fmul float %15, %237 > %242 = fadd float %240, %241 > %243 = fadd float %242, %16 > %244 = fmul float %17, %215 > %245 = fmul float %18, %226 > %246 = fadd float %244, %245 > %247 = fmul float %19, %237 > %248 = fadd float %246, %247 > %249 = fadd float %248, %20 > %250 = fmul float %21, %215 > %251 = fmul float %22, %226 > %252 = fadd float %250, %251 > %253 = fmul float %23, %237 > %254 = fadd float %252, %253 > %255 = fadd float %254, %24 > %256 = and i32 %7, 8191 > %257 = and i32 %10, 255 > %258 = mul nuw nsw i32 %256, %257 > %259 = lshr i32 %7, 12 > %260 = and i32 %259, 510 > %261 = add nuw nsw i32 %258, %260 > %262 = add nuw nsw i32 %261, 16 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = bitcast i32 addrspace(3)* %264 to float addrspace(3)* > %266 = load float, float addrspace(3)* %265, align 4 > %267 = and i32 %7, 8191 > %268 = and i32 %10, 255 > %269 = mul nuw nsw i32 %267, %268 > %270 = lshr i32 %7, 12 > %271 = and i32 %270, 510 > %272 = add nuw nsw i32 %269, %271 > %273 = add nuw nsw i32 %272, 17 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = bitcast i32 addrspace(3)* %275 to float addrspace(3)* > %277 = load float, float addrspace(3)* %276, align 4 > %278 = and i32 %7, 8191 > %279 = and i32 %10, 255 > %280 = mul nuw nsw i32 %278, %279 > %281 = lshr i32 %7, 12 > %282 = and i32 %281, 510 > %283 = add nuw nsw i32 %280, %282 > %284 = add nuw nsw i32 %283, 18 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = bitcast i32 addrspace(3)* %286 to float addrspace(3)* > %288 = load float, float addrspace(3)* %287, align 4 > %289 = fmul float %13, %266 > %290 = fmul float %14, %277 > %291 = fadd float %289, %290 > %292 = fmul float %15, %288 > %293 = fadd float %291, %292 > %294 = fadd float %293, %16 > %295 = fmul float %17, %266 > %296 = fmul float %18, %277 > %297 = fadd float %295, %296 > %298 = fmul float %19, %288 > %299 = fadd float %297, %298 > %300 = fadd float %299, %20 > %301 = fmul float %204, %28 > %302 = fmul float %255, %28 > %303 = fmul float %21, %266 > %304 = fmul float %22, %277 > %305 = fadd float %303, %304 > %306 = fmul float %23, %288 > %307 = fadd float %305, %306 > %308 = fadd float %307, %24 > %309 = fmul float %308, %28 > %310 = fsub float -0.000000e+00, %27 > %311 = fcmp olt float %192, %310 > %312 = zext i1 %311 to i32 > %313 = fsub float -0.000000e+00, %27 > %314 = fcmp olt float %243, %313 > %315 = fsub float -0.000000e+00, %27 > %316 = fcmp olt float %249, %315 > %317 = zext i1 %314 to i32 > %318 = zext i1 %316 to i32 > %319 = add nuw nsw i32 %317, %312 > %320 = fsub float -0.000000e+00, %27 > %321 = fcmp olt float %294, %320 > %322 = zext i1 %321 to i32 > %323 = add nuw nsw i32 %322, %319 > %324 = fsub float -0.000000e+00, %27 > %325 = fcmp olt float %198, %324 > %326 = zext i1 %325 to i32 > %327 = add nuw nsw i32 %318, %326 > %328 = fsub float -0.000000e+00, %27 > %329 = fcmp olt float %300, %328 > %330 = zext i1 %329 to i32 > %331 = add nuw nsw i32 %330, %327 > %332 = fcmp olt float %301, 0.000000e+00 > %333 = zext i1 %332 to i32 > %334 = fcmp olt float %302, 0.000000e+00 > %335 = zext i1 %334 to i32 > %336 = add nuw nsw i32 %333, %335 > %337 = fcmp olt float %309, 0.000000e+00 > %338 = zext i1 %337 to i32 > %339 = add nuw nsw i32 %336, %338 > %340 = fcmp olt float %27, %192 > %341 = zext i1 %340 to i32 > %342 = fcmp olt float %27, %243 > %343 = fcmp olt float %27, %249 > %344 = zext i1 %342 to i32 > %345 = zext i1 %343 to i32 > %346 = add nuw nsw i32 %344, %341 > %347 = fcmp olt float %27, %294 > %348 = zext i1 %347 to i32 > %349 = add nuw nsw i32 %346, %348 > %350 = fcmp olt float %27, %198 > %351 = zext i1 %350 to i32 > %352 = add nuw nsw i32 %345, %351 > %353 = fcmp olt float %27, %300 > %354 = zext i1 %353 to i32 > %355 = add nuw nsw i32 %352, %354 > %356 = fcmp olt float %27, %301 > %357 = fcmp olt float %27, %302 > %358 = zext i1 %356 to i32 > %359 = zext i1 %357 to i32 > %360 = add nuw nsw i32 %359, %358 > %361 = fcmp olt float %27, %309 > %362 = zext i1 %361 to i32 > %363 = add nuw nsw i32 %362, %360 > %364 = icmp eq i32 %323, 3 > %365 = sext i1 %364 to i32 > %366 = icmp eq i32 %349, 3 > %367 = icmp eq i32 %355, 3 > %368 = sext i1 %367 to i32 > %369 = icmp eq i32 %331, 3 > %370 = sext i1 %369 to i32 > %371 = select i1 %367, i32 -1, i32 %370 > %372 = select i1 %366, i32 -1, i32 %365 > %373 = or i32 %371, %372 > %374 = icmp eq i32 %339, 3 > %375 = icmp eq i32 %363, 3 > %376 = sext i1 %375 to i32 > %377 = select i1 %374, i32 -1, i32 %376 > %378 = or i32 %377, %373 > %379 = icmp eq i32 %378, 0 > br i1 %379, label %ELSE, label %ENDIF > >ELSE: ; preds = %main_body > %380 = lshr i32 %7, 13 > %381 = and i32 %380, 255 > %382 = and i32 %7, 8191 > %383 = and i32 %10, 255 > %384 = mul nuw nsw i32 %382, %383 > %385 = add nuw nsw i32 %384, %381 > %386 = add nuw nsw i32 %385, 16 > %387 = zext i32 %386 to i64 > %388 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %387 > %389 = bitcast i32 addrspace(3)* %388 to float addrspace(3)* > %390 = load float, float addrspace(3)* %389, align 4 > %391 = and i32 %7, 8191 > %392 = and i32 %10, 255 > %393 = mul nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 16 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = bitcast i32 addrspace(3)* %396 to float addrspace(3)* > %398 = load float, float addrspace(3)* %397, align 4 > %399 = fsub float %398, %390 > %400 = lshr i32 %7, 13 > %401 = and i32 %400, 255 > %402 = and i32 %7, 8191 > %403 = and i32 %10, 255 > %404 = mul nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, %401 > %406 = add nuw nsw i32 %405, 17 > %407 = zext i32 %406 to i64 > %408 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %407 > %409 = bitcast i32 addrspace(3)* %408 to float addrspace(3)* > %410 = load float, float addrspace(3)* %409, align 4 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = add nuw nsw i32 %413, 17 > %415 = zext i32 %414 to i64 > %416 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %415 > %417 = bitcast i32 addrspace(3)* %416 to float addrspace(3)* > %418 = load float, float addrspace(3)* %417, align 4 > %419 = fsub float %418, %410 > %420 = lshr i32 %7, 13 > %421 = and i32 %420, 255 > %422 = and i32 %7, 8191 > %423 = and i32 %10, 255 > %424 = mul nuw nsw i32 %422, %423 > %425 = add nuw nsw i32 %424, %421 > %426 = add nuw nsw i32 %425, 18 > %427 = zext i32 %426 to i64 > %428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %427 > %429 = bitcast i32 addrspace(3)* %428 to float addrspace(3)* > %430 = load float, float addrspace(3)* %429, align 4 > %431 = and i32 %7, 8191 > %432 = and i32 %10, 255 > %433 = mul nuw nsw i32 %431, %432 > %434 = add nuw nsw i32 %433, 18 > %435 = zext i32 %434 to i64 > %436 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %435 > %437 = bitcast i32 addrspace(3)* %436 to float addrspace(3)* > %438 = load float, float addrspace(3)* %437, align 4 > %439 = fsub float %438, %430 > %440 = fmul float %399, %399 > %441 = fmul float %419, %419 > %442 = fadd float %441, %440 > %443 = fmul float %439, %439 > %444 = fadd float %442, %443 > %445 = and i32 %7, 8191 > %446 = and i32 %10, 255 > %447 = mul nuw nsw i32 %445, %446 > %448 = lshr i32 %7, 12 > %449 = and i32 %448, 510 > %450 = add nuw nsw i32 %447, %449 > %451 = add nuw nsw i32 %450, 16 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > %454 = bitcast i32 addrspace(3)* %453 to float addrspace(3)* > %455 = load float, float addrspace(3)* %454, align 4 > %456 = lshr i32 %7, 13 > %457 = and i32 %456, 255 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, %457 > %462 = add nuw nsw i32 %461, 16 > %463 = zext i32 %462 to i64 > %464 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %463 > %465 = bitcast i32 addrspace(3)* %464 to float addrspace(3)* > %466 = load float, float addrspace(3)* %465, align 4 > %467 = fsub float %466, %455 > %468 = and i32 %7, 8191 > %469 = and i32 %10, 255 > %470 = mul nuw nsw i32 %468, %469 > %471 = lshr i32 %7, 12 > %472 = and i32 %471, 510 > %473 = add nuw nsw i32 %470, %472 > %474 = add nuw nsw i32 %473, 17 > %475 = zext i32 %474 to i64 > %476 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %475 > %477 = bitcast i32 addrspace(3)* %476 to float addrspace(3)* > %478 = load float, float addrspace(3)* %477, align 4 > %479 = lshr i32 %7, 13 > %480 = and i32 %479, 255 > %481 = and i32 %7, 8191 > %482 = and i32 %10, 255 > %483 = mul nuw nsw i32 %481, %482 > %484 = add nuw nsw i32 %483, %480 > %485 = add nuw nsw i32 %484, 17 > %486 = zext i32 %485 to i64 > %487 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %486 > %488 = bitcast i32 addrspace(3)* %487 to float addrspace(3)* > %489 = load float, float addrspace(3)* %488, align 4 > %490 = fsub float %489, %478 > %491 = and i32 %7, 8191 > %492 = and i32 %10, 255 > %493 = mul nuw nsw i32 %491, %492 > %494 = lshr i32 %7, 12 > %495 = and i32 %494, 510 > %496 = add nuw nsw i32 %493, %495 > %497 = add nuw nsw i32 %496, 18 > %498 = zext i32 %497 to i64 > %499 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %498 > %500 = bitcast i32 addrspace(3)* %499 to float addrspace(3)* > %501 = load float, float addrspace(3)* %500, align 4 > %502 = lshr i32 %7, 13 > %503 = and i32 %502, 255 > %504 = and i32 %7, 8191 > %505 = and i32 %10, 255 > %506 = mul nuw nsw i32 %504, %505 > %507 = add nuw nsw i32 %506, %503 > %508 = add nuw nsw i32 %507, 18 > %509 = zext i32 %508 to i64 > %510 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %509 > %511 = bitcast i32 addrspace(3)* %510 to float addrspace(3)* > %512 = load float, float addrspace(3)* %511, align 4 > %513 = fsub float %512, %501 > %514 = fmul float %467, %467 > %515 = fmul float %490, %490 > %516 = fadd float %515, %514 > %517 = fmul float %513, %513 > %518 = fadd float %516, %517 > %519 = call float @llvm.sqrt.f32(float %444) > %520 = call float @llvm.sqrt.f32(float %518) > %521 = and i32 %7, 8191 > %522 = and i32 %10, 255 > %523 = mul nuw nsw i32 %521, %522 > %524 = add nuw nsw i32 %523, 16 > %525 = zext i32 %524 to i64 > %526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %525 > %527 = bitcast i32 addrspace(3)* %526 to float addrspace(3)* > %528 = load float, float addrspace(3)* %527, align 4 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = lshr i32 %7, 12 > %533 = and i32 %532, 510 > %534 = add nuw nsw i32 %531, %533 > %535 = add nuw nsw i32 %534, 16 > %536 = zext i32 %535 to i64 > %537 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %536 > %538 = bitcast i32 addrspace(3)* %537 to float addrspace(3)* > %539 = load float, float addrspace(3)* %538, align 4 > %540 = fsub float %539, %528 > %541 = and i32 %7, 8191 > %542 = and i32 %10, 255 > %543 = mul nuw nsw i32 %541, %542 > %544 = add nuw nsw i32 %543, 17 > %545 = zext i32 %544 to i64 > %546 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %545 > %547 = bitcast i32 addrspace(3)* %546 to float addrspace(3)* > %548 = load float, float addrspace(3)* %547, align 4 > %549 = and i32 %7, 8191 > %550 = and i32 %10, 255 > %551 = mul nuw nsw i32 %549, %550 > %552 = lshr i32 %7, 12 > %553 = and i32 %552, 510 > %554 = add nuw nsw i32 %551, %553 > %555 = add nuw nsw i32 %554, 17 > %556 = zext i32 %555 to i64 > %557 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %556 > %558 = bitcast i32 addrspace(3)* %557 to float addrspace(3)* > %559 = load float, float addrspace(3)* %558, align 4 > %560 = fsub float %559, %548 > %561 = and i32 %7, 8191 > %562 = and i32 %10, 255 > %563 = mul nuw nsw i32 %561, %562 > %564 = add nuw nsw i32 %563, 18 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fsub float %579, %568 > %581 = fmul float %540, %540 > %582 = fmul float %560, %560 > %583 = fadd float %582, %581 > %584 = fmul float %580, %580 > %585 = fadd float %583, %584 > %586 = call float @llvm.sqrt.f32(float %585) > %587 = call float @llvm.minnum.f32(float %302, float %301) > %588 = call float @llvm.minnum.f32(float %302, float %309) > %589 = call float @llvm.minnum.f32(float %301, float %309) > %590 = fmul float %587, 0x3FD99999A0000000 > %591 = call float @llvm.maxnum.f32(float %590, float 1.000000e+02) > %592 = fmul float %589, 0x3FD99999A0000000 > %593 = fmul float %588, 0x3FD99999A0000000 > %594 = call float @llvm.maxnum.f32(float %592, float 1.000000e+02) > %595 = call float @llvm.maxnum.f32(float %593, float 1.000000e+02) > %596 = fcmp une float %591, 0.000000e+00 > br i1 %596, label %IF45, label %ELSE46 > >ENDIF: ; preds = %main_body, %ENDIF50 > %temp24.0 = phi i32 [ %phitmp57, %ENDIF50 ], [ 0, %main_body ] > %temp20.0 = phi i32 [ %phitmp56, %ENDIF50 ], [ 0, %main_body ] > %temp8.0 = phi i32 [ %phitmp55, %ENDIF50 ], [ 0, %main_body ] > %temp4.0 = phi i32 [ %phitmp, %ENDIF50 ], [ 0, %main_body ] > %597 = lshr i32 %5, 16 > %598 = shl nuw nsw i32 %597, 2 > %599 = and i32 %6, 8191 > %600 = and i32 %10, 255 > %601 = mul nuw nsw i32 %599, %600 > %602 = add nuw nsw i32 %598, %601 > %603 = add nuw nsw i32 %602, 8 > %604 = zext i32 %603 to i64 > %605 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %604 > store i32 %temp4.0, i32 addrspace(3)* %605, align 4 > %606 = add nuw nsw i32 %602, 9 > %607 = zext i32 %606 to i64 > %608 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %607 > store i32 %368, i32 addrspace(3)* %608, align 4 > %609 = add nuw nsw i32 %602, 10 > %610 = zext i32 %609 to i64 > %611 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %610 > %612 = bitcast i32 addrspace(3)* %611 to float addrspace(3)* > store float %15, float addrspace(3)* %612, align 4 > %613 = add nuw nsw i32 %602, 11 > %614 = zext i32 %613 to i64 > %615 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %614 > %616 = bitcast i32 addrspace(3)* %615 to float addrspace(3)* > store float %16, float addrspace(3)* %616, align 4 > %617 = lshr i32 %5, 16 > %618 = shl nuw nsw i32 %617, 2 > %619 = and i32 %6, 8191 > %620 = and i32 %10, 255 > %621 = mul nuw nsw i32 %619, %620 > %622 = add nuw nsw i32 %618, %621 > %623 = add nuw nsw i32 %622, 12 > %624 = zext i32 %623 to i64 > %625 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %624 > store i32 %temp8.0, i32 addrspace(3)* %625, align 4 > %626 = add nuw nsw i32 %622, 13 > %627 = zext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > store float %18, float addrspace(3)* %629, align 4 > %630 = add nuw nsw i32 %622, 14 > %631 = zext i32 %630 to i64 > %632 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %631 > %633 = bitcast i32 addrspace(3)* %632 to float addrspace(3)* > store float %19, float addrspace(3)* %633, align 4 > %634 = add nuw nsw i32 %622, 15 > %635 = zext i32 %634 to i64 > %636 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %635 > %637 = bitcast i32 addrspace(3)* %636 to float addrspace(3)* > store float %20, float addrspace(3)* %637, align 4 > %638 = lshr i32 %5, 16 > %639 = shl nuw nsw i32 %638, 2 > %640 = and i32 %6, 8191 > %641 = and i32 %10, 255 > %642 = mul nuw nsw i32 %640, %641 > %643 = add nuw nsw i32 %639, %642 > %644 = add nuw nsw i32 %643, 16 > %645 = zext i32 %644 to i64 > %646 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %645 > store i32 %temp20.0, i32 addrspace(3)* %646, align 4 > %647 = add nuw nsw i32 %643, 17 > %648 = zext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %14, float addrspace(3)* %650, align 4 > %651 = add nuw nsw i32 %643, 18 > %652 = zext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %15, float addrspace(3)* %654, align 4 > %655 = add nuw nsw i32 %643, 19 > %656 = zext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %16, float addrspace(3)* %658, align 4 > %659 = lshr i32 %5, 16 > %660 = shl nuw nsw i32 %659, 2 > %661 = and i32 %6, 8191 > %662 = and i32 %10, 255 > %663 = mul nuw nsw i32 %661, %662 > %664 = add nuw nsw i32 %660, %663 > %665 = add nuw nsw i32 %664, 20 > %666 = zext i32 %665 to i64 > %667 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %666 > store i32 %temp24.0, i32 addrspace(3)* %667, align 4 > %668 = add nuw nsw i32 %664, 21 > %669 = zext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > store float %18, float addrspace(3)* %671, align 4 > %672 = add nuw nsw i32 %664, 22 > %673 = zext i32 %672 to i64 > %674 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %673 > %675 = bitcast i32 addrspace(3)* %674 to float addrspace(3)* > store float %19, float addrspace(3)* %675, align 4 > %676 = add nuw nsw i32 %664, 23 > %677 = zext i32 %676 to i64 > %678 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %677 > %679 = bitcast i32 addrspace(3)* %678 to float addrspace(3)* > store float %20, float addrspace(3)* %679, align 4 > %680 = lshr i32 %5, 16 > %681 = shl nuw nsw i32 %680, 2 > %682 = and i32 %6, 8191 > %683 = and i32 %10, 255 > %684 = mul nuw nsw i32 %682, %683 > %685 = add nuw nsw i32 %681, %684 > %686 = zext i32 %685 to i64 > %687 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %686 > store i32 %temp4.0, i32 addrspace(3)* %687, align 4 > %688 = lshr i32 %5, 16 > %689 = shl nuw nsw i32 %688, 2 > %690 = and i32 %6, 8191 > %691 = and i32 %10, 255 > %692 = mul nuw nsw i32 %690, %691 > %693 = add nuw nsw i32 %689, %692 > %694 = add nuw nsw i32 %693, 1 > %695 = zext i32 %694 to i64 > %696 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %695 > store i32 %temp8.0, i32 addrspace(3)* %696, align 4 > %697 = lshr i32 %5, 16 > %698 = shl nuw nsw i32 %697, 2 > %699 = and i32 %6, 8191 > %700 = and i32 %10, 255 > %701 = mul nuw nsw i32 %699, %700 > %702 = add nuw nsw i32 %698, %701 > %703 = add nuw nsw i32 %702, 2 > %704 = zext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > store i32 %temp20.0, i32 addrspace(3)* %705, align 4 > %706 = lshr i32 %5, 16 > %707 = shl nuw nsw i32 %706, 2 > %708 = and i32 %6, 8191 > %709 = and i32 %10, 255 > %710 = mul nuw nsw i32 %708, %709 > %711 = add nuw nsw i32 %707, %710 > %712 = add nuw nsw i32 %711, 4 > %713 = zext i32 %712 to i64 > %714 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %713 > store i32 %temp24.0, i32 addrspace(3)* %714, align 4 > %715 = and i32 %10, 255 > %716 = lshr i32 %10, 8 > %717 = and i32 %716, 31 > %718 = lshr i32 %5, 16 > %719 = shl nuw nsw i32 %718, 2 > %720 = and i32 %6, 8191 > %721 = and i32 %10, 255 > %722 = mul nuw nsw i32 %720, %721 > %723 = add nuw nsw i32 %719, %722 > %724 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %725 = bitcast i64 %724 to <2 x i32> > %726 = extractelement <2 x i32> %725, i32 0 > %727 = extractelement <2 x i32> %725, i32 1 > %728 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %726, 0 > %729 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %728, i32 %727, 1 > %730 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %729, i32 %8, 13 > %731 = bitcast i32 %715 to float > %732 = bitcast i32 %717 to float > %733 = bitcast i32 %723 to float > %734 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %730, float %731, 14 > %735 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %734, float %732, 15 > %736 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %735, float %733, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %736 > >IF45: ; preds = %ELSE > %737 = fdiv float 1.000000e+00, %591 > %738 = fmul float %519, %737 > br label %ENDIF44 > >ELSE46: ; preds = %ELSE > %739 = fcmp ogt float %519, 0.000000e+00 > %740 = select i1 %739, float 1.000000e+00, float %519 > %741 = fcmp oge float %740, 0.000000e+00 > %.op = fmul float %740, 0x4600000000000000 > %742 = select i1 %741, float %.op, float 0xC600000000000000 > br label %ENDIF44 > >ENDIF44: ; preds = %ELSE46, %IF45 > %temp12.0 = phi float [ %738, %IF45 ], [ %742, %ELSE46 ] > %743 = call float @llvm.maxnum.f32(float %temp12.0, float 1.000000e+00) > %744 = fcmp une float %595, 0.000000e+00 > br i1 %744, label %IF48, label %ELSE49 > >IF48: ; preds = %ENDIF44 > %745 = fdiv float 1.000000e+00, %595 > %746 = fmul float %520, %745 > br label %ENDIF47 > >ELSE49: ; preds = %ENDIF44 > %747 = fcmp ogt float %520, 0.000000e+00 > %748 = select i1 %747, float 1.000000e+00, float %520 > %749 = fcmp oge float %748, 0.000000e+00 > %.op53 = fmul float %748, 0x4600000000000000 > %750 = select i1 %749, float %.op53, float 0xC600000000000000 > br label %ENDIF47 > >ENDIF47: ; preds = %ELSE49, %IF48 > %temp12.1 = phi float [ %746, %IF48 ], [ %750, %ELSE49 ] > %751 = call float @llvm.maxnum.f32(float %temp12.1, float 1.000000e+00) > %752 = fcmp une float %594, 0.000000e+00 > br i1 %752, label %IF51, label %ELSE52 > >IF51: ; preds = %ENDIF47 > %753 = fdiv float 1.000000e+00, %594 > %754 = fmul float %586, %753 > br label %ENDIF50 > >ELSE52: ; preds = %ENDIF47 > %755 = fcmp ogt float %586, 0.000000e+00 > %756 = select i1 %755, float 1.000000e+00, float %586 > %757 = fcmp oge float %756, 0.000000e+00 > %.op54 = fmul float %756, 0x4600000000000000 > %758 = select i1 %757, float %.op54, float 0xC600000000000000 > br label %ENDIF50 > >ENDIF50: ; preds = %ELSE52, %IF51 > %temp12.2 = phi float [ %754, %IF51 ], [ %758, %ELSE52 ] > %759 = call float @llvm.maxnum.f32(float %temp12.2, float 1.000000e+00) > %760 = call float @llvm.minnum.f32(float %759, float 6.300000e+01) > %761 = call float @llvm.minnum.f32(float %743, float 6.300000e+01) > %762 = call float @llvm.minnum.f32(float %751, float 6.300000e+01) > %763 = call float @llvm.maxnum.f32(float %761, float %760) > %764 = call float @llvm.maxnum.f32(float %763, float %762) > %phitmp = bitcast float %762 to i32 > %phitmp55 = bitcast float %760 to i32 > %phitmp56 = bitcast float %761 to i32 > %phitmp57 = bitcast float %764 to i32 > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..1], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..51] >DCL CONST[2][0..39] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[1] UINT32 {0, 784, 800, 816} >IMM[2] UINT32 {1, 624, 0, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[0].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[0].w, IMM[0].xxxx > 4: MOV TEMP[1], CONST[1][49] > 5: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 6: MOV TEMP[2], CONST[1][50] > 7: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 8: MOV TEMP[1].y, TEMP[2].xxxx > 9: MOV TEMP[2], CONST[1][51] > 10: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 11: MOV TEMP[1].z, TEMP[2].xxxx > 12: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 13: SQRT TEMP[2].x, TEMP[0].xxxx > 14: FSEQ TEMP[3].xyz, TEMP[2].xxxx, IMM[0].yyyy > 15: SSG TEMP[4].xyz, TEMP[1].xyzz > 16: MUL TEMP[4].xyz, IMM[0].zzzz, TEMP[4].xyzz > 17: RCP TEMP[5].xyz, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz > 19: UCMP TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[1].xyzz > 20: MOV TEMP[3].x, CONST[2][39] > 21: FSNE TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy > 22: UIF TEMP[3].xxxx :0 > 23: MOV TEMP[3].x, CONST[2][39] > 24: RCP TEMP[3].x, TEMP[3].xxxx > 25: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[3].xxxx > 26: ELSE :0 > 27: SSG TEMP[2].x, TEMP[2].xxxx > 28: MUL TEMP[3].x, IMM[0].zzzz, TEMP[2].xxxx > 29: ENDIF > 30: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[0].xxxx > 31: MOV TEMP[2].z, CONST[2][39] > 32: FMA TEMP[2].x, TEMP[1].zzzz, TEMP[2].zzzz, IMM[0].xxxx > 33: FSEQ TEMP[3].xy, TEMP[2].xxxx, IMM[0].yyyy > 34: SSG TEMP[4].xy, TEMP[1].xyyy > 35: MUL TEMP[4].xy, IMM[0].zzzz, TEMP[4].xyyy > 36: RCP TEMP[2].xy, TEMP[2].xxxx > 37: MUL TEMP[2].xy, TEMP[1].xyyy, TEMP[2].xyyy > 38: UCMP TEMP[2].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[2].xyyy > 39: MOV TEMP[3].z, CONST[2][39] > 40: MUL TEMP[1].x, TEMP[1].zzzz, TEMP[3].zzzz > 41: MOV TEMP[0].y, TEMP[1].xxxx > 42: MOV TEMP[2].z, TEMP[0].xxxx > 43: MOV TEMP[1].zw, TEMP[0].xxyx > 44: MOV TEMP[2].w, IMM[0].xxxx > 45: MUL TEMP[0].xy, SV[0].yyyy, IN[1][1].xyyy > 46: FMA TEMP[0].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[0].xyyy > 47: FMA TEMP[1].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[0].xyyy > 48: MOV OUT[1], TEMP[1] > 49: MOV OUT[0], TEMP[2] > 50: END >radeonsi: Compiling shader 191 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 800) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 804) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 808) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 812) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 816) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 820) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 824) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 828) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = fadd float %7, %8 > %30 = fsub float 1.000000e+00, %29 > %31 = lshr i32 %6, 13 > %32 = and i32 %31, 255 > %33 = shl i32 %5, 2 > %34 = and i32 %33, 262140 > %35 = and i32 %6, 8191 > %36 = mul i32 %35, %9 > %37 = add i32 %34, %36 > %38 = add i32 %37, %32 > %39 = add i32 %38, 16 > %40 = sext i32 %39 to i64 > %41 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %40 > %42 = bitcast i32 addrspace(3)* %41 to float addrspace(3)* > %43 = load float, float addrspace(3)* %42, align 4 > %44 = fmul float %43, %8 > %45 = lshr i32 %6, 13 > %46 = and i32 %45, 255 > %47 = shl i32 %5, 2 > %48 = and i32 %47, 262140 > %49 = and i32 %6, 8191 > %50 = mul i32 %49, %9 > %51 = add i32 %48, %50 > %52 = add i32 %51, %46 > %53 = add i32 %52, 17 > %54 = sext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = bitcast i32 addrspace(3)* %55 to float addrspace(3)* > %57 = load float, float addrspace(3)* %56, align 4 > %58 = fmul float %57, %8 > %59 = lshr i32 %6, 13 > %60 = and i32 %59, 255 > %61 = shl i32 %5, 2 > %62 = and i32 %61, 262140 > %63 = and i32 %6, 8191 > %64 = mul i32 %63, %9 > %65 = add i32 %62, %64 > %66 = add i32 %65, %60 > %67 = add i32 %66, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > %71 = load float, float addrspace(3)* %70, align 4 > %72 = fmul float %71, %8 > %73 = shl i32 %5, 2 > %74 = and i32 %73, 262140 > %75 = and i32 %6, 8191 > %76 = mul i32 %75, %9 > %77 = add i32 %74, %76 > %78 = add i32 %77, 16 > %79 = sext i32 %78 to i64 > %80 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %79 > %81 = bitcast i32 addrspace(3)* %80 to float addrspace(3)* > %82 = load float, float addrspace(3)* %81, align 4 > %83 = call float @llvm.fma.f32(float %7, float %82, float %44) > %84 = shl i32 %5, 2 > %85 = and i32 %84, 262140 > %86 = and i32 %6, 8191 > %87 = mul i32 %86, %9 > %88 = add i32 %85, %87 > %89 = add i32 %88, 17 > %90 = sext i32 %89 to i64 > %91 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %90 > %92 = bitcast i32 addrspace(3)* %91 to float addrspace(3)* > %93 = load float, float addrspace(3)* %92, align 4 > %94 = call float @llvm.fma.f32(float %7, float %93, float %58) > %95 = shl i32 %5, 2 > %96 = and i32 %95, 262140 > %97 = and i32 %6, 8191 > %98 = mul i32 %97, %9 > %99 = add i32 %96, %98 > %100 = add i32 %99, 18 > %101 = sext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > %103 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > %104 = load float, float addrspace(3)* %103, align 4 > %105 = call float @llvm.fma.f32(float %7, float %104, float %72) > %106 = shl i32 %5, 2 > %107 = and i32 %106, 262140 > %108 = and i32 %6, 8191 > %109 = mul i32 %108, %9 > %110 = add i32 %107, %109 > %111 = lshr i32 %6, 12 > %112 = and i32 %111, 510 > %113 = add i32 %110, %112 > %114 = add i32 %113, 16 > %115 = sext i32 %114 to i64 > %116 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %115 > %117 = bitcast i32 addrspace(3)* %116 to float addrspace(3)* > %118 = load float, float addrspace(3)* %117, align 4 > %119 = call float @llvm.fma.f32(float %30, float %118, float %83) > %120 = shl i32 %5, 2 > %121 = and i32 %120, 262140 > %122 = and i32 %6, 8191 > %123 = mul i32 %122, %9 > %124 = add i32 %121, %123 > %125 = lshr i32 %6, 12 > %126 = and i32 %125, 510 > %127 = add i32 %124, %126 > %128 = add i32 %127, 17 > %129 = sext i32 %128 to i64 > %130 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %129 > %131 = bitcast i32 addrspace(3)* %130 to float addrspace(3)* > %132 = load float, float addrspace(3)* %131, align 4 > %133 = call float @llvm.fma.f32(float %30, float %132, float %94) > %134 = shl i32 %5, 2 > %135 = and i32 %134, 262140 > %136 = and i32 %6, 8191 > %137 = mul i32 %136, %9 > %138 = add i32 %135, %137 > %139 = lshr i32 %6, 12 > %140 = and i32 %139, 510 > %141 = add i32 %138, %140 > %142 = add i32 %141, 18 > %143 = sext i32 %142 to i64 > %144 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %143 > %145 = bitcast i32 addrspace(3)* %144 to float addrspace(3)* > %146 = load float, float addrspace(3)* %145, align 4 > %147 = call float @llvm.fma.f32(float %30, float %146, float %105) > %148 = fmul float %13, %119 > %149 = fmul float %14, %133 > %150 = fadd float %148, %149 > %151 = fmul float %15, %147 > %152 = fadd float %150, %151 > %153 = fadd float %152, %16 > %154 = fmul float %17, %119 > %155 = fmul float %18, %133 > %156 = fadd float %154, %155 > %157 = fmul float %19, %147 > %158 = fadd float %156, %157 > %159 = fadd float %158, %20 > %160 = fmul float %21, %119 > %161 = fmul float %22, %133 > %162 = fadd float %160, %161 > %163 = fmul float %23, %147 > %164 = fadd float %162, %163 > %165 = fadd float %164, %24 > %166 = fmul float %153, %153 > %167 = fmul float %159, %159 > %168 = fadd float %167, %166 > %169 = fmul float %165, %165 > %170 = fadd float %168, %169 > %171 = call float @llvm.sqrt.f32(float %170) > %172 = fcmp oeq float %171, 0.000000e+00 > %173 = fcmp oeq float %171, 0.000000e+00 > %174 = fcmp oeq float %171, 0.000000e+00 > %175 = fcmp ogt float %153, 0.000000e+00 > %176 = select i1 %175, float 1.000000e+00, float %153 > %177 = fcmp oge float %176, 0.000000e+00 > %178 = fcmp ogt float %159, 0.000000e+00 > %179 = select i1 %178, float 1.000000e+00, float %159 > %180 = fcmp oge float %179, 0.000000e+00 > %181 = fcmp ogt float %165, 0.000000e+00 > %182 = select i1 %181, float 1.000000e+00, float %165 > %183 = fcmp oge float %182, 0.000000e+00 > %.op = fmul float %176, 0x4600000000000000 > %184 = select i1 %177, float %.op, float 0xC600000000000000 > %.op24 = fmul float %179, 0x4600000000000000 > %185 = select i1 %180, float %.op24, float 0xC600000000000000 > %.op25 = fmul float %182, 0x4600000000000000 > %186 = select i1 %183, float %.op25, float 0xC600000000000000 > %187 = fdiv float 1.000000e+00, %171 > %188 = fmul float %153, %187 > %189 = fmul float %159, %187 > %190 = fmul float %165, %187 > %191 = select i1 %172, float %184, float %188 > %192 = select i1 %173, float %185, float %189 > %193 = select i1 %174, float %186, float %190 > %194 = fcmp une float %27, 0.000000e+00 > br i1 %194, label %IF, label %ELSE > >IF: ; preds = %main_body > %195 = fdiv float 1.000000e+00, %27 > %196 = fmul float %171, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fcmp ogt float %171, 0.000000e+00 > %198 = select i1 %197, float 1.000000e+00, float %171 > %199 = fcmp oge float %198, 0.000000e+00 > %.op26 = fmul float %198, 0x4600000000000000 > %200 = select i1 %199, float %.op26, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %196, %IF ], [ %200, %ELSE ] > %201 = fsub float 1.000000e+00, %temp12.0 > %202 = call float @llvm.fma.f32(float %193, float %28, float 1.000000e+00) > %203 = fcmp oeq float %202, 0.000000e+00 > %204 = fcmp oeq float %202, 0.000000e+00 > %205 = fcmp ogt float %191, 0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %191 > %207 = fcmp oge float %206, 0.000000e+00 > %208 = fcmp ogt float %192, 0.000000e+00 > %209 = select i1 %208, float 1.000000e+00, float %192 > %210 = fcmp oge float %209, 0.000000e+00 > %.op27 = fmul float %206, 0x4600000000000000 > %211 = select i1 %207, float %.op27, float 0xC600000000000000 > %.op28 = fmul float %209, 0x4600000000000000 > %212 = select i1 %210, float %.op28, float 0xC600000000000000 > %213 = fdiv float 1.000000e+00, %202 > %214 = fmul float %191, %213 > %215 = fmul float %192, %213 > %216 = select i1 %203, float %211, float %214 > %217 = select i1 %204, float %212, float %215 > %218 = fmul float %193, %28 > %219 = lshr i32 %6, 13 > %220 = and i32 %219, 255 > %221 = shl i32 %5, 2 > %222 = and i32 %221, 262140 > %223 = and i32 %6, 8191 > %224 = mul i32 %223, %9 > %225 = add i32 %222, %224 > %226 = add i32 %225, %220 > %227 = add i32 %226, 20 > %228 = sext i32 %227 to i64 > %229 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %228 > %230 = bitcast i32 addrspace(3)* %229 to float addrspace(3)* > %231 = load float, float addrspace(3)* %230, align 4 > %232 = fmul float %231, %8 > %233 = lshr i32 %6, 13 > %234 = and i32 %233, 255 > %235 = shl i32 %5, 2 > %236 = and i32 %235, 262140 > %237 = and i32 %6, 8191 > %238 = mul i32 %237, %9 > %239 = add i32 %236, %238 > %240 = add i32 %239, %234 > %241 = add i32 %240, 21 > %242 = sext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > %244 = bitcast i32 addrspace(3)* %243 to float addrspace(3)* > %245 = load float, float addrspace(3)* %244, align 4 > %246 = fmul float %245, %8 > %247 = shl i32 %5, 2 > %248 = and i32 %247, 262140 > %249 = and i32 %6, 8191 > %250 = mul i32 %249, %9 > %251 = add i32 %248, %250 > %252 = add i32 %251, 20 > %253 = sext i32 %252 to i64 > %254 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %253 > %255 = bitcast i32 addrspace(3)* %254 to float addrspace(3)* > %256 = load float, float addrspace(3)* %255, align 4 > %257 = call float @llvm.fma.f32(float %7, float %256, float %232) > %258 = shl i32 %5, 2 > %259 = and i32 %258, 262140 > %260 = and i32 %6, 8191 > %261 = mul i32 %260, %9 > %262 = add i32 %259, %261 > %263 = add i32 %262, 21 > %264 = sext i32 %263 to i64 > %265 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %264 > %266 = bitcast i32 addrspace(3)* %265 to float addrspace(3)* > %267 = load float, float addrspace(3)* %266, align 4 > %268 = call float @llvm.fma.f32(float %7, float %267, float %246) > %269 = shl i32 %5, 2 > %270 = and i32 %269, 262140 > %271 = and i32 %6, 8191 > %272 = mul i32 %271, %9 > %273 = add i32 %270, %272 > %274 = lshr i32 %6, 12 > %275 = and i32 %274, 510 > %276 = add i32 %273, %275 > %277 = add i32 %276, 20 > %278 = sext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = bitcast i32 addrspace(3)* %279 to float addrspace(3)* > %281 = load float, float addrspace(3)* %280, align 4 > %282 = call float @llvm.fma.f32(float %30, float %281, float %257) > %283 = shl i32 %5, 2 > %284 = and i32 %283, 262140 > %285 = and i32 %6, 8191 > %286 = mul i32 %285, %9 > %287 = add i32 %284, %286 > %288 = lshr i32 %6, 12 > %289 = and i32 %288, 510 > %290 = add i32 %287, %289 > %291 = add i32 %290, 21 > %292 = sext i32 %291 to i64 > %293 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %292 > %294 = bitcast i32 addrspace(3)* %293 to float addrspace(3)* > %295 = load float, float addrspace(3)* %294, align 4 > %296 = call float @llvm.fma.f32(float %30, float %295, float %268) > %297 = bitcast i32 %10 to float > %298 = insertvalue <{ float, float, float }> undef, float %297, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %282, float %296, float %218, float %201) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %216, float %217, float %201, float 1.000000e+00) > ret <{ float, float, float }> %298 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 192 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, 0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 64, 80} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {96, 368, 352, 0} >IMM[5] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: MOV TEMP[1].w, TEMP[8].xxxx > 66: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 67: MOV TEMP[1].z, TEMP[2].xxxx > 68: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 69: MOV TEMP[0].yw, TEMP[2].yxyy > 70: ABS TEMP[2].x, TEMP[3].xxxx > 71: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 72: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 73: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 74: INEG TEMP[9].xy, TEMP[9].xyyy > 75: MOV TEMP[4].yz, TEMP[9].yxyy > 76: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 77: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 78: INEG TEMP[9].xy, TEMP[9].xyyy > 79: MOV TEMP[5].zw, TEMP[9].yyxy > 80: INEG TEMP[9].xy, TEMP[4].yzzz > 81: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 82: MOV TEMP[4].yz, TEMP[9].yxyy > 83: I2F TEMP[9].xy, TEMP[4].yzzz > 84: MOV TEMP[4].yz, TEMP[9].yxyy > 85: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 86: ABS TEMP[2].x, TEMP[6].xxxx > 87: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 88: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 89: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 90: INEG TEMP[9].xy, TEMP[9].xyyy > 91: MOV TEMP[4].yz, TEMP[9].yxyy > 92: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 93: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 94: INEG TEMP[9].xy, TEMP[9].xyyy > 95: MOV TEMP[5].zw, TEMP[9].yyxy > 96: INEG TEMP[9].xy, TEMP[4].yzzz > 97: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 98: MOV TEMP[4].yz, TEMP[9].yxyy > 99: I2F TEMP[9].xy, TEMP[4].yzzz >100: MOV TEMP[4].yz, TEMP[9].yxyy >101: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >102: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >103: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >104: INEG TEMP[2].xy, TEMP[2].xyyy >105: MOV TEMP[5].xy, TEMP[2].xyxx >106: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >107: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >108: INEG TEMP[2].xy, TEMP[2].xyyy >109: MOV TEMP[5].zw, TEMP[2].yyxy >110: INEG TEMP[2].xy, TEMP[5].xyyy >111: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >112: MOV TEMP[5].xy, TEMP[2].xyxx >113: I2F TEMP[5].xy, TEMP[5].xyyy >114: ABS TEMP[2].x, TEMP[8].xxxx >115: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >116: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >117: MOV TEMP[4].zw, TEMP[2].yyxy >118: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >119: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >120: INEG TEMP[2].xy, TEMP[2].xyyy >121: MOV TEMP[5].xy, TEMP[2].xyxx >122: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >123: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >124: INEG TEMP[2].xy, TEMP[2].xyyy >125: MOV TEMP[5].zw, TEMP[2].yyxy >126: AND TEMP[2], TEMP[5], IMM[2].yyyy >127: MOV TEMP[2], TEMP[2] >128: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >129: MOV TEMP[5].xy, TEMP[2].xyxx >130: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >131: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >132: INEG TEMP[2].xy, TEMP[2].xyyy >133: MOV TEMP[5].zw, TEMP[2].yyxy >134: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >135: MOV TEMP[5].zw, TEMP[2].yyxy >136: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >137: MOV TEMP[5].xy, TEMP[2].xyxx >138: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >139: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >140: INEG TEMP[2].x, TEMP[2].xxxx >141: MOV TEMP[1].z, TEMP[2].xxxx >142: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >143: MOV TEMP[1].z, TEMP[2].xxxx >144: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >145: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >146: INEG TEMP[2].xy, TEMP[2].xyyy >147: MOV TEMP[0].yw, TEMP[2].yxyy >148: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >149: MOV TEMP[0].yw, TEMP[2].yxyy >150: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >151: MOV TEMP[0].y, TEMP[2].xxxx >152: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >153: MOV TEMP[0].y, TEMP[2].xxxx >154: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >155: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >156: INEG TEMP[2].xy, TEMP[2].xyyy >157: MOV TEMP[0].xw, TEMP[2].xxxy >158: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >159: MOV TEMP[0].xw, TEMP[2].xxxy >160: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >161: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >162: INEG TEMP[2].xy, TEMP[2].xyyy >163: MOV TEMP[1].xy, TEMP[2].xyxx >164: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >165: MOV TEMP[1].xy, TEMP[2].xyxx >166: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >167: MOV TEMP[0].xz, TEMP[2].xxyx >168: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >169: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >170: INEG TEMP[2].xy, TEMP[2].xyyy >171: MOV TEMP[1].xy, TEMP[2].xyxx >172: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >173: MOV TEMP[1].xy, TEMP[2].xyxx >174: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >175: MOV TEMP[0].xz, TEMP[2].xxyx >176: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >177: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >178: INEG TEMP[2].xy, TEMP[2].xyyy >179: MOV TEMP[1].xy, TEMP[2].xyxx >180: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >181: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >182: INEG TEMP[2].xyz, TEMP[2].xyzz >183: MOV TEMP[0].xyz, TEMP[2].xyzx >184: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >185: MOV TEMP[0].xz, TEMP[2].xxyx >186: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >187: MOV TEMP[0].x, TEMP[2].xxxx >188: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >189: MOV TEMP[0].x, TEMP[2].xxxx >190: MOV TEMP[2].x, TEMP[0].xxxx >191: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >192: UIF TEMP[2].xxxx :0 >193: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >194: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >195: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >196: MOV TEMP[0].yzw, TEMP[2].yxyz >197: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >198: MOV TEMP[0].y, TEMP[2].xxxx >199: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >200: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >201: MOV TEMP[0].z, TEMP[2].xxxx >202: SQRT TEMP[2].x, TEMP[0].xxxx >203: SQRT TEMP[2].y, TEMP[0].yyyy >204: SQRT TEMP[2].z, TEMP[0].zzzz >205: MOV TEMP[0].xyz, TEMP[2].xyzx >206: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >207: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].wwww >208: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >209: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww >210: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >211: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[0].wwww >212: MOV TEMP[2].y, CONST[3][4] >213: MOV TEMP[7].x, TEMP[2].yyyy >214: MOV TEMP[2].y, CONST[3][5] >215: MOV TEMP[7].y, TEMP[2].yyyy >216: MOV TEMP[2].y, CONST[3][6] >217: MOV TEMP[7].z, TEMP[2].yyyy >218: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >219: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >220: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >221: MOV TEMP[1].w, IMM[0].xxxx >222: MOV TEMP[6], CONST[3][0] >223: DP4 TEMP[7].x, TEMP[6], TEMP[1] >224: MOV TEMP[6], CONST[3][1] >225: DP4 TEMP[6].x, TEMP[6], TEMP[1] >226: MOV TEMP[7].y, TEMP[6].xxxx >227: MOV TEMP[6], CONST[3][3] >228: DP4 TEMP[6].x, TEMP[6], TEMP[1] >229: MOV TEMP[4].w, IMM[0].xxxx >230: MOV TEMP[8], CONST[3][0] >231: DP4 TEMP[8].x, TEMP[8], TEMP[4] >232: MOV TEMP[9], CONST[3][1] >233: DP4 TEMP[9].x, TEMP[9], TEMP[4] >234: MOV TEMP[8].y, TEMP[9].xxxx >235: MOV TEMP[9], CONST[3][3] >236: DP4 TEMP[9].x, TEMP[9], TEMP[4] >237: MOV TEMP[5].w, IMM[0].xxxx >238: MOV TEMP[10], CONST[3][0] >239: DP4 TEMP[4].x, TEMP[10], TEMP[5] >240: MOV TEMP[10], CONST[3][1] >241: DP4 TEMP[10].x, TEMP[10], TEMP[5] >242: MOV TEMP[4].y, TEMP[10].xxxx >243: MOV TEMP[10], CONST[3][3] >244: DP4 TEMP[10].x, TEMP[10], TEMP[5] >245: MOV TEMP[2].w, IMM[0].xxxx >246: MOV TEMP[11], CONST[3][0] >247: DP4 TEMP[5].x, TEMP[11], TEMP[2] >248: MOV TEMP[11], CONST[3][1] >249: DP4 TEMP[11].x, TEMP[11], TEMP[2] >250: MOV TEMP[5].y, TEMP[11].xxxx >251: MOV TEMP[11], CONST[3][3] >252: DP4 TEMP[11].x, TEMP[11], TEMP[2] >253: MOV TEMP[3].w, IMM[0].xxxx >254: MOV TEMP[12], CONST[3][0] >255: DP4 TEMP[2].x, TEMP[12], TEMP[3] >256: MOV TEMP[12], CONST[3][1] >257: DP4 TEMP[12].x, TEMP[12], TEMP[3] >258: MOV TEMP[2].y, TEMP[12].xxxx >259: MOV TEMP[12], CONST[3][3] >260: DP4 TEMP[12].x, TEMP[12], TEMP[3] >261: MOV TEMP[0].w, IMM[0].xxxx >262: MOV TEMP[13], CONST[3][0] >263: DP4 TEMP[3].x, TEMP[13], TEMP[0] >264: MOV TEMP[13], CONST[3][1] >265: DP4 TEMP[13].x, TEMP[13], TEMP[0] >266: MOV TEMP[3].y, TEMP[13].xxxx >267: MOV TEMP[13], CONST[3][3] >268: DP4 TEMP[13].x, TEMP[13], TEMP[0] >269: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >270: SSG TEMP[15].xy, TEMP[8].xyyy >271: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >272: RCP TEMP[16].xy, TEMP[9].xxxx >273: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >274: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >275: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >276: SSG TEMP[15].xy, TEMP[4].xyyy >277: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >278: RCP TEMP[16].xy, TEMP[10].xxxx >279: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >280: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >281: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >282: SSG TEMP[16].xy, TEMP[5].xyyy >283: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >284: RCP TEMP[11].xy, TEMP[11].xxxx >285: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >286: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >287: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >288: SSG TEMP[15].xy, TEMP[7].xyyy >289: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >290: RCP TEMP[16].xy, TEMP[6].xxxx >291: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >292: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >293: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >294: MOV TEMP[0].yz, TEMP[5].yxyy >295: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >296: SSG TEMP[7].xy, TEMP[2].xyyy >297: MUL TEMP[7].xy, IMM[5].xxxx, TEMP[7].xyyy >298: RCP TEMP[11].xy, TEMP[12].xxxx >299: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >300: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >301: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >302: MOV TEMP[4].zw, TEMP[2].yyxy >303: MOV TEMP[2].xy, CONST[3][23] >304: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >305: MOV TEMP[4].zw, TEMP[2].yyxy >306: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >307: SSG TEMP[5].xy, TEMP[3].xyyy >308: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >309: RCP TEMP[7].xy, TEMP[13].xxxx >310: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >311: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >312: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >313: MOV TEMP[0].xw, TEMP[2].xxxy >314: MOV TEMP[2].xy, CONST[3][23] >315: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >316: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >317: MOV TEMP[0].y, TEMP[2].xxxx >318: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >319: MOV TEMP[0].z, TEMP[2].xxxx >320: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >321: SQRT TEMP[2].x, TEMP[0].xxxx >322: SQRT TEMP[2].y, TEMP[0].yyyy >323: SQRT TEMP[2].z, TEMP[0].zzzz >324: MOV TEMP[2].xyz, TEMP[2].xyzx >325: MOV TEMP[3].z, CONST[1][22] >326: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >327: MOV TEMP[0].w, TEMP[3].xxxx >328: MOV TEMP[3].z, CONST[1][22] >329: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >330: MOV TEMP[3].z, CONST[1][22] >331: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >332: MOV TEMP[1].y, TEMP[3].xxxx >333: MOV TEMP[3].w, CONST[1][22] >334: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >335: UIF TEMP[3].xxxx :0 >336: MOV TEMP[3].w, CONST[1][22] >337: RCP TEMP[3].x, TEMP[3].wwww >338: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >339: ELSE :0 >340: SSG TEMP[5].x, TEMP[0].wwww >341: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >342: ENDIF >343: MOV_SAT TEMP[3].x, TEMP[3].xxxx >344: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >345: MOV TEMP[0].w, TEMP[3].xxxx >346: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >347: MOV TEMP[0].y, TEMP[3].xxxx >348: MOV TEMP[3].w, CONST[1][22] >349: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >350: UIF TEMP[3].xxxx :0 >351: MOV TEMP[3].w, CONST[1][22] >352: RCP TEMP[3].x, TEMP[3].wwww >353: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >354: ELSE :0 >355: SSG TEMP[5].x, TEMP[1].xxxx >356: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >357: ENDIF >358: MOV_SAT TEMP[3].x, TEMP[3].xxxx >359: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >360: MOV TEMP[0].w, TEMP[3].xxxx >361: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >362: MOV TEMP[0].z, TEMP[3].xxxx >363: MOV TEMP[3].w, CONST[1][22] >364: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >365: UIF TEMP[3].xxxx :0 >366: MOV TEMP[3].w, CONST[1][22] >367: RCP TEMP[3].x, TEMP[3].wwww >368: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >369: ELSE :0 >370: SSG TEMP[5].x, TEMP[1].yyyy >371: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >372: ENDIF >373: MOV_SAT TEMP[3].x, TEMP[3].xxxx >374: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >375: MOV TEMP[0].w, TEMP[3].xxxx >376: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >377: MOV TEMP[2].xy, CONST[1][22] >378: MOV TEMP[3].xy, CONST[2][4] >379: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >380: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >381: MOV TEMP[0].w, TEMP[2].xxxx >382: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >383: SSG TEMP[3].xy, TEMP[0].xyyy >384: MUL TEMP[3].xy, IMM[5].xxxx, TEMP[3].xyyy >385: RCP TEMP[5].xy, TEMP[1].xxxx >386: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >387: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >388: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >389: MOV TEMP[0].y, TEMP[2].xxxx >390: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >391: MOV TEMP[4].z, TEMP[2].xxxx >392: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >393: UIF TEMP[2].xxxx :0 >394: RCP TEMP[1].x, TEMP[1].xxxx >395: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >396: ELSE :0 >397: SSG TEMP[2].x, TEMP[0].zzzz >398: MUL TEMP[1].x, IMM[5].xxxx, TEMP[2].xxxx >399: ENDIF >400: MOV TEMP[0].y, TEMP[1].xxxx >401: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >402: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >403: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >404: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >405: MOV TEMP[4].w, TEMP[0].xxxx >406: ELSE :0 >407: MOV TEMP[4], IMM[0].zzzz >408: ENDIF >409: MIN TEMP[0], TEMP[4], IMM[5].yyyy >410: MOV TEMP[1].x, TEMP[0].xxxx >411: MOV TEMP[2].x, TEMP[0].yyyy >412: MOV TEMP[3].x, TEMP[0].zzzz >413: MOV TEMP[0].x, TEMP[0].wwww >414: MOV OUT[8], TEMP[1] >415: MOV OUT[9], TEMP[2] >416: MOV OUT[10], TEMP[3] >417: MOV OUT[11], TEMP[0] >418: MOV OUT[0].x, TEMP[1].xxxx >419: MOV OUT[0].y, TEMP[2].xxxx >420: MOV OUT[0].z, TEMP[3].xxxx >421: MOV OUT[1].x, TEMP[0].xxxx >422: END >radeonsi: Compiling shader 193 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 64) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 68) > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 84) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %44 = lshr i32 %10, 8 > %45 = and i32 %44, 31 > %46 = lshr i32 %7, 13 > %47 = and i32 %46, 255 > %48 = and i32 %7, 8191 > %49 = and i32 %10, 255 > %50 = mul nuw nsw i32 %48, %49 > %51 = mul nuw nsw i32 %45, %47 > %52 = add nuw nsw i32 %50, %51 > %53 = add nuw nsw i32 %52, 16 > %54 = zext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = load i32, i32 addrspace(3)* %55, align 4 > %57 = lshr i32 %7, 13 > %58 = and i32 %57, 255 > %59 = and i32 %7, 8191 > %60 = and i32 %10, 255 > %61 = mul nuw nsw i32 %59, %60 > %62 = mul nuw nsw i32 %45, %58 > %63 = add nuw nsw i32 %61, %62 > %64 = add nuw nsw i32 %63, 17 > %65 = zext i32 %64 to i64 > %66 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %65 > %67 = load i32, i32 addrspace(3)* %66, align 4 > %68 = lshr i32 %7, 13 > %69 = and i32 %68, 255 > %70 = and i32 %7, 8191 > %71 = and i32 %10, 255 > %72 = mul nuw nsw i32 %70, %71 > %73 = mul nuw nsw i32 %45, %69 > %74 = add nuw nsw i32 %72, %73 > %75 = add nuw nsw i32 %74, 18 > %76 = zext i32 %75 to i64 > %77 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %76 > %78 = load i32, i32 addrspace(3)* %77, align 4 > %79 = lshr i32 %7, 13 > %80 = and i32 %79, 255 > %81 = and i32 %7, 8191 > %82 = and i32 %10, 255 > %83 = mul nuw nsw i32 %81, %82 > %84 = mul nuw nsw i32 %45, %80 > %85 = add nuw nsw i32 %83, %84 > %86 = add nuw nsw i32 %85, 19 > %87 = zext i32 %86 to i64 > %88 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %87 > %89 = load i32, i32 addrspace(3)* %88, align 4 > %90 = lshr i32 %6, 13 > %91 = and i32 %90, 255 > %92 = shl i32 %5, 2 > %93 = and i32 %92, 262140 > %94 = and i32 %6, 8191 > %95 = and i32 %10, 255 > %96 = mul nuw nsw i32 %94, %95 > %97 = add nuw nsw i32 %93, %96 > %98 = mul nuw nsw i32 %45, %91 > %99 = add nuw nsw i32 %97, %98 > %100 = add nuw nsw i32 %99, 16 > %101 = zext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > store i32 %56, i32 addrspace(3)* %102, align 4 > %103 = add nuw nsw i32 %99, 17 > %104 = zext i32 %103 to i64 > %105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %104 > store i32 %67, i32 addrspace(3)* %105, align 4 > %106 = add nuw nsw i32 %99, 18 > %107 = zext i32 %106 to i64 > %108 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %107 > store i32 %78, i32 addrspace(3)* %108, align 4 > %109 = add nuw nsw i32 %99, 19 > %110 = zext i32 %109 to i64 > %111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %110 > store i32 %89, i32 addrspace(3)* %111, align 4 > %112 = lshr i32 %7, 13 > %113 = and i32 %112, 255 > %114 = and i32 %7, 8191 > %115 = and i32 %10, 255 > %116 = mul nuw nsw i32 %114, %115 > %117 = mul nuw nsw i32 %45, %113 > %118 = add nuw nsw i32 %116, %117 > %119 = add nuw nsw i32 %118, 20 > %120 = zext i32 %119 to i64 > %121 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %120 > %122 = load i32, i32 addrspace(3)* %121, align 4 > %123 = lshr i32 %7, 13 > %124 = and i32 %123, 255 > %125 = and i32 %7, 8191 > %126 = and i32 %10, 255 > %127 = mul nuw nsw i32 %125, %126 > %128 = mul nuw nsw i32 %45, %124 > %129 = add nuw nsw i32 %127, %128 > %130 = add nuw nsw i32 %129, 21 > %131 = zext i32 %130 to i64 > %132 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %131 > %133 = load i32, i32 addrspace(3)* %132, align 4 > %134 = lshr i32 %7, 13 > %135 = and i32 %134, 255 > %136 = and i32 %7, 8191 > %137 = and i32 %10, 255 > %138 = mul nuw nsw i32 %136, %137 > %139 = mul nuw nsw i32 %45, %135 > %140 = add nuw nsw i32 %138, %139 > %141 = add nuw nsw i32 %140, 22 > %142 = zext i32 %141 to i64 > %143 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %142 > %144 = load i32, i32 addrspace(3)* %143, align 4 > %145 = lshr i32 %7, 13 > %146 = and i32 %145, 255 > %147 = and i32 %7, 8191 > %148 = and i32 %10, 255 > %149 = mul nuw nsw i32 %147, %148 > %150 = mul nuw nsw i32 %45, %146 > %151 = add nuw nsw i32 %149, %150 > %152 = add nuw nsw i32 %151, 23 > %153 = zext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = load i32, i32 addrspace(3)* %154, align 4 > %156 = lshr i32 %6, 13 > %157 = and i32 %156, 255 > %158 = shl i32 %5, 2 > %159 = and i32 %158, 262140 > %160 = and i32 %6, 8191 > %161 = and i32 %10, 255 > %162 = mul nuw nsw i32 %160, %161 > %163 = add nuw nsw i32 %159, %162 > %164 = mul nuw nsw i32 %45, %157 > %165 = add nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 20 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > store i32 %122, i32 addrspace(3)* %168, align 4 > %169 = add nuw nsw i32 %165, 21 > %170 = zext i32 %169 to i64 > %171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %170 > store i32 %133, i32 addrspace(3)* %171, align 4 > %172 = add nuw nsw i32 %165, 22 > %173 = zext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > store i32 %144, i32 addrspace(3)* %174, align 4 > %175 = add nuw nsw i32 %165, 23 > %176 = zext i32 %175 to i64 > %177 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %176 > store i32 %155, i32 addrspace(3)* %177, align 4 > %178 = lshr i32 %7, 13 > %179 = and i32 %178, 255 > %180 = and i32 %7, 8191 > %181 = and i32 %10, 255 > %182 = mul nuw nsw i32 %180, %181 > %183 = mul nuw nsw i32 %45, %179 > %184 = add nuw nsw i32 %182, %183 > %185 = add nuw nsw i32 %184, 24 > %186 = zext i32 %185 to i64 > %187 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %186 > %188 = load i32, i32 addrspace(3)* %187, align 4 > %189 = lshr i32 %7, 13 > %190 = and i32 %189, 255 > %191 = and i32 %7, 8191 > %192 = and i32 %10, 255 > %193 = mul nuw nsw i32 %191, %192 > %194 = mul nuw nsw i32 %45, %190 > %195 = add nuw nsw i32 %193, %194 > %196 = add nuw nsw i32 %195, 25 > %197 = zext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = load i32, i32 addrspace(3)* %198, align 4 > %200 = lshr i32 %7, 13 > %201 = and i32 %200, 255 > %202 = and i32 %7, 8191 > %203 = and i32 %10, 255 > %204 = mul nuw nsw i32 %202, %203 > %205 = mul nuw nsw i32 %45, %201 > %206 = add nuw nsw i32 %204, %205 > %207 = add nuw nsw i32 %206, 26 > %208 = zext i32 %207 to i64 > %209 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %208 > %210 = load i32, i32 addrspace(3)* %209, align 4 > %211 = lshr i32 %7, 13 > %212 = and i32 %211, 255 > %213 = and i32 %7, 8191 > %214 = and i32 %10, 255 > %215 = mul nuw nsw i32 %213, %214 > %216 = mul nuw nsw i32 %45, %212 > %217 = add nuw nsw i32 %215, %216 > %218 = add nuw nsw i32 %217, 27 > %219 = zext i32 %218 to i64 > %220 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %219 > %221 = load i32, i32 addrspace(3)* %220, align 4 > %222 = lshr i32 %6, 13 > %223 = and i32 %222, 255 > %224 = shl i32 %5, 2 > %225 = and i32 %224, 262140 > %226 = and i32 %6, 8191 > %227 = and i32 %10, 255 > %228 = mul nuw nsw i32 %226, %227 > %229 = add nuw nsw i32 %225, %228 > %230 = mul nuw nsw i32 %45, %223 > %231 = add nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, 24 > %233 = zext i32 %232 to i64 > %234 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %233 > store i32 %188, i32 addrspace(3)* %234, align 4 > %235 = add nuw nsw i32 %231, 25 > %236 = zext i32 %235 to i64 > %237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %236 > store i32 %199, i32 addrspace(3)* %237, align 4 > %238 = add nuw nsw i32 %231, 26 > %239 = zext i32 %238 to i64 > %240 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %239 > store i32 %210, i32 addrspace(3)* %240, align 4 > %241 = add nuw nsw i32 %231, 27 > %242 = zext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > store i32 %221, i32 addrspace(3)* %243, align 4 > %244 = lshr i32 %7, 13 > %245 = and i32 %244, 255 > %246 = and i32 %7, 8191 > %247 = and i32 %10, 255 > %248 = mul nuw nsw i32 %246, %247 > %249 = mul nuw nsw i32 %45, %245 > %250 = add nuw nsw i32 %248, %249 > %251 = add nuw nsw i32 %250, 28 > %252 = zext i32 %251 to i64 > %253 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %252 > %254 = load i32, i32 addrspace(3)* %253, align 4 > %255 = lshr i32 %7, 13 > %256 = and i32 %255, 255 > %257 = and i32 %7, 8191 > %258 = and i32 %10, 255 > %259 = mul nuw nsw i32 %257, %258 > %260 = mul nuw nsw i32 %45, %256 > %261 = add nuw nsw i32 %259, %260 > %262 = add nuw nsw i32 %261, 29 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = load i32, i32 addrspace(3)* %264, align 4 > %266 = lshr i32 %7, 13 > %267 = and i32 %266, 255 > %268 = and i32 %7, 8191 > %269 = and i32 %10, 255 > %270 = mul nuw nsw i32 %268, %269 > %271 = mul nuw nsw i32 %45, %267 > %272 = add nuw nsw i32 %270, %271 > %273 = add nuw nsw i32 %272, 30 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = load i32, i32 addrspace(3)* %275, align 4 > %277 = lshr i32 %7, 13 > %278 = and i32 %277, 255 > %279 = and i32 %7, 8191 > %280 = and i32 %10, 255 > %281 = mul nuw nsw i32 %279, %280 > %282 = mul nuw nsw i32 %45, %278 > %283 = add nuw nsw i32 %281, %282 > %284 = add nuw nsw i32 %283, 31 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = load i32, i32 addrspace(3)* %286, align 4 > %288 = lshr i32 %6, 13 > %289 = and i32 %288, 255 > %290 = shl i32 %5, 2 > %291 = and i32 %290, 262140 > %292 = and i32 %6, 8191 > %293 = and i32 %10, 255 > %294 = mul nuw nsw i32 %292, %293 > %295 = add nuw nsw i32 %291, %294 > %296 = mul nuw nsw i32 %45, %289 > %297 = add nuw nsw i32 %295, %296 > %298 = add nuw nsw i32 %297, 28 > %299 = zext i32 %298 to i64 > %300 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %299 > store i32 %254, i32 addrspace(3)* %300, align 4 > %301 = add nuw nsw i32 %297, 29 > %302 = zext i32 %301 to i64 > %303 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %302 > store i32 %265, i32 addrspace(3)* %303, align 4 > %304 = add nuw nsw i32 %297, 30 > %305 = zext i32 %304 to i64 > %306 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %305 > store i32 %276, i32 addrspace(3)* %306, align 4 > %307 = add nuw nsw i32 %297, 31 > %308 = zext i32 %307 to i64 > %309 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %308 > store i32 %287, i32 addrspace(3)* %309, align 4 > %310 = lshr i32 %7, 13 > %311 = and i32 %310, 255 > %312 = and i32 %7, 8191 > %313 = and i32 %10, 255 > %314 = mul nuw nsw i32 %312, %313 > %315 = mul nuw nsw i32 %45, %311 > %316 = add nuw nsw i32 %314, %315 > %317 = add nuw nsw i32 %316, 32 > %318 = zext i32 %317 to i64 > %319 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %318 > %320 = load i32, i32 addrspace(3)* %319, align 4 > %321 = lshr i32 %7, 13 > %322 = and i32 %321, 255 > %323 = and i32 %7, 8191 > %324 = and i32 %10, 255 > %325 = mul nuw nsw i32 %323, %324 > %326 = mul nuw nsw i32 %45, %322 > %327 = add nuw nsw i32 %325, %326 > %328 = add nuw nsw i32 %327, 33 > %329 = zext i32 %328 to i64 > %330 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %329 > %331 = load i32, i32 addrspace(3)* %330, align 4 > %332 = lshr i32 %7, 13 > %333 = and i32 %332, 255 > %334 = and i32 %7, 8191 > %335 = and i32 %10, 255 > %336 = mul nuw nsw i32 %334, %335 > %337 = mul nuw nsw i32 %45, %333 > %338 = add nuw nsw i32 %336, %337 > %339 = add nuw nsw i32 %338, 34 > %340 = zext i32 %339 to i64 > %341 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %340 > %342 = load i32, i32 addrspace(3)* %341, align 4 > %343 = lshr i32 %7, 13 > %344 = and i32 %343, 255 > %345 = and i32 %7, 8191 > %346 = and i32 %10, 255 > %347 = mul nuw nsw i32 %345, %346 > %348 = mul nuw nsw i32 %45, %344 > %349 = add nuw nsw i32 %347, %348 > %350 = add nuw nsw i32 %349, 35 > %351 = zext i32 %350 to i64 > %352 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %351 > %353 = load i32, i32 addrspace(3)* %352, align 4 > %354 = lshr i32 %6, 13 > %355 = and i32 %354, 255 > %356 = shl i32 %5, 2 > %357 = and i32 %356, 262140 > %358 = and i32 %6, 8191 > %359 = and i32 %10, 255 > %360 = mul nuw nsw i32 %358, %359 > %361 = add nuw nsw i32 %357, %360 > %362 = mul nuw nsw i32 %45, %355 > %363 = add nuw nsw i32 %361, %362 > %364 = add nuw nsw i32 %363, 32 > %365 = zext i32 %364 to i64 > %366 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %365 > store i32 %320, i32 addrspace(3)* %366, align 4 > %367 = add nuw nsw i32 %363, 33 > %368 = zext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > store i32 %331, i32 addrspace(3)* %369, align 4 > %370 = add nuw nsw i32 %363, 34 > %371 = zext i32 %370 to i64 > %372 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %371 > store i32 %342, i32 addrspace(3)* %372, align 4 > %373 = add nuw nsw i32 %363, 35 > %374 = zext i32 %373 to i64 > %375 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %374 > store i32 %353, i32 addrspace(3)* %375, align 4 > %376 = lshr i32 %7, 13 > %377 = and i32 %376, 255 > %378 = and i32 %7, 8191 > %379 = and i32 %10, 255 > %380 = mul nuw nsw i32 %378, %379 > %381 = mul nuw nsw i32 %45, %377 > %382 = add nuw nsw i32 %380, %381 > %383 = add nuw nsw i32 %382, 36 > %384 = zext i32 %383 to i64 > %385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %384 > %386 = load i32, i32 addrspace(3)* %385, align 4 > %387 = lshr i32 %7, 13 > %388 = and i32 %387, 255 > %389 = and i32 %7, 8191 > %390 = and i32 %10, 255 > %391 = mul nuw nsw i32 %389, %390 > %392 = mul nuw nsw i32 %45, %388 > %393 = add nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 37 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = load i32, i32 addrspace(3)* %396, align 4 > %398 = lshr i32 %7, 13 > %399 = and i32 %398, 255 > %400 = and i32 %7, 8191 > %401 = and i32 %10, 255 > %402 = mul nuw nsw i32 %400, %401 > %403 = mul nuw nsw i32 %45, %399 > %404 = add nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, 38 > %406 = zext i32 %405 to i64 > %407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %406 > %408 = load i32, i32 addrspace(3)* %407, align 4 > %409 = lshr i32 %7, 13 > %410 = and i32 %409, 255 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = mul nuw nsw i32 %45, %410 > %415 = add nuw nsw i32 %413, %414 > %416 = add nuw nsw i32 %415, 39 > %417 = zext i32 %416 to i64 > %418 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %417 > %419 = load i32, i32 addrspace(3)* %418, align 4 > %420 = lshr i32 %6, 13 > %421 = and i32 %420, 255 > %422 = shl i32 %5, 2 > %423 = and i32 %422, 262140 > %424 = and i32 %6, 8191 > %425 = and i32 %10, 255 > %426 = mul nuw nsw i32 %424, %425 > %427 = add nuw nsw i32 %423, %426 > %428 = mul nuw nsw i32 %45, %421 > %429 = add nuw nsw i32 %427, %428 > %430 = add nuw nsw i32 %429, 36 > %431 = zext i32 %430 to i64 > %432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %431 > store i32 %386, i32 addrspace(3)* %432, align 4 > %433 = add nuw nsw i32 %429, 37 > %434 = zext i32 %433 to i64 > %435 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %434 > store i32 %397, i32 addrspace(3)* %435, align 4 > %436 = add nuw nsw i32 %429, 38 > %437 = zext i32 %436 to i64 > %438 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %437 > store i32 %408, i32 addrspace(3)* %438, align 4 > %439 = add nuw nsw i32 %429, 39 > %440 = zext i32 %439 to i64 > %441 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %440 > store i32 %419, i32 addrspace(3)* %441, align 4 > %442 = and i32 %7, 8191 > %443 = and i32 %10, 255 > %444 = mul nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 16 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > %448 = bitcast i32 addrspace(3)* %447 to float addrspace(3)* > %449 = load float, float addrspace(3)* %448, align 4 > %450 = and i32 %7, 8191 > %451 = and i32 %10, 255 > %452 = mul nuw nsw i32 %450, %451 > %453 = add nuw nsw i32 %452, 17 > %454 = zext i32 %453 to i64 > %455 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %454 > %456 = bitcast i32 addrspace(3)* %455 to float addrspace(3)* > %457 = load float, float addrspace(3)* %456, align 4 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, 18 > %462 = zext i32 %461 to i64 > %463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %462 > %464 = bitcast i32 addrspace(3)* %463 to float addrspace(3)* > %465 = load float, float addrspace(3)* %464, align 4 > %466 = fmul float %23, %449 > %467 = fmul float %24, %457 > %468 = fadd float %466, %467 > %469 = fmul float %25, %465 > %470 = fadd float %468, %469 > %471 = fadd float %470, %26 > %472 = fmul float %27, %449 > %473 = fmul float %28, %457 > %474 = fadd float %472, %473 > %475 = fmul float %29, %465 > %476 = fadd float %474, %475 > %477 = fadd float %476, %30 > %478 = fmul float %31, %449 > %479 = fmul float %32, %457 > %480 = fadd float %478, %479 > %481 = fmul float %33, %465 > %482 = fadd float %480, %481 > %483 = fadd float %482, %34 > %484 = fmul float %35, %449 > %485 = fmul float %36, %457 > %486 = fadd float %484, %485 > %487 = fmul float %37, %465 > %488 = fadd float %486, %487 > %489 = fadd float %488, %38 > %490 = lshr i32 %7, 13 > %491 = and i32 %490, 255 > %492 = and i32 %7, 8191 > %493 = and i32 %10, 255 > %494 = mul nuw nsw i32 %492, %493 > %495 = add nuw nsw i32 %494, %491 > %496 = add nuw nsw i32 %495, 16 > %497 = zext i32 %496 to i64 > %498 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %497 > %499 = bitcast i32 addrspace(3)* %498 to float addrspace(3)* > %500 = load float, float addrspace(3)* %499, align 4 > %501 = lshr i32 %7, 13 > %502 = and i32 %501, 255 > %503 = and i32 %7, 8191 > %504 = and i32 %10, 255 > %505 = mul nuw nsw i32 %503, %504 > %506 = add nuw nsw i32 %505, %502 > %507 = add nuw nsw i32 %506, 17 > %508 = zext i32 %507 to i64 > %509 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %508 > %510 = bitcast i32 addrspace(3)* %509 to float addrspace(3)* > %511 = load float, float addrspace(3)* %510, align 4 > %512 = lshr i32 %7, 13 > %513 = and i32 %512, 255 > %514 = and i32 %7, 8191 > %515 = and i32 %10, 255 > %516 = mul nuw nsw i32 %514, %515 > %517 = add nuw nsw i32 %516, %513 > %518 = add nuw nsw i32 %517, 18 > %519 = zext i32 %518 to i64 > %520 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %519 > %521 = bitcast i32 addrspace(3)* %520 to float addrspace(3)* > %522 = load float, float addrspace(3)* %521, align 4 > %523 = fmul float %23, %500 > %524 = fmul float %24, %511 > %525 = fadd float %523, %524 > %526 = fmul float %25, %522 > %527 = fadd float %525, %526 > %528 = fadd float %527, %26 > %529 = fmul float %27, %500 > %530 = fmul float %28, %511 > %531 = fadd float %529, %530 > %532 = fmul float %29, %522 > %533 = fadd float %531, %532 > %534 = fadd float %533, %30 > %535 = fmul float %31, %500 > %536 = fmul float %32, %511 > %537 = fadd float %535, %536 > %538 = fmul float %33, %522 > %539 = fadd float %537, %538 > %540 = fadd float %539, %34 > %541 = fmul float %35, %500 > %542 = fmul float %36, %511 > %543 = fadd float %541, %542 > %544 = fmul float %37, %522 > %545 = fadd float %543, %544 > %546 = fadd float %545, %38 > %547 = and i32 %7, 8191 > %548 = and i32 %10, 255 > %549 = mul nuw nsw i32 %547, %548 > %550 = lshr i32 %7, 12 > %551 = and i32 %550, 510 > %552 = add nuw nsw i32 %549, %551 > %553 = add nuw nsw i32 %552, 16 > %554 = zext i32 %553 to i64 > %555 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %554 > %556 = bitcast i32 addrspace(3)* %555 to float addrspace(3)* > %557 = load float, float addrspace(3)* %556, align 4 > %558 = and i32 %7, 8191 > %559 = and i32 %10, 255 > %560 = mul nuw nsw i32 %558, %559 > %561 = lshr i32 %7, 12 > %562 = and i32 %561, 510 > %563 = add nuw nsw i32 %560, %562 > %564 = add nuw nsw i32 %563, 17 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fmul float %23, %557 > %581 = fmul float %24, %568 > %582 = fadd float %580, %581 > %583 = fmul float %25, %579 > %584 = fadd float %582, %583 > %585 = fadd float %584, %26 > %586 = fmul float %27, %557 > %587 = fmul float %28, %568 > %588 = fadd float %586, %587 > %589 = fmul float %29, %579 > %590 = fadd float %588, %589 > %591 = fadd float %590, %30 > %592 = fmul float %31, %557 > %593 = fmul float %32, %568 > %594 = fadd float %592, %593 > %595 = fmul float %33, %579 > %596 = fadd float %594, %595 > %597 = fadd float %596, %34 > %598 = fmul float %35, %557 > %599 = fmul float %36, %568 > %600 = fadd float %598, %599 > %601 = fmul float %37, %579 > %602 = fadd float %600, %601 > %603 = fadd float %602, %38 > %604 = fadd float %483, 1.000000e+02 > %605 = fadd float %540, 1.000000e+02 > %606 = fadd float %597, 1.000000e+02 > %607 = call float @llvm.fabs.f32(float %489) > %608 = call float @llvm.minnum.f32(float %607, float 1.000000e+02) > %609 = fcmp ogt float %471, 0.000000e+00 > %610 = fcmp ogt float %477, 0.000000e+00 > %611 = fcmp olt float %471, 0.000000e+00 > %612 = fcmp olt float %477, 0.000000e+00 > %613 = sext i1 %611 to i32 > %614 = sext i1 %612 to i32 > %615 = zext i1 %609 to i32 > %616 = zext i1 %610 to i32 > %617 = add nsw i32 %615, %613 > %618 = add nsw i32 %616, %614 > %619 = sitofp i32 %617 to float > %620 = sitofp i32 %618 to float > %621 = fsub float -0.000000e+00, %608 > %622 = call float @llvm.fma.f32(float %621, float %619, float %471) > %623 = fsub float -0.000000e+00, %608 > %624 = call float @llvm.fma.f32(float %623, float %620, float %477) > %625 = call float @llvm.fabs.f32(float %546) > %626 = call float @llvm.minnum.f32(float %625, float 1.000000e+02) > %627 = fcmp ogt float %528, 0.000000e+00 > %628 = fcmp ogt float %534, 0.000000e+00 > %629 = fcmp olt float %528, 0.000000e+00 > %630 = fcmp olt float %534, 0.000000e+00 > %631 = sext i1 %629 to i32 > %632 = sext i1 %630 to i32 > %633 = zext i1 %627 to i32 > %634 = zext i1 %628 to i32 > %635 = add nsw i32 %633, %631 > %636 = add nsw i32 %634, %632 > %637 = sitofp i32 %635 to float > %638 = sitofp i32 %636 to float > %639 = fsub float -0.000000e+00, %626 > %640 = call float @llvm.fma.f32(float %639, float %637, float %528) > %641 = fsub float -0.000000e+00, %626 > %642 = call float @llvm.fma.f32(float %641, float %638, float %534) > %643 = fcmp ogt float %585, 0.000000e+00 > %644 = fcmp ogt float %591, 0.000000e+00 > %645 = fcmp olt float %585, 0.000000e+00 > %646 = fcmp olt float %591, 0.000000e+00 > %647 = sext i1 %645 to i32 > %648 = sext i1 %646 to i32 > %649 = zext i1 %643 to i32 > %650 = zext i1 %644 to i32 > %651 = add nsw i32 %649, %647 > %652 = add nsw i32 %650, %648 > %653 = sitofp i32 %651 to float > %654 = sitofp i32 %652 to float > %655 = call float @llvm.fabs.f32(float %603) > %656 = call float @llvm.minnum.f32(float %655, float 1.000000e+02) > %657 = fsub float -0.000000e+00, %656 > %658 = call float @llvm.fma.f32(float %657, float %653, float %585) > %659 = fsub float -0.000000e+00, %656 > %660 = call float @llvm.fma.f32(float %659, float %654, float %591) > %661 = fsub float -0.000000e+00, %489 > %662 = fcmp olt float %622, %661 > %663 = fsub float -0.000000e+00, %489 > %664 = fcmp olt float %624, %663 > %665 = zext i1 %662 to i32 > %666 = zext i1 %664 to i32 > %667 = fsub float -0.000000e+00, %546 > %668 = fcmp olt float %640, %667 > %669 = fsub float -0.000000e+00, %546 > %670 = fcmp olt float %642, %669 > %671 = zext i1 %668 to i32 > %672 = zext i1 %670 to i32 > %673 = add nuw nsw i32 %671, %665 > %674 = add nuw nsw i32 %672, %666 > %675 = fsub float -0.000000e+00, %603 > %676 = fcmp olt float %658, %675 > %677 = fsub float -0.000000e+00, %603 > %678 = fcmp olt float %660, %677 > %679 = zext i1 %676 to i32 > %680 = zext i1 %678 to i32 > %681 = add nuw nsw i32 %679, %673 > %682 = add nuw nsw i32 %680, %674 > %683 = fcmp olt float %604, 0.000000e+00 > %684 = zext i1 %683 to i32 > %685 = bitcast i32 %684 to float > %686 = fcmp olt float %605, 0.000000e+00 > %687 = fcmp olt float %606, 0.000000e+00 > %688 = zext i1 %686 to i32 > %689 = zext i1 %687 to i32 > %690 = add nuw nsw i32 %688, %684 > %691 = add nuw nsw i32 %689, %690 > %692 = fcmp olt float %489, %622 > %693 = fcmp olt float %489, %624 > %694 = zext i1 %692 to i32 > %695 = zext i1 %693 to i32 > %696 = fcmp olt float %546, %640 > %697 = fcmp olt float %546, %642 > %698 = zext i1 %696 to i32 > %699 = zext i1 %697 to i32 > %700 = add nuw nsw i32 %694, %698 > %701 = add nuw nsw i32 %695, %699 > %702 = fcmp olt float %603, %658 > %703 = fcmp olt float %603, %660 > %704 = zext i1 %702 to i32 > %705 = zext i1 %703 to i32 > %706 = add nuw nsw i32 %700, %704 > %707 = add nuw nsw i32 %701, %705 > %708 = icmp eq i32 %681, 3 > %709 = icmp eq i32 %682, 3 > %710 = sext i1 %708 to i32 > %711 = sext i1 %709 to i32 > %712 = bitcast i32 %711 to float > %713 = icmp eq i32 %706, 3 > %714 = icmp eq i32 %707, 3 > %715 = sext i1 %714 to i32 > %716 = bitcast i32 %715 to float > %717 = bitcast i32 %711 to float > %718 = select i1 %714, float 0xFFFFFFFFE0000000, float %717 > %719 = bitcast float %718 to i32 > %720 = select i1 %713, i32 -1, i32 %710 > %721 = or i32 %719, %720 > %722 = icmp eq i32 %721, 0 > %not. = icmp ne i32 %691, 3 > %723 = and i1 %722, %not. > br i1 %723, label %IF, label %ENDIF > >IF: ; preds = %main_body > %724 = lshr i32 %7, 13 > %725 = and i32 %724, 255 > %726 = and i32 %7, 8191 > %727 = and i32 %10, 255 > %728 = mul nuw nsw i32 %726, %727 > %729 = add nuw nsw i32 %728, %725 > %730 = add nuw nsw i32 %729, 16 > %731 = zext i32 %730 to i64 > %732 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %731 > %733 = bitcast i32 addrspace(3)* %732 to float addrspace(3)* > %734 = load float, float addrspace(3)* %733, align 4 > %735 = and i32 %7, 8191 > %736 = and i32 %10, 255 > %737 = mul nuw nsw i32 %735, %736 > %738 = add nuw nsw i32 %737, 16 > %739 = zext i32 %738 to i64 > %740 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %739 > %741 = bitcast i32 addrspace(3)* %740 to float addrspace(3)* > %742 = load float, float addrspace(3)* %741, align 4 > %743 = fsub float %742, %734 > %744 = lshr i32 %7, 13 > %745 = and i32 %744, 255 > %746 = and i32 %7, 8191 > %747 = and i32 %10, 255 > %748 = mul nuw nsw i32 %746, %747 > %749 = add nuw nsw i32 %748, %745 > %750 = add nuw nsw i32 %749, 17 > %751 = zext i32 %750 to i64 > %752 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %751 > %753 = bitcast i32 addrspace(3)* %752 to float addrspace(3)* > %754 = load float, float addrspace(3)* %753, align 4 > %755 = and i32 %7, 8191 > %756 = and i32 %10, 255 > %757 = mul nuw nsw i32 %755, %756 > %758 = add nuw nsw i32 %757, 17 > %759 = zext i32 %758 to i64 > %760 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %759 > %761 = bitcast i32 addrspace(3)* %760 to float addrspace(3)* > %762 = load float, float addrspace(3)* %761, align 4 > %763 = fsub float %762, %754 > %764 = lshr i32 %7, 13 > %765 = and i32 %764, 255 > %766 = and i32 %7, 8191 > %767 = and i32 %10, 255 > %768 = mul nuw nsw i32 %766, %767 > %769 = add nuw nsw i32 %768, %765 > %770 = add nuw nsw i32 %769, 18 > %771 = zext i32 %770 to i64 > %772 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %771 > %773 = bitcast i32 addrspace(3)* %772 to float addrspace(3)* > %774 = load float, float addrspace(3)* %773, align 4 > %775 = and i32 %7, 8191 > %776 = and i32 %10, 255 > %777 = mul nuw nsw i32 %775, %776 > %778 = add nuw nsw i32 %777, 18 > %779 = zext i32 %778 to i64 > %780 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %779 > %781 = bitcast i32 addrspace(3)* %780 to float addrspace(3)* > %782 = load float, float addrspace(3)* %781, align 4 > %783 = fsub float %782, %774 > %784 = fmul float %743, %743 > %785 = fmul float %763, %763 > %786 = fadd float %785, %784 > %787 = fmul float %783, %783 > %788 = fadd float %786, %787 > %789 = and i32 %7, 8191 > %790 = and i32 %10, 255 > %791 = mul nuw nsw i32 %789, %790 > %792 = lshr i32 %7, 12 > %793 = and i32 %792, 510 > %794 = add nuw nsw i32 %791, %793 > %795 = add nuw nsw i32 %794, 16 > %796 = zext i32 %795 to i64 > %797 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %796 > %798 = bitcast i32 addrspace(3)* %797 to float addrspace(3)* > %799 = load float, float addrspace(3)* %798, align 4 > %800 = lshr i32 %7, 13 > %801 = and i32 %800, 255 > %802 = and i32 %7, 8191 > %803 = and i32 %10, 255 > %804 = mul nuw nsw i32 %802, %803 > %805 = add nuw nsw i32 %804, %801 > %806 = add nuw nsw i32 %805, 16 > %807 = zext i32 %806 to i64 > %808 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %807 > %809 = bitcast i32 addrspace(3)* %808 to float addrspace(3)* > %810 = load float, float addrspace(3)* %809, align 4 > %811 = fsub float %810, %799 > %812 = and i32 %7, 8191 > %813 = and i32 %10, 255 > %814 = mul nuw nsw i32 %812, %813 > %815 = lshr i32 %7, 12 > %816 = and i32 %815, 510 > %817 = add nuw nsw i32 %814, %816 > %818 = add nuw nsw i32 %817, 17 > %819 = zext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = lshr i32 %7, 13 > %824 = and i32 %823, 255 > %825 = and i32 %7, 8191 > %826 = and i32 %10, 255 > %827 = mul nuw nsw i32 %825, %826 > %828 = add nuw nsw i32 %827, %824 > %829 = add nuw nsw i32 %828, 17 > %830 = zext i32 %829 to i64 > %831 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %830 > %832 = bitcast i32 addrspace(3)* %831 to float addrspace(3)* > %833 = load float, float addrspace(3)* %832, align 4 > %834 = fsub float %833, %822 > %835 = and i32 %7, 8191 > %836 = and i32 %10, 255 > %837 = mul nuw nsw i32 %835, %836 > %838 = lshr i32 %7, 12 > %839 = and i32 %838, 510 > %840 = add nuw nsw i32 %837, %839 > %841 = add nuw nsw i32 %840, 18 > %842 = zext i32 %841 to i64 > %843 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %842 > %844 = bitcast i32 addrspace(3)* %843 to float addrspace(3)* > %845 = load float, float addrspace(3)* %844, align 4 > %846 = lshr i32 %7, 13 > %847 = and i32 %846, 255 > %848 = and i32 %7, 8191 > %849 = and i32 %10, 255 > %850 = mul nuw nsw i32 %848, %849 > %851 = add nuw nsw i32 %850, %847 > %852 = add nuw nsw i32 %851, 18 > %853 = zext i32 %852 to i64 > %854 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %853 > %855 = bitcast i32 addrspace(3)* %854 to float addrspace(3)* > %856 = load float, float addrspace(3)* %855, align 4 > %857 = fsub float %856, %845 > %858 = fmul float %811, %811 > %859 = fmul float %834, %834 > %860 = fadd float %859, %858 > %861 = fmul float %857, %857 > %862 = fadd float %860, %861 > %863 = and i32 %7, 8191 > %864 = and i32 %10, 255 > %865 = mul nuw nsw i32 %863, %864 > %866 = add nuw nsw i32 %865, 16 > %867 = zext i32 %866 to i64 > %868 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %867 > %869 = bitcast i32 addrspace(3)* %868 to float addrspace(3)* > %870 = load float, float addrspace(3)* %869, align 4 > %871 = and i32 %7, 8191 > %872 = and i32 %10, 255 > %873 = mul nuw nsw i32 %871, %872 > %874 = lshr i32 %7, 12 > %875 = and i32 %874, 510 > %876 = add nuw nsw i32 %873, %875 > %877 = add nuw nsw i32 %876, 16 > %878 = zext i32 %877 to i64 > %879 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %878 > %880 = bitcast i32 addrspace(3)* %879 to float addrspace(3)* > %881 = load float, float addrspace(3)* %880, align 4 > %882 = fsub float %881, %870 > %883 = and i32 %7, 8191 > %884 = and i32 %10, 255 > %885 = mul nuw nsw i32 %883, %884 > %886 = add nuw nsw i32 %885, 17 > %887 = zext i32 %886 to i64 > %888 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %887 > %889 = bitcast i32 addrspace(3)* %888 to float addrspace(3)* > %890 = load float, float addrspace(3)* %889, align 4 > %891 = and i32 %7, 8191 > %892 = and i32 %10, 255 > %893 = mul nuw nsw i32 %891, %892 > %894 = lshr i32 %7, 12 > %895 = and i32 %894, 510 > %896 = add nuw nsw i32 %893, %895 > %897 = add nuw nsw i32 %896, 17 > %898 = zext i32 %897 to i64 > %899 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %898 > %900 = bitcast i32 addrspace(3)* %899 to float addrspace(3)* > %901 = load float, float addrspace(3)* %900, align 4 > %902 = fsub float %901, %890 > %903 = and i32 %7, 8191 > %904 = and i32 %10, 255 > %905 = mul nuw nsw i32 %903, %904 > %906 = add nuw nsw i32 %905, 18 > %907 = zext i32 %906 to i64 > %908 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %907 > %909 = bitcast i32 addrspace(3)* %908 to float addrspace(3)* > %910 = load float, float addrspace(3)* %909, align 4 > %911 = and i32 %7, 8191 > %912 = and i32 %10, 255 > %913 = mul nuw nsw i32 %911, %912 > %914 = lshr i32 %7, 12 > %915 = and i32 %914, 510 > %916 = add nuw nsw i32 %913, %915 > %917 = add nuw nsw i32 %916, 18 > %918 = zext i32 %917 to i64 > %919 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %918 > %920 = bitcast i32 addrspace(3)* %919 to float addrspace(3)* > %921 = load float, float addrspace(3)* %920, align 4 > %922 = fsub float %921, %910 > %923 = fmul float %882, %882 > %924 = fmul float %902, %902 > %925 = fadd float %924, %923 > %926 = fmul float %922, %922 > %927 = fadd float %925, %926 > %928 = call float @llvm.sqrt.f32(float %788) > %929 = call float @llvm.sqrt.f32(float %862) > %930 = call float @llvm.sqrt.f32(float %927) > %931 = lshr i32 %7, 13 > %932 = and i32 %931, 255 > %933 = and i32 %7, 8191 > %934 = and i32 %10, 255 > %935 = mul nuw nsw i32 %933, %934 > %936 = add nuw nsw i32 %935, %932 > %937 = add nuw nsw i32 %936, 16 > %938 = zext i32 %937 to i64 > %939 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %938 > %940 = bitcast i32 addrspace(3)* %939 to float addrspace(3)* > %941 = load float, float addrspace(3)* %940, align 4 > %942 = and i32 %7, 8191 > %943 = and i32 %10, 255 > %944 = mul nuw nsw i32 %942, %943 > %945 = add nuw nsw i32 %944, 16 > %946 = zext i32 %945 to i64 > %947 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %946 > %948 = bitcast i32 addrspace(3)* %947 to float addrspace(3)* > %949 = load float, float addrspace(3)* %948, align 4 > %950 = fadd float %941, %949 > %951 = lshr i32 %7, 13 > %952 = and i32 %951, 255 > %953 = and i32 %7, 8191 > %954 = and i32 %10, 255 > %955 = mul nuw nsw i32 %953, %954 > %956 = add nuw nsw i32 %955, %952 > %957 = add nuw nsw i32 %956, 17 > %958 = zext i32 %957 to i64 > %959 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %958 > %960 = bitcast i32 addrspace(3)* %959 to float addrspace(3)* > %961 = load float, float addrspace(3)* %960, align 4 > %962 = and i32 %7, 8191 > %963 = and i32 %10, 255 > %964 = mul nuw nsw i32 %962, %963 > %965 = add nuw nsw i32 %964, 17 > %966 = zext i32 %965 to i64 > %967 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %966 > %968 = bitcast i32 addrspace(3)* %967 to float addrspace(3)* > %969 = load float, float addrspace(3)* %968, align 4 > %970 = fadd float %961, %969 > %971 = lshr i32 %7, 13 > %972 = and i32 %971, 255 > %973 = and i32 %7, 8191 > %974 = and i32 %10, 255 > %975 = mul nuw nsw i32 %973, %974 > %976 = add nuw nsw i32 %975, %972 > %977 = add nuw nsw i32 %976, 18 > %978 = zext i32 %977 to i64 > %979 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %978 > %980 = bitcast i32 addrspace(3)* %979 to float addrspace(3)* > %981 = load float, float addrspace(3)* %980, align 4 > %982 = and i32 %7, 8191 > %983 = and i32 %10, 255 > %984 = mul nuw nsw i32 %982, %983 > %985 = add nuw nsw i32 %984, 18 > %986 = zext i32 %985 to i64 > %987 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %986 > %988 = bitcast i32 addrspace(3)* %987 to float addrspace(3)* > %989 = load float, float addrspace(3)* %988, align 4 > %990 = fadd float %981, %989 > %991 = fmul float %950, 5.000000e-01 > %992 = fmul float %970, 5.000000e-01 > %993 = fmul float %990, 5.000000e-01 > %994 = and i32 %7, 8191 > %995 = and i32 %10, 255 > %996 = mul nuw nsw i32 %994, %995 > %997 = lshr i32 %7, 12 > %998 = and i32 %997, 510 > %999 = add nuw nsw i32 %996, %998 > %1000 = add nuw nsw i32 %999, 16 > %1001 = zext i32 %1000 to i64 > %1002 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1001 > %1003 = bitcast i32 addrspace(3)* %1002 to float addrspace(3)* > %1004 = load float, float addrspace(3)* %1003, align 4 > %1005 = lshr i32 %7, 13 > %1006 = and i32 %1005, 255 > %1007 = and i32 %7, 8191 > %1008 = and i32 %10, 255 > %1009 = mul nuw nsw i32 %1007, %1008 > %1010 = add nuw nsw i32 %1009, %1006 > %1011 = add nuw nsw i32 %1010, 16 > %1012 = zext i32 %1011 to i64 > %1013 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1012 > %1014 = bitcast i32 addrspace(3)* %1013 to float addrspace(3)* > %1015 = load float, float addrspace(3)* %1014, align 4 > %1016 = fadd float %1004, %1015 > %1017 = and i32 %7, 8191 > %1018 = and i32 %10, 255 > %1019 = mul nuw nsw i32 %1017, %1018 > %1020 = lshr i32 %7, 12 > %1021 = and i32 %1020, 510 > %1022 = add nuw nsw i32 %1019, %1021 > %1023 = add nuw nsw i32 %1022, 17 > %1024 = zext i32 %1023 to i64 > %1025 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1024 > %1026 = bitcast i32 addrspace(3)* %1025 to float addrspace(3)* > %1027 = load float, float addrspace(3)* %1026, align 4 > %1028 = lshr i32 %7, 13 > %1029 = and i32 %1028, 255 > %1030 = and i32 %7, 8191 > %1031 = and i32 %10, 255 > %1032 = mul nuw nsw i32 %1030, %1031 > %1033 = add nuw nsw i32 %1032, %1029 > %1034 = add nuw nsw i32 %1033, 17 > %1035 = zext i32 %1034 to i64 > %1036 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1035 > %1037 = bitcast i32 addrspace(3)* %1036 to float addrspace(3)* > %1038 = load float, float addrspace(3)* %1037, align 4 > %1039 = fadd float %1027, %1038 > %1040 = and i32 %7, 8191 > %1041 = and i32 %10, 255 > %1042 = mul nuw nsw i32 %1040, %1041 > %1043 = lshr i32 %7, 12 > %1044 = and i32 %1043, 510 > %1045 = add nuw nsw i32 %1042, %1044 > %1046 = add nuw nsw i32 %1045, 18 > %1047 = zext i32 %1046 to i64 > %1048 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1047 > %1049 = bitcast i32 addrspace(3)* %1048 to float addrspace(3)* > %1050 = load float, float addrspace(3)* %1049, align 4 > %1051 = lshr i32 %7, 13 > %1052 = and i32 %1051, 255 > %1053 = and i32 %7, 8191 > %1054 = and i32 %10, 255 > %1055 = mul nuw nsw i32 %1053, %1054 > %1056 = add nuw nsw i32 %1055, %1052 > %1057 = add nuw nsw i32 %1056, 18 > %1058 = zext i32 %1057 to i64 > %1059 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1058 > %1060 = bitcast i32 addrspace(3)* %1059 to float addrspace(3)* > %1061 = load float, float addrspace(3)* %1060, align 4 > %1062 = fadd float %1050, %1061 > %1063 = fmul float %1016, 5.000000e-01 > %1064 = fmul float %1039, 5.000000e-01 > %1065 = fmul float %1062, 5.000000e-01 > %1066 = and i32 %7, 8191 > %1067 = and i32 %10, 255 > %1068 = mul nuw nsw i32 %1066, %1067 > %1069 = add nuw nsw i32 %1068, 16 > %1070 = zext i32 %1069 to i64 > %1071 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1070 > %1072 = bitcast i32 addrspace(3)* %1071 to float addrspace(3)* > %1073 = load float, float addrspace(3)* %1072, align 4 > %1074 = and i32 %7, 8191 > %1075 = and i32 %10, 255 > %1076 = mul nuw nsw i32 %1074, %1075 > %1077 = lshr i32 %7, 12 > %1078 = and i32 %1077, 510 > %1079 = add nuw nsw i32 %1076, %1078 > %1080 = add nuw nsw i32 %1079, 16 > %1081 = zext i32 %1080 to i64 > %1082 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1081 > %1083 = bitcast i32 addrspace(3)* %1082 to float addrspace(3)* > %1084 = load float, float addrspace(3)* %1083, align 4 > %1085 = fadd float %1073, %1084 > %1086 = and i32 %7, 8191 > %1087 = and i32 %10, 255 > %1088 = mul nuw nsw i32 %1086, %1087 > %1089 = add nuw nsw i32 %1088, 17 > %1090 = zext i32 %1089 to i64 > %1091 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1090 > %1092 = bitcast i32 addrspace(3)* %1091 to float addrspace(3)* > %1093 = load float, float addrspace(3)* %1092, align 4 > %1094 = and i32 %7, 8191 > %1095 = and i32 %10, 255 > %1096 = mul nuw nsw i32 %1094, %1095 > %1097 = lshr i32 %7, 12 > %1098 = and i32 %1097, 510 > %1099 = add nuw nsw i32 %1096, %1098 > %1100 = add nuw nsw i32 %1099, 17 > %1101 = zext i32 %1100 to i64 > %1102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1101 > %1103 = bitcast i32 addrspace(3)* %1102 to float addrspace(3)* > %1104 = load float, float addrspace(3)* %1103, align 4 > %1105 = fadd float %1093, %1104 > %1106 = and i32 %7, 8191 > %1107 = and i32 %10, 255 > %1108 = mul nuw nsw i32 %1106, %1107 > %1109 = add nuw nsw i32 %1108, 18 > %1110 = zext i32 %1109 to i64 > %1111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1110 > %1112 = bitcast i32 addrspace(3)* %1111 to float addrspace(3)* > %1113 = load float, float addrspace(3)* %1112, align 4 > %1114 = and i32 %7, 8191 > %1115 = and i32 %10, 255 > %1116 = mul nuw nsw i32 %1114, %1115 > %1117 = lshr i32 %7, 12 > %1118 = and i32 %1117, 510 > %1119 = add nuw nsw i32 %1116, %1118 > %1120 = add nuw nsw i32 %1119, 18 > %1121 = zext i32 %1120 to i64 > %1122 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1121 > %1123 = bitcast i32 addrspace(3)* %1122 to float addrspace(3)* > %1124 = load float, float addrspace(3)* %1123, align 4 > %1125 = fadd float %1113, %1124 > %1126 = fmul float %1085, 5.000000e-01 > %1127 = fmul float %1105, 5.000000e-01 > %1128 = fmul float %1125, 5.000000e-01 > %1129 = call float @llvm.fma.f32(float %39, float %928, float %991) > %1130 = call float @llvm.fma.f32(float %40, float %928, float %992) > %1131 = call float @llvm.fma.f32(float %41, float %928, float %993) > %1132 = call float @llvm.fma.f32(float %39, float %929, float %1063) > %1133 = call float @llvm.fma.f32(float %40, float %929, float %1064) > %1134 = call float @llvm.fma.f32(float %41, float %929, float %1065) > %1135 = call float @llvm.fma.f32(float %39, float %930, float %1126) > %1136 = call float @llvm.fma.f32(float %40, float %930, float %1127) > %1137 = call float @llvm.fma.f32(float %41, float %930, float %1128) > %1138 = fmul float %23, %991 > %1139 = fmul float %24, %992 > %1140 = fadd float %1138, %1139 > %1141 = fmul float %25, %993 > %1142 = fadd float %1140, %1141 > %1143 = fadd float %1142, %26 > %1144 = fmul float %27, %991 > %1145 = fmul float %28, %992 > %1146 = fadd float %1144, %1145 > %1147 = fmul float %29, %993 > %1148 = fadd float %1146, %1147 > %1149 = fadd float %1148, %30 > %1150 = fmul float %35, %991 > %1151 = fmul float %36, %992 > %1152 = fadd float %1150, %1151 > %1153 = fmul float %37, %993 > %1154 = fadd float %1152, %1153 > %1155 = fadd float %1154, %38 > %1156 = fmul float %23, %1063 > %1157 = fmul float %24, %1064 > %1158 = fadd float %1156, %1157 > %1159 = fmul float %25, %1065 > %1160 = fadd float %1158, %1159 > %1161 = fadd float %1160, %26 > %1162 = fmul float %27, %1063 > %1163 = fmul float %28, %1064 > %1164 = fadd float %1162, %1163 > %1165 = fmul float %29, %1065 > %1166 = fadd float %1164, %1165 > %1167 = fadd float %1166, %30 > %1168 = fmul float %35, %1063 > %1169 = fmul float %36, %1064 > %1170 = fadd float %1168, %1169 > %1171 = fmul float %37, %1065 > %1172 = fadd float %1170, %1171 > %1173 = fadd float %1172, %38 > %1174 = fmul float %23, %1126 > %1175 = fmul float %24, %1127 > %1176 = fadd float %1174, %1175 > %1177 = fmul float %25, %1128 > %1178 = fadd float %1176, %1177 > %1179 = fadd float %1178, %26 > %1180 = fmul float %27, %1126 > %1181 = fmul float %28, %1127 > %1182 = fadd float %1180, %1181 > %1183 = fmul float %29, %1128 > %1184 = fadd float %1182, %1183 > %1185 = fadd float %1184, %30 > %1186 = fmul float %35, %1126 > %1187 = fmul float %36, %1127 > %1188 = fadd float %1186, %1187 > %1189 = fmul float %37, %1128 > %1190 = fadd float %1188, %1189 > %1191 = fadd float %1190, %38 > %1192 = fmul float %23, %1129 > %1193 = fmul float %24, %1130 > %1194 = fadd float %1192, %1193 > %1195 = fmul float %25, %1131 > %1196 = fadd float %1194, %1195 > %1197 = fadd float %1196, %26 > %1198 = fmul float %27, %1129 > %1199 = fmul float %28, %1130 > %1200 = fadd float %1198, %1199 > %1201 = fmul float %29, %1131 > %1202 = fadd float %1200, %1201 > %1203 = fadd float %1202, %30 > %1204 = fmul float %35, %1129 > %1205 = fmul float %36, %1130 > %1206 = fadd float %1204, %1205 > %1207 = fmul float %37, %1131 > %1208 = fadd float %1206, %1207 > %1209 = fadd float %1208, %38 > %1210 = fmul float %23, %1132 > %1211 = fmul float %24, %1133 > %1212 = fadd float %1210, %1211 > %1213 = fmul float %25, %1134 > %1214 = fadd float %1212, %1213 > %1215 = fadd float %1214, %26 > %1216 = fmul float %27, %1132 > %1217 = fmul float %28, %1133 > %1218 = fadd float %1216, %1217 > %1219 = fmul float %29, %1134 > %1220 = fadd float %1218, %1219 > %1221 = fadd float %1220, %30 > %1222 = fmul float %35, %1132 > %1223 = fmul float %36, %1133 > %1224 = fadd float %1222, %1223 > %1225 = fmul float %37, %1134 > %1226 = fadd float %1224, %1225 > %1227 = fadd float %1226, %38 > %1228 = fmul float %23, %1135 > %1229 = fmul float %24, %1136 > %1230 = fadd float %1228, %1229 > %1231 = fmul float %25, %1137 > %1232 = fadd float %1230, %1231 > %1233 = fadd float %1232, %26 > %1234 = fmul float %27, %1135 > %1235 = fmul float %28, %1136 > %1236 = fadd float %1234, %1235 > %1237 = fmul float %29, %1137 > %1238 = fadd float %1236, %1237 > %1239 = fadd float %1238, %30 > %1240 = fmul float %35, %1135 > %1241 = fmul float %36, %1136 > %1242 = fadd float %1240, %1241 > %1243 = fmul float %37, %1137 > %1244 = fadd float %1242, %1243 > %1245 = fadd float %1244, %38 > %1246 = fcmp oeq float %1173, 0.000000e+00 > %1247 = fcmp oeq float %1173, 0.000000e+00 > %1248 = fcmp ogt float %1161, 0.000000e+00 > %1249 = select i1 %1248, float 1.000000e+00, float %1161 > %1250 = fcmp oge float %1249, 0.000000e+00 > %1251 = fcmp ogt float %1167, 0.000000e+00 > %1252 = select i1 %1251, float 1.000000e+00, float %1167 > %1253 = fcmp oge float %1252, 0.000000e+00 > %.op = fmul float %1249, 0x4600000000000000 > %1254 = select i1 %1250, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1252, 0x4600000000000000 > %1255 = select i1 %1253, float %.op80, float 0xC600000000000000 > %1256 = fdiv float 1.000000e+00, %1173 > %1257 = fmul float %1161, %1256 > %1258 = fmul float %1167, %1256 > %1259 = select i1 %1246, float %1254, float %1257 > %1260 = select i1 %1247, float %1255, float %1258 > %1261 = fcmp oeq float %1191, 0.000000e+00 > %1262 = fcmp oeq float %1191, 0.000000e+00 > %1263 = fcmp ogt float %1179, 0.000000e+00 > %1264 = select i1 %1263, float 1.000000e+00, float %1179 > %1265 = fcmp oge float %1264, 0.000000e+00 > %1266 = fcmp ogt float %1185, 0.000000e+00 > %1267 = select i1 %1266, float 1.000000e+00, float %1185 > %1268 = fcmp oge float %1267, 0.000000e+00 > %.op81 = fmul float %1264, 0x4600000000000000 > %1269 = select i1 %1265, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1267, 0x4600000000000000 > %1270 = select i1 %1268, float %.op82, float 0xC600000000000000 > %1271 = fdiv float 1.000000e+00, %1191 > %1272 = fmul float %1179, %1271 > %1273 = fmul float %1185, %1271 > %1274 = select i1 %1261, float %1269, float %1272 > %1275 = select i1 %1262, float %1270, float %1273 > %1276 = fcmp oeq float %1209, 0.000000e+00 > %1277 = fcmp oeq float %1209, 0.000000e+00 > %1278 = fcmp ogt float %1197, 0.000000e+00 > %1279 = select i1 %1278, float 1.000000e+00, float %1197 > %1280 = fcmp oge float %1279, 0.000000e+00 > %1281 = fcmp ogt float %1203, 0.000000e+00 > %1282 = select i1 %1281, float 1.000000e+00, float %1203 > %1283 = fcmp oge float %1282, 0.000000e+00 > %.op83 = fmul float %1279, 0x4600000000000000 > %1284 = select i1 %1280, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1282, 0x4600000000000000 > %1285 = select i1 %1283, float %.op84, float 0xC600000000000000 > %1286 = fdiv float 1.000000e+00, %1209 > %1287 = fmul float %1197, %1286 > %1288 = fmul float %1203, %1286 > %1289 = select i1 %1276, float %1284, float %1287 > %1290 = select i1 %1277, float %1285, float %1288 > %1291 = fcmp oeq float %1155, 0.000000e+00 > %1292 = fcmp oeq float %1155, 0.000000e+00 > %1293 = fcmp ogt float %1143, 0.000000e+00 > %1294 = select i1 %1293, float 1.000000e+00, float %1143 > %1295 = fcmp oge float %1294, 0.000000e+00 > %1296 = fcmp ogt float %1149, 0.000000e+00 > %1297 = select i1 %1296, float 1.000000e+00, float %1149 > %1298 = fcmp oge float %1297, 0.000000e+00 > %.op85 = fmul float %1294, 0x4600000000000000 > %1299 = select i1 %1295, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1297, 0x4600000000000000 > %1300 = select i1 %1298, float %.op86, float 0xC600000000000000 > %1301 = fdiv float 1.000000e+00, %1155 > %1302 = fmul float %1143, %1301 > %1303 = fmul float %1149, %1301 > %1304 = select i1 %1291, float %1299, float %1302 > %1305 = select i1 %1292, float %1300, float %1303 > %1306 = fsub float %1304, %1289 > %1307 = fsub float %1305, %1290 > %1308 = fcmp oeq float %1227, 0.000000e+00 > %1309 = fcmp oeq float %1227, 0.000000e+00 > %1310 = fcmp ogt float %1215, 0.000000e+00 > %1311 = select i1 %1310, float 1.000000e+00, float %1215 > %1312 = fcmp oge float %1311, 0.000000e+00 > %1313 = fcmp ogt float %1221, 0.000000e+00 > %1314 = select i1 %1313, float 1.000000e+00, float %1221 > %1315 = fcmp oge float %1314, 0.000000e+00 > %.op87 = fmul float %1311, 0x4600000000000000 > %1316 = select i1 %1312, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1314, 0x4600000000000000 > %1317 = select i1 %1315, float %.op88, float 0xC600000000000000 > %1318 = fdiv float 1.000000e+00, %1227 > %1319 = fmul float %1215, %1318 > %1320 = fmul float %1221, %1318 > %1321 = select i1 %1308, float %1316, float %1319 > %1322 = select i1 %1309, float %1317, float %1320 > %1323 = fsub float %1259, %1321 > %1324 = fsub float %1260, %1322 > %1325 = fmul float %1323, %42 > %1326 = fmul float %1324, %43 > %1327 = fcmp oeq float %1245, 0.000000e+00 > %1328 = fcmp oeq float %1245, 0.000000e+00 > %1329 = fcmp ogt float %1233, 0.000000e+00 > %1330 = select i1 %1329, float 1.000000e+00, float %1233 > %1331 = fcmp oge float %1330, 0.000000e+00 > %1332 = fcmp ogt float %1239, 0.000000e+00 > %1333 = select i1 %1332, float 1.000000e+00, float %1239 > %1334 = fcmp oge float %1333, 0.000000e+00 > %.op89 = fmul float %1330, 0x4600000000000000 > %1335 = select i1 %1331, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1333, 0x4600000000000000 > %1336 = select i1 %1334, float %.op90, float 0xC600000000000000 > %1337 = fdiv float 1.000000e+00, %1245 > %1338 = fmul float %1233, %1337 > %1339 = fmul float %1239, %1337 > %1340 = select i1 %1327, float %1335, float %1338 > %1341 = select i1 %1328, float %1336, float %1339 > %1342 = fsub float %1274, %1340 > %1343 = fsub float %1275, %1341 > %1344 = fmul float %1342, %42 > %1345 = fmul float %1306, %42 > %1346 = fmul float %1307, %43 > %1347 = fmul float %1343, %43 > %1348 = fmul float %1345, %1345 > %1349 = fmul float %1346, %1346 > %1350 = fadd float %1348, %1349 > %1351 = fmul float %1325, %1325 > %1352 = fmul float %1326, %1326 > %1353 = fadd float %1351, %1352 > %1354 = fmul float %1344, %1344 > %1355 = fmul float %1347, %1347 > %1356 = fadd float %1354, %1355 > %1357 = call float @llvm.sqrt.f32(float %1356) > %1358 = call float @llvm.sqrt.f32(float %1350) > %1359 = call float @llvm.sqrt.f32(float %1353) > %1360 = fsub float %1155, %15 > %1361 = fsub float %1173, %15 > %1362 = fsub float %1191, %15 > %1363 = fcmp une float %16, 0.000000e+00 > br i1 %1363, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %16, %ENDIF77 ], [ %38, %main_body ] > %temp16.0 = phi float [ %1577, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1578, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1567, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1580, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %15, %ENDIF77 ], [ %37, %main_body ] > %temp13.0 = phi float [ %1560, %ENDIF77 ], [ %36, %main_body ] > %1364 = phi i32 [ 1065353216, %ENDIF77 ], [ %672, %main_body ] > %temp10.0 = phi float [ %1359, %ENDIF77 ], [ %716, %main_body ] > %temp9.0 = phi float [ %1552, %ENDIF77 ], [ %718, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %603, %main_body ] > %temp6.0 = phi float [ %993, %ENDIF77 ], [ %685, %main_body ] > %temp5.0 = phi float [ %1547, %ENDIF77 ], [ %712, %main_body ] > %1365 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1366 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1367 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1368 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1369 = lshr i32 %5, 16 > %1370 = shl nuw nsw i32 %1369, 2 > %1371 = and i32 %6, 8191 > %1372 = and i32 %10, 255 > %1373 = mul nuw nsw i32 %1371, %1372 > %1374 = add nuw nsw i32 %1370, %1373 > %1375 = add nuw nsw i32 %1374, 8 > %1376 = zext i32 %1375 to i64 > %1377 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1376 > %1378 = bitcast i32 addrspace(3)* %1377 to float addrspace(3)* > store float %1365, float addrspace(3)* %1378, align 4 > %1379 = add nuw nsw i32 %1374, 9 > %1380 = zext i32 %1379 to i64 > %1381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1380 > %1382 = bitcast i32 addrspace(3)* %1381 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1382, align 4 > %1383 = add nuw nsw i32 %1374, 10 > %1384 = zext i32 %1383 to i64 > %1385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1384 > %1386 = bitcast i32 addrspace(3)* %1385 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1386, align 4 > %1387 = add nuw nsw i32 %1374, 11 > %1388 = zext i32 %1387 to i64 > %1389 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1388 > %1390 = bitcast i32 addrspace(3)* %1389 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1390, align 4 > %1391 = lshr i32 %5, 16 > %1392 = shl nuw nsw i32 %1391, 2 > %1393 = and i32 %6, 8191 > %1394 = and i32 %10, 255 > %1395 = mul nuw nsw i32 %1393, %1394 > %1396 = add nuw nsw i32 %1392, %1395 > %1397 = add nuw nsw i32 %1396, 12 > %1398 = zext i32 %1397 to i64 > %1399 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1398 > %1400 = bitcast i32 addrspace(3)* %1399 to float addrspace(3)* > store float %1366, float addrspace(3)* %1400, align 4 > %1401 = add nuw nsw i32 %1396, 13 > %1402 = zext i32 %1401 to i64 > %1403 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1402 > %1404 = bitcast i32 addrspace(3)* %1403 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1404, align 4 > %1405 = add nuw nsw i32 %1396, 14 > %1406 = zext i32 %1405 to i64 > %1407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1406 > %1408 = bitcast i32 addrspace(3)* %1407 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1408, align 4 > %1409 = add nuw nsw i32 %1396, 15 > %1410 = zext i32 %1409 to i64 > %1411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1410 > store i32 %1364, i32 addrspace(3)* %1411, align 4 > %1412 = lshr i32 %5, 16 > %1413 = shl nuw nsw i32 %1412, 2 > %1414 = and i32 %6, 8191 > %1415 = and i32 %10, 255 > %1416 = mul nuw nsw i32 %1414, %1415 > %1417 = add nuw nsw i32 %1413, %1416 > %1418 = add nuw nsw i32 %1417, 16 > %1419 = zext i32 %1418 to i64 > %1420 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1419 > %1421 = bitcast i32 addrspace(3)* %1420 to float addrspace(3)* > store float %1367, float addrspace(3)* %1421, align 4 > %1422 = add nuw nsw i32 %1417, 17 > %1423 = zext i32 %1422 to i64 > %1424 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1423 > %1425 = bitcast i32 addrspace(3)* %1424 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1425, align 4 > %1426 = add nuw nsw i32 %1417, 18 > %1427 = zext i32 %1426 to i64 > %1428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1427 > %1429 = bitcast i32 addrspace(3)* %1428 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1429, align 4 > %1430 = add nuw nsw i32 %1417, 19 > %1431 = zext i32 %1430 to i64 > %1432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1431 > %1433 = bitcast i32 addrspace(3)* %1432 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1433, align 4 > %1434 = lshr i32 %5, 16 > %1435 = shl nuw nsw i32 %1434, 2 > %1436 = and i32 %6, 8191 > %1437 = and i32 %10, 255 > %1438 = mul nuw nsw i32 %1436, %1437 > %1439 = add nuw nsw i32 %1435, %1438 > %1440 = add nuw nsw i32 %1439, 20 > %1441 = zext i32 %1440 to i64 > %1442 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1441 > %1443 = bitcast i32 addrspace(3)* %1442 to float addrspace(3)* > store float %1368, float addrspace(3)* %1443, align 4 > %1444 = add nuw nsw i32 %1439, 21 > %1445 = zext i32 %1444 to i64 > %1446 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1445 > %1447 = bitcast i32 addrspace(3)* %1446 to float addrspace(3)* > store float %1366, float addrspace(3)* %1447, align 4 > %1448 = add nuw nsw i32 %1439, 22 > %1449 = zext i32 %1448 to i64 > %1450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1449 > %1451 = bitcast i32 addrspace(3)* %1450 to float addrspace(3)* > store float %1367, float addrspace(3)* %1451, align 4 > %1452 = add nuw nsw i32 %1439, 23 > %1453 = zext i32 %1452 to i64 > %1454 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1453 > %1455 = bitcast i32 addrspace(3)* %1454 to float addrspace(3)* > store float %1368, float addrspace(3)* %1455, align 4 > %1456 = lshr i32 %5, 16 > %1457 = shl nuw nsw i32 %1456, 2 > %1458 = and i32 %6, 8191 > %1459 = and i32 %10, 255 > %1460 = mul nuw nsw i32 %1458, %1459 > %1461 = add nuw nsw i32 %1457, %1460 > %1462 = zext i32 %1461 to i64 > %1463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1462 > %1464 = bitcast i32 addrspace(3)* %1463 to float addrspace(3)* > store float %1365, float addrspace(3)* %1464, align 4 > %1465 = lshr i32 %5, 16 > %1466 = shl nuw nsw i32 %1465, 2 > %1467 = and i32 %6, 8191 > %1468 = and i32 %10, 255 > %1469 = mul nuw nsw i32 %1467, %1468 > %1470 = add nuw nsw i32 %1466, %1469 > %1471 = add nuw nsw i32 %1470, 1 > %1472 = zext i32 %1471 to i64 > %1473 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1472 > %1474 = bitcast i32 addrspace(3)* %1473 to float addrspace(3)* > store float %1366, float addrspace(3)* %1474, align 4 > %1475 = lshr i32 %5, 16 > %1476 = shl nuw nsw i32 %1475, 2 > %1477 = and i32 %6, 8191 > %1478 = and i32 %10, 255 > %1479 = mul nuw nsw i32 %1477, %1478 > %1480 = add nuw nsw i32 %1476, %1479 > %1481 = add nuw nsw i32 %1480, 2 > %1482 = zext i32 %1481 to i64 > %1483 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1482 > %1484 = bitcast i32 addrspace(3)* %1483 to float addrspace(3)* > store float %1367, float addrspace(3)* %1484, align 4 > %1485 = lshr i32 %5, 16 > %1486 = shl nuw nsw i32 %1485, 2 > %1487 = and i32 %6, 8191 > %1488 = and i32 %10, 255 > %1489 = mul nuw nsw i32 %1487, %1488 > %1490 = add nuw nsw i32 %1486, %1489 > %1491 = add nuw nsw i32 %1490, 4 > %1492 = zext i32 %1491 to i64 > %1493 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1492 > %1494 = bitcast i32 addrspace(3)* %1493 to float addrspace(3)* > store float %1368, float addrspace(3)* %1494, align 4 > %1495 = and i32 %10, 255 > %1496 = lshr i32 %10, 8 > %1497 = and i32 %1496, 31 > %1498 = lshr i32 %5, 16 > %1499 = shl nuw nsw i32 %1498, 2 > %1500 = and i32 %6, 8191 > %1501 = and i32 %10, 255 > %1502 = mul nuw nsw i32 %1500, %1501 > %1503 = add nuw nsw i32 %1499, %1502 > %1504 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1505 = bitcast i64 %1504 to <2 x i32> > %1506 = extractelement <2 x i32> %1505, i32 0 > %1507 = extractelement <2 x i32> %1505, i32 1 > %1508 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1506, 0 > %1509 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1508, i32 %1507, 1 > %1510 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1509, i32 %8, 13 > %1511 = bitcast i32 %1495 to float > %1512 = bitcast i32 %1497 to float > %1513 = bitcast i32 %1503 to float > %1514 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1510, float %1511, 14 > %1515 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1514, float %1512, 15 > %1516 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1515, float %1513, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1516 > >IF69: ; preds = %IF > %1517 = fdiv float 1.000000e+00, %16 > %1518 = fmul float %1360, %1517 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1519 = fcmp ogt float %1360, 0.000000e+00 > %1520 = select i1 %1519, float 1.000000e+00, float %1360 > %1521 = fcmp oge float %1520, 0.000000e+00 > %.op91 = fmul float %1520, 0x4600000000000000 > %1522 = select i1 %1521, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1518, %IF69 ], [ %1522, %ELSE70 ] > %1523 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1524 = fsub float 1.000000e+00, %1523 > %1525 = fmul float %1524, %1358 > %1526 = fcmp une float %16, 0.000000e+00 > br i1 %1526, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1527 = fdiv float 1.000000e+00, %16 > %1528 = fmul float %1361, %1527 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1529 = fcmp ogt float %1361, 0.000000e+00 > %1530 = select i1 %1529, float 1.000000e+00, float %1361 > %1531 = fcmp oge float %1530, 0.000000e+00 > %.op92 = fmul float %1530, 0x4600000000000000 > %1532 = select i1 %1531, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1528, %IF72 ], [ %1532, %ELSE73 ] > %1533 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1534 = fsub float 1.000000e+00, %1533 > %1535 = fmul float %1534, %1359 > %1536 = fcmp une float %16, 0.000000e+00 > br i1 %1536, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1537 = fdiv float 1.000000e+00, %16 > %1538 = fmul float %1362, %1537 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1539 = fcmp ogt float %1362, 0.000000e+00 > %1540 = select i1 %1539, float 1.000000e+00, float %1362 > %1541 = fcmp oge float %1540, 0.000000e+00 > %.op93 = fmul float %1540, 0x4600000000000000 > %1542 = select i1 %1541, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1538, %IF75 ], [ %1542, %ELSE76 ] > %1543 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1544 = fsub float 1.000000e+00, %1543 > %1545 = fmul float %1544, %1357 > %1546 = fmul float %13, %19 > %1547 = fmul float %14, %20 > %1548 = call float @llvm.maxnum.f32(float %1547, float 1.000000e+00) > %1549 = fcmp oeq float %1546, 0.000000e+00 > %1550 = fcmp oeq float %1546, 0.000000e+00 > %1551 = sext i1 %1550 to i32 > %1552 = bitcast i32 %1551 to float > %1553 = fcmp ogt float %1545, 0.000000e+00 > %1554 = select i1 %1553, float 1.000000e+00, float %1545 > %1555 = fcmp oge float %1554, 0.000000e+00 > %1556 = fcmp ogt float %1525, 0.000000e+00 > %1557 = select i1 %1556, float 1.000000e+00, float %1525 > %1558 = fcmp oge float %1557, 0.000000e+00 > %.op94 = fmul float %1554, 0x4600000000000000 > %1559 = select i1 %1555, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1557, 0x4600000000000000 > %1560 = select i1 %1558, float %.op95, float 0xC600000000000000 > %1561 = fdiv float 1.000000e+00, %1546 > %1562 = fmul float %1545, %1561 > %1563 = fmul float %1525, %1561 > %1564 = select i1 %1549, float %1559, float %1562 > %1565 = select i1 %1550, float %1560, float %1563 > %1566 = call float @llvm.maxnum.f32(float %1565, float 1.000000e+00) > %1567 = call float @llvm.minnum.f32(float %1548, float %1566) > %1568 = fcmp une float %1546, 0.000000e+00 > br i1 %1568, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1569 = fdiv float 1.000000e+00, %1546 > %1570 = fmul float %1535, %1569 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1571 = fcmp ogt float %1535, 0.000000e+00 > %1572 = select i1 %1571, float 1.000000e+00, float %1535 > %1573 = fcmp oge float %1572, 0.000000e+00 > %.op96 = fmul float %1572, 0x4600000000000000 > %1574 = select i1 %1573, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1570, %IF78 ], [ %1574, %ELSE79 ] > %1575 = call float @llvm.maxnum.f32(float %1564, float 1.000000e+00) > %1576 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1577 = call float @llvm.minnum.f32(float %1548, float %1576) > %1578 = call float @llvm.minnum.f32(float %1548, float %1575) > %1579 = call float @llvm.maxnum.f32(float %1567, float %1578) > %1580 = call float @llvm.maxnum.f32(float %1579, float %1577) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[0].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[0].xxxx >101: MOV OUT[4], TEMP[3] >102: MOV OUT[2], TEMP[6] >103: MOV OUT[3], TEMP[4] >104: MOV OUT[1], TEMP[5] >105: MOV OUT[0], TEMP[1] >106: END >radeonsi: Compiling shader 194 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = bitcast i32 %10 to float > %711 = insertvalue <{ float, float, float }> undef, float %710, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %711 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL SV[0], FACE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..24] >DCL TEMP[0..5], LOCAL >IMM[0] UINT32 {0, 384, 0, 0} >IMM[1] INT32 {-1, 0, 1, 0} >IMM[2] FLT32 { -0.3765, 0.0000, 1.0000, 2.0000} >IMM[3] FLT32 { -1.0000, 0.5000, 0.0000, 0.0000} > 0: UIF SV[0].xxxx :0 > 1: MOV TEMP[0].x, IMM[1].xxxx > 2: ELSE :0 > 3: MOV TEMP[0].x, IMM[1].yyyy > 4: ENDIF > 5: MOV TEMP[1].xy, IN[0].xyyy > 6: TEX TEMP[2], TEMP[1], SAMP[0], 2D > 7: ADD TEMP[3].x, TEMP[2].wwww, IMM[2].xxxx > 8: MOV TEMP[2].xyz, TEMP[2].xyzx > 9: FSLT TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 10: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].zzzz > 11: INEG TEMP[3].x, TEMP[3].xxxx > 12: USNE TEMP[1].x, TEMP[3].xxxx, IMM[0].xxxx > 13: AND TEMP[3].x, TEMP[1].xxxx, IMM[2].zzzz > 14: KILL_IF -TEMP[3].xxxx > 15: MOV TEMP[3].xy, IN[0].xyyy > 16: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D > 17: FMA TEMP[3].xy, TEMP[3].ywww, IMM[2].wwww, IMM[3].xxxx > 18: MOV TEMP[1].xy, TEMP[3].xyxx > 19: FMA TEMP[4].x, -TEMP[3].xxxx, TEMP[3].xxxx, IMM[2].zzzz > 20: FMA TEMP[3].x, -TEMP[3].yyyy, TEMP[3].yyyy, TEMP[4].xxxx > 21: SQRT TEMP[3].x, TEMP[3].xxxx > 22: MOV TEMP[1].z, TEMP[3].xxxx > 23: DP3 TEMP[3].x, IN[1].xyzz, TEMP[1].xyzz > 24: DP3 TEMP[4].x, IN[2].xyzz, TEMP[1].xyzz > 25: MOV TEMP[3].y, TEMP[4].xxxx > 26: DP3 TEMP[4].x, IN[3].xyzz, TEMP[1].xyzz > 27: MOV TEMP[3].z, TEMP[4].xxxx > 28: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[3].xyzz > 29: RSQ TEMP[4].x, TEMP[1].xxxx > 30: MUL TEMP[1].xyz, TEMP[4].xxxx, TEMP[3].xyzz > 31: MOV TEMP[3].xyz, -TEMP[1].xyzx > 32: USNE TEMP[4].x, TEMP[0].xxxx, IMM[0].xxxx > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].x, TEMP[1].xxxx > 35: ELSE :0 > 36: MOV TEMP[4].x, TEMP[3].xxxx > 37: ENDIF > 38: MOV TEMP[4].x, TEMP[4].xxxx > 39: USNE TEMP[5].x, TEMP[0].xxxx, IMM[0].xxxx > 40: UIF TEMP[5].xxxx :0 > 41: MOV TEMP[5].x, TEMP[1].yyyy > 42: ELSE :0 > 43: MOV TEMP[5].x, TEMP[3].yyyy > 44: ENDIF > 45: MOV TEMP[4].y, TEMP[5].xxxx > 46: USNE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 47: UIF TEMP[0].xxxx :0 > 48: MOV TEMP[1].x, TEMP[1].zzzz > 49: ELSE :0 > 50: MOV TEMP[1].x, TEMP[3].zzzz > 51: ENDIF > 52: MOV TEMP[4].z, TEMP[1].xxxx > 53: FMA TEMP[1].xyz, TEMP[4].xyzz, IMM[3].yyyy, IMM[3].yyyy > 54: MOV TEMP[1].w, CONST[1][24].yyyy > 55: MOV TEMP[2].w, IMM[2].zzzz > 56: MOV TEMP[0].xy, IN[0].xyyy > 57: TEX TEMP[0], TEMP[0], SAMP[2], 2D > 58: MUL TEMP[3].x, TEMP[0].zzzz, CONST[1][24].xxxx > 59: MOV TEMP[3].yzw, TEMP[0].xyxw > 60: MOV OUT[0], TEMP[1] > 61: MOV OUT[1], TEMP[2] > 62: MOV OUT[2], TEMP[3] > 63: END >radeonsi: Compiling shader 195 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 7 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 11 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %65 = icmp ne i32 %19, 0 > %. = select i1 %65, float 0xFFFFFFFFE0000000, float 0.000000e+00 > %66 = bitcast float %54 to i32 > %67 = bitcast float %55 to i32 > %68 = insertelement <2 x i32> undef, i32 %66, i32 0 > %69 = insertelement <2 x i32> %68, i32 %67, i32 1 > %70 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %69, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = fadd float %74, 0xBFD8181820000000 > %76 = fcmp olt float %75, 0.000000e+00 > %77 = select i1 %76, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %77) > %78 = bitcast float %54 to i32 > %79 = bitcast float %55 to i32 > %80 = insertelement <2 x i32> undef, i32 %78, i32 0 > %81 = insertelement <2 x i32> %80, i32 %79, i32 1 > %82 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %81, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %83 = extractelement <4 x float> %82, i32 1 > %84 = extractelement <4 x float> %82, i32 3 > %85 = call float @llvm.fma.f32(float %83, float 2.000000e+00, float -1.000000e+00) > %86 = call float @llvm.fma.f32(float %84, float 2.000000e+00, float -1.000000e+00) > %87 = fsub float -0.000000e+00, %85 > %88 = call float @llvm.fma.f32(float %87, float %85, float 1.000000e+00) > %89 = fsub float -0.000000e+00, %86 > %90 = call float @llvm.fma.f32(float %89, float %86, float %88) > %91 = call float @llvm.sqrt.f32(float %90) > %92 = fmul float %56, %85 > %93 = fmul float %57, %86 > %94 = fadd float %93, %92 > %95 = fmul float %58, %91 > %96 = fadd float %94, %95 > %97 = fmul float %59, %85 > %98 = fmul float %60, %86 > %99 = fadd float %98, %97 > %100 = fmul float %61, %91 > %101 = fadd float %99, %100 > %102 = fmul float %62, %85 > %103 = fmul float %63, %86 > %104 = fadd float %103, %102 > %105 = fmul float %64, %91 > %106 = fadd float %104, %105 > %107 = fmul float %96, %96 > %108 = fmul float %101, %101 > %109 = fadd float %108, %107 > %110 = fmul float %106, %106 > %111 = fadd float %109, %110 > %112 = call float @llvm.AMDGPU.rsq.clamped.f32(float %111) > %113 = fmul float %112, %96 > %114 = fmul float %112, %101 > %115 = fmul float %112, %106 > %116 = fsub float -0.000000e+00, %113 > %117 = fsub float -0.000000e+00, %114 > %118 = fsub float -0.000000e+00, %115 > %119 = bitcast float %. to i32 > %120 = icmp ne i32 %119, 0 > %temp16.0 = select i1 %120, float %113, float %116 > %121 = bitcast float %. to i32 > %122 = icmp ne i32 %121, 0 > %.33 = select i1 %122, float %114, float %117 > %123 = bitcast float %. to i32 > %124 = icmp ne i32 %123, 0 > %temp4.0 = select i1 %124, float %115, float %118 > %125 = call float @llvm.fma.f32(float %temp16.0, float 5.000000e-01, float 5.000000e-01) > %126 = call float @llvm.fma.f32(float %.33, float 5.000000e-01, float 5.000000e-01) > %127 = call float @llvm.fma.f32(float %temp4.0, float 5.000000e-01, float 5.000000e-01) > %128 = bitcast float %54 to i32 > %129 = bitcast float %55 to i32 > %130 = insertelement <2 x i32> undef, i32 %128, i32 0 > %131 = insertelement <2 x i32> %130, i32 %129, i32 1 > %132 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %131, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %133 = extractelement <4 x float> %132, i32 0 > %134 = extractelement <4 x float> %132, i32 1 > %135 = extractelement <4 x float> %132, i32 2 > %136 = extractelement <4 x float> %132, i32 3 > %137 = fmul float %135, %25 > %138 = bitcast float %5 to i32 > %139 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %138, 10 > %140 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %139, float %125, 11 > %141 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %140, float %126, 12 > %142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %141, float %127, 13 > %143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float %26, 14 > %144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %71, 15 > %145 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144, float %72, 16 > %146 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %145, float %73, 17 > %147 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %146, float 1.000000e+00, 18 > %148 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %147, float %137, 19 > %149 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %148, float %134, 20 > %150 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %149, float %133, 21 > %151 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %150, float %136, 22 > %152 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %151, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %152 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 196 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, -0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 176, 112} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {128, 144, 64, 80} >IMM[5] FLT32 { 0.5000, 158456325028528675187087900672.0000, 63.0000, 0.0000} >IMM[6] UINT32 {96, 368, 352, 0} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 66: MOV TEMP[1].z, TEMP[2].xxxx > 67: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 68: MOV TEMP[0].yw, TEMP[2].yxyy > 69: ABS TEMP[2].x, TEMP[3].xxxx > 70: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 71: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 72: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 73: INEG TEMP[9].xy, TEMP[9].xyyy > 74: MOV TEMP[4].yz, TEMP[9].yxyy > 75: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 76: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 77: INEG TEMP[9].xy, TEMP[9].xyyy > 78: MOV TEMP[5].zw, TEMP[9].yyxy > 79: INEG TEMP[9].xy, TEMP[4].yzzz > 80: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 81: MOV TEMP[4].yz, TEMP[9].yxyy > 82: I2F TEMP[9].xy, TEMP[4].yzzz > 83: MOV TEMP[4].yz, TEMP[9].yxyy > 84: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 85: ABS TEMP[2].x, TEMP[6].xxxx > 86: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 87: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 88: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 89: INEG TEMP[9].xy, TEMP[9].xyyy > 90: MOV TEMP[4].yz, TEMP[9].yxyy > 91: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 92: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 93: INEG TEMP[9].xy, TEMP[9].xyyy > 94: MOV TEMP[5].zw, TEMP[9].yyxy > 95: INEG TEMP[9].xy, TEMP[4].yzzz > 96: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 97: MOV TEMP[4].yz, TEMP[9].yxyy > 98: I2F TEMP[9].xy, TEMP[4].yzzz > 99: MOV TEMP[4].yz, TEMP[9].yxyy >100: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >101: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >102: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >103: INEG TEMP[2].xy, TEMP[2].xyyy >104: MOV TEMP[5].xy, TEMP[2].xyxx >105: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >106: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >107: INEG TEMP[2].xy, TEMP[2].xyyy >108: MOV TEMP[5].zw, TEMP[2].yyxy >109: INEG TEMP[2].xy, TEMP[5].xyyy >110: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >111: MOV TEMP[5].xy, TEMP[2].xyxx >112: I2F TEMP[5].xy, TEMP[5].xyyy >113: ABS TEMP[2].x, TEMP[8].xxxx >114: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >115: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >116: MOV TEMP[4].zw, TEMP[2].yyxy >117: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >118: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >119: INEG TEMP[2].xy, TEMP[2].xyyy >120: MOV TEMP[5].xy, TEMP[2].xyxx >121: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >122: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >123: INEG TEMP[2].xy, TEMP[2].xyyy >124: MOV TEMP[5].zw, TEMP[2].yyxy >125: AND TEMP[2], TEMP[5], IMM[2].yyyy >126: MOV TEMP[2], TEMP[2] >127: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >128: MOV TEMP[5].xy, TEMP[2].xyxx >129: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >130: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >131: INEG TEMP[2].xy, TEMP[2].xyyy >132: MOV TEMP[5].zw, TEMP[2].yyxy >133: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >134: MOV TEMP[5].zw, TEMP[2].yyxy >135: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >136: MOV TEMP[5].xy, TEMP[2].xyxx >137: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >138: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >139: INEG TEMP[2].x, TEMP[2].xxxx >140: MOV TEMP[1].z, TEMP[2].xxxx >141: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >142: MOV TEMP[1].z, TEMP[2].xxxx >143: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >144: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >145: INEG TEMP[2].xy, TEMP[2].xyyy >146: MOV TEMP[0].yw, TEMP[2].yxyy >147: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >148: MOV TEMP[0].yw, TEMP[2].yxyy >149: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >150: MOV TEMP[0].y, TEMP[2].xxxx >151: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >152: MOV TEMP[0].y, TEMP[2].xxxx >153: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >154: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >155: INEG TEMP[2].xy, TEMP[2].xyyy >156: MOV TEMP[0].xw, TEMP[2].xxxy >157: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >158: MOV TEMP[0].xw, TEMP[2].xxxy >159: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >160: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >161: INEG TEMP[2].xy, TEMP[2].xyyy >162: MOV TEMP[1].xy, TEMP[2].xyxx >163: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >164: MOV TEMP[1].xy, TEMP[2].xyxx >165: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >166: MOV TEMP[0].xz, TEMP[2].xxyx >167: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >168: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >169: INEG TEMP[2].xy, TEMP[2].xyyy >170: MOV TEMP[1].xy, TEMP[2].xyxx >171: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >172: MOV TEMP[1].xy, TEMP[2].xyxx >173: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >174: MOV TEMP[0].xz, TEMP[2].xxyx >175: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >176: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >177: INEG TEMP[2].xy, TEMP[2].xyyy >178: MOV TEMP[1].xy, TEMP[2].xyxx >179: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >180: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >181: INEG TEMP[2].xyz, TEMP[2].xyzz >182: MOV TEMP[0].xyz, TEMP[2].xyzx >183: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >184: MOV TEMP[0].xz, TEMP[2].xxyx >185: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >186: MOV TEMP[0].x, TEMP[2].xxxx >187: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >188: MOV TEMP[0].x, TEMP[2].xxxx >189: ADD TEMP[2].xyz, -IN[0][0].zxyy, IN[1][0].zxyy >190: MOV TEMP[0].yzw, TEMP[2].yxyz >191: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >192: MUL TEMP[4].xyz, TEMP[0].yzww, TEMP[1].yzxx >193: FMA TEMP[2].xyz, TEMP[0].wyzz, TEMP[1].zxyy, -TEMP[4].xyzz >194: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz >195: MOV TEMP[1].w, TEMP[3].xxxx >196: RSQ TEMP[3].x, TEMP[1].wwww >197: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx >198: MOV TEMP[0].yzw, TEMP[2].yxyz >199: MOV TEMP[2].xyz, CONST[1][11] >200: MOV TEMP[4].xyz, TEMP[2].xyzx >201: MOV TEMP[4].w, IMM[0].xxxx >202: MOV TEMP[2], CONST[1][7] >203: DP4 TEMP[5].x, TEMP[2], TEMP[4] >204: MOV TEMP[2], CONST[1][8] >205: DP4 TEMP[2].x, TEMP[2], TEMP[4] >206: MOV TEMP[5].y, TEMP[2].xxxx >207: MOV TEMP[2], CONST[1][9] >208: DP4 TEMP[2].x, TEMP[2], TEMP[4] >209: MOV TEMP[5].z, TEMP[2].xxxx >210: ADD TEMP[4].xyz, TEMP[5].xyzz, -IN[0][0].xyzz >211: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[4].xyzz >212: MOV TEMP[1].w, TEMP[2].xxxx >213: RSQ TEMP[2].x, TEMP[1].wwww >214: MOV TEMP[1].w, TEMP[2].xxxx >215: MUL TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz >216: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[4].xyzz >217: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].wwww >218: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >219: INEG TEMP[2].x, TEMP[2].xxxx >220: MOV TEMP[0].y, TEMP[2].xxxx >221: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >222: MOV TEMP[0].x, TEMP[2].xxxx >223: MOV TEMP[2].x, TEMP[0].xxxx >224: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >225: UIF TEMP[2].xxxx :0 >226: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >227: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >228: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >229: MOV TEMP[0].yzw, TEMP[2].yxyz >230: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >231: MOV TEMP[0].y, TEMP[2].xxxx >232: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >233: MOV TEMP[0].z, TEMP[2].xxxx >234: SQRT TEMP[2].x, TEMP[0].xxxx >235: SQRT TEMP[2].y, TEMP[0].yyyy >236: SQRT TEMP[2].z, TEMP[0].zzzz >237: MOV TEMP[0].xyz, TEMP[2].xyzx >238: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >239: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].xxxx >240: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >241: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[5].xxxx >242: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >243: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[5].xxxx >244: MOV TEMP[2].y, CONST[3][4] >245: MOV TEMP[7].x, TEMP[2].yyyy >246: MOV TEMP[2].y, CONST[3][5] >247: MOV TEMP[7].y, TEMP[2].yyyy >248: MOV TEMP[2].y, CONST[3][6] >249: MOV TEMP[7].z, TEMP[2].yyyy >250: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >251: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >252: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >253: MOV TEMP[1].w, IMM[0].xxxx >254: MOV TEMP[6], CONST[3][0] >255: DP4 TEMP[7].x, TEMP[6], TEMP[1] >256: MOV TEMP[6], CONST[3][1] >257: DP4 TEMP[6].x, TEMP[6], TEMP[1] >258: MOV TEMP[7].y, TEMP[6].xxxx >259: MOV TEMP[6], CONST[3][3] >260: DP4 TEMP[6].x, TEMP[6], TEMP[1] >261: MOV TEMP[4].w, IMM[0].xxxx >262: MOV TEMP[8], CONST[3][0] >263: DP4 TEMP[8].x, TEMP[8], TEMP[4] >264: MOV TEMP[9], CONST[3][1] >265: DP4 TEMP[9].x, TEMP[9], TEMP[4] >266: MOV TEMP[8].y, TEMP[9].xxxx >267: MOV TEMP[9], CONST[3][3] >268: DP4 TEMP[9].x, TEMP[9], TEMP[4] >269: MOV TEMP[5].w, IMM[0].xxxx >270: MOV TEMP[10], CONST[3][0] >271: DP4 TEMP[4].x, TEMP[10], TEMP[5] >272: MOV TEMP[10], CONST[3][1] >273: DP4 TEMP[10].x, TEMP[10], TEMP[5] >274: MOV TEMP[4].y, TEMP[10].xxxx >275: MOV TEMP[10], CONST[3][3] >276: DP4 TEMP[10].x, TEMP[10], TEMP[5] >277: MOV TEMP[2].w, IMM[0].xxxx >278: MOV TEMP[11], CONST[3][0] >279: DP4 TEMP[5].x, TEMP[11], TEMP[2] >280: MOV TEMP[11], CONST[3][1] >281: DP4 TEMP[11].x, TEMP[11], TEMP[2] >282: MOV TEMP[5].y, TEMP[11].xxxx >283: MOV TEMP[11], CONST[3][3] >284: DP4 TEMP[11].x, TEMP[11], TEMP[2] >285: MOV TEMP[3].w, IMM[0].xxxx >286: MOV TEMP[12], CONST[3][0] >287: DP4 TEMP[2].x, TEMP[12], TEMP[3] >288: MOV TEMP[12], CONST[3][1] >289: DP4 TEMP[12].x, TEMP[12], TEMP[3] >290: MOV TEMP[2].y, TEMP[12].xxxx >291: MOV TEMP[12], CONST[3][3] >292: DP4 TEMP[12].x, TEMP[12], TEMP[3] >293: MOV TEMP[0].w, IMM[0].xxxx >294: MOV TEMP[13], CONST[3][0] >295: DP4 TEMP[3].x, TEMP[13], TEMP[0] >296: MOV TEMP[13], CONST[3][1] >297: DP4 TEMP[13].x, TEMP[13], TEMP[0] >298: MOV TEMP[3].y, TEMP[13].xxxx >299: MOV TEMP[13], CONST[3][3] >300: DP4 TEMP[13].x, TEMP[13], TEMP[0] >301: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >302: SSG TEMP[15].xy, TEMP[8].xyyy >303: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >304: RCP TEMP[16].xy, TEMP[9].xxxx >305: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >306: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >307: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >308: SSG TEMP[15].xy, TEMP[4].xyyy >309: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >310: RCP TEMP[16].xy, TEMP[10].xxxx >311: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >312: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >313: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >314: SSG TEMP[16].xy, TEMP[5].xyyy >315: MUL TEMP[16].xy, IMM[5].yyyy, TEMP[16].xyyy >316: RCP TEMP[11].xy, TEMP[11].xxxx >317: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >318: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >319: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >320: SSG TEMP[15].xy, TEMP[7].xyyy >321: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >322: RCP TEMP[16].xy, TEMP[6].xxxx >323: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >324: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >325: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >326: MOV TEMP[0].yz, TEMP[5].yxyy >327: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >328: SSG TEMP[7].xy, TEMP[2].xyyy >329: MUL TEMP[7].xy, IMM[5].yyyy, TEMP[7].xyyy >330: RCP TEMP[11].xy, TEMP[12].xxxx >331: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >332: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >333: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >334: MOV TEMP[4].zw, TEMP[2].yyxy >335: MOV TEMP[2].xy, CONST[3][23] >336: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >337: MOV TEMP[4].zw, TEMP[2].yyxy >338: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >339: SSG TEMP[5].xy, TEMP[3].xyyy >340: MUL TEMP[5].xy, IMM[5].yyyy, TEMP[5].xyyy >341: RCP TEMP[7].xy, TEMP[13].xxxx >342: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >343: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >344: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >345: MOV TEMP[0].xw, TEMP[2].xxxy >346: MOV TEMP[2].xy, CONST[3][23] >347: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >348: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >349: MOV TEMP[0].y, TEMP[2].xxxx >350: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >351: MOV TEMP[0].z, TEMP[2].xxxx >352: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >353: SQRT TEMP[2].x, TEMP[0].xxxx >354: SQRT TEMP[2].y, TEMP[0].yyyy >355: SQRT TEMP[2].z, TEMP[0].zzzz >356: MOV TEMP[2].xyz, TEMP[2].xyzx >357: MOV TEMP[3].z, CONST[1][22] >358: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >359: MOV TEMP[0].w, TEMP[3].xxxx >360: MOV TEMP[3].z, CONST[1][22] >361: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >362: MOV TEMP[3].z, CONST[1][22] >363: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >364: MOV TEMP[1].y, TEMP[3].xxxx >365: MOV TEMP[3].w, CONST[1][22] >366: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >367: UIF TEMP[3].xxxx :0 >368: MOV TEMP[3].w, CONST[1][22] >369: RCP TEMP[3].x, TEMP[3].wwww >370: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >371: ELSE :0 >372: SSG TEMP[5].x, TEMP[0].wwww >373: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >374: ENDIF >375: MOV_SAT TEMP[3].x, TEMP[3].xxxx >376: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >377: MOV TEMP[0].w, TEMP[3].xxxx >378: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >379: MOV TEMP[0].y, TEMP[3].xxxx >380: MOV TEMP[3].w, CONST[1][22] >381: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >382: UIF TEMP[3].xxxx :0 >383: MOV TEMP[3].w, CONST[1][22] >384: RCP TEMP[3].x, TEMP[3].wwww >385: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >386: ELSE :0 >387: SSG TEMP[5].x, TEMP[1].xxxx >388: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >389: ENDIF >390: MOV_SAT TEMP[3].x, TEMP[3].xxxx >391: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >392: MOV TEMP[0].w, TEMP[3].xxxx >393: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >394: MOV TEMP[0].z, TEMP[3].xxxx >395: MOV TEMP[3].w, CONST[1][22] >396: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >397: UIF TEMP[3].xxxx :0 >398: MOV TEMP[3].w, CONST[1][22] >399: RCP TEMP[3].x, TEMP[3].wwww >400: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >401: ELSE :0 >402: SSG TEMP[5].x, TEMP[1].yyyy >403: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >404: ENDIF >405: MOV_SAT TEMP[3].x, TEMP[3].xxxx >406: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >407: MOV TEMP[0].w, TEMP[3].xxxx >408: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >409: MOV TEMP[2].xy, CONST[1][22] >410: MOV TEMP[3].xy, CONST[2][4] >411: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >412: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >413: MOV TEMP[0].w, TEMP[2].xxxx >414: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >415: SSG TEMP[3].xy, TEMP[0].xyyy >416: MUL TEMP[3].xy, IMM[5].yyyy, TEMP[3].xyyy >417: RCP TEMP[5].xy, TEMP[1].xxxx >418: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >419: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >420: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >421: MOV TEMP[0].y, TEMP[2].xxxx >422: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >423: MOV TEMP[4].z, TEMP[2].xxxx >424: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >425: UIF TEMP[2].xxxx :0 >426: RCP TEMP[1].x, TEMP[1].xxxx >427: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >428: ELSE :0 >429: SSG TEMP[2].x, TEMP[0].zzzz >430: MUL TEMP[1].x, IMM[5].yyyy, TEMP[2].xxxx >431: ENDIF >432: MOV TEMP[0].y, TEMP[1].xxxx >433: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >434: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >435: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >436: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >437: MOV TEMP[4].w, TEMP[0].xxxx >438: ELSE :0 >439: MOV TEMP[4], IMM[0].zzzz >440: ENDIF >441: MIN TEMP[0], TEMP[4], IMM[5].zzzz >442: MOV TEMP[1].x, TEMP[0].xxxx >443: MOV TEMP[2].x, TEMP[0].yyyy >444: MOV TEMP[3].x, TEMP[0].zzzz >445: MOV TEMP[0].x, TEMP[0].wwww >446: MOV OUT[8], TEMP[1] >447: MOV OUT[9], TEMP[2] >448: MOV OUT[10], TEMP[3] >449: MOV OUT[11], TEMP[0] >450: MOV OUT[0].x, TEMP[1].xxxx >451: MOV OUT[0].y, TEMP[2].xxxx >452: MOV OUT[0].z, TEMP[3].xxxx >453: MOV OUT[1].x, TEMP[0].xxxx >454: END >radeonsi: Compiling shader 197 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call float @llvm.SI.load.const(<16 x i8> %33, i32 64) > %35 = call float @llvm.SI.load.const(<16 x i8> %33, i32 68) > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = call float @llvm.SI.load.const(<16 x i8> %37, i32 0) > %39 = call float @llvm.SI.load.const(<16 x i8> %37, i32 4) > %40 = call float @llvm.SI.load.const(<16 x i8> %37, i32 8) > %41 = call float @llvm.SI.load.const(<16 x i8> %37, i32 12) > %42 = call float @llvm.SI.load.const(<16 x i8> %37, i32 16) > %43 = call float @llvm.SI.load.const(<16 x i8> %37, i32 20) > %44 = call float @llvm.SI.load.const(<16 x i8> %37, i32 24) > %45 = call float @llvm.SI.load.const(<16 x i8> %37, i32 28) > %46 = call float @llvm.SI.load.const(<16 x i8> %37, i32 32) > %47 = call float @llvm.SI.load.const(<16 x i8> %37, i32 36) > %48 = call float @llvm.SI.load.const(<16 x i8> %37, i32 40) > %49 = call float @llvm.SI.load.const(<16 x i8> %37, i32 44) > %50 = call float @llvm.SI.load.const(<16 x i8> %37, i32 48) > %51 = call float @llvm.SI.load.const(<16 x i8> %37, i32 52) > %52 = call float @llvm.SI.load.const(<16 x i8> %37, i32 56) > %53 = call float @llvm.SI.load.const(<16 x i8> %37, i32 60) > %54 = call float @llvm.SI.load.const(<16 x i8> %37, i32 68) > %55 = call float @llvm.SI.load.const(<16 x i8> %37, i32 84) > %56 = call float @llvm.SI.load.const(<16 x i8> %37, i32 100) > %57 = call float @llvm.SI.load.const(<16 x i8> %37, i32 368) > %58 = call float @llvm.SI.load.const(<16 x i8> %37, i32 372) > %59 = lshr i32 %10, 8 > %60 = and i32 %59, 31 > %61 = lshr i32 %7, 13 > %62 = and i32 %61, 255 > %63 = and i32 %7, 8191 > %64 = and i32 %10, 255 > %65 = mul nuw nsw i32 %63, %64 > %66 = mul nuw nsw i32 %60, %62 > %67 = add nuw nsw i32 %65, %66 > %68 = add nuw nsw i32 %67, 16 > %69 = zext i32 %68 to i64 > %70 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %69 > %71 = load i32, i32 addrspace(3)* %70, align 4 > %72 = lshr i32 %7, 13 > %73 = and i32 %72, 255 > %74 = and i32 %7, 8191 > %75 = and i32 %10, 255 > %76 = mul nuw nsw i32 %74, %75 > %77 = mul nuw nsw i32 %60, %73 > %78 = add nuw nsw i32 %76, %77 > %79 = add nuw nsw i32 %78, 17 > %80 = zext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = load i32, i32 addrspace(3)* %81, align 4 > %83 = lshr i32 %7, 13 > %84 = and i32 %83, 255 > %85 = and i32 %7, 8191 > %86 = and i32 %10, 255 > %87 = mul nuw nsw i32 %85, %86 > %88 = mul nuw nsw i32 %60, %84 > %89 = add nuw nsw i32 %87, %88 > %90 = add nuw nsw i32 %89, 18 > %91 = zext i32 %90 to i64 > %92 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %91 > %93 = load i32, i32 addrspace(3)* %92, align 4 > %94 = lshr i32 %7, 13 > %95 = and i32 %94, 255 > %96 = and i32 %7, 8191 > %97 = and i32 %10, 255 > %98 = mul nuw nsw i32 %96, %97 > %99 = mul nuw nsw i32 %60, %95 > %100 = add nuw nsw i32 %98, %99 > %101 = add nuw nsw i32 %100, 19 > %102 = zext i32 %101 to i64 > %103 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %102 > %104 = load i32, i32 addrspace(3)* %103, align 4 > %105 = lshr i32 %6, 13 > %106 = and i32 %105, 255 > %107 = shl i32 %5, 2 > %108 = and i32 %107, 262140 > %109 = and i32 %6, 8191 > %110 = and i32 %10, 255 > %111 = mul nuw nsw i32 %109, %110 > %112 = add nuw nsw i32 %108, %111 > %113 = mul nuw nsw i32 %60, %106 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 16 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > store i32 %71, i32 addrspace(3)* %117, align 4 > %118 = add nuw nsw i32 %114, 17 > %119 = zext i32 %118 to i64 > %120 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %119 > store i32 %82, i32 addrspace(3)* %120, align 4 > %121 = add nuw nsw i32 %114, 18 > %122 = zext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > store i32 %93, i32 addrspace(3)* %123, align 4 > %124 = add nuw nsw i32 %114, 19 > %125 = zext i32 %124 to i64 > %126 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %125 > store i32 %104, i32 addrspace(3)* %126, align 4 > %127 = lshr i32 %7, 13 > %128 = and i32 %127, 255 > %129 = and i32 %7, 8191 > %130 = and i32 %10, 255 > %131 = mul nuw nsw i32 %129, %130 > %132 = mul nuw nsw i32 %60, %128 > %133 = add nuw nsw i32 %131, %132 > %134 = add nuw nsw i32 %133, 20 > %135 = zext i32 %134 to i64 > %136 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %135 > %137 = load i32, i32 addrspace(3)* %136, align 4 > %138 = lshr i32 %7, 13 > %139 = and i32 %138, 255 > %140 = and i32 %7, 8191 > %141 = and i32 %10, 255 > %142 = mul nuw nsw i32 %140, %141 > %143 = mul nuw nsw i32 %60, %139 > %144 = add nuw nsw i32 %142, %143 > %145 = add nuw nsw i32 %144, 21 > %146 = zext i32 %145 to i64 > %147 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %146 > %148 = load i32, i32 addrspace(3)* %147, align 4 > %149 = lshr i32 %7, 13 > %150 = and i32 %149, 255 > %151 = and i32 %7, 8191 > %152 = and i32 %10, 255 > %153 = mul nuw nsw i32 %151, %152 > %154 = mul nuw nsw i32 %60, %150 > %155 = add nuw nsw i32 %153, %154 > %156 = add nuw nsw i32 %155, 22 > %157 = zext i32 %156 to i64 > %158 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %157 > %159 = load i32, i32 addrspace(3)* %158, align 4 > %160 = lshr i32 %7, 13 > %161 = and i32 %160, 255 > %162 = and i32 %7, 8191 > %163 = and i32 %10, 255 > %164 = mul nuw nsw i32 %162, %163 > %165 = mul nuw nsw i32 %60, %161 > %166 = add nuw nsw i32 %164, %165 > %167 = add nuw nsw i32 %166, 23 > %168 = zext i32 %167 to i64 > %169 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %168 > %170 = load i32, i32 addrspace(3)* %169, align 4 > %171 = lshr i32 %6, 13 > %172 = and i32 %171, 255 > %173 = shl i32 %5, 2 > %174 = and i32 %173, 262140 > %175 = and i32 %6, 8191 > %176 = and i32 %10, 255 > %177 = mul nuw nsw i32 %175, %176 > %178 = add nuw nsw i32 %174, %177 > %179 = mul nuw nsw i32 %60, %172 > %180 = add nuw nsw i32 %178, %179 > %181 = add nuw nsw i32 %180, 20 > %182 = zext i32 %181 to i64 > %183 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %182 > store i32 %137, i32 addrspace(3)* %183, align 4 > %184 = add nuw nsw i32 %180, 21 > %185 = zext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > store i32 %148, i32 addrspace(3)* %186, align 4 > %187 = add nuw nsw i32 %180, 22 > %188 = zext i32 %187 to i64 > %189 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %188 > store i32 %159, i32 addrspace(3)* %189, align 4 > %190 = add nuw nsw i32 %180, 23 > %191 = zext i32 %190 to i64 > %192 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %191 > store i32 %170, i32 addrspace(3)* %192, align 4 > %193 = lshr i32 %7, 13 > %194 = and i32 %193, 255 > %195 = and i32 %7, 8191 > %196 = and i32 %10, 255 > %197 = mul nuw nsw i32 %195, %196 > %198 = mul nuw nsw i32 %60, %194 > %199 = add nuw nsw i32 %197, %198 > %200 = add nuw nsw i32 %199, 24 > %201 = zext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = load i32, i32 addrspace(3)* %202, align 4 > %204 = lshr i32 %7, 13 > %205 = and i32 %204, 255 > %206 = and i32 %7, 8191 > %207 = and i32 %10, 255 > %208 = mul nuw nsw i32 %206, %207 > %209 = mul nuw nsw i32 %60, %205 > %210 = add nuw nsw i32 %208, %209 > %211 = add nuw nsw i32 %210, 25 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = load i32, i32 addrspace(3)* %213, align 4 > %215 = lshr i32 %7, 13 > %216 = and i32 %215, 255 > %217 = and i32 %7, 8191 > %218 = and i32 %10, 255 > %219 = mul nuw nsw i32 %217, %218 > %220 = mul nuw nsw i32 %60, %216 > %221 = add nuw nsw i32 %219, %220 > %222 = add nuw nsw i32 %221, 26 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = load i32, i32 addrspace(3)* %224, align 4 > %226 = lshr i32 %7, 13 > %227 = and i32 %226, 255 > %228 = and i32 %7, 8191 > %229 = and i32 %10, 255 > %230 = mul nuw nsw i32 %228, %229 > %231 = mul nuw nsw i32 %60, %227 > %232 = add nuw nsw i32 %230, %231 > %233 = add nuw nsw i32 %232, 27 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = load i32, i32 addrspace(3)* %235, align 4 > %237 = lshr i32 %6, 13 > %238 = and i32 %237, 255 > %239 = shl i32 %5, 2 > %240 = and i32 %239, 262140 > %241 = and i32 %6, 8191 > %242 = and i32 %10, 255 > %243 = mul nuw nsw i32 %241, %242 > %244 = add nuw nsw i32 %240, %243 > %245 = mul nuw nsw i32 %60, %238 > %246 = add nuw nsw i32 %244, %245 > %247 = add nuw nsw i32 %246, 24 > %248 = zext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > store i32 %203, i32 addrspace(3)* %249, align 4 > %250 = add nuw nsw i32 %246, 25 > %251 = zext i32 %250 to i64 > %252 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %251 > store i32 %214, i32 addrspace(3)* %252, align 4 > %253 = add nuw nsw i32 %246, 26 > %254 = zext i32 %253 to i64 > %255 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %254 > store i32 %225, i32 addrspace(3)* %255, align 4 > %256 = add nuw nsw i32 %246, 27 > %257 = zext i32 %256 to i64 > %258 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %257 > store i32 %236, i32 addrspace(3)* %258, align 4 > %259 = lshr i32 %7, 13 > %260 = and i32 %259, 255 > %261 = and i32 %7, 8191 > %262 = and i32 %10, 255 > %263 = mul nuw nsw i32 %261, %262 > %264 = mul nuw nsw i32 %60, %260 > %265 = add nuw nsw i32 %263, %264 > %266 = add nuw nsw i32 %265, 28 > %267 = zext i32 %266 to i64 > %268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %267 > %269 = load i32, i32 addrspace(3)* %268, align 4 > %270 = lshr i32 %7, 13 > %271 = and i32 %270, 255 > %272 = and i32 %7, 8191 > %273 = and i32 %10, 255 > %274 = mul nuw nsw i32 %272, %273 > %275 = mul nuw nsw i32 %60, %271 > %276 = add nuw nsw i32 %274, %275 > %277 = add nuw nsw i32 %276, 29 > %278 = zext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = load i32, i32 addrspace(3)* %279, align 4 > %281 = lshr i32 %7, 13 > %282 = and i32 %281, 255 > %283 = and i32 %7, 8191 > %284 = and i32 %10, 255 > %285 = mul nuw nsw i32 %283, %284 > %286 = mul nuw nsw i32 %60, %282 > %287 = add nuw nsw i32 %285, %286 > %288 = add nuw nsw i32 %287, 30 > %289 = zext i32 %288 to i64 > %290 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %289 > %291 = load i32, i32 addrspace(3)* %290, align 4 > %292 = lshr i32 %7, 13 > %293 = and i32 %292, 255 > %294 = and i32 %7, 8191 > %295 = and i32 %10, 255 > %296 = mul nuw nsw i32 %294, %295 > %297 = mul nuw nsw i32 %60, %293 > %298 = add nuw nsw i32 %296, %297 > %299 = add nuw nsw i32 %298, 31 > %300 = zext i32 %299 to i64 > %301 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %300 > %302 = load i32, i32 addrspace(3)* %301, align 4 > %303 = lshr i32 %6, 13 > %304 = and i32 %303, 255 > %305 = shl i32 %5, 2 > %306 = and i32 %305, 262140 > %307 = and i32 %6, 8191 > %308 = and i32 %10, 255 > %309 = mul nuw nsw i32 %307, %308 > %310 = add nuw nsw i32 %306, %309 > %311 = mul nuw nsw i32 %60, %304 > %312 = add nuw nsw i32 %310, %311 > %313 = add nuw nsw i32 %312, 28 > %314 = zext i32 %313 to i64 > %315 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %314 > store i32 %269, i32 addrspace(3)* %315, align 4 > %316 = add nuw nsw i32 %312, 29 > %317 = zext i32 %316 to i64 > %318 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %317 > store i32 %280, i32 addrspace(3)* %318, align 4 > %319 = add nuw nsw i32 %312, 30 > %320 = zext i32 %319 to i64 > %321 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %320 > store i32 %291, i32 addrspace(3)* %321, align 4 > %322 = add nuw nsw i32 %312, 31 > %323 = zext i32 %322 to i64 > %324 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %323 > store i32 %302, i32 addrspace(3)* %324, align 4 > %325 = lshr i32 %7, 13 > %326 = and i32 %325, 255 > %327 = and i32 %7, 8191 > %328 = and i32 %10, 255 > %329 = mul nuw nsw i32 %327, %328 > %330 = mul nuw nsw i32 %60, %326 > %331 = add nuw nsw i32 %329, %330 > %332 = add nuw nsw i32 %331, 32 > %333 = zext i32 %332 to i64 > %334 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %333 > %335 = load i32, i32 addrspace(3)* %334, align 4 > %336 = lshr i32 %7, 13 > %337 = and i32 %336, 255 > %338 = and i32 %7, 8191 > %339 = and i32 %10, 255 > %340 = mul nuw nsw i32 %338, %339 > %341 = mul nuw nsw i32 %60, %337 > %342 = add nuw nsw i32 %340, %341 > %343 = add nuw nsw i32 %342, 33 > %344 = zext i32 %343 to i64 > %345 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %344 > %346 = load i32, i32 addrspace(3)* %345, align 4 > %347 = lshr i32 %7, 13 > %348 = and i32 %347, 255 > %349 = and i32 %7, 8191 > %350 = and i32 %10, 255 > %351 = mul nuw nsw i32 %349, %350 > %352 = mul nuw nsw i32 %60, %348 > %353 = add nuw nsw i32 %351, %352 > %354 = add nuw nsw i32 %353, 34 > %355 = zext i32 %354 to i64 > %356 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %355 > %357 = load i32, i32 addrspace(3)* %356, align 4 > %358 = lshr i32 %7, 13 > %359 = and i32 %358, 255 > %360 = and i32 %7, 8191 > %361 = and i32 %10, 255 > %362 = mul nuw nsw i32 %360, %361 > %363 = mul nuw nsw i32 %60, %359 > %364 = add nuw nsw i32 %362, %363 > %365 = add nuw nsw i32 %364, 35 > %366 = zext i32 %365 to i64 > %367 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %366 > %368 = load i32, i32 addrspace(3)* %367, align 4 > %369 = lshr i32 %6, 13 > %370 = and i32 %369, 255 > %371 = shl i32 %5, 2 > %372 = and i32 %371, 262140 > %373 = and i32 %6, 8191 > %374 = and i32 %10, 255 > %375 = mul nuw nsw i32 %373, %374 > %376 = add nuw nsw i32 %372, %375 > %377 = mul nuw nsw i32 %60, %370 > %378 = add nuw nsw i32 %376, %377 > %379 = add nuw nsw i32 %378, 32 > %380 = zext i32 %379 to i64 > %381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %380 > store i32 %335, i32 addrspace(3)* %381, align 4 > %382 = add nuw nsw i32 %378, 33 > %383 = zext i32 %382 to i64 > %384 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %383 > store i32 %346, i32 addrspace(3)* %384, align 4 > %385 = add nuw nsw i32 %378, 34 > %386 = zext i32 %385 to i64 > %387 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %386 > store i32 %357, i32 addrspace(3)* %387, align 4 > %388 = add nuw nsw i32 %378, 35 > %389 = zext i32 %388 to i64 > %390 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %389 > store i32 %368, i32 addrspace(3)* %390, align 4 > %391 = lshr i32 %7, 13 > %392 = and i32 %391, 255 > %393 = and i32 %7, 8191 > %394 = and i32 %10, 255 > %395 = mul nuw nsw i32 %393, %394 > %396 = mul nuw nsw i32 %60, %392 > %397 = add nuw nsw i32 %395, %396 > %398 = add nuw nsw i32 %397, 36 > %399 = zext i32 %398 to i64 > %400 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %399 > %401 = load i32, i32 addrspace(3)* %400, align 4 > %402 = lshr i32 %7, 13 > %403 = and i32 %402, 255 > %404 = and i32 %7, 8191 > %405 = and i32 %10, 255 > %406 = mul nuw nsw i32 %404, %405 > %407 = mul nuw nsw i32 %60, %403 > %408 = add nuw nsw i32 %406, %407 > %409 = add nuw nsw i32 %408, 37 > %410 = zext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = load i32, i32 addrspace(3)* %411, align 4 > %413 = lshr i32 %7, 13 > %414 = and i32 %413, 255 > %415 = and i32 %7, 8191 > %416 = and i32 %10, 255 > %417 = mul nuw nsw i32 %415, %416 > %418 = mul nuw nsw i32 %60, %414 > %419 = add nuw nsw i32 %417, %418 > %420 = add nuw nsw i32 %419, 38 > %421 = zext i32 %420 to i64 > %422 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %421 > %423 = load i32, i32 addrspace(3)* %422, align 4 > %424 = lshr i32 %7, 13 > %425 = and i32 %424, 255 > %426 = and i32 %7, 8191 > %427 = and i32 %10, 255 > %428 = mul nuw nsw i32 %426, %427 > %429 = mul nuw nsw i32 %60, %425 > %430 = add nuw nsw i32 %428, %429 > %431 = add nuw nsw i32 %430, 39 > %432 = zext i32 %431 to i64 > %433 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %432 > %434 = load i32, i32 addrspace(3)* %433, align 4 > %435 = lshr i32 %6, 13 > %436 = and i32 %435, 255 > %437 = shl i32 %5, 2 > %438 = and i32 %437, 262140 > %439 = and i32 %6, 8191 > %440 = and i32 %10, 255 > %441 = mul nuw nsw i32 %439, %440 > %442 = add nuw nsw i32 %438, %441 > %443 = mul nuw nsw i32 %60, %436 > %444 = add nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 36 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > store i32 %401, i32 addrspace(3)* %447, align 4 > %448 = add nuw nsw i32 %444, 37 > %449 = zext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > store i32 %412, i32 addrspace(3)* %450, align 4 > %451 = add nuw nsw i32 %444, 38 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > store i32 %423, i32 addrspace(3)* %453, align 4 > %454 = add nuw nsw i32 %444, 39 > %455 = zext i32 %454 to i64 > %456 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %455 > store i32 %434, i32 addrspace(3)* %456, align 4 > %457 = and i32 %7, 8191 > %458 = and i32 %10, 255 > %459 = mul nuw nsw i32 %457, %458 > %460 = add nuw nsw i32 %459, 16 > %461 = zext i32 %460 to i64 > %462 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %461 > %463 = bitcast i32 addrspace(3)* %462 to float addrspace(3)* > %464 = load float, float addrspace(3)* %463, align 4 > %465 = and i32 %7, 8191 > %466 = and i32 %10, 255 > %467 = mul nuw nsw i32 %465, %466 > %468 = add nuw nsw i32 %467, 17 > %469 = zext i32 %468 to i64 > %470 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %469 > %471 = bitcast i32 addrspace(3)* %470 to float addrspace(3)* > %472 = load float, float addrspace(3)* %471, align 4 > %473 = and i32 %7, 8191 > %474 = and i32 %10, 255 > %475 = mul nuw nsw i32 %473, %474 > %476 = add nuw nsw i32 %475, 18 > %477 = zext i32 %476 to i64 > %478 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %477 > %479 = bitcast i32 addrspace(3)* %478 to float addrspace(3)* > %480 = load float, float addrspace(3)* %479, align 4 > %481 = fmul float %38, %464 > %482 = fmul float %39, %472 > %483 = fadd float %481, %482 > %484 = fmul float %40, %480 > %485 = fadd float %483, %484 > %486 = fadd float %485, %41 > %487 = fmul float %42, %464 > %488 = fmul float %43, %472 > %489 = fadd float %487, %488 > %490 = fmul float %44, %480 > %491 = fadd float %489, %490 > %492 = fadd float %491, %45 > %493 = fmul float %46, %464 > %494 = fmul float %47, %472 > %495 = fadd float %493, %494 > %496 = fmul float %48, %480 > %497 = fadd float %495, %496 > %498 = fadd float %497, %49 > %499 = fmul float %50, %464 > %500 = fmul float %51, %472 > %501 = fadd float %499, %500 > %502 = fmul float %52, %480 > %503 = fadd float %501, %502 > %504 = fadd float %503, %53 > %505 = lshr i32 %7, 13 > %506 = and i32 %505, 255 > %507 = and i32 %7, 8191 > %508 = and i32 %10, 255 > %509 = mul nuw nsw i32 %507, %508 > %510 = add nuw nsw i32 %509, %506 > %511 = add nuw nsw i32 %510, 16 > %512 = zext i32 %511 to i64 > %513 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %512 > %514 = bitcast i32 addrspace(3)* %513 to float addrspace(3)* > %515 = load float, float addrspace(3)* %514, align 4 > %516 = lshr i32 %7, 13 > %517 = and i32 %516, 255 > %518 = and i32 %7, 8191 > %519 = and i32 %10, 255 > %520 = mul nuw nsw i32 %518, %519 > %521 = add nuw nsw i32 %520, %517 > %522 = add nuw nsw i32 %521, 17 > %523 = zext i32 %522 to i64 > %524 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %523 > %525 = bitcast i32 addrspace(3)* %524 to float addrspace(3)* > %526 = load float, float addrspace(3)* %525, align 4 > %527 = lshr i32 %7, 13 > %528 = and i32 %527, 255 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = add nuw nsw i32 %531, %528 > %533 = add nuw nsw i32 %532, 18 > %534 = zext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %38, %515 > %539 = fmul float %39, %526 > %540 = fadd float %538, %539 > %541 = fmul float %40, %537 > %542 = fadd float %540, %541 > %543 = fadd float %542, %41 > %544 = fmul float %42, %515 > %545 = fmul float %43, %526 > %546 = fadd float %544, %545 > %547 = fmul float %44, %537 > %548 = fadd float %546, %547 > %549 = fadd float %548, %45 > %550 = fmul float %46, %515 > %551 = fmul float %47, %526 > %552 = fadd float %550, %551 > %553 = fmul float %48, %537 > %554 = fadd float %552, %553 > %555 = fadd float %554, %49 > %556 = fmul float %50, %515 > %557 = fmul float %51, %526 > %558 = fadd float %556, %557 > %559 = fmul float %52, %537 > %560 = fadd float %558, %559 > %561 = fadd float %560, %53 > %562 = and i32 %7, 8191 > %563 = and i32 %10, 255 > %564 = mul nuw nsw i32 %562, %563 > %565 = lshr i32 %7, 12 > %566 = and i32 %565, 510 > %567 = add nuw nsw i32 %564, %566 > %568 = add nuw nsw i32 %567, 16 > %569 = zext i32 %568 to i64 > %570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %569 > %571 = bitcast i32 addrspace(3)* %570 to float addrspace(3)* > %572 = load float, float addrspace(3)* %571, align 4 > %573 = and i32 %7, 8191 > %574 = and i32 %10, 255 > %575 = mul nuw nsw i32 %573, %574 > %576 = lshr i32 %7, 12 > %577 = and i32 %576, 510 > %578 = add nuw nsw i32 %575, %577 > %579 = add nuw nsw i32 %578, 17 > %580 = zext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = and i32 %7, 8191 > %585 = and i32 %10, 255 > %586 = mul nuw nsw i32 %584, %585 > %587 = lshr i32 %7, 12 > %588 = and i32 %587, 510 > %589 = add nuw nsw i32 %586, %588 > %590 = add nuw nsw i32 %589, 18 > %591 = zext i32 %590 to i64 > %592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %591 > %593 = bitcast i32 addrspace(3)* %592 to float addrspace(3)* > %594 = load float, float addrspace(3)* %593, align 4 > %595 = fmul float %38, %572 > %596 = fmul float %39, %583 > %597 = fadd float %595, %596 > %598 = fmul float %40, %594 > %599 = fadd float %597, %598 > %600 = fadd float %599, %41 > %601 = fmul float %42, %572 > %602 = fmul float %43, %583 > %603 = fadd float %601, %602 > %604 = fmul float %44, %594 > %605 = fadd float %603, %604 > %606 = fadd float %605, %45 > %607 = fmul float %46, %572 > %608 = fmul float %47, %583 > %609 = fadd float %607, %608 > %610 = fmul float %48, %594 > %611 = fadd float %609, %610 > %612 = fadd float %611, %49 > %613 = fmul float %50, %572 > %614 = fmul float %51, %583 > %615 = fadd float %613, %614 > %616 = fmul float %52, %594 > %617 = fadd float %615, %616 > %618 = fadd float %617, %53 > %619 = fadd float %498, 1.000000e+02 > %620 = fadd float %555, 1.000000e+02 > %621 = fadd float %612, 1.000000e+02 > %622 = call float @llvm.fabs.f32(float %504) > %623 = call float @llvm.minnum.f32(float %622, float 1.000000e+02) > %624 = fcmp ogt float %486, 0.000000e+00 > %625 = fcmp ogt float %492, 0.000000e+00 > %626 = fcmp olt float %486, 0.000000e+00 > %627 = fcmp olt float %492, 0.000000e+00 > %628 = sext i1 %626 to i32 > %629 = sext i1 %627 to i32 > %630 = zext i1 %624 to i32 > %631 = zext i1 %625 to i32 > %632 = add nsw i32 %630, %628 > %633 = add nsw i32 %631, %629 > %634 = sitofp i32 %632 to float > %635 = sitofp i32 %633 to float > %636 = fsub float -0.000000e+00, %623 > %637 = call float @llvm.fma.f32(float %636, float %634, float %486) > %638 = fsub float -0.000000e+00, %623 > %639 = call float @llvm.fma.f32(float %638, float %635, float %492) > %640 = call float @llvm.fabs.f32(float %561) > %641 = call float @llvm.minnum.f32(float %640, float 1.000000e+02) > %642 = fcmp ogt float %543, 0.000000e+00 > %643 = fcmp ogt float %549, 0.000000e+00 > %644 = fcmp olt float %543, 0.000000e+00 > %645 = fcmp olt float %549, 0.000000e+00 > %646 = sext i1 %644 to i32 > %647 = sext i1 %645 to i32 > %648 = zext i1 %642 to i32 > %649 = zext i1 %643 to i32 > %650 = add nsw i32 %648, %646 > %651 = add nsw i32 %649, %647 > %652 = sitofp i32 %650 to float > %653 = sitofp i32 %651 to float > %654 = fsub float -0.000000e+00, %641 > %655 = call float @llvm.fma.f32(float %654, float %652, float %543) > %656 = fsub float -0.000000e+00, %641 > %657 = call float @llvm.fma.f32(float %656, float %653, float %549) > %658 = fcmp ogt float %600, 0.000000e+00 > %659 = fcmp ogt float %606, 0.000000e+00 > %660 = fcmp olt float %600, 0.000000e+00 > %661 = fcmp olt float %606, 0.000000e+00 > %662 = sext i1 %660 to i32 > %663 = sext i1 %661 to i32 > %664 = zext i1 %658 to i32 > %665 = zext i1 %659 to i32 > %666 = add nsw i32 %664, %662 > %667 = add nsw i32 %665, %663 > %668 = sitofp i32 %666 to float > %669 = sitofp i32 %667 to float > %670 = call float @llvm.fabs.f32(float %618) > %671 = call float @llvm.minnum.f32(float %670, float 1.000000e+02) > %672 = fsub float -0.000000e+00, %671 > %673 = call float @llvm.fma.f32(float %672, float %668, float %600) > %674 = fsub float -0.000000e+00, %671 > %675 = call float @llvm.fma.f32(float %674, float %669, float %606) > %676 = fsub float -0.000000e+00, %504 > %677 = fcmp olt float %637, %676 > %678 = fsub float -0.000000e+00, %504 > %679 = fcmp olt float %639, %678 > %680 = zext i1 %677 to i32 > %681 = zext i1 %679 to i32 > %682 = fsub float -0.000000e+00, %561 > %683 = fcmp olt float %655, %682 > %684 = fsub float -0.000000e+00, %561 > %685 = fcmp olt float %657, %684 > %686 = zext i1 %683 to i32 > %687 = zext i1 %685 to i32 > %688 = add nuw nsw i32 %686, %680 > %689 = add nuw nsw i32 %687, %681 > %690 = fsub float -0.000000e+00, %618 > %691 = fcmp olt float %673, %690 > %692 = fsub float -0.000000e+00, %618 > %693 = fcmp olt float %675, %692 > %694 = zext i1 %691 to i32 > %695 = zext i1 %693 to i32 > %696 = add nuw nsw i32 %694, %688 > %697 = add nuw nsw i32 %695, %689 > %698 = fcmp olt float %619, 0.000000e+00 > %699 = zext i1 %698 to i32 > %700 = fcmp olt float %620, 0.000000e+00 > %701 = fcmp olt float %621, 0.000000e+00 > %702 = zext i1 %700 to i32 > %703 = zext i1 %701 to i32 > %704 = add nuw nsw i32 %702, %699 > %705 = add nuw nsw i32 %703, %704 > %706 = fcmp olt float %504, %637 > %707 = fcmp olt float %504, %639 > %708 = zext i1 %706 to i32 > %709 = zext i1 %707 to i32 > %710 = fcmp olt float %561, %655 > %711 = fcmp olt float %561, %657 > %712 = zext i1 %710 to i32 > %713 = zext i1 %711 to i32 > %714 = add nuw nsw i32 %708, %712 > %715 = add nuw nsw i32 %709, %713 > %716 = fcmp olt float %618, %673 > %717 = fcmp olt float %618, %675 > %718 = zext i1 %716 to i32 > %719 = zext i1 %717 to i32 > %720 = add nuw nsw i32 %714, %718 > %721 = add nuw nsw i32 %715, %719 > %722 = icmp eq i32 %696, 3 > %723 = icmp eq i32 %697, 3 > %724 = sext i1 %722 to i32 > %725 = sext i1 %723 to i32 > %726 = icmp eq i32 %720, 3 > %727 = icmp eq i32 %721, 3 > %728 = select i1 %727, i32 -1, i32 %725 > %729 = select i1 %726, i32 -1, i32 %724 > %730 = or i32 %728, %729 > %731 = and i32 %7, 8191 > %732 = and i32 %10, 255 > %733 = mul nuw nsw i32 %731, %732 > %734 = add nuw nsw i32 %733, 18 > %735 = zext i32 %734 to i64 > %736 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %735 > %737 = bitcast i32 addrspace(3)* %736 to float addrspace(3)* > %738 = load float, float addrspace(3)* %737, align 4 > %739 = lshr i32 %7, 13 > %740 = and i32 %739, 255 > %741 = and i32 %7, 8191 > %742 = and i32 %10, 255 > %743 = mul nuw nsw i32 %741, %742 > %744 = add nuw nsw i32 %743, %740 > %745 = add nuw nsw i32 %744, 18 > %746 = zext i32 %745 to i64 > %747 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %746 > %748 = bitcast i32 addrspace(3)* %747 to float addrspace(3)* > %749 = load float, float addrspace(3)* %748, align 4 > %750 = fsub float %749, %738 > %751 = and i32 %7, 8191 > %752 = and i32 %10, 255 > %753 = mul nuw nsw i32 %751, %752 > %754 = add nuw nsw i32 %753, 16 > %755 = zext i32 %754 to i64 > %756 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %755 > %757 = bitcast i32 addrspace(3)* %756 to float addrspace(3)* > %758 = load float, float addrspace(3)* %757, align 4 > %759 = lshr i32 %7, 13 > %760 = and i32 %759, 255 > %761 = and i32 %7, 8191 > %762 = and i32 %10, 255 > %763 = mul nuw nsw i32 %761, %762 > %764 = add nuw nsw i32 %763, %760 > %765 = add nuw nsw i32 %764, 16 > %766 = zext i32 %765 to i64 > %767 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %766 > %768 = bitcast i32 addrspace(3)* %767 to float addrspace(3)* > %769 = load float, float addrspace(3)* %768, align 4 > %770 = fsub float %769, %758 > %771 = and i32 %7, 8191 > %772 = and i32 %10, 255 > %773 = mul nuw nsw i32 %771, %772 > %774 = add nuw nsw i32 %773, 17 > %775 = zext i32 %774 to i64 > %776 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %775 > %777 = bitcast i32 addrspace(3)* %776 to float addrspace(3)* > %778 = load float, float addrspace(3)* %777, align 4 > %779 = lshr i32 %7, 13 > %780 = and i32 %779, 255 > %781 = and i32 %7, 8191 > %782 = and i32 %10, 255 > %783 = mul nuw nsw i32 %781, %782 > %784 = add nuw nsw i32 %783, %780 > %785 = add nuw nsw i32 %784, 17 > %786 = zext i32 %785 to i64 > %787 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %786 > %788 = bitcast i32 addrspace(3)* %787 to float addrspace(3)* > %789 = load float, float addrspace(3)* %788, align 4 > %790 = fsub float %789, %778 > %791 = and i32 %7, 8191 > %792 = and i32 %10, 255 > %793 = mul nuw nsw i32 %791, %792 > %794 = add nuw nsw i32 %793, 16 > %795 = zext i32 %794 to i64 > %796 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %795 > %797 = bitcast i32 addrspace(3)* %796 to float addrspace(3)* > %798 = load float, float addrspace(3)* %797, align 4 > %799 = and i32 %7, 8191 > %800 = and i32 %10, 255 > %801 = mul nuw nsw i32 %799, %800 > %802 = lshr i32 %7, 12 > %803 = and i32 %802, 510 > %804 = add nuw nsw i32 %801, %803 > %805 = add nuw nsw i32 %804, 16 > %806 = zext i32 %805 to i64 > %807 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %806 > %808 = bitcast i32 addrspace(3)* %807 to float addrspace(3)* > %809 = load float, float addrspace(3)* %808, align 4 > %810 = fsub float %809, %798 > %811 = and i32 %7, 8191 > %812 = and i32 %10, 255 > %813 = mul nuw nsw i32 %811, %812 > %814 = add nuw nsw i32 %813, 17 > %815 = zext i32 %814 to i64 > %816 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %815 > %817 = bitcast i32 addrspace(3)* %816 to float addrspace(3)* > %818 = load float, float addrspace(3)* %817, align 4 > %819 = and i32 %7, 8191 > %820 = and i32 %10, 255 > %821 = mul nuw nsw i32 %819, %820 > %822 = lshr i32 %7, 12 > %823 = and i32 %822, 510 > %824 = add nuw nsw i32 %821, %823 > %825 = add nuw nsw i32 %824, 17 > %826 = zext i32 %825 to i64 > %827 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %826 > %828 = bitcast i32 addrspace(3)* %827 to float addrspace(3)* > %829 = load float, float addrspace(3)* %828, align 4 > %830 = fsub float %829, %818 > %831 = and i32 %7, 8191 > %832 = and i32 %10, 255 > %833 = mul nuw nsw i32 %831, %832 > %834 = add nuw nsw i32 %833, 18 > %835 = zext i32 %834 to i64 > %836 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %835 > %837 = bitcast i32 addrspace(3)* %836 to float addrspace(3)* > %838 = load float, float addrspace(3)* %837, align 4 > %839 = and i32 %7, 8191 > %840 = and i32 %10, 255 > %841 = mul nuw nsw i32 %839, %840 > %842 = lshr i32 %7, 12 > %843 = and i32 %842, 510 > %844 = add nuw nsw i32 %841, %843 > %845 = add nuw nsw i32 %844, 18 > %846 = zext i32 %845 to i64 > %847 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %846 > %848 = bitcast i32 addrspace(3)* %847 to float addrspace(3)* > %849 = load float, float addrspace(3)* %848, align 4 > %850 = fsub float %849, %838 > %851 = fmul float %750, %830 > %852 = fmul float %770, %850 > %853 = fmul float %790, %810 > %854 = fsub float -0.000000e+00, %851 > %855 = call float @llvm.fma.f32(float %790, float %850, float %854) > %856 = fsub float -0.000000e+00, %852 > %857 = call float @llvm.fma.f32(float %750, float %810, float %856) > %858 = fsub float -0.000000e+00, %853 > %859 = call float @llvm.fma.f32(float %770, float %830, float %858) > %860 = fmul float %855, %855 > %861 = fmul float %857, %857 > %862 = fadd float %861, %860 > %863 = fmul float %859, %859 > %864 = fadd float %862, %863 > %865 = call float @llvm.AMDGPU.rsq.clamped.f32(float %864) > %866 = fmul float %855, %865 > %867 = fmul float %857, %865 > %868 = fmul float %859, %865 > %869 = fmul float %13, %25 > %870 = fmul float %14, %26 > %871 = fadd float %869, %870 > %872 = fmul float %15, %27 > %873 = fadd float %871, %872 > %874 = fadd float %873, %16 > %875 = fmul float %17, %25 > %876 = fmul float %18, %26 > %877 = fadd float %875, %876 > %878 = fmul float %19, %27 > %879 = fadd float %877, %878 > %880 = fadd float %879, %20 > %881 = fmul float %21, %25 > %882 = fmul float %22, %26 > %883 = fadd float %881, %882 > %884 = fmul float %23, %27 > %885 = fadd float %883, %884 > %886 = fadd float %885, %24 > %887 = and i32 %7, 8191 > %888 = and i32 %10, 255 > %889 = mul nuw nsw i32 %887, %888 > %890 = add nuw nsw i32 %889, 16 > %891 = zext i32 %890 to i64 > %892 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %891 > %893 = bitcast i32 addrspace(3)* %892 to float addrspace(3)* > %894 = load float, float addrspace(3)* %893, align 4 > %895 = fsub float %874, %894 > %896 = and i32 %7, 8191 > %897 = and i32 %10, 255 > %898 = mul nuw nsw i32 %896, %897 > %899 = add nuw nsw i32 %898, 17 > %900 = zext i32 %899 to i64 > %901 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %900 > %902 = bitcast i32 addrspace(3)* %901 to float addrspace(3)* > %903 = load float, float addrspace(3)* %902, align 4 > %904 = fsub float %880, %903 > %905 = and i32 %7, 8191 > %906 = and i32 %10, 255 > %907 = mul nuw nsw i32 %905, %906 > %908 = add nuw nsw i32 %907, 18 > %909 = zext i32 %908 to i64 > %910 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %909 > %911 = bitcast i32 addrspace(3)* %910 to float addrspace(3)* > %912 = load float, float addrspace(3)* %911, align 4 > %913 = fsub float %886, %912 > %914 = fmul float %895, %895 > %915 = fmul float %904, %904 > %916 = fadd float %915, %914 > %917 = fmul float %913, %913 > %918 = fadd float %916, %917 > %919 = call float @llvm.AMDGPU.rsq.clamped.f32(float %918) > %920 = fmul float %919, %895 > %921 = fmul float %919, %904 > %922 = fmul float %919, %913 > %923 = fmul float %866, %920 > %924 = fmul float %867, %921 > %925 = fadd float %924, %923 > %926 = fmul float %868, %922 > %927 = fadd float %925, %926 > %928 = icmp eq i32 %730, 0 > %notlhs = fcmp uge float %927, -5.000000e-01 > %notrhs = icmp ne i32 %705, 3 > %not. = and i1 %notrhs, %notlhs > %929 = and i1 %928, %not. > br i1 %929, label %IF, label %ENDIF > >IF: ; preds = %main_body > %930 = lshr i32 %7, 13 > %931 = and i32 %930, 255 > %932 = and i32 %7, 8191 > %933 = and i32 %10, 255 > %934 = mul nuw nsw i32 %932, %933 > %935 = add nuw nsw i32 %934, %931 > %936 = add nuw nsw i32 %935, 16 > %937 = zext i32 %936 to i64 > %938 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %937 > %939 = bitcast i32 addrspace(3)* %938 to float addrspace(3)* > %940 = load float, float addrspace(3)* %939, align 4 > %941 = and i32 %7, 8191 > %942 = and i32 %10, 255 > %943 = mul nuw nsw i32 %941, %942 > %944 = add nuw nsw i32 %943, 16 > %945 = zext i32 %944 to i64 > %946 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %945 > %947 = bitcast i32 addrspace(3)* %946 to float addrspace(3)* > %948 = load float, float addrspace(3)* %947, align 4 > %949 = fsub float %948, %940 > %950 = lshr i32 %7, 13 > %951 = and i32 %950, 255 > %952 = and i32 %7, 8191 > %953 = and i32 %10, 255 > %954 = mul nuw nsw i32 %952, %953 > %955 = add nuw nsw i32 %954, %951 > %956 = add nuw nsw i32 %955, 17 > %957 = zext i32 %956 to i64 > %958 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %957 > %959 = bitcast i32 addrspace(3)* %958 to float addrspace(3)* > %960 = load float, float addrspace(3)* %959, align 4 > %961 = and i32 %7, 8191 > %962 = and i32 %10, 255 > %963 = mul nuw nsw i32 %961, %962 > %964 = add nuw nsw i32 %963, 17 > %965 = zext i32 %964 to i64 > %966 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %965 > %967 = bitcast i32 addrspace(3)* %966 to float addrspace(3)* > %968 = load float, float addrspace(3)* %967, align 4 > %969 = fsub float %968, %960 > %970 = lshr i32 %7, 13 > %971 = and i32 %970, 255 > %972 = and i32 %7, 8191 > %973 = and i32 %10, 255 > %974 = mul nuw nsw i32 %972, %973 > %975 = add nuw nsw i32 %974, %971 > %976 = add nuw nsw i32 %975, 18 > %977 = zext i32 %976 to i64 > %978 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %977 > %979 = bitcast i32 addrspace(3)* %978 to float addrspace(3)* > %980 = load float, float addrspace(3)* %979, align 4 > %981 = and i32 %7, 8191 > %982 = and i32 %10, 255 > %983 = mul nuw nsw i32 %981, %982 > %984 = add nuw nsw i32 %983, 18 > %985 = zext i32 %984 to i64 > %986 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %985 > %987 = bitcast i32 addrspace(3)* %986 to float addrspace(3)* > %988 = load float, float addrspace(3)* %987, align 4 > %989 = fsub float %988, %980 > %990 = fmul float %949, %949 > %991 = fmul float %969, %969 > %992 = fadd float %991, %990 > %993 = fmul float %989, %989 > %994 = fadd float %992, %993 > %995 = and i32 %7, 8191 > %996 = and i32 %10, 255 > %997 = mul nuw nsw i32 %995, %996 > %998 = lshr i32 %7, 12 > %999 = and i32 %998, 510 > %1000 = add nuw nsw i32 %997, %999 > %1001 = add nuw nsw i32 %1000, 16 > %1002 = zext i32 %1001 to i64 > %1003 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1002 > %1004 = bitcast i32 addrspace(3)* %1003 to float addrspace(3)* > %1005 = load float, float addrspace(3)* %1004, align 4 > %1006 = lshr i32 %7, 13 > %1007 = and i32 %1006, 255 > %1008 = and i32 %7, 8191 > %1009 = and i32 %10, 255 > %1010 = mul nuw nsw i32 %1008, %1009 > %1011 = add nuw nsw i32 %1010, %1007 > %1012 = add nuw nsw i32 %1011, 16 > %1013 = zext i32 %1012 to i64 > %1014 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1013 > %1015 = bitcast i32 addrspace(3)* %1014 to float addrspace(3)* > %1016 = load float, float addrspace(3)* %1015, align 4 > %1017 = fsub float %1016, %1005 > %1018 = and i32 %7, 8191 > %1019 = and i32 %10, 255 > %1020 = mul nuw nsw i32 %1018, %1019 > %1021 = lshr i32 %7, 12 > %1022 = and i32 %1021, 510 > %1023 = add nuw nsw i32 %1020, %1022 > %1024 = add nuw nsw i32 %1023, 17 > %1025 = zext i32 %1024 to i64 > %1026 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1025 > %1027 = bitcast i32 addrspace(3)* %1026 to float addrspace(3)* > %1028 = load float, float addrspace(3)* %1027, align 4 > %1029 = lshr i32 %7, 13 > %1030 = and i32 %1029, 255 > %1031 = and i32 %7, 8191 > %1032 = and i32 %10, 255 > %1033 = mul nuw nsw i32 %1031, %1032 > %1034 = add nuw nsw i32 %1033, %1030 > %1035 = add nuw nsw i32 %1034, 17 > %1036 = zext i32 %1035 to i64 > %1037 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1036 > %1038 = bitcast i32 addrspace(3)* %1037 to float addrspace(3)* > %1039 = load float, float addrspace(3)* %1038, align 4 > %1040 = fsub float %1039, %1028 > %1041 = and i32 %7, 8191 > %1042 = and i32 %10, 255 > %1043 = mul nuw nsw i32 %1041, %1042 > %1044 = lshr i32 %7, 12 > %1045 = and i32 %1044, 510 > %1046 = add nuw nsw i32 %1043, %1045 > %1047 = add nuw nsw i32 %1046, 18 > %1048 = zext i32 %1047 to i64 > %1049 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1048 > %1050 = bitcast i32 addrspace(3)* %1049 to float addrspace(3)* > %1051 = load float, float addrspace(3)* %1050, align 4 > %1052 = lshr i32 %7, 13 > %1053 = and i32 %1052, 255 > %1054 = and i32 %7, 8191 > %1055 = and i32 %10, 255 > %1056 = mul nuw nsw i32 %1054, %1055 > %1057 = add nuw nsw i32 %1056, %1053 > %1058 = add nuw nsw i32 %1057, 18 > %1059 = zext i32 %1058 to i64 > %1060 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1059 > %1061 = bitcast i32 addrspace(3)* %1060 to float addrspace(3)* > %1062 = load float, float addrspace(3)* %1061, align 4 > %1063 = fsub float %1062, %1051 > %1064 = fmul float %1017, %1017 > %1065 = fmul float %1040, %1040 > %1066 = fadd float %1065, %1064 > %1067 = fmul float %1063, %1063 > %1068 = fadd float %1066, %1067 > %1069 = fmul float %810, %810 > %1070 = fmul float %830, %830 > %1071 = fadd float %1070, %1069 > %1072 = fmul float %850, %850 > %1073 = fadd float %1071, %1072 > %1074 = call float @llvm.sqrt.f32(float %994) > %1075 = call float @llvm.sqrt.f32(float %1068) > %1076 = call float @llvm.sqrt.f32(float %1073) > %1077 = lshr i32 %7, 13 > %1078 = and i32 %1077, 255 > %1079 = and i32 %7, 8191 > %1080 = and i32 %10, 255 > %1081 = mul nuw nsw i32 %1079, %1080 > %1082 = add nuw nsw i32 %1081, %1078 > %1083 = add nuw nsw i32 %1082, 16 > %1084 = zext i32 %1083 to i64 > %1085 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1084 > %1086 = bitcast i32 addrspace(3)* %1085 to float addrspace(3)* > %1087 = load float, float addrspace(3)* %1086, align 4 > %1088 = and i32 %7, 8191 > %1089 = and i32 %10, 255 > %1090 = mul nuw nsw i32 %1088, %1089 > %1091 = add nuw nsw i32 %1090, 16 > %1092 = zext i32 %1091 to i64 > %1093 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1092 > %1094 = bitcast i32 addrspace(3)* %1093 to float addrspace(3)* > %1095 = load float, float addrspace(3)* %1094, align 4 > %1096 = fadd float %1087, %1095 > %1097 = lshr i32 %7, 13 > %1098 = and i32 %1097, 255 > %1099 = and i32 %7, 8191 > %1100 = and i32 %10, 255 > %1101 = mul nuw nsw i32 %1099, %1100 > %1102 = add nuw nsw i32 %1101, %1098 > %1103 = add nuw nsw i32 %1102, 17 > %1104 = zext i32 %1103 to i64 > %1105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1104 > %1106 = bitcast i32 addrspace(3)* %1105 to float addrspace(3)* > %1107 = load float, float addrspace(3)* %1106, align 4 > %1108 = and i32 %7, 8191 > %1109 = and i32 %10, 255 > %1110 = mul nuw nsw i32 %1108, %1109 > %1111 = add nuw nsw i32 %1110, 17 > %1112 = zext i32 %1111 to i64 > %1113 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1112 > %1114 = bitcast i32 addrspace(3)* %1113 to float addrspace(3)* > %1115 = load float, float addrspace(3)* %1114, align 4 > %1116 = fadd float %1107, %1115 > %1117 = lshr i32 %7, 13 > %1118 = and i32 %1117, 255 > %1119 = and i32 %7, 8191 > %1120 = and i32 %10, 255 > %1121 = mul nuw nsw i32 %1119, %1120 > %1122 = add nuw nsw i32 %1121, %1118 > %1123 = add nuw nsw i32 %1122, 18 > %1124 = zext i32 %1123 to i64 > %1125 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1124 > %1126 = bitcast i32 addrspace(3)* %1125 to float addrspace(3)* > %1127 = load float, float addrspace(3)* %1126, align 4 > %1128 = and i32 %7, 8191 > %1129 = and i32 %10, 255 > %1130 = mul nuw nsw i32 %1128, %1129 > %1131 = add nuw nsw i32 %1130, 18 > %1132 = zext i32 %1131 to i64 > %1133 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1132 > %1134 = bitcast i32 addrspace(3)* %1133 to float addrspace(3)* > %1135 = load float, float addrspace(3)* %1134, align 4 > %1136 = fadd float %1127, %1135 > %1137 = fmul float %1096, 5.000000e-01 > %1138 = fmul float %1116, 5.000000e-01 > %1139 = fmul float %1136, 5.000000e-01 > %1140 = and i32 %7, 8191 > %1141 = and i32 %10, 255 > %1142 = mul nuw nsw i32 %1140, %1141 > %1143 = lshr i32 %7, 12 > %1144 = and i32 %1143, 510 > %1145 = add nuw nsw i32 %1142, %1144 > %1146 = add nuw nsw i32 %1145, 16 > %1147 = zext i32 %1146 to i64 > %1148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1147 > %1149 = bitcast i32 addrspace(3)* %1148 to float addrspace(3)* > %1150 = load float, float addrspace(3)* %1149, align 4 > %1151 = lshr i32 %7, 13 > %1152 = and i32 %1151, 255 > %1153 = and i32 %7, 8191 > %1154 = and i32 %10, 255 > %1155 = mul nuw nsw i32 %1153, %1154 > %1156 = add nuw nsw i32 %1155, %1152 > %1157 = add nuw nsw i32 %1156, 16 > %1158 = zext i32 %1157 to i64 > %1159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1158 > %1160 = bitcast i32 addrspace(3)* %1159 to float addrspace(3)* > %1161 = load float, float addrspace(3)* %1160, align 4 > %1162 = fadd float %1150, %1161 > %1163 = and i32 %7, 8191 > %1164 = and i32 %10, 255 > %1165 = mul nuw nsw i32 %1163, %1164 > %1166 = lshr i32 %7, 12 > %1167 = and i32 %1166, 510 > %1168 = add nuw nsw i32 %1165, %1167 > %1169 = add nuw nsw i32 %1168, 17 > %1170 = zext i32 %1169 to i64 > %1171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1170 > %1172 = bitcast i32 addrspace(3)* %1171 to float addrspace(3)* > %1173 = load float, float addrspace(3)* %1172, align 4 > %1174 = lshr i32 %7, 13 > %1175 = and i32 %1174, 255 > %1176 = and i32 %7, 8191 > %1177 = and i32 %10, 255 > %1178 = mul nuw nsw i32 %1176, %1177 > %1179 = add nuw nsw i32 %1178, %1175 > %1180 = add nuw nsw i32 %1179, 17 > %1181 = zext i32 %1180 to i64 > %1182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1181 > %1183 = bitcast i32 addrspace(3)* %1182 to float addrspace(3)* > %1184 = load float, float addrspace(3)* %1183, align 4 > %1185 = fadd float %1173, %1184 > %1186 = and i32 %7, 8191 > %1187 = and i32 %10, 255 > %1188 = mul nuw nsw i32 %1186, %1187 > %1189 = lshr i32 %7, 12 > %1190 = and i32 %1189, 510 > %1191 = add nuw nsw i32 %1188, %1190 > %1192 = add nuw nsw i32 %1191, 18 > %1193 = zext i32 %1192 to i64 > %1194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1193 > %1195 = bitcast i32 addrspace(3)* %1194 to float addrspace(3)* > %1196 = load float, float addrspace(3)* %1195, align 4 > %1197 = lshr i32 %7, 13 > %1198 = and i32 %1197, 255 > %1199 = and i32 %7, 8191 > %1200 = and i32 %10, 255 > %1201 = mul nuw nsw i32 %1199, %1200 > %1202 = add nuw nsw i32 %1201, %1198 > %1203 = add nuw nsw i32 %1202, 18 > %1204 = zext i32 %1203 to i64 > %1205 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1204 > %1206 = bitcast i32 addrspace(3)* %1205 to float addrspace(3)* > %1207 = load float, float addrspace(3)* %1206, align 4 > %1208 = fadd float %1196, %1207 > %1209 = fmul float %1162, 5.000000e-01 > %1210 = fmul float %1185, 5.000000e-01 > %1211 = fmul float %1208, 5.000000e-01 > %1212 = and i32 %7, 8191 > %1213 = and i32 %10, 255 > %1214 = mul nuw nsw i32 %1212, %1213 > %1215 = add nuw nsw i32 %1214, 16 > %1216 = zext i32 %1215 to i64 > %1217 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1216 > %1218 = bitcast i32 addrspace(3)* %1217 to float addrspace(3)* > %1219 = load float, float addrspace(3)* %1218, align 4 > %1220 = and i32 %7, 8191 > %1221 = and i32 %10, 255 > %1222 = mul nuw nsw i32 %1220, %1221 > %1223 = lshr i32 %7, 12 > %1224 = and i32 %1223, 510 > %1225 = add nuw nsw i32 %1222, %1224 > %1226 = add nuw nsw i32 %1225, 16 > %1227 = zext i32 %1226 to i64 > %1228 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1227 > %1229 = bitcast i32 addrspace(3)* %1228 to float addrspace(3)* > %1230 = load float, float addrspace(3)* %1229, align 4 > %1231 = fadd float %1219, %1230 > %1232 = and i32 %7, 8191 > %1233 = and i32 %10, 255 > %1234 = mul nuw nsw i32 %1232, %1233 > %1235 = add nuw nsw i32 %1234, 17 > %1236 = zext i32 %1235 to i64 > %1237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1236 > %1238 = bitcast i32 addrspace(3)* %1237 to float addrspace(3)* > %1239 = load float, float addrspace(3)* %1238, align 4 > %1240 = and i32 %7, 8191 > %1241 = and i32 %10, 255 > %1242 = mul nuw nsw i32 %1240, %1241 > %1243 = lshr i32 %7, 12 > %1244 = and i32 %1243, 510 > %1245 = add nuw nsw i32 %1242, %1244 > %1246 = add nuw nsw i32 %1245, 17 > %1247 = zext i32 %1246 to i64 > %1248 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1247 > %1249 = bitcast i32 addrspace(3)* %1248 to float addrspace(3)* > %1250 = load float, float addrspace(3)* %1249, align 4 > %1251 = fadd float %1239, %1250 > %1252 = and i32 %7, 8191 > %1253 = and i32 %10, 255 > %1254 = mul nuw nsw i32 %1252, %1253 > %1255 = add nuw nsw i32 %1254, 18 > %1256 = zext i32 %1255 to i64 > %1257 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1256 > %1258 = bitcast i32 addrspace(3)* %1257 to float addrspace(3)* > %1259 = load float, float addrspace(3)* %1258, align 4 > %1260 = and i32 %7, 8191 > %1261 = and i32 %10, 255 > %1262 = mul nuw nsw i32 %1260, %1261 > %1263 = lshr i32 %7, 12 > %1264 = and i32 %1263, 510 > %1265 = add nuw nsw i32 %1262, %1264 > %1266 = add nuw nsw i32 %1265, 18 > %1267 = zext i32 %1266 to i64 > %1268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1267 > %1269 = bitcast i32 addrspace(3)* %1268 to float addrspace(3)* > %1270 = load float, float addrspace(3)* %1269, align 4 > %1271 = fadd float %1259, %1270 > %1272 = fmul float %1231, 5.000000e-01 > %1273 = fmul float %1251, 5.000000e-01 > %1274 = fmul float %1271, 5.000000e-01 > %1275 = call float @llvm.fma.f32(float %54, float %1074, float %1137) > %1276 = call float @llvm.fma.f32(float %55, float %1074, float %1138) > %1277 = call float @llvm.fma.f32(float %56, float %1074, float %1139) > %1278 = call float @llvm.fma.f32(float %54, float %1075, float %1209) > %1279 = call float @llvm.fma.f32(float %55, float %1075, float %1210) > %1280 = call float @llvm.fma.f32(float %56, float %1075, float %1211) > %1281 = call float @llvm.fma.f32(float %54, float %1076, float %1272) > %1282 = call float @llvm.fma.f32(float %55, float %1076, float %1273) > %1283 = call float @llvm.fma.f32(float %56, float %1076, float %1274) > %1284 = fmul float %38, %1137 > %1285 = fmul float %39, %1138 > %1286 = fadd float %1284, %1285 > %1287 = fmul float %40, %1139 > %1288 = fadd float %1286, %1287 > %1289 = fadd float %1288, %41 > %1290 = fmul float %42, %1137 > %1291 = fmul float %43, %1138 > %1292 = fadd float %1290, %1291 > %1293 = fmul float %44, %1139 > %1294 = fadd float %1292, %1293 > %1295 = fadd float %1294, %45 > %1296 = fmul float %50, %1137 > %1297 = fmul float %51, %1138 > %1298 = fadd float %1296, %1297 > %1299 = fmul float %52, %1139 > %1300 = fadd float %1298, %1299 > %1301 = fadd float %1300, %53 > %1302 = fmul float %38, %1209 > %1303 = fmul float %39, %1210 > %1304 = fadd float %1302, %1303 > %1305 = fmul float %40, %1211 > %1306 = fadd float %1304, %1305 > %1307 = fadd float %1306, %41 > %1308 = fmul float %42, %1209 > %1309 = fmul float %43, %1210 > %1310 = fadd float %1308, %1309 > %1311 = fmul float %44, %1211 > %1312 = fadd float %1310, %1311 > %1313 = fadd float %1312, %45 > %1314 = fmul float %50, %1209 > %1315 = fmul float %51, %1210 > %1316 = fadd float %1314, %1315 > %1317 = fmul float %52, %1211 > %1318 = fadd float %1316, %1317 > %1319 = fadd float %1318, %53 > %1320 = fmul float %38, %1272 > %1321 = fmul float %39, %1273 > %1322 = fadd float %1320, %1321 > %1323 = fmul float %40, %1274 > %1324 = fadd float %1322, %1323 > %1325 = fadd float %1324, %41 > %1326 = fmul float %42, %1272 > %1327 = fmul float %43, %1273 > %1328 = fadd float %1326, %1327 > %1329 = fmul float %44, %1274 > %1330 = fadd float %1328, %1329 > %1331 = fadd float %1330, %45 > %1332 = fmul float %50, %1272 > %1333 = fmul float %51, %1273 > %1334 = fadd float %1332, %1333 > %1335 = fmul float %52, %1274 > %1336 = fadd float %1334, %1335 > %1337 = fadd float %1336, %53 > %1338 = fmul float %38, %1275 > %1339 = fmul float %39, %1276 > %1340 = fadd float %1338, %1339 > %1341 = fmul float %40, %1277 > %1342 = fadd float %1340, %1341 > %1343 = fadd float %1342, %41 > %1344 = fmul float %42, %1275 > %1345 = fmul float %43, %1276 > %1346 = fadd float %1344, %1345 > %1347 = fmul float %44, %1277 > %1348 = fadd float %1346, %1347 > %1349 = fadd float %1348, %45 > %1350 = fmul float %50, %1275 > %1351 = fmul float %51, %1276 > %1352 = fadd float %1350, %1351 > %1353 = fmul float %52, %1277 > %1354 = fadd float %1352, %1353 > %1355 = fadd float %1354, %53 > %1356 = fmul float %38, %1278 > %1357 = fmul float %39, %1279 > %1358 = fadd float %1356, %1357 > %1359 = fmul float %40, %1280 > %1360 = fadd float %1358, %1359 > %1361 = fadd float %1360, %41 > %1362 = fmul float %42, %1278 > %1363 = fmul float %43, %1279 > %1364 = fadd float %1362, %1363 > %1365 = fmul float %44, %1280 > %1366 = fadd float %1364, %1365 > %1367 = fadd float %1366, %45 > %1368 = fmul float %50, %1278 > %1369 = fmul float %51, %1279 > %1370 = fadd float %1368, %1369 > %1371 = fmul float %52, %1280 > %1372 = fadd float %1370, %1371 > %1373 = fadd float %1372, %53 > %1374 = fmul float %38, %1281 > %1375 = fmul float %39, %1282 > %1376 = fadd float %1374, %1375 > %1377 = fmul float %40, %1283 > %1378 = fadd float %1376, %1377 > %1379 = fadd float %1378, %41 > %1380 = fmul float %42, %1281 > %1381 = fmul float %43, %1282 > %1382 = fadd float %1380, %1381 > %1383 = fmul float %44, %1283 > %1384 = fadd float %1382, %1383 > %1385 = fadd float %1384, %45 > %1386 = fmul float %50, %1281 > %1387 = fmul float %51, %1282 > %1388 = fadd float %1386, %1387 > %1389 = fmul float %52, %1283 > %1390 = fadd float %1388, %1389 > %1391 = fadd float %1390, %53 > %1392 = fcmp oeq float %1319, 0.000000e+00 > %1393 = fcmp oeq float %1319, 0.000000e+00 > %1394 = fcmp ogt float %1307, 0.000000e+00 > %1395 = select i1 %1394, float 1.000000e+00, float %1307 > %1396 = fcmp oge float %1395, 0.000000e+00 > %1397 = fcmp ogt float %1313, 0.000000e+00 > %1398 = select i1 %1397, float 1.000000e+00, float %1313 > %1399 = fcmp oge float %1398, 0.000000e+00 > %.op = fmul float %1395, 0x4600000000000000 > %1400 = select i1 %1396, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1398, 0x4600000000000000 > %1401 = select i1 %1399, float %.op80, float 0xC600000000000000 > %1402 = fdiv float 1.000000e+00, %1319 > %1403 = fmul float %1307, %1402 > %1404 = fmul float %1313, %1402 > %1405 = select i1 %1392, float %1400, float %1403 > %1406 = select i1 %1393, float %1401, float %1404 > %1407 = fcmp oeq float %1337, 0.000000e+00 > %1408 = fcmp oeq float %1337, 0.000000e+00 > %1409 = fcmp ogt float %1325, 0.000000e+00 > %1410 = select i1 %1409, float 1.000000e+00, float %1325 > %1411 = fcmp oge float %1410, 0.000000e+00 > %1412 = fcmp ogt float %1331, 0.000000e+00 > %1413 = select i1 %1412, float 1.000000e+00, float %1331 > %1414 = fcmp oge float %1413, 0.000000e+00 > %.op81 = fmul float %1410, 0x4600000000000000 > %1415 = select i1 %1411, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1413, 0x4600000000000000 > %1416 = select i1 %1414, float %.op82, float 0xC600000000000000 > %1417 = fdiv float 1.000000e+00, %1337 > %1418 = fmul float %1325, %1417 > %1419 = fmul float %1331, %1417 > %1420 = select i1 %1407, float %1415, float %1418 > %1421 = select i1 %1408, float %1416, float %1419 > %1422 = fcmp oeq float %1355, 0.000000e+00 > %1423 = fcmp oeq float %1355, 0.000000e+00 > %1424 = fcmp ogt float %1343, 0.000000e+00 > %1425 = select i1 %1424, float 1.000000e+00, float %1343 > %1426 = fcmp oge float %1425, 0.000000e+00 > %1427 = fcmp ogt float %1349, 0.000000e+00 > %1428 = select i1 %1427, float 1.000000e+00, float %1349 > %1429 = fcmp oge float %1428, 0.000000e+00 > %.op83 = fmul float %1425, 0x4600000000000000 > %1430 = select i1 %1426, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1428, 0x4600000000000000 > %1431 = select i1 %1429, float %.op84, float 0xC600000000000000 > %1432 = fdiv float 1.000000e+00, %1355 > %1433 = fmul float %1343, %1432 > %1434 = fmul float %1349, %1432 > %1435 = select i1 %1422, float %1430, float %1433 > %1436 = select i1 %1423, float %1431, float %1434 > %1437 = fcmp oeq float %1301, 0.000000e+00 > %1438 = fcmp oeq float %1301, 0.000000e+00 > %1439 = fcmp ogt float %1289, 0.000000e+00 > %1440 = select i1 %1439, float 1.000000e+00, float %1289 > %1441 = fcmp oge float %1440, 0.000000e+00 > %1442 = fcmp ogt float %1295, 0.000000e+00 > %1443 = select i1 %1442, float 1.000000e+00, float %1295 > %1444 = fcmp oge float %1443, 0.000000e+00 > %.op85 = fmul float %1440, 0x4600000000000000 > %1445 = select i1 %1441, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1443, 0x4600000000000000 > %1446 = select i1 %1444, float %.op86, float 0xC600000000000000 > %1447 = fdiv float 1.000000e+00, %1301 > %1448 = fmul float %1289, %1447 > %1449 = fmul float %1295, %1447 > %1450 = select i1 %1437, float %1445, float %1448 > %1451 = select i1 %1438, float %1446, float %1449 > %1452 = fsub float %1450, %1435 > %1453 = fsub float %1451, %1436 > %1454 = fcmp oeq float %1373, 0.000000e+00 > %1455 = fcmp oeq float %1373, 0.000000e+00 > %1456 = fcmp ogt float %1361, 0.000000e+00 > %1457 = select i1 %1456, float 1.000000e+00, float %1361 > %1458 = fcmp oge float %1457, 0.000000e+00 > %1459 = fcmp ogt float %1367, 0.000000e+00 > %1460 = select i1 %1459, float 1.000000e+00, float %1367 > %1461 = fcmp oge float %1460, 0.000000e+00 > %.op87 = fmul float %1457, 0x4600000000000000 > %1462 = select i1 %1458, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1460, 0x4600000000000000 > %1463 = select i1 %1461, float %.op88, float 0xC600000000000000 > %1464 = fdiv float 1.000000e+00, %1373 > %1465 = fmul float %1361, %1464 > %1466 = fmul float %1367, %1464 > %1467 = select i1 %1454, float %1462, float %1465 > %1468 = select i1 %1455, float %1463, float %1466 > %1469 = fsub float %1405, %1467 > %1470 = fsub float %1406, %1468 > %1471 = fmul float %1469, %57 > %1472 = fmul float %1470, %58 > %1473 = fcmp oeq float %1391, 0.000000e+00 > %1474 = fcmp oeq float %1391, 0.000000e+00 > %1475 = fcmp ogt float %1379, 0.000000e+00 > %1476 = select i1 %1475, float 1.000000e+00, float %1379 > %1477 = fcmp oge float %1476, 0.000000e+00 > %1478 = fcmp ogt float %1385, 0.000000e+00 > %1479 = select i1 %1478, float 1.000000e+00, float %1385 > %1480 = fcmp oge float %1479, 0.000000e+00 > %.op89 = fmul float %1476, 0x4600000000000000 > %1481 = select i1 %1477, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1479, 0x4600000000000000 > %1482 = select i1 %1480, float %.op90, float 0xC600000000000000 > %1483 = fdiv float 1.000000e+00, %1391 > %1484 = fmul float %1379, %1483 > %1485 = fmul float %1385, %1483 > %1486 = select i1 %1473, float %1481, float %1484 > %1487 = select i1 %1474, float %1482, float %1485 > %1488 = fsub float %1420, %1486 > %1489 = fsub float %1421, %1487 > %1490 = fmul float %1488, %57 > %1491 = fmul float %1452, %57 > %1492 = fmul float %1453, %58 > %1493 = fmul float %1489, %58 > %1494 = fmul float %1491, %1491 > %1495 = fmul float %1492, %1492 > %1496 = fadd float %1494, %1495 > %1497 = fmul float %1471, %1471 > %1498 = fmul float %1472, %1472 > %1499 = fadd float %1497, %1498 > %1500 = fmul float %1490, %1490 > %1501 = fmul float %1493, %1493 > %1502 = fadd float %1500, %1501 > %1503 = call float @llvm.sqrt.f32(float %1502) > %1504 = call float @llvm.sqrt.f32(float %1496) > %1505 = call float @llvm.sqrt.f32(float %1499) > %1506 = fsub float %1301, %30 > %1507 = fsub float %1319, %30 > %1508 = fsub float %1337, %30 > %1509 = fcmp une float %31, 0.000000e+00 > br i1 %1509, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %31, %ENDIF77 ], [ %53, %main_body ] > %temp16.0 = phi float [ %1723, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1724, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1713, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1726, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %30, %ENDIF77 ], [ %52, %main_body ] > %temp13.0 = phi float [ %1706, %ENDIF77 ], [ %51, %main_body ] > %temp11.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %24, %main_body ] > %temp10.0 = phi float [ %1505, %ENDIF77 ], [ %23, %main_body ] > %temp9.0 = phi float [ %1698, %ENDIF77 ], [ %22, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %919, %main_body ] > %temp6.0 = phi float [ %1139, %ENDIF77 ], [ %850, %main_body ] > %temp5.0 = phi float [ %1693, %ENDIF77 ], [ %830, %main_body ] > %1510 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1511 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1512 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1513 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1514 = lshr i32 %5, 16 > %1515 = shl nuw nsw i32 %1514, 2 > %1516 = and i32 %6, 8191 > %1517 = and i32 %10, 255 > %1518 = mul nuw nsw i32 %1516, %1517 > %1519 = add nuw nsw i32 %1515, %1518 > %1520 = add nuw nsw i32 %1519, 8 > %1521 = zext i32 %1520 to i64 > %1522 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1521 > %1523 = bitcast i32 addrspace(3)* %1522 to float addrspace(3)* > store float %1510, float addrspace(3)* %1523, align 4 > %1524 = add nuw nsw i32 %1519, 9 > %1525 = zext i32 %1524 to i64 > %1526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1525 > %1527 = bitcast i32 addrspace(3)* %1526 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1527, align 4 > %1528 = add nuw nsw i32 %1519, 10 > %1529 = zext i32 %1528 to i64 > %1530 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1529 > %1531 = bitcast i32 addrspace(3)* %1530 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1531, align 4 > %1532 = add nuw nsw i32 %1519, 11 > %1533 = zext i32 %1532 to i64 > %1534 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1533 > %1535 = bitcast i32 addrspace(3)* %1534 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1535, align 4 > %1536 = lshr i32 %5, 16 > %1537 = shl nuw nsw i32 %1536, 2 > %1538 = and i32 %6, 8191 > %1539 = and i32 %10, 255 > %1540 = mul nuw nsw i32 %1538, %1539 > %1541 = add nuw nsw i32 %1537, %1540 > %1542 = add nuw nsw i32 %1541, 12 > %1543 = zext i32 %1542 to i64 > %1544 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1543 > %1545 = bitcast i32 addrspace(3)* %1544 to float addrspace(3)* > store float %1511, float addrspace(3)* %1545, align 4 > %1546 = add nuw nsw i32 %1541, 13 > %1547 = zext i32 %1546 to i64 > %1548 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1547 > %1549 = bitcast i32 addrspace(3)* %1548 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1549, align 4 > %1550 = add nuw nsw i32 %1541, 14 > %1551 = zext i32 %1550 to i64 > %1552 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1551 > %1553 = bitcast i32 addrspace(3)* %1552 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1553, align 4 > %1554 = add nuw nsw i32 %1541, 15 > %1555 = zext i32 %1554 to i64 > %1556 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1555 > %1557 = bitcast i32 addrspace(3)* %1556 to float addrspace(3)* > store float %temp11.0, float addrspace(3)* %1557, align 4 > %1558 = lshr i32 %5, 16 > %1559 = shl nuw nsw i32 %1558, 2 > %1560 = and i32 %6, 8191 > %1561 = and i32 %10, 255 > %1562 = mul nuw nsw i32 %1560, %1561 > %1563 = add nuw nsw i32 %1559, %1562 > %1564 = add nuw nsw i32 %1563, 16 > %1565 = zext i32 %1564 to i64 > %1566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1565 > %1567 = bitcast i32 addrspace(3)* %1566 to float addrspace(3)* > store float %1512, float addrspace(3)* %1567, align 4 > %1568 = add nuw nsw i32 %1563, 17 > %1569 = zext i32 %1568 to i64 > %1570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1569 > %1571 = bitcast i32 addrspace(3)* %1570 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1571, align 4 > %1572 = add nuw nsw i32 %1563, 18 > %1573 = zext i32 %1572 to i64 > %1574 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1573 > %1575 = bitcast i32 addrspace(3)* %1574 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1575, align 4 > %1576 = add nuw nsw i32 %1563, 19 > %1577 = zext i32 %1576 to i64 > %1578 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1577 > %1579 = bitcast i32 addrspace(3)* %1578 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1579, align 4 > %1580 = lshr i32 %5, 16 > %1581 = shl nuw nsw i32 %1580, 2 > %1582 = and i32 %6, 8191 > %1583 = and i32 %10, 255 > %1584 = mul nuw nsw i32 %1582, %1583 > %1585 = add nuw nsw i32 %1581, %1584 > %1586 = add nuw nsw i32 %1585, 20 > %1587 = zext i32 %1586 to i64 > %1588 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1587 > %1589 = bitcast i32 addrspace(3)* %1588 to float addrspace(3)* > store float %1513, float addrspace(3)* %1589, align 4 > %1590 = add nuw nsw i32 %1585, 21 > %1591 = zext i32 %1590 to i64 > %1592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1591 > %1593 = bitcast i32 addrspace(3)* %1592 to float addrspace(3)* > store float %1511, float addrspace(3)* %1593, align 4 > %1594 = add nuw nsw i32 %1585, 22 > %1595 = zext i32 %1594 to i64 > %1596 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1595 > %1597 = bitcast i32 addrspace(3)* %1596 to float addrspace(3)* > store float %1512, float addrspace(3)* %1597, align 4 > %1598 = add nuw nsw i32 %1585, 23 > %1599 = zext i32 %1598 to i64 > %1600 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1599 > %1601 = bitcast i32 addrspace(3)* %1600 to float addrspace(3)* > store float %1513, float addrspace(3)* %1601, align 4 > %1602 = lshr i32 %5, 16 > %1603 = shl nuw nsw i32 %1602, 2 > %1604 = and i32 %6, 8191 > %1605 = and i32 %10, 255 > %1606 = mul nuw nsw i32 %1604, %1605 > %1607 = add nuw nsw i32 %1603, %1606 > %1608 = zext i32 %1607 to i64 > %1609 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1608 > %1610 = bitcast i32 addrspace(3)* %1609 to float addrspace(3)* > store float %1510, float addrspace(3)* %1610, align 4 > %1611 = lshr i32 %5, 16 > %1612 = shl nuw nsw i32 %1611, 2 > %1613 = and i32 %6, 8191 > %1614 = and i32 %10, 255 > %1615 = mul nuw nsw i32 %1613, %1614 > %1616 = add nuw nsw i32 %1612, %1615 > %1617 = add nuw nsw i32 %1616, 1 > %1618 = zext i32 %1617 to i64 > %1619 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1618 > %1620 = bitcast i32 addrspace(3)* %1619 to float addrspace(3)* > store float %1511, float addrspace(3)* %1620, align 4 > %1621 = lshr i32 %5, 16 > %1622 = shl nuw nsw i32 %1621, 2 > %1623 = and i32 %6, 8191 > %1624 = and i32 %10, 255 > %1625 = mul nuw nsw i32 %1623, %1624 > %1626 = add nuw nsw i32 %1622, %1625 > %1627 = add nuw nsw i32 %1626, 2 > %1628 = zext i32 %1627 to i64 > %1629 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1628 > %1630 = bitcast i32 addrspace(3)* %1629 to float addrspace(3)* > store float %1512, float addrspace(3)* %1630, align 4 > %1631 = lshr i32 %5, 16 > %1632 = shl nuw nsw i32 %1631, 2 > %1633 = and i32 %6, 8191 > %1634 = and i32 %10, 255 > %1635 = mul nuw nsw i32 %1633, %1634 > %1636 = add nuw nsw i32 %1632, %1635 > %1637 = add nuw nsw i32 %1636, 4 > %1638 = zext i32 %1637 to i64 > %1639 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1638 > %1640 = bitcast i32 addrspace(3)* %1639 to float addrspace(3)* > store float %1513, float addrspace(3)* %1640, align 4 > %1641 = and i32 %10, 255 > %1642 = lshr i32 %10, 8 > %1643 = and i32 %1642, 31 > %1644 = lshr i32 %5, 16 > %1645 = shl nuw nsw i32 %1644, 2 > %1646 = and i32 %6, 8191 > %1647 = and i32 %10, 255 > %1648 = mul nuw nsw i32 %1646, %1647 > %1649 = add nuw nsw i32 %1645, %1648 > %1650 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1651 = bitcast i64 %1650 to <2 x i32> > %1652 = extractelement <2 x i32> %1651, i32 0 > %1653 = extractelement <2 x i32> %1651, i32 1 > %1654 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1652, 0 > %1655 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1654, i32 %1653, 1 > %1656 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1655, i32 %8, 13 > %1657 = bitcast i32 %1641 to float > %1658 = bitcast i32 %1643 to float > %1659 = bitcast i32 %1649 to float > %1660 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1656, float %1657, 14 > %1661 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1660, float %1658, 15 > %1662 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1661, float %1659, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1662 > >IF69: ; preds = %IF > %1663 = fdiv float 1.000000e+00, %31 > %1664 = fmul float %1506, %1663 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1665 = fcmp ogt float %1506, 0.000000e+00 > %1666 = select i1 %1665, float 1.000000e+00, float %1506 > %1667 = fcmp oge float %1666, 0.000000e+00 > %.op91 = fmul float %1666, 0x4600000000000000 > %1668 = select i1 %1667, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1664, %IF69 ], [ %1668, %ELSE70 ] > %1669 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1670 = fsub float 1.000000e+00, %1669 > %1671 = fmul float %1670, %1504 > %1672 = fcmp une float %31, 0.000000e+00 > br i1 %1672, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1673 = fdiv float 1.000000e+00, %31 > %1674 = fmul float %1507, %1673 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1675 = fcmp ogt float %1507, 0.000000e+00 > %1676 = select i1 %1675, float 1.000000e+00, float %1507 > %1677 = fcmp oge float %1676, 0.000000e+00 > %.op92 = fmul float %1676, 0x4600000000000000 > %1678 = select i1 %1677, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1674, %IF72 ], [ %1678, %ELSE73 ] > %1679 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1680 = fsub float 1.000000e+00, %1679 > %1681 = fmul float %1680, %1505 > %1682 = fcmp une float %31, 0.000000e+00 > br i1 %1682, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1683 = fdiv float 1.000000e+00, %31 > %1684 = fmul float %1508, %1683 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1685 = fcmp ogt float %1508, 0.000000e+00 > %1686 = select i1 %1685, float 1.000000e+00, float %1508 > %1687 = fcmp oge float %1686, 0.000000e+00 > %.op93 = fmul float %1686, 0x4600000000000000 > %1688 = select i1 %1687, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1684, %IF75 ], [ %1688, %ELSE76 ] > %1689 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1690 = fsub float 1.000000e+00, %1689 > %1691 = fmul float %1690, %1503 > %1692 = fmul float %28, %34 > %1693 = fmul float %29, %35 > %1694 = call float @llvm.maxnum.f32(float %1693, float 1.000000e+00) > %1695 = fcmp oeq float %1692, 0.000000e+00 > %1696 = fcmp oeq float %1692, 0.000000e+00 > %1697 = sext i1 %1696 to i32 > %1698 = bitcast i32 %1697 to float > %1699 = fcmp ogt float %1691, 0.000000e+00 > %1700 = select i1 %1699, float 1.000000e+00, float %1691 > %1701 = fcmp oge float %1700, 0.000000e+00 > %1702 = fcmp ogt float %1671, 0.000000e+00 > %1703 = select i1 %1702, float 1.000000e+00, float %1671 > %1704 = fcmp oge float %1703, 0.000000e+00 > %.op94 = fmul float %1700, 0x4600000000000000 > %1705 = select i1 %1701, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1703, 0x4600000000000000 > %1706 = select i1 %1704, float %.op95, float 0xC600000000000000 > %1707 = fdiv float 1.000000e+00, %1692 > %1708 = fmul float %1691, %1707 > %1709 = fmul float %1671, %1707 > %1710 = select i1 %1695, float %1705, float %1708 > %1711 = select i1 %1696, float %1706, float %1709 > %1712 = call float @llvm.maxnum.f32(float %1711, float 1.000000e+00) > %1713 = call float @llvm.minnum.f32(float %1694, float %1712) > %1714 = fcmp une float %1692, 0.000000e+00 > br i1 %1714, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1715 = fdiv float 1.000000e+00, %1692 > %1716 = fmul float %1681, %1715 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1717 = fcmp ogt float %1681, 0.000000e+00 > %1718 = select i1 %1717, float 1.000000e+00, float %1681 > %1719 = fcmp oge float %1718, 0.000000e+00 > %.op96 = fmul float %1718, 0x4600000000000000 > %1720 = select i1 %1719, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1716, %IF78 ], [ %1720, %ELSE79 ] > %1721 = call float @llvm.maxnum.f32(float %1710, float 1.000000e+00) > %1722 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1723 = call float @llvm.minnum.f32(float %1694, float %1722) > %1724 = call float @llvm.minnum.f32(float %1694, float %1721) > %1725 = call float @llvm.maxnum.f32(float %1713, float %1724) > %1726 = call float @llvm.maxnum.f32(float %1725, float %1723) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[2].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[2].xxxx >101: MUL TEMP[0], SV[0].yyyy, IN[1][2] >102: FMA TEMP[0], SV[0].xxxx, IN[0][2], TEMP[0] >103: FMA TEMP[0], SV[0].zzzz, IN[2][2], TEMP[0] >104: MOV OUT[5], TEMP[0] >105: MOV OUT[4], TEMP[3] >106: MOV OUT[2], TEMP[6] >107: MOV OUT[3], TEMP[4] >108: MOV OUT[1], TEMP[5] >109: MOV OUT[0], TEMP[1] >110: END >radeonsi: Compiling shader 198 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = lshr i32 %6, 13 > %711 = and i32 %710, 255 > %712 = shl i32 %5, 2 > %713 = and i32 %712, 262140 > %714 = and i32 %6, 8191 > %715 = mul i32 %714, %9 > %716 = add i32 %713, %715 > %717 = add i32 %716, %711 > %718 = add i32 %717, 24 > %719 = sext i32 %718 to i64 > %720 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %719 > %721 = bitcast i32 addrspace(3)* %720 to float addrspace(3)* > %722 = load float, float addrspace(3)* %721, align 4 > %723 = fmul float %722, %8 > %724 = lshr i32 %6, 13 > %725 = and i32 %724, 255 > %726 = shl i32 %5, 2 > %727 = and i32 %726, 262140 > %728 = and i32 %6, 8191 > %729 = mul i32 %728, %9 > %730 = add i32 %727, %729 > %731 = add i32 %730, %725 > %732 = add i32 %731, 25 > %733 = sext i32 %732 to i64 > %734 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %733 > %735 = bitcast i32 addrspace(3)* %734 to float addrspace(3)* > %736 = load float, float addrspace(3)* %735, align 4 > %737 = fmul float %736, %8 > %738 = lshr i32 %6, 13 > %739 = and i32 %738, 255 > %740 = shl i32 %5, 2 > %741 = and i32 %740, 262140 > %742 = and i32 %6, 8191 > %743 = mul i32 %742, %9 > %744 = add i32 %741, %743 > %745 = add i32 %744, %739 > %746 = add i32 %745, 26 > %747 = sext i32 %746 to i64 > %748 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %747 > %749 = bitcast i32 addrspace(3)* %748 to float addrspace(3)* > %750 = load float, float addrspace(3)* %749, align 4 > %751 = fmul float %750, %8 > %752 = lshr i32 %6, 13 > %753 = and i32 %752, 255 > %754 = shl i32 %5, 2 > %755 = and i32 %754, 262140 > %756 = and i32 %6, 8191 > %757 = mul i32 %756, %9 > %758 = add i32 %755, %757 > %759 = add i32 %758, %753 > %760 = add i32 %759, 27 > %761 = sext i32 %760 to i64 > %762 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %761 > %763 = bitcast i32 addrspace(3)* %762 to float addrspace(3)* > %764 = load float, float addrspace(3)* %763, align 4 > %765 = fmul float %764, %8 > %766 = shl i32 %5, 2 > %767 = and i32 %766, 262140 > %768 = and i32 %6, 8191 > %769 = mul i32 %768, %9 > %770 = add i32 %767, %769 > %771 = add i32 %770, 24 > %772 = sext i32 %771 to i64 > %773 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %772 > %774 = bitcast i32 addrspace(3)* %773 to float addrspace(3)* > %775 = load float, float addrspace(3)* %774, align 4 > %776 = call float @llvm.fma.f32(float %7, float %775, float %723) > %777 = shl i32 %5, 2 > %778 = and i32 %777, 262140 > %779 = and i32 %6, 8191 > %780 = mul i32 %779, %9 > %781 = add i32 %778, %780 > %782 = add i32 %781, 25 > %783 = sext i32 %782 to i64 > %784 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %783 > %785 = bitcast i32 addrspace(3)* %784 to float addrspace(3)* > %786 = load float, float addrspace(3)* %785, align 4 > %787 = call float @llvm.fma.f32(float %7, float %786, float %737) > %788 = shl i32 %5, 2 > %789 = and i32 %788, 262140 > %790 = and i32 %6, 8191 > %791 = mul i32 %790, %9 > %792 = add i32 %789, %791 > %793 = add i32 %792, 26 > %794 = sext i32 %793 to i64 > %795 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %794 > %796 = bitcast i32 addrspace(3)* %795 to float addrspace(3)* > %797 = load float, float addrspace(3)* %796, align 4 > %798 = call float @llvm.fma.f32(float %7, float %797, float %751) > %799 = shl i32 %5, 2 > %800 = and i32 %799, 262140 > %801 = and i32 %6, 8191 > %802 = mul i32 %801, %9 > %803 = add i32 %800, %802 > %804 = add i32 %803, 27 > %805 = sext i32 %804 to i64 > %806 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %805 > %807 = bitcast i32 addrspace(3)* %806 to float addrspace(3)* > %808 = load float, float addrspace(3)* %807, align 4 > %809 = call float @llvm.fma.f32(float %7, float %808, float %765) > %810 = shl i32 %5, 2 > %811 = and i32 %810, 262140 > %812 = and i32 %6, 8191 > %813 = mul i32 %812, %9 > %814 = add i32 %811, %813 > %815 = lshr i32 %6, 12 > %816 = and i32 %815, 510 > %817 = add i32 %814, %816 > %818 = add i32 %817, 24 > %819 = sext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = call float @llvm.fma.f32(float %62, float %822, float %776) > %824 = shl i32 %5, 2 > %825 = and i32 %824, 262140 > %826 = and i32 %6, 8191 > %827 = mul i32 %826, %9 > %828 = add i32 %825, %827 > %829 = lshr i32 %6, 12 > %830 = and i32 %829, 510 > %831 = add i32 %828, %830 > %832 = add i32 %831, 25 > %833 = sext i32 %832 to i64 > %834 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %833 > %835 = bitcast i32 addrspace(3)* %834 to float addrspace(3)* > %836 = load float, float addrspace(3)* %835, align 4 > %837 = call float @llvm.fma.f32(float %62, float %836, float %787) > %838 = shl i32 %5, 2 > %839 = and i32 %838, 262140 > %840 = and i32 %6, 8191 > %841 = mul i32 %840, %9 > %842 = add i32 %839, %841 > %843 = lshr i32 %6, 12 > %844 = and i32 %843, 510 > %845 = add i32 %842, %844 > %846 = add i32 %845, 26 > %847 = sext i32 %846 to i64 > %848 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %847 > %849 = bitcast i32 addrspace(3)* %848 to float addrspace(3)* > %850 = load float, float addrspace(3)* %849, align 4 > %851 = call float @llvm.fma.f32(float %62, float %850, float %798) > %852 = shl i32 %5, 2 > %853 = and i32 %852, 262140 > %854 = and i32 %6, 8191 > %855 = mul i32 %854, %9 > %856 = add i32 %853, %855 > %857 = lshr i32 %6, 12 > %858 = and i32 %857, 510 > %859 = add i32 %856, %858 > %860 = add i32 %859, 27 > %861 = sext i32 %860 to i64 > %862 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %861 > %863 = bitcast i32 addrspace(3)* %862 to float addrspace(3)* > %864 = load float, float addrspace(3)* %863, align 4 > %865 = call float @llvm.fma.f32(float %62, float %864, float %809) > %866 = bitcast i32 %10 to float > %867 = insertvalue <{ float, float, float }> undef, float %866, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %823, float %837, float %851, float %865) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %867 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..26] >DCL TEMP[0..4], LOCAL >IMM[0] UINT32 {0, 384, 416, 400} >IMM[1] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[2] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, -0.3765} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] FLT32 { 2.0000, 0.5000, 0.0000, 0.0000} > 0: ADD TEMP[0].x, CONST[1][24].yyyy, IMM[1].xxxx > 1: ADD TEMP[1].xy, -IN[4].wwww, IMM[1].xyyy > 2: FMA TEMP[2].x, CONST[1][24].xxxx, TEMP[0].xxxx, TEMP[1].xxxx > 3: CEIL TEMP[3].x, TEMP[1].yyyy > 4: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 5: ADD TEMP[0].x, TEMP[2].xxxx, IMM[1].zzzz > 6: FSNE TEMP[2].x, CONST[1][24].yyyy, IMM[1].wwww > 7: UIF TEMP[2].xxxx :0 > 8: RCP TEMP[2].x, CONST[1][24].yyyy > 9: ELSE :0 > 10: MOV TEMP[2].x, IMM[2].xxxx > 11: ENDIF > 12: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[0].xxxx > 13: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 14: FMA TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy, IMM[2].zzzz > 15: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[2].xxxx > 16: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx > 17: FMA TEMP[2].x, TEMP[3].xxxx, TEMP[0].xxxx, IMM[2].wwww > 18: FMA TEMP[0].x, -TEMP[3].xxxx, TEMP[0].xxxx, IMM[1].xxxx > 19: LG2 TEMP[3].x, TEMP[0].xxxx > 20: MUL TEMP[0].x, TEMP[3].xxxx, CONST[1][26].xxxx > 21: EX2 TEMP[3].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[3].xxxx, CONST[1][25].wwww > 23: MUL TEMP[3].xyz, TEMP[0].xxxx, CONST[1][25].xyzz > 24: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[1].wwww > 25: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx > 26: INEG TEMP[2].x, TEMP[2].xxxx > 27: USNE TEMP[1].x, TEMP[2].xxxx, IMM[0].xxxx > 28: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 29: KILL_IF -TEMP[1].xxxx > 30: MOV TEMP[3].w, IMM[1].wwww > 31: MOV TEMP[1].xy, IN[0].xyyy > 32: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 33: FMA TEMP[1].xy, TEMP[1].ywww, IMM[4].xxxx, IMM[1].zzzz > 34: MOV TEMP[0].xy, TEMP[1].xyxx > 35: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[1].xxxx > 36: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 37: SQRT TEMP[1].x, TEMP[1].xxxx > 38: MOV TEMP[0].z, TEMP[1].xxxx > 39: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 40: DP3 TEMP[2].x, IN[2].xyzz, TEMP[0].xyzz > 41: MOV TEMP[1].y, TEMP[2].xxxx > 42: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 43: MOV TEMP[1].z, TEMP[2].xxxx > 44: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 45: RSQ TEMP[2].x, TEMP[0].xxxx > 46: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 47: FMA TEMP[1].xyz, TEMP[0].xyzz, IMM[4].yyyy, IMM[4].yyyy > 48: MOV TEMP[1].w, CONST[1][26].zzzz > 49: MOV TEMP[0].xy, IN[0].xyyy > 50: TEX TEMP[0], TEMP[0], SAMP[1], 2D > 51: MOV TEMP[2].xy, IN[0].xyyy > 52: TEX TEMP[2], TEMP[2], SAMP[2], 2D > 53: MUL TEMP[4].x, TEMP[2].zzzz, CONST[1][26].yyyy > 54: MOV TEMP[4].yzw, TEMP[2].xyxw > 55: MOV OUT[0], TEMP[3] > 56: MOV OUT[1], TEMP[1] > 57: MOV OUT[2], TEMP[0] > 58: MOV OUT[3], TEMP[4] > 59: END >radeonsi: Compiling shader 199 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %34 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 > %36 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %37 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %36, i64 0, i64 3 > %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 > %39 = extractelement <8 x i32> %35, i32 7 > %40 = extractelement <4 x i32> %38, i32 0 > %41 = and i32 %40, %39 > %42 = insertelement <4 x i32> %38, i32 %41, i32 0 > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 7 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 11 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %73 = fadd float %26, 1.000000e+00 > %74 = fsub float 1.000000e+00, %72 > %75 = fsub float 0x3FEFD70A40000000, %72 > %76 = call float @llvm.fma.f32(float %25, float %73, float %74) > %77 = call float @llvm.ceil.f32(float %75) > %78 = call float @llvm.AMDGPU.clamp.(float %77, float 0.000000e+00, float 1.000000e+00) > %79 = fadd float %76, -1.000000e+00 > %80 = fcmp une float %26, 0.000000e+00 > %81 = fdiv float 1.000000e+00, %26 > %temp8.0 = select i1 %80, float %81, float 0x4600000000000000 > %82 = fmul float %temp8.0, %79 > %83 = call float @llvm.AMDGPU.clamp.(float %82, float 0.000000e+00, float 1.000000e+00) > %84 = call float @llvm.fma.f32(float %83, float -2.000000e+00, float 3.000000e+00) > %85 = fmul float %83, %83 > %86 = fmul float %85, %84 > %87 = call float @llvm.fma.f32(float %78, float %86, float 0xBFD8181820000000) > %88 = fsub float -0.000000e+00, %78 > %89 = call float @llvm.fma.f32(float %88, float %86, float 1.000000e+00) > %90 = call float @llvm.log2.f32(float %89) > %91 = fmul float %90, %31 > %92 = call float @llvm.exp2.f32(float %91) > %93 = fmul float %92, %30 > %94 = fmul float %93, %27 > %95 = fmul float %93, %28 > %96 = fmul float %93, %29 > %97 = fcmp olt float %87, 0.000000e+00 > %98 = select i1 %97, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %98) > %99 = bitcast float %61 to i32 > %100 = bitcast float %62 to i32 > %101 = insertelement <2 x i32> undef, i32 %99, i32 0 > %102 = insertelement <2 x i32> %101, i32 %100, i32 1 > %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %35, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %104 = extractelement <4 x float> %103, i32 1 > %105 = extractelement <4 x float> %103, i32 3 > %106 = call float @llvm.fma.f32(float %104, float 2.000000e+00, float -1.000000e+00) > %107 = call float @llvm.fma.f32(float %105, float 2.000000e+00, float -1.000000e+00) > %108 = fsub float -0.000000e+00, %106 > %109 = call float @llvm.fma.f32(float %108, float %106, float 1.000000e+00) > %110 = fsub float -0.000000e+00, %107 > %111 = call float @llvm.fma.f32(float %110, float %107, float %109) > %112 = call float @llvm.sqrt.f32(float %111) > %113 = fmul float %63, %106 > %114 = fmul float %64, %107 > %115 = fadd float %114, %113 > %116 = fmul float %65, %112 > %117 = fadd float %115, %116 > %118 = fmul float %66, %106 > %119 = fmul float %67, %107 > %120 = fadd float %119, %118 > %121 = fmul float %68, %112 > %122 = fadd float %120, %121 > %123 = fmul float %69, %106 > %124 = fmul float %70, %107 > %125 = fadd float %124, %123 > %126 = fmul float %71, %112 > %127 = fadd float %125, %126 > %128 = fmul float %117, %117 > %129 = fmul float %122, %122 > %130 = fadd float %129, %128 > %131 = fmul float %127, %127 > %132 = fadd float %130, %131 > %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) > %134 = fmul float %133, %117 > %135 = fmul float %133, %122 > %136 = fmul float %133, %127 > %137 = call float @llvm.fma.f32(float %134, float 5.000000e-01, float 5.000000e-01) > %138 = call float @llvm.fma.f32(float %135, float 5.000000e-01, float 5.000000e-01) > %139 = call float @llvm.fma.f32(float %136, float 5.000000e-01, float 5.000000e-01) > %140 = bitcast float %61 to i32 > %141 = bitcast float %62 to i32 > %142 = insertelement <2 x i32> undef, i32 %140, i32 0 > %143 = insertelement <2 x i32> %142, i32 %141, i32 1 > %144 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %143, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %145 = extractelement <4 x float> %144, i32 0 > %146 = extractelement <4 x float> %144, i32 1 > %147 = extractelement <4 x float> %144, i32 2 > %148 = extractelement <4 x float> %144, i32 3 > %149 = bitcast float %61 to i32 > %150 = bitcast float %62 to i32 > %151 = insertelement <2 x i32> undef, i32 %149, i32 0 > %152 = insertelement <2 x i32> %151, i32 %150, i32 1 > %153 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %152, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %154 = extractelement <4 x float> %153, i32 0 > %155 = extractelement <4 x float> %153, i32 1 > %156 = extractelement <4 x float> %153, i32 2 > %157 = extractelement <4 x float> %153, i32 3 > %158 = fmul float %156, %32 > %159 = bitcast float %5 to i32 > %160 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %159, 10 > %161 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %160, float %94, 11 > %162 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %161, float %95, 12 > %163 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %162, float %96, 13 > %164 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %163, float 0.000000e+00, 14 > %165 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %164, float %137, 15 > %166 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %165, float %138, 16 > %167 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %166, float %139, 17 > %168 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %167, float %33, 18 > %169 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %168, float %145, 19 > %170 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %169, float %146, 20 > %171 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %170, float %147, 21 > %172 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %171, float %148, 22 > %173 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %172, float %158, 23 > %174 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %173, float %155, 24 > %175 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %174, float %154, 25 > %176 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %175, float %157, 26 > %177 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %176, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %177 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 200 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, 0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 64, 80} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {96, 368, 352, 0} >IMM[5] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: MOV TEMP[1].w, TEMP[8].xxxx > 66: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 67: MOV TEMP[1].z, TEMP[2].xxxx > 68: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 69: MOV TEMP[0].yw, TEMP[2].yxyy > 70: ABS TEMP[2].x, TEMP[3].xxxx > 71: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 72: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 73: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 74: INEG TEMP[9].xy, TEMP[9].xyyy > 75: MOV TEMP[4].yz, TEMP[9].yxyy > 76: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 77: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 78: INEG TEMP[9].xy, TEMP[9].xyyy > 79: MOV TEMP[5].zw, TEMP[9].yyxy > 80: INEG TEMP[9].xy, TEMP[4].yzzz > 81: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 82: MOV TEMP[4].yz, TEMP[9].yxyy > 83: I2F TEMP[9].xy, TEMP[4].yzzz > 84: MOV TEMP[4].yz, TEMP[9].yxyy > 85: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 86: ABS TEMP[2].x, TEMP[6].xxxx > 87: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 88: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 89: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 90: INEG TEMP[9].xy, TEMP[9].xyyy > 91: MOV TEMP[4].yz, TEMP[9].yxyy > 92: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 93: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 94: INEG TEMP[9].xy, TEMP[9].xyyy > 95: MOV TEMP[5].zw, TEMP[9].yyxy > 96: INEG TEMP[9].xy, TEMP[4].yzzz > 97: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 98: MOV TEMP[4].yz, TEMP[9].yxyy > 99: I2F TEMP[9].xy, TEMP[4].yzzz >100: MOV TEMP[4].yz, TEMP[9].yxyy >101: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >102: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >103: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >104: INEG TEMP[2].xy, TEMP[2].xyyy >105: MOV TEMP[5].xy, TEMP[2].xyxx >106: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >107: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >108: INEG TEMP[2].xy, TEMP[2].xyyy >109: MOV TEMP[5].zw, TEMP[2].yyxy >110: INEG TEMP[2].xy, TEMP[5].xyyy >111: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >112: MOV TEMP[5].xy, TEMP[2].xyxx >113: I2F TEMP[5].xy, TEMP[5].xyyy >114: ABS TEMP[2].x, TEMP[8].xxxx >115: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >116: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >117: MOV TEMP[4].zw, TEMP[2].yyxy >118: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >119: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >120: INEG TEMP[2].xy, TEMP[2].xyyy >121: MOV TEMP[5].xy, TEMP[2].xyxx >122: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >123: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >124: INEG TEMP[2].xy, TEMP[2].xyyy >125: MOV TEMP[5].zw, TEMP[2].yyxy >126: AND TEMP[2], TEMP[5], IMM[2].yyyy >127: MOV TEMP[2], TEMP[2] >128: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >129: MOV TEMP[5].xy, TEMP[2].xyxx >130: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >131: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >132: INEG TEMP[2].xy, TEMP[2].xyyy >133: MOV TEMP[5].zw, TEMP[2].yyxy >134: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >135: MOV TEMP[5].zw, TEMP[2].yyxy >136: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >137: MOV TEMP[5].xy, TEMP[2].xyxx >138: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >139: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >140: INEG TEMP[2].x, TEMP[2].xxxx >141: MOV TEMP[1].z, TEMP[2].xxxx >142: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >143: MOV TEMP[1].z, TEMP[2].xxxx >144: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >145: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >146: INEG TEMP[2].xy, TEMP[2].xyyy >147: MOV TEMP[0].yw, TEMP[2].yxyy >148: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >149: MOV TEMP[0].yw, TEMP[2].yxyy >150: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >151: MOV TEMP[0].y, TEMP[2].xxxx >152: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >153: MOV TEMP[0].y, TEMP[2].xxxx >154: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >155: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >156: INEG TEMP[2].xy, TEMP[2].xyyy >157: MOV TEMP[0].xw, TEMP[2].xxxy >158: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >159: MOV TEMP[0].xw, TEMP[2].xxxy >160: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >161: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >162: INEG TEMP[2].xy, TEMP[2].xyyy >163: MOV TEMP[1].xy, TEMP[2].xyxx >164: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >165: MOV TEMP[1].xy, TEMP[2].xyxx >166: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >167: MOV TEMP[0].xz, TEMP[2].xxyx >168: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >169: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >170: INEG TEMP[2].xy, TEMP[2].xyyy >171: MOV TEMP[1].xy, TEMP[2].xyxx >172: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >173: MOV TEMP[1].xy, TEMP[2].xyxx >174: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >175: MOV TEMP[0].xz, TEMP[2].xxyx >176: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >177: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >178: INEG TEMP[2].xy, TEMP[2].xyyy >179: MOV TEMP[1].xy, TEMP[2].xyxx >180: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >181: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >182: INEG TEMP[2].xyz, TEMP[2].xyzz >183: MOV TEMP[0].xyz, TEMP[2].xyzx >184: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >185: MOV TEMP[0].xz, TEMP[2].xxyx >186: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >187: MOV TEMP[0].x, TEMP[2].xxxx >188: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >189: MOV TEMP[0].x, TEMP[2].xxxx >190: MOV TEMP[2].x, TEMP[0].xxxx >191: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >192: UIF TEMP[2].xxxx :0 >193: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >194: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >195: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >196: MOV TEMP[0].yzw, TEMP[2].yxyz >197: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >198: MOV TEMP[0].y, TEMP[2].xxxx >199: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >200: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >201: MOV TEMP[0].z, TEMP[2].xxxx >202: SQRT TEMP[2].x, TEMP[0].xxxx >203: SQRT TEMP[2].y, TEMP[0].yyyy >204: SQRT TEMP[2].z, TEMP[0].zzzz >205: MOV TEMP[0].xyz, TEMP[2].xyzx >206: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >207: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].wwww >208: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >209: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww >210: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >211: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[0].wwww >212: MOV TEMP[2].y, CONST[3][4] >213: MOV TEMP[7].x, TEMP[2].yyyy >214: MOV TEMP[2].y, CONST[3][5] >215: MOV TEMP[7].y, TEMP[2].yyyy >216: MOV TEMP[2].y, CONST[3][6] >217: MOV TEMP[7].z, TEMP[2].yyyy >218: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >219: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >220: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >221: MOV TEMP[1].w, IMM[0].xxxx >222: MOV TEMP[6], CONST[3][0] >223: DP4 TEMP[7].x, TEMP[6], TEMP[1] >224: MOV TEMP[6], CONST[3][1] >225: DP4 TEMP[6].x, TEMP[6], TEMP[1] >226: MOV TEMP[7].y, TEMP[6].xxxx >227: MOV TEMP[6], CONST[3][3] >228: DP4 TEMP[6].x, TEMP[6], TEMP[1] >229: MOV TEMP[4].w, IMM[0].xxxx >230: MOV TEMP[8], CONST[3][0] >231: DP4 TEMP[8].x, TEMP[8], TEMP[4] >232: MOV TEMP[9], CONST[3][1] >233: DP4 TEMP[9].x, TEMP[9], TEMP[4] >234: MOV TEMP[8].y, TEMP[9].xxxx >235: MOV TEMP[9], CONST[3][3] >236: DP4 TEMP[9].x, TEMP[9], TEMP[4] >237: MOV TEMP[5].w, IMM[0].xxxx >238: MOV TEMP[10], CONST[3][0] >239: DP4 TEMP[4].x, TEMP[10], TEMP[5] >240: MOV TEMP[10], CONST[3][1] >241: DP4 TEMP[10].x, TEMP[10], TEMP[5] >242: MOV TEMP[4].y, TEMP[10].xxxx >243: MOV TEMP[10], CONST[3][3] >244: DP4 TEMP[10].x, TEMP[10], TEMP[5] >245: MOV TEMP[2].w, IMM[0].xxxx >246: MOV TEMP[11], CONST[3][0] >247: DP4 TEMP[5].x, TEMP[11], TEMP[2] >248: MOV TEMP[11], CONST[3][1] >249: DP4 TEMP[11].x, TEMP[11], TEMP[2] >250: MOV TEMP[5].y, TEMP[11].xxxx >251: MOV TEMP[11], CONST[3][3] >252: DP4 TEMP[11].x, TEMP[11], TEMP[2] >253: MOV TEMP[3].w, IMM[0].xxxx >254: MOV TEMP[12], CONST[3][0] >255: DP4 TEMP[2].x, TEMP[12], TEMP[3] >256: MOV TEMP[12], CONST[3][1] >257: DP4 TEMP[12].x, TEMP[12], TEMP[3] >258: MOV TEMP[2].y, TEMP[12].xxxx >259: MOV TEMP[12], CONST[3][3] >260: DP4 TEMP[12].x, TEMP[12], TEMP[3] >261: MOV TEMP[0].w, IMM[0].xxxx >262: MOV TEMP[13], CONST[3][0] >263: DP4 TEMP[3].x, TEMP[13], TEMP[0] >264: MOV TEMP[13], CONST[3][1] >265: DP4 TEMP[13].x, TEMP[13], TEMP[0] >266: MOV TEMP[3].y, TEMP[13].xxxx >267: MOV TEMP[13], CONST[3][3] >268: DP4 TEMP[13].x, TEMP[13], TEMP[0] >269: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >270: SSG TEMP[15].xy, TEMP[8].xyyy >271: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >272: RCP TEMP[16].xy, TEMP[9].xxxx >273: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >274: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >275: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >276: SSG TEMP[15].xy, TEMP[4].xyyy >277: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >278: RCP TEMP[16].xy, TEMP[10].xxxx >279: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >280: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >281: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >282: SSG TEMP[16].xy, TEMP[5].xyyy >283: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >284: RCP TEMP[11].xy, TEMP[11].xxxx >285: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >286: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >287: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >288: SSG TEMP[15].xy, TEMP[7].xyyy >289: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >290: RCP TEMP[16].xy, TEMP[6].xxxx >291: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >292: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >293: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >294: MOV TEMP[0].yz, TEMP[5].yxyy >295: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >296: SSG TEMP[7].xy, TEMP[2].xyyy >297: MUL TEMP[7].xy, IMM[5].xxxx, TEMP[7].xyyy >298: RCP TEMP[11].xy, TEMP[12].xxxx >299: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >300: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >301: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >302: MOV TEMP[4].zw, TEMP[2].yyxy >303: MOV TEMP[2].xy, CONST[3][23] >304: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >305: MOV TEMP[4].zw, TEMP[2].yyxy >306: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >307: SSG TEMP[5].xy, TEMP[3].xyyy >308: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >309: RCP TEMP[7].xy, TEMP[13].xxxx >310: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >311: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >312: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >313: MOV TEMP[0].xw, TEMP[2].xxxy >314: MOV TEMP[2].xy, CONST[3][23] >315: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >316: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >317: MOV TEMP[0].y, TEMP[2].xxxx >318: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >319: MOV TEMP[0].z, TEMP[2].xxxx >320: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >321: SQRT TEMP[2].x, TEMP[0].xxxx >322: SQRT TEMP[2].y, TEMP[0].yyyy >323: SQRT TEMP[2].z, TEMP[0].zzzz >324: MOV TEMP[2].xyz, TEMP[2].xyzx >325: MOV TEMP[3].z, CONST[1][22] >326: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >327: MOV TEMP[0].w, TEMP[3].xxxx >328: MOV TEMP[3].z, CONST[1][22] >329: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >330: MOV TEMP[3].z, CONST[1][22] >331: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >332: MOV TEMP[1].y, TEMP[3].xxxx >333: MOV TEMP[3].w, CONST[1][22] >334: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >335: UIF TEMP[3].xxxx :0 >336: MOV TEMP[3].w, CONST[1][22] >337: RCP TEMP[3].x, TEMP[3].wwww >338: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >339: ELSE :0 >340: SSG TEMP[5].x, TEMP[0].wwww >341: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >342: ENDIF >343: MOV_SAT TEMP[3].x, TEMP[3].xxxx >344: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >345: MOV TEMP[0].w, TEMP[3].xxxx >346: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >347: MOV TEMP[0].y, TEMP[3].xxxx >348: MOV TEMP[3].w, CONST[1][22] >349: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >350: UIF TEMP[3].xxxx :0 >351: MOV TEMP[3].w, CONST[1][22] >352: RCP TEMP[3].x, TEMP[3].wwww >353: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >354: ELSE :0 >355: SSG TEMP[5].x, TEMP[1].xxxx >356: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >357: ENDIF >358: MOV_SAT TEMP[3].x, TEMP[3].xxxx >359: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >360: MOV TEMP[0].w, TEMP[3].xxxx >361: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >362: MOV TEMP[0].z, TEMP[3].xxxx >363: MOV TEMP[3].w, CONST[1][22] >364: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >365: UIF TEMP[3].xxxx :0 >366: MOV TEMP[3].w, CONST[1][22] >367: RCP TEMP[3].x, TEMP[3].wwww >368: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >369: ELSE :0 >370: SSG TEMP[5].x, TEMP[1].yyyy >371: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >372: ENDIF >373: MOV_SAT TEMP[3].x, TEMP[3].xxxx >374: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >375: MOV TEMP[0].w, TEMP[3].xxxx >376: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >377: MOV TEMP[2].xy, CONST[1][22] >378: MOV TEMP[3].xy, CONST[2][4] >379: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >380: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >381: MOV TEMP[0].w, TEMP[2].xxxx >382: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >383: SSG TEMP[3].xy, TEMP[0].xyyy >384: MUL TEMP[3].xy, IMM[5].xxxx, TEMP[3].xyyy >385: RCP TEMP[5].xy, TEMP[1].xxxx >386: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >387: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >388: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >389: MOV TEMP[0].y, TEMP[2].xxxx >390: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >391: MOV TEMP[4].z, TEMP[2].xxxx >392: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >393: UIF TEMP[2].xxxx :0 >394: RCP TEMP[1].x, TEMP[1].xxxx >395: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >396: ELSE :0 >397: SSG TEMP[2].x, TEMP[0].zzzz >398: MUL TEMP[1].x, IMM[5].xxxx, TEMP[2].xxxx >399: ENDIF >400: MOV TEMP[0].y, TEMP[1].xxxx >401: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >402: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >403: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >404: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >405: MOV TEMP[4].w, TEMP[0].xxxx >406: ELSE :0 >407: MOV TEMP[4], IMM[0].zzzz >408: ENDIF >409: MIN TEMP[0], TEMP[4], IMM[5].yyyy >410: MOV TEMP[1].x, TEMP[0].xxxx >411: MOV TEMP[2].x, TEMP[0].yyyy >412: MOV TEMP[3].x, TEMP[0].zzzz >413: MOV TEMP[0].x, TEMP[0].wwww >414: MOV OUT[8], TEMP[1] >415: MOV OUT[9], TEMP[2] >416: MOV OUT[10], TEMP[3] >417: MOV OUT[11], TEMP[0] >418: MOV OUT[0].x, TEMP[1].xxxx >419: MOV OUT[0].y, TEMP[2].xxxx >420: MOV OUT[0].z, TEMP[3].xxxx >421: MOV OUT[1].x, TEMP[0].xxxx >422: END >radeonsi: Compiling shader 201 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 64) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 68) > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 84) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %44 = lshr i32 %10, 8 > %45 = and i32 %44, 31 > %46 = lshr i32 %7, 13 > %47 = and i32 %46, 255 > %48 = and i32 %7, 8191 > %49 = and i32 %10, 255 > %50 = mul nuw nsw i32 %48, %49 > %51 = mul nuw nsw i32 %45, %47 > %52 = add nuw nsw i32 %50, %51 > %53 = add nuw nsw i32 %52, 16 > %54 = zext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = load i32, i32 addrspace(3)* %55, align 4 > %57 = lshr i32 %7, 13 > %58 = and i32 %57, 255 > %59 = and i32 %7, 8191 > %60 = and i32 %10, 255 > %61 = mul nuw nsw i32 %59, %60 > %62 = mul nuw nsw i32 %45, %58 > %63 = add nuw nsw i32 %61, %62 > %64 = add nuw nsw i32 %63, 17 > %65 = zext i32 %64 to i64 > %66 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %65 > %67 = load i32, i32 addrspace(3)* %66, align 4 > %68 = lshr i32 %7, 13 > %69 = and i32 %68, 255 > %70 = and i32 %7, 8191 > %71 = and i32 %10, 255 > %72 = mul nuw nsw i32 %70, %71 > %73 = mul nuw nsw i32 %45, %69 > %74 = add nuw nsw i32 %72, %73 > %75 = add nuw nsw i32 %74, 18 > %76 = zext i32 %75 to i64 > %77 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %76 > %78 = load i32, i32 addrspace(3)* %77, align 4 > %79 = lshr i32 %7, 13 > %80 = and i32 %79, 255 > %81 = and i32 %7, 8191 > %82 = and i32 %10, 255 > %83 = mul nuw nsw i32 %81, %82 > %84 = mul nuw nsw i32 %45, %80 > %85 = add nuw nsw i32 %83, %84 > %86 = add nuw nsw i32 %85, 19 > %87 = zext i32 %86 to i64 > %88 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %87 > %89 = load i32, i32 addrspace(3)* %88, align 4 > %90 = lshr i32 %6, 13 > %91 = and i32 %90, 255 > %92 = shl i32 %5, 2 > %93 = and i32 %92, 262140 > %94 = and i32 %6, 8191 > %95 = and i32 %10, 255 > %96 = mul nuw nsw i32 %94, %95 > %97 = add nuw nsw i32 %93, %96 > %98 = mul nuw nsw i32 %45, %91 > %99 = add nuw nsw i32 %97, %98 > %100 = add nuw nsw i32 %99, 16 > %101 = zext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > store i32 %56, i32 addrspace(3)* %102, align 4 > %103 = add nuw nsw i32 %99, 17 > %104 = zext i32 %103 to i64 > %105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %104 > store i32 %67, i32 addrspace(3)* %105, align 4 > %106 = add nuw nsw i32 %99, 18 > %107 = zext i32 %106 to i64 > %108 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %107 > store i32 %78, i32 addrspace(3)* %108, align 4 > %109 = add nuw nsw i32 %99, 19 > %110 = zext i32 %109 to i64 > %111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %110 > store i32 %89, i32 addrspace(3)* %111, align 4 > %112 = lshr i32 %7, 13 > %113 = and i32 %112, 255 > %114 = and i32 %7, 8191 > %115 = and i32 %10, 255 > %116 = mul nuw nsw i32 %114, %115 > %117 = mul nuw nsw i32 %45, %113 > %118 = add nuw nsw i32 %116, %117 > %119 = add nuw nsw i32 %118, 20 > %120 = zext i32 %119 to i64 > %121 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %120 > %122 = load i32, i32 addrspace(3)* %121, align 4 > %123 = lshr i32 %7, 13 > %124 = and i32 %123, 255 > %125 = and i32 %7, 8191 > %126 = and i32 %10, 255 > %127 = mul nuw nsw i32 %125, %126 > %128 = mul nuw nsw i32 %45, %124 > %129 = add nuw nsw i32 %127, %128 > %130 = add nuw nsw i32 %129, 21 > %131 = zext i32 %130 to i64 > %132 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %131 > %133 = load i32, i32 addrspace(3)* %132, align 4 > %134 = lshr i32 %7, 13 > %135 = and i32 %134, 255 > %136 = and i32 %7, 8191 > %137 = and i32 %10, 255 > %138 = mul nuw nsw i32 %136, %137 > %139 = mul nuw nsw i32 %45, %135 > %140 = add nuw nsw i32 %138, %139 > %141 = add nuw nsw i32 %140, 22 > %142 = zext i32 %141 to i64 > %143 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %142 > %144 = load i32, i32 addrspace(3)* %143, align 4 > %145 = lshr i32 %7, 13 > %146 = and i32 %145, 255 > %147 = and i32 %7, 8191 > %148 = and i32 %10, 255 > %149 = mul nuw nsw i32 %147, %148 > %150 = mul nuw nsw i32 %45, %146 > %151 = add nuw nsw i32 %149, %150 > %152 = add nuw nsw i32 %151, 23 > %153 = zext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = load i32, i32 addrspace(3)* %154, align 4 > %156 = lshr i32 %6, 13 > %157 = and i32 %156, 255 > %158 = shl i32 %5, 2 > %159 = and i32 %158, 262140 > %160 = and i32 %6, 8191 > %161 = and i32 %10, 255 > %162 = mul nuw nsw i32 %160, %161 > %163 = add nuw nsw i32 %159, %162 > %164 = mul nuw nsw i32 %45, %157 > %165 = add nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 20 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > store i32 %122, i32 addrspace(3)* %168, align 4 > %169 = add nuw nsw i32 %165, 21 > %170 = zext i32 %169 to i64 > %171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %170 > store i32 %133, i32 addrspace(3)* %171, align 4 > %172 = add nuw nsw i32 %165, 22 > %173 = zext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > store i32 %144, i32 addrspace(3)* %174, align 4 > %175 = add nuw nsw i32 %165, 23 > %176 = zext i32 %175 to i64 > %177 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %176 > store i32 %155, i32 addrspace(3)* %177, align 4 > %178 = lshr i32 %7, 13 > %179 = and i32 %178, 255 > %180 = and i32 %7, 8191 > %181 = and i32 %10, 255 > %182 = mul nuw nsw i32 %180, %181 > %183 = mul nuw nsw i32 %45, %179 > %184 = add nuw nsw i32 %182, %183 > %185 = add nuw nsw i32 %184, 24 > %186 = zext i32 %185 to i64 > %187 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %186 > %188 = load i32, i32 addrspace(3)* %187, align 4 > %189 = lshr i32 %7, 13 > %190 = and i32 %189, 255 > %191 = and i32 %7, 8191 > %192 = and i32 %10, 255 > %193 = mul nuw nsw i32 %191, %192 > %194 = mul nuw nsw i32 %45, %190 > %195 = add nuw nsw i32 %193, %194 > %196 = add nuw nsw i32 %195, 25 > %197 = zext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = load i32, i32 addrspace(3)* %198, align 4 > %200 = lshr i32 %7, 13 > %201 = and i32 %200, 255 > %202 = and i32 %7, 8191 > %203 = and i32 %10, 255 > %204 = mul nuw nsw i32 %202, %203 > %205 = mul nuw nsw i32 %45, %201 > %206 = add nuw nsw i32 %204, %205 > %207 = add nuw nsw i32 %206, 26 > %208 = zext i32 %207 to i64 > %209 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %208 > %210 = load i32, i32 addrspace(3)* %209, align 4 > %211 = lshr i32 %7, 13 > %212 = and i32 %211, 255 > %213 = and i32 %7, 8191 > %214 = and i32 %10, 255 > %215 = mul nuw nsw i32 %213, %214 > %216 = mul nuw nsw i32 %45, %212 > %217 = add nuw nsw i32 %215, %216 > %218 = add nuw nsw i32 %217, 27 > %219 = zext i32 %218 to i64 > %220 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %219 > %221 = load i32, i32 addrspace(3)* %220, align 4 > %222 = lshr i32 %6, 13 > %223 = and i32 %222, 255 > %224 = shl i32 %5, 2 > %225 = and i32 %224, 262140 > %226 = and i32 %6, 8191 > %227 = and i32 %10, 255 > %228 = mul nuw nsw i32 %226, %227 > %229 = add nuw nsw i32 %225, %228 > %230 = mul nuw nsw i32 %45, %223 > %231 = add nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, 24 > %233 = zext i32 %232 to i64 > %234 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %233 > store i32 %188, i32 addrspace(3)* %234, align 4 > %235 = add nuw nsw i32 %231, 25 > %236 = zext i32 %235 to i64 > %237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %236 > store i32 %199, i32 addrspace(3)* %237, align 4 > %238 = add nuw nsw i32 %231, 26 > %239 = zext i32 %238 to i64 > %240 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %239 > store i32 %210, i32 addrspace(3)* %240, align 4 > %241 = add nuw nsw i32 %231, 27 > %242 = zext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > store i32 %221, i32 addrspace(3)* %243, align 4 > %244 = lshr i32 %7, 13 > %245 = and i32 %244, 255 > %246 = and i32 %7, 8191 > %247 = and i32 %10, 255 > %248 = mul nuw nsw i32 %246, %247 > %249 = mul nuw nsw i32 %45, %245 > %250 = add nuw nsw i32 %248, %249 > %251 = add nuw nsw i32 %250, 28 > %252 = zext i32 %251 to i64 > %253 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %252 > %254 = load i32, i32 addrspace(3)* %253, align 4 > %255 = lshr i32 %7, 13 > %256 = and i32 %255, 255 > %257 = and i32 %7, 8191 > %258 = and i32 %10, 255 > %259 = mul nuw nsw i32 %257, %258 > %260 = mul nuw nsw i32 %45, %256 > %261 = add nuw nsw i32 %259, %260 > %262 = add nuw nsw i32 %261, 29 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = load i32, i32 addrspace(3)* %264, align 4 > %266 = lshr i32 %7, 13 > %267 = and i32 %266, 255 > %268 = and i32 %7, 8191 > %269 = and i32 %10, 255 > %270 = mul nuw nsw i32 %268, %269 > %271 = mul nuw nsw i32 %45, %267 > %272 = add nuw nsw i32 %270, %271 > %273 = add nuw nsw i32 %272, 30 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = load i32, i32 addrspace(3)* %275, align 4 > %277 = lshr i32 %7, 13 > %278 = and i32 %277, 255 > %279 = and i32 %7, 8191 > %280 = and i32 %10, 255 > %281 = mul nuw nsw i32 %279, %280 > %282 = mul nuw nsw i32 %45, %278 > %283 = add nuw nsw i32 %281, %282 > %284 = add nuw nsw i32 %283, 31 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = load i32, i32 addrspace(3)* %286, align 4 > %288 = lshr i32 %6, 13 > %289 = and i32 %288, 255 > %290 = shl i32 %5, 2 > %291 = and i32 %290, 262140 > %292 = and i32 %6, 8191 > %293 = and i32 %10, 255 > %294 = mul nuw nsw i32 %292, %293 > %295 = add nuw nsw i32 %291, %294 > %296 = mul nuw nsw i32 %45, %289 > %297 = add nuw nsw i32 %295, %296 > %298 = add nuw nsw i32 %297, 28 > %299 = zext i32 %298 to i64 > %300 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %299 > store i32 %254, i32 addrspace(3)* %300, align 4 > %301 = add nuw nsw i32 %297, 29 > %302 = zext i32 %301 to i64 > %303 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %302 > store i32 %265, i32 addrspace(3)* %303, align 4 > %304 = add nuw nsw i32 %297, 30 > %305 = zext i32 %304 to i64 > %306 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %305 > store i32 %276, i32 addrspace(3)* %306, align 4 > %307 = add nuw nsw i32 %297, 31 > %308 = zext i32 %307 to i64 > %309 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %308 > store i32 %287, i32 addrspace(3)* %309, align 4 > %310 = lshr i32 %7, 13 > %311 = and i32 %310, 255 > %312 = and i32 %7, 8191 > %313 = and i32 %10, 255 > %314 = mul nuw nsw i32 %312, %313 > %315 = mul nuw nsw i32 %45, %311 > %316 = add nuw nsw i32 %314, %315 > %317 = add nuw nsw i32 %316, 32 > %318 = zext i32 %317 to i64 > %319 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %318 > %320 = load i32, i32 addrspace(3)* %319, align 4 > %321 = lshr i32 %7, 13 > %322 = and i32 %321, 255 > %323 = and i32 %7, 8191 > %324 = and i32 %10, 255 > %325 = mul nuw nsw i32 %323, %324 > %326 = mul nuw nsw i32 %45, %322 > %327 = add nuw nsw i32 %325, %326 > %328 = add nuw nsw i32 %327, 33 > %329 = zext i32 %328 to i64 > %330 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %329 > %331 = load i32, i32 addrspace(3)* %330, align 4 > %332 = lshr i32 %7, 13 > %333 = and i32 %332, 255 > %334 = and i32 %7, 8191 > %335 = and i32 %10, 255 > %336 = mul nuw nsw i32 %334, %335 > %337 = mul nuw nsw i32 %45, %333 > %338 = add nuw nsw i32 %336, %337 > %339 = add nuw nsw i32 %338, 34 > %340 = zext i32 %339 to i64 > %341 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %340 > %342 = load i32, i32 addrspace(3)* %341, align 4 > %343 = lshr i32 %7, 13 > %344 = and i32 %343, 255 > %345 = and i32 %7, 8191 > %346 = and i32 %10, 255 > %347 = mul nuw nsw i32 %345, %346 > %348 = mul nuw nsw i32 %45, %344 > %349 = add nuw nsw i32 %347, %348 > %350 = add nuw nsw i32 %349, 35 > %351 = zext i32 %350 to i64 > %352 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %351 > %353 = load i32, i32 addrspace(3)* %352, align 4 > %354 = lshr i32 %6, 13 > %355 = and i32 %354, 255 > %356 = shl i32 %5, 2 > %357 = and i32 %356, 262140 > %358 = and i32 %6, 8191 > %359 = and i32 %10, 255 > %360 = mul nuw nsw i32 %358, %359 > %361 = add nuw nsw i32 %357, %360 > %362 = mul nuw nsw i32 %45, %355 > %363 = add nuw nsw i32 %361, %362 > %364 = add nuw nsw i32 %363, 32 > %365 = zext i32 %364 to i64 > %366 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %365 > store i32 %320, i32 addrspace(3)* %366, align 4 > %367 = add nuw nsw i32 %363, 33 > %368 = zext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > store i32 %331, i32 addrspace(3)* %369, align 4 > %370 = add nuw nsw i32 %363, 34 > %371 = zext i32 %370 to i64 > %372 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %371 > store i32 %342, i32 addrspace(3)* %372, align 4 > %373 = add nuw nsw i32 %363, 35 > %374 = zext i32 %373 to i64 > %375 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %374 > store i32 %353, i32 addrspace(3)* %375, align 4 > %376 = lshr i32 %7, 13 > %377 = and i32 %376, 255 > %378 = and i32 %7, 8191 > %379 = and i32 %10, 255 > %380 = mul nuw nsw i32 %378, %379 > %381 = mul nuw nsw i32 %45, %377 > %382 = add nuw nsw i32 %380, %381 > %383 = add nuw nsw i32 %382, 36 > %384 = zext i32 %383 to i64 > %385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %384 > %386 = load i32, i32 addrspace(3)* %385, align 4 > %387 = lshr i32 %7, 13 > %388 = and i32 %387, 255 > %389 = and i32 %7, 8191 > %390 = and i32 %10, 255 > %391 = mul nuw nsw i32 %389, %390 > %392 = mul nuw nsw i32 %45, %388 > %393 = add nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 37 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = load i32, i32 addrspace(3)* %396, align 4 > %398 = lshr i32 %7, 13 > %399 = and i32 %398, 255 > %400 = and i32 %7, 8191 > %401 = and i32 %10, 255 > %402 = mul nuw nsw i32 %400, %401 > %403 = mul nuw nsw i32 %45, %399 > %404 = add nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, 38 > %406 = zext i32 %405 to i64 > %407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %406 > %408 = load i32, i32 addrspace(3)* %407, align 4 > %409 = lshr i32 %7, 13 > %410 = and i32 %409, 255 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = mul nuw nsw i32 %45, %410 > %415 = add nuw nsw i32 %413, %414 > %416 = add nuw nsw i32 %415, 39 > %417 = zext i32 %416 to i64 > %418 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %417 > %419 = load i32, i32 addrspace(3)* %418, align 4 > %420 = lshr i32 %6, 13 > %421 = and i32 %420, 255 > %422 = shl i32 %5, 2 > %423 = and i32 %422, 262140 > %424 = and i32 %6, 8191 > %425 = and i32 %10, 255 > %426 = mul nuw nsw i32 %424, %425 > %427 = add nuw nsw i32 %423, %426 > %428 = mul nuw nsw i32 %45, %421 > %429 = add nuw nsw i32 %427, %428 > %430 = add nuw nsw i32 %429, 36 > %431 = zext i32 %430 to i64 > %432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %431 > store i32 %386, i32 addrspace(3)* %432, align 4 > %433 = add nuw nsw i32 %429, 37 > %434 = zext i32 %433 to i64 > %435 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %434 > store i32 %397, i32 addrspace(3)* %435, align 4 > %436 = add nuw nsw i32 %429, 38 > %437 = zext i32 %436 to i64 > %438 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %437 > store i32 %408, i32 addrspace(3)* %438, align 4 > %439 = add nuw nsw i32 %429, 39 > %440 = zext i32 %439 to i64 > %441 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %440 > store i32 %419, i32 addrspace(3)* %441, align 4 > %442 = and i32 %7, 8191 > %443 = and i32 %10, 255 > %444 = mul nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 16 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > %448 = bitcast i32 addrspace(3)* %447 to float addrspace(3)* > %449 = load float, float addrspace(3)* %448, align 4 > %450 = and i32 %7, 8191 > %451 = and i32 %10, 255 > %452 = mul nuw nsw i32 %450, %451 > %453 = add nuw nsw i32 %452, 17 > %454 = zext i32 %453 to i64 > %455 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %454 > %456 = bitcast i32 addrspace(3)* %455 to float addrspace(3)* > %457 = load float, float addrspace(3)* %456, align 4 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, 18 > %462 = zext i32 %461 to i64 > %463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %462 > %464 = bitcast i32 addrspace(3)* %463 to float addrspace(3)* > %465 = load float, float addrspace(3)* %464, align 4 > %466 = fmul float %23, %449 > %467 = fmul float %24, %457 > %468 = fadd float %466, %467 > %469 = fmul float %25, %465 > %470 = fadd float %468, %469 > %471 = fadd float %470, %26 > %472 = fmul float %27, %449 > %473 = fmul float %28, %457 > %474 = fadd float %472, %473 > %475 = fmul float %29, %465 > %476 = fadd float %474, %475 > %477 = fadd float %476, %30 > %478 = fmul float %31, %449 > %479 = fmul float %32, %457 > %480 = fadd float %478, %479 > %481 = fmul float %33, %465 > %482 = fadd float %480, %481 > %483 = fadd float %482, %34 > %484 = fmul float %35, %449 > %485 = fmul float %36, %457 > %486 = fadd float %484, %485 > %487 = fmul float %37, %465 > %488 = fadd float %486, %487 > %489 = fadd float %488, %38 > %490 = lshr i32 %7, 13 > %491 = and i32 %490, 255 > %492 = and i32 %7, 8191 > %493 = and i32 %10, 255 > %494 = mul nuw nsw i32 %492, %493 > %495 = add nuw nsw i32 %494, %491 > %496 = add nuw nsw i32 %495, 16 > %497 = zext i32 %496 to i64 > %498 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %497 > %499 = bitcast i32 addrspace(3)* %498 to float addrspace(3)* > %500 = load float, float addrspace(3)* %499, align 4 > %501 = lshr i32 %7, 13 > %502 = and i32 %501, 255 > %503 = and i32 %7, 8191 > %504 = and i32 %10, 255 > %505 = mul nuw nsw i32 %503, %504 > %506 = add nuw nsw i32 %505, %502 > %507 = add nuw nsw i32 %506, 17 > %508 = zext i32 %507 to i64 > %509 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %508 > %510 = bitcast i32 addrspace(3)* %509 to float addrspace(3)* > %511 = load float, float addrspace(3)* %510, align 4 > %512 = lshr i32 %7, 13 > %513 = and i32 %512, 255 > %514 = and i32 %7, 8191 > %515 = and i32 %10, 255 > %516 = mul nuw nsw i32 %514, %515 > %517 = add nuw nsw i32 %516, %513 > %518 = add nuw nsw i32 %517, 18 > %519 = zext i32 %518 to i64 > %520 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %519 > %521 = bitcast i32 addrspace(3)* %520 to float addrspace(3)* > %522 = load float, float addrspace(3)* %521, align 4 > %523 = fmul float %23, %500 > %524 = fmul float %24, %511 > %525 = fadd float %523, %524 > %526 = fmul float %25, %522 > %527 = fadd float %525, %526 > %528 = fadd float %527, %26 > %529 = fmul float %27, %500 > %530 = fmul float %28, %511 > %531 = fadd float %529, %530 > %532 = fmul float %29, %522 > %533 = fadd float %531, %532 > %534 = fadd float %533, %30 > %535 = fmul float %31, %500 > %536 = fmul float %32, %511 > %537 = fadd float %535, %536 > %538 = fmul float %33, %522 > %539 = fadd float %537, %538 > %540 = fadd float %539, %34 > %541 = fmul float %35, %500 > %542 = fmul float %36, %511 > %543 = fadd float %541, %542 > %544 = fmul float %37, %522 > %545 = fadd float %543, %544 > %546 = fadd float %545, %38 > %547 = and i32 %7, 8191 > %548 = and i32 %10, 255 > %549 = mul nuw nsw i32 %547, %548 > %550 = lshr i32 %7, 12 > %551 = and i32 %550, 510 > %552 = add nuw nsw i32 %549, %551 > %553 = add nuw nsw i32 %552, 16 > %554 = zext i32 %553 to i64 > %555 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %554 > %556 = bitcast i32 addrspace(3)* %555 to float addrspace(3)* > %557 = load float, float addrspace(3)* %556, align 4 > %558 = and i32 %7, 8191 > %559 = and i32 %10, 255 > %560 = mul nuw nsw i32 %558, %559 > %561 = lshr i32 %7, 12 > %562 = and i32 %561, 510 > %563 = add nuw nsw i32 %560, %562 > %564 = add nuw nsw i32 %563, 17 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fmul float %23, %557 > %581 = fmul float %24, %568 > %582 = fadd float %580, %581 > %583 = fmul float %25, %579 > %584 = fadd float %582, %583 > %585 = fadd float %584, %26 > %586 = fmul float %27, %557 > %587 = fmul float %28, %568 > %588 = fadd float %586, %587 > %589 = fmul float %29, %579 > %590 = fadd float %588, %589 > %591 = fadd float %590, %30 > %592 = fmul float %31, %557 > %593 = fmul float %32, %568 > %594 = fadd float %592, %593 > %595 = fmul float %33, %579 > %596 = fadd float %594, %595 > %597 = fadd float %596, %34 > %598 = fmul float %35, %557 > %599 = fmul float %36, %568 > %600 = fadd float %598, %599 > %601 = fmul float %37, %579 > %602 = fadd float %600, %601 > %603 = fadd float %602, %38 > %604 = fadd float %483, 1.000000e+02 > %605 = fadd float %540, 1.000000e+02 > %606 = fadd float %597, 1.000000e+02 > %607 = call float @llvm.fabs.f32(float %489) > %608 = call float @llvm.minnum.f32(float %607, float 1.000000e+02) > %609 = fcmp ogt float %471, 0.000000e+00 > %610 = fcmp ogt float %477, 0.000000e+00 > %611 = fcmp olt float %471, 0.000000e+00 > %612 = fcmp olt float %477, 0.000000e+00 > %613 = sext i1 %611 to i32 > %614 = sext i1 %612 to i32 > %615 = zext i1 %609 to i32 > %616 = zext i1 %610 to i32 > %617 = add nsw i32 %615, %613 > %618 = add nsw i32 %616, %614 > %619 = sitofp i32 %617 to float > %620 = sitofp i32 %618 to float > %621 = fsub float -0.000000e+00, %608 > %622 = call float @llvm.fma.f32(float %621, float %619, float %471) > %623 = fsub float -0.000000e+00, %608 > %624 = call float @llvm.fma.f32(float %623, float %620, float %477) > %625 = call float @llvm.fabs.f32(float %546) > %626 = call float @llvm.minnum.f32(float %625, float 1.000000e+02) > %627 = fcmp ogt float %528, 0.000000e+00 > %628 = fcmp ogt float %534, 0.000000e+00 > %629 = fcmp olt float %528, 0.000000e+00 > %630 = fcmp olt float %534, 0.000000e+00 > %631 = sext i1 %629 to i32 > %632 = sext i1 %630 to i32 > %633 = zext i1 %627 to i32 > %634 = zext i1 %628 to i32 > %635 = add nsw i32 %633, %631 > %636 = add nsw i32 %634, %632 > %637 = sitofp i32 %635 to float > %638 = sitofp i32 %636 to float > %639 = fsub float -0.000000e+00, %626 > %640 = call float @llvm.fma.f32(float %639, float %637, float %528) > %641 = fsub float -0.000000e+00, %626 > %642 = call float @llvm.fma.f32(float %641, float %638, float %534) > %643 = fcmp ogt float %585, 0.000000e+00 > %644 = fcmp ogt float %591, 0.000000e+00 > %645 = fcmp olt float %585, 0.000000e+00 > %646 = fcmp olt float %591, 0.000000e+00 > %647 = sext i1 %645 to i32 > %648 = sext i1 %646 to i32 > %649 = zext i1 %643 to i32 > %650 = zext i1 %644 to i32 > %651 = add nsw i32 %649, %647 > %652 = add nsw i32 %650, %648 > %653 = sitofp i32 %651 to float > %654 = sitofp i32 %652 to float > %655 = call float @llvm.fabs.f32(float %603) > %656 = call float @llvm.minnum.f32(float %655, float 1.000000e+02) > %657 = fsub float -0.000000e+00, %656 > %658 = call float @llvm.fma.f32(float %657, float %653, float %585) > %659 = fsub float -0.000000e+00, %656 > %660 = call float @llvm.fma.f32(float %659, float %654, float %591) > %661 = fsub float -0.000000e+00, %489 > %662 = fcmp olt float %622, %661 > %663 = fsub float -0.000000e+00, %489 > %664 = fcmp olt float %624, %663 > %665 = zext i1 %662 to i32 > %666 = zext i1 %664 to i32 > %667 = fsub float -0.000000e+00, %546 > %668 = fcmp olt float %640, %667 > %669 = fsub float -0.000000e+00, %546 > %670 = fcmp olt float %642, %669 > %671 = zext i1 %668 to i32 > %672 = zext i1 %670 to i32 > %673 = add nuw nsw i32 %671, %665 > %674 = add nuw nsw i32 %672, %666 > %675 = fsub float -0.000000e+00, %603 > %676 = fcmp olt float %658, %675 > %677 = fsub float -0.000000e+00, %603 > %678 = fcmp olt float %660, %677 > %679 = zext i1 %676 to i32 > %680 = zext i1 %678 to i32 > %681 = add nuw nsw i32 %679, %673 > %682 = add nuw nsw i32 %680, %674 > %683 = fcmp olt float %604, 0.000000e+00 > %684 = zext i1 %683 to i32 > %685 = bitcast i32 %684 to float > %686 = fcmp olt float %605, 0.000000e+00 > %687 = fcmp olt float %606, 0.000000e+00 > %688 = zext i1 %686 to i32 > %689 = zext i1 %687 to i32 > %690 = add nuw nsw i32 %688, %684 > %691 = add nuw nsw i32 %689, %690 > %692 = fcmp olt float %489, %622 > %693 = fcmp olt float %489, %624 > %694 = zext i1 %692 to i32 > %695 = zext i1 %693 to i32 > %696 = fcmp olt float %546, %640 > %697 = fcmp olt float %546, %642 > %698 = zext i1 %696 to i32 > %699 = zext i1 %697 to i32 > %700 = add nuw nsw i32 %694, %698 > %701 = add nuw nsw i32 %695, %699 > %702 = fcmp olt float %603, %658 > %703 = fcmp olt float %603, %660 > %704 = zext i1 %702 to i32 > %705 = zext i1 %703 to i32 > %706 = add nuw nsw i32 %700, %704 > %707 = add nuw nsw i32 %701, %705 > %708 = icmp eq i32 %681, 3 > %709 = icmp eq i32 %682, 3 > %710 = sext i1 %708 to i32 > %711 = sext i1 %709 to i32 > %712 = bitcast i32 %711 to float > %713 = icmp eq i32 %706, 3 > %714 = icmp eq i32 %707, 3 > %715 = sext i1 %714 to i32 > %716 = bitcast i32 %715 to float > %717 = bitcast i32 %711 to float > %718 = select i1 %714, float 0xFFFFFFFFE0000000, float %717 > %719 = bitcast float %718 to i32 > %720 = select i1 %713, i32 -1, i32 %710 > %721 = or i32 %719, %720 > %722 = icmp eq i32 %721, 0 > %not. = icmp ne i32 %691, 3 > %723 = and i1 %722, %not. > br i1 %723, label %IF, label %ENDIF > >IF: ; preds = %main_body > %724 = lshr i32 %7, 13 > %725 = and i32 %724, 255 > %726 = and i32 %7, 8191 > %727 = and i32 %10, 255 > %728 = mul nuw nsw i32 %726, %727 > %729 = add nuw nsw i32 %728, %725 > %730 = add nuw nsw i32 %729, 16 > %731 = zext i32 %730 to i64 > %732 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %731 > %733 = bitcast i32 addrspace(3)* %732 to float addrspace(3)* > %734 = load float, float addrspace(3)* %733, align 4 > %735 = and i32 %7, 8191 > %736 = and i32 %10, 255 > %737 = mul nuw nsw i32 %735, %736 > %738 = add nuw nsw i32 %737, 16 > %739 = zext i32 %738 to i64 > %740 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %739 > %741 = bitcast i32 addrspace(3)* %740 to float addrspace(3)* > %742 = load float, float addrspace(3)* %741, align 4 > %743 = fsub float %742, %734 > %744 = lshr i32 %7, 13 > %745 = and i32 %744, 255 > %746 = and i32 %7, 8191 > %747 = and i32 %10, 255 > %748 = mul nuw nsw i32 %746, %747 > %749 = add nuw nsw i32 %748, %745 > %750 = add nuw nsw i32 %749, 17 > %751 = zext i32 %750 to i64 > %752 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %751 > %753 = bitcast i32 addrspace(3)* %752 to float addrspace(3)* > %754 = load float, float addrspace(3)* %753, align 4 > %755 = and i32 %7, 8191 > %756 = and i32 %10, 255 > %757 = mul nuw nsw i32 %755, %756 > %758 = add nuw nsw i32 %757, 17 > %759 = zext i32 %758 to i64 > %760 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %759 > %761 = bitcast i32 addrspace(3)* %760 to float addrspace(3)* > %762 = load float, float addrspace(3)* %761, align 4 > %763 = fsub float %762, %754 > %764 = lshr i32 %7, 13 > %765 = and i32 %764, 255 > %766 = and i32 %7, 8191 > %767 = and i32 %10, 255 > %768 = mul nuw nsw i32 %766, %767 > %769 = add nuw nsw i32 %768, %765 > %770 = add nuw nsw i32 %769, 18 > %771 = zext i32 %770 to i64 > %772 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %771 > %773 = bitcast i32 addrspace(3)* %772 to float addrspace(3)* > %774 = load float, float addrspace(3)* %773, align 4 > %775 = and i32 %7, 8191 > %776 = and i32 %10, 255 > %777 = mul nuw nsw i32 %775, %776 > %778 = add nuw nsw i32 %777, 18 > %779 = zext i32 %778 to i64 > %780 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %779 > %781 = bitcast i32 addrspace(3)* %780 to float addrspace(3)* > %782 = load float, float addrspace(3)* %781, align 4 > %783 = fsub float %782, %774 > %784 = fmul float %743, %743 > %785 = fmul float %763, %763 > %786 = fadd float %785, %784 > %787 = fmul float %783, %783 > %788 = fadd float %786, %787 > %789 = and i32 %7, 8191 > %790 = and i32 %10, 255 > %791 = mul nuw nsw i32 %789, %790 > %792 = lshr i32 %7, 12 > %793 = and i32 %792, 510 > %794 = add nuw nsw i32 %791, %793 > %795 = add nuw nsw i32 %794, 16 > %796 = zext i32 %795 to i64 > %797 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %796 > %798 = bitcast i32 addrspace(3)* %797 to float addrspace(3)* > %799 = load float, float addrspace(3)* %798, align 4 > %800 = lshr i32 %7, 13 > %801 = and i32 %800, 255 > %802 = and i32 %7, 8191 > %803 = and i32 %10, 255 > %804 = mul nuw nsw i32 %802, %803 > %805 = add nuw nsw i32 %804, %801 > %806 = add nuw nsw i32 %805, 16 > %807 = zext i32 %806 to i64 > %808 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %807 > %809 = bitcast i32 addrspace(3)* %808 to float addrspace(3)* > %810 = load float, float addrspace(3)* %809, align 4 > %811 = fsub float %810, %799 > %812 = and i32 %7, 8191 > %813 = and i32 %10, 255 > %814 = mul nuw nsw i32 %812, %813 > %815 = lshr i32 %7, 12 > %816 = and i32 %815, 510 > %817 = add nuw nsw i32 %814, %816 > %818 = add nuw nsw i32 %817, 17 > %819 = zext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = lshr i32 %7, 13 > %824 = and i32 %823, 255 > %825 = and i32 %7, 8191 > %826 = and i32 %10, 255 > %827 = mul nuw nsw i32 %825, %826 > %828 = add nuw nsw i32 %827, %824 > %829 = add nuw nsw i32 %828, 17 > %830 = zext i32 %829 to i64 > %831 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %830 > %832 = bitcast i32 addrspace(3)* %831 to float addrspace(3)* > %833 = load float, float addrspace(3)* %832, align 4 > %834 = fsub float %833, %822 > %835 = and i32 %7, 8191 > %836 = and i32 %10, 255 > %837 = mul nuw nsw i32 %835, %836 > %838 = lshr i32 %7, 12 > %839 = and i32 %838, 510 > %840 = add nuw nsw i32 %837, %839 > %841 = add nuw nsw i32 %840, 18 > %842 = zext i32 %841 to i64 > %843 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %842 > %844 = bitcast i32 addrspace(3)* %843 to float addrspace(3)* > %845 = load float, float addrspace(3)* %844, align 4 > %846 = lshr i32 %7, 13 > %847 = and i32 %846, 255 > %848 = and i32 %7, 8191 > %849 = and i32 %10, 255 > %850 = mul nuw nsw i32 %848, %849 > %851 = add nuw nsw i32 %850, %847 > %852 = add nuw nsw i32 %851, 18 > %853 = zext i32 %852 to i64 > %854 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %853 > %855 = bitcast i32 addrspace(3)* %854 to float addrspace(3)* > %856 = load float, float addrspace(3)* %855, align 4 > %857 = fsub float %856, %845 > %858 = fmul float %811, %811 > %859 = fmul float %834, %834 > %860 = fadd float %859, %858 > %861 = fmul float %857, %857 > %862 = fadd float %860, %861 > %863 = and i32 %7, 8191 > %864 = and i32 %10, 255 > %865 = mul nuw nsw i32 %863, %864 > %866 = add nuw nsw i32 %865, 16 > %867 = zext i32 %866 to i64 > %868 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %867 > %869 = bitcast i32 addrspace(3)* %868 to float addrspace(3)* > %870 = load float, float addrspace(3)* %869, align 4 > %871 = and i32 %7, 8191 > %872 = and i32 %10, 255 > %873 = mul nuw nsw i32 %871, %872 > %874 = lshr i32 %7, 12 > %875 = and i32 %874, 510 > %876 = add nuw nsw i32 %873, %875 > %877 = add nuw nsw i32 %876, 16 > %878 = zext i32 %877 to i64 > %879 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %878 > %880 = bitcast i32 addrspace(3)* %879 to float addrspace(3)* > %881 = load float, float addrspace(3)* %880, align 4 > %882 = fsub float %881, %870 > %883 = and i32 %7, 8191 > %884 = and i32 %10, 255 > %885 = mul nuw nsw i32 %883, %884 > %886 = add nuw nsw i32 %885, 17 > %887 = zext i32 %886 to i64 > %888 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %887 > %889 = bitcast i32 addrspace(3)* %888 to float addrspace(3)* > %890 = load float, float addrspace(3)* %889, align 4 > %891 = and i32 %7, 8191 > %892 = and i32 %10, 255 > %893 = mul nuw nsw i32 %891, %892 > %894 = lshr i32 %7, 12 > %895 = and i32 %894, 510 > %896 = add nuw nsw i32 %893, %895 > %897 = add nuw nsw i32 %896, 17 > %898 = zext i32 %897 to i64 > %899 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %898 > %900 = bitcast i32 addrspace(3)* %899 to float addrspace(3)* > %901 = load float, float addrspace(3)* %900, align 4 > %902 = fsub float %901, %890 > %903 = and i32 %7, 8191 > %904 = and i32 %10, 255 > %905 = mul nuw nsw i32 %903, %904 > %906 = add nuw nsw i32 %905, 18 > %907 = zext i32 %906 to i64 > %908 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %907 > %909 = bitcast i32 addrspace(3)* %908 to float addrspace(3)* > %910 = load float, float addrspace(3)* %909, align 4 > %911 = and i32 %7, 8191 > %912 = and i32 %10, 255 > %913 = mul nuw nsw i32 %911, %912 > %914 = lshr i32 %7, 12 > %915 = and i32 %914, 510 > %916 = add nuw nsw i32 %913, %915 > %917 = add nuw nsw i32 %916, 18 > %918 = zext i32 %917 to i64 > %919 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %918 > %920 = bitcast i32 addrspace(3)* %919 to float addrspace(3)* > %921 = load float, float addrspace(3)* %920, align 4 > %922 = fsub float %921, %910 > %923 = fmul float %882, %882 > %924 = fmul float %902, %902 > %925 = fadd float %924, %923 > %926 = fmul float %922, %922 > %927 = fadd float %925, %926 > %928 = call float @llvm.sqrt.f32(float %788) > %929 = call float @llvm.sqrt.f32(float %862) > %930 = call float @llvm.sqrt.f32(float %927) > %931 = lshr i32 %7, 13 > %932 = and i32 %931, 255 > %933 = and i32 %7, 8191 > %934 = and i32 %10, 255 > %935 = mul nuw nsw i32 %933, %934 > %936 = add nuw nsw i32 %935, %932 > %937 = add nuw nsw i32 %936, 16 > %938 = zext i32 %937 to i64 > %939 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %938 > %940 = bitcast i32 addrspace(3)* %939 to float addrspace(3)* > %941 = load float, float addrspace(3)* %940, align 4 > %942 = and i32 %7, 8191 > %943 = and i32 %10, 255 > %944 = mul nuw nsw i32 %942, %943 > %945 = add nuw nsw i32 %944, 16 > %946 = zext i32 %945 to i64 > %947 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %946 > %948 = bitcast i32 addrspace(3)* %947 to float addrspace(3)* > %949 = load float, float addrspace(3)* %948, align 4 > %950 = fadd float %941, %949 > %951 = lshr i32 %7, 13 > %952 = and i32 %951, 255 > %953 = and i32 %7, 8191 > %954 = and i32 %10, 255 > %955 = mul nuw nsw i32 %953, %954 > %956 = add nuw nsw i32 %955, %952 > %957 = add nuw nsw i32 %956, 17 > %958 = zext i32 %957 to i64 > %959 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %958 > %960 = bitcast i32 addrspace(3)* %959 to float addrspace(3)* > %961 = load float, float addrspace(3)* %960, align 4 > %962 = and i32 %7, 8191 > %963 = and i32 %10, 255 > %964 = mul nuw nsw i32 %962, %963 > %965 = add nuw nsw i32 %964, 17 > %966 = zext i32 %965 to i64 > %967 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %966 > %968 = bitcast i32 addrspace(3)* %967 to float addrspace(3)* > %969 = load float, float addrspace(3)* %968, align 4 > %970 = fadd float %961, %969 > %971 = lshr i32 %7, 13 > %972 = and i32 %971, 255 > %973 = and i32 %7, 8191 > %974 = and i32 %10, 255 > %975 = mul nuw nsw i32 %973, %974 > %976 = add nuw nsw i32 %975, %972 > %977 = add nuw nsw i32 %976, 18 > %978 = zext i32 %977 to i64 > %979 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %978 > %980 = bitcast i32 addrspace(3)* %979 to float addrspace(3)* > %981 = load float, float addrspace(3)* %980, align 4 > %982 = and i32 %7, 8191 > %983 = and i32 %10, 255 > %984 = mul nuw nsw i32 %982, %983 > %985 = add nuw nsw i32 %984, 18 > %986 = zext i32 %985 to i64 > %987 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %986 > %988 = bitcast i32 addrspace(3)* %987 to float addrspace(3)* > %989 = load float, float addrspace(3)* %988, align 4 > %990 = fadd float %981, %989 > %991 = fmul float %950, 5.000000e-01 > %992 = fmul float %970, 5.000000e-01 > %993 = fmul float %990, 5.000000e-01 > %994 = and i32 %7, 8191 > %995 = and i32 %10, 255 > %996 = mul nuw nsw i32 %994, %995 > %997 = lshr i32 %7, 12 > %998 = and i32 %997, 510 > %999 = add nuw nsw i32 %996, %998 > %1000 = add nuw nsw i32 %999, 16 > %1001 = zext i32 %1000 to i64 > %1002 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1001 > %1003 = bitcast i32 addrspace(3)* %1002 to float addrspace(3)* > %1004 = load float, float addrspace(3)* %1003, align 4 > %1005 = lshr i32 %7, 13 > %1006 = and i32 %1005, 255 > %1007 = and i32 %7, 8191 > %1008 = and i32 %10, 255 > %1009 = mul nuw nsw i32 %1007, %1008 > %1010 = add nuw nsw i32 %1009, %1006 > %1011 = add nuw nsw i32 %1010, 16 > %1012 = zext i32 %1011 to i64 > %1013 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1012 > %1014 = bitcast i32 addrspace(3)* %1013 to float addrspace(3)* > %1015 = load float, float addrspace(3)* %1014, align 4 > %1016 = fadd float %1004, %1015 > %1017 = and i32 %7, 8191 > %1018 = and i32 %10, 255 > %1019 = mul nuw nsw i32 %1017, %1018 > %1020 = lshr i32 %7, 12 > %1021 = and i32 %1020, 510 > %1022 = add nuw nsw i32 %1019, %1021 > %1023 = add nuw nsw i32 %1022, 17 > %1024 = zext i32 %1023 to i64 > %1025 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1024 > %1026 = bitcast i32 addrspace(3)* %1025 to float addrspace(3)* > %1027 = load float, float addrspace(3)* %1026, align 4 > %1028 = lshr i32 %7, 13 > %1029 = and i32 %1028, 255 > %1030 = and i32 %7, 8191 > %1031 = and i32 %10, 255 > %1032 = mul nuw nsw i32 %1030, %1031 > %1033 = add nuw nsw i32 %1032, %1029 > %1034 = add nuw nsw i32 %1033, 17 > %1035 = zext i32 %1034 to i64 > %1036 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1035 > %1037 = bitcast i32 addrspace(3)* %1036 to float addrspace(3)* > %1038 = load float, float addrspace(3)* %1037, align 4 > %1039 = fadd float %1027, %1038 > %1040 = and i32 %7, 8191 > %1041 = and i32 %10, 255 > %1042 = mul nuw nsw i32 %1040, %1041 > %1043 = lshr i32 %7, 12 > %1044 = and i32 %1043, 510 > %1045 = add nuw nsw i32 %1042, %1044 > %1046 = add nuw nsw i32 %1045, 18 > %1047 = zext i32 %1046 to i64 > %1048 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1047 > %1049 = bitcast i32 addrspace(3)* %1048 to float addrspace(3)* > %1050 = load float, float addrspace(3)* %1049, align 4 > %1051 = lshr i32 %7, 13 > %1052 = and i32 %1051, 255 > %1053 = and i32 %7, 8191 > %1054 = and i32 %10, 255 > %1055 = mul nuw nsw i32 %1053, %1054 > %1056 = add nuw nsw i32 %1055, %1052 > %1057 = add nuw nsw i32 %1056, 18 > %1058 = zext i32 %1057 to i64 > %1059 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1058 > %1060 = bitcast i32 addrspace(3)* %1059 to float addrspace(3)* > %1061 = load float, float addrspace(3)* %1060, align 4 > %1062 = fadd float %1050, %1061 > %1063 = fmul float %1016, 5.000000e-01 > %1064 = fmul float %1039, 5.000000e-01 > %1065 = fmul float %1062, 5.000000e-01 > %1066 = and i32 %7, 8191 > %1067 = and i32 %10, 255 > %1068 = mul nuw nsw i32 %1066, %1067 > %1069 = add nuw nsw i32 %1068, 16 > %1070 = zext i32 %1069 to i64 > %1071 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1070 > %1072 = bitcast i32 addrspace(3)* %1071 to float addrspace(3)* > %1073 = load float, float addrspace(3)* %1072, align 4 > %1074 = and i32 %7, 8191 > %1075 = and i32 %10, 255 > %1076 = mul nuw nsw i32 %1074, %1075 > %1077 = lshr i32 %7, 12 > %1078 = and i32 %1077, 510 > %1079 = add nuw nsw i32 %1076, %1078 > %1080 = add nuw nsw i32 %1079, 16 > %1081 = zext i32 %1080 to i64 > %1082 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1081 > %1083 = bitcast i32 addrspace(3)* %1082 to float addrspace(3)* > %1084 = load float, float addrspace(3)* %1083, align 4 > %1085 = fadd float %1073, %1084 > %1086 = and i32 %7, 8191 > %1087 = and i32 %10, 255 > %1088 = mul nuw nsw i32 %1086, %1087 > %1089 = add nuw nsw i32 %1088, 17 > %1090 = zext i32 %1089 to i64 > %1091 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1090 > %1092 = bitcast i32 addrspace(3)* %1091 to float addrspace(3)* > %1093 = load float, float addrspace(3)* %1092, align 4 > %1094 = and i32 %7, 8191 > %1095 = and i32 %10, 255 > %1096 = mul nuw nsw i32 %1094, %1095 > %1097 = lshr i32 %7, 12 > %1098 = and i32 %1097, 510 > %1099 = add nuw nsw i32 %1096, %1098 > %1100 = add nuw nsw i32 %1099, 17 > %1101 = zext i32 %1100 to i64 > %1102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1101 > %1103 = bitcast i32 addrspace(3)* %1102 to float addrspace(3)* > %1104 = load float, float addrspace(3)* %1103, align 4 > %1105 = fadd float %1093, %1104 > %1106 = and i32 %7, 8191 > %1107 = and i32 %10, 255 > %1108 = mul nuw nsw i32 %1106, %1107 > %1109 = add nuw nsw i32 %1108, 18 > %1110 = zext i32 %1109 to i64 > %1111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1110 > %1112 = bitcast i32 addrspace(3)* %1111 to float addrspace(3)* > %1113 = load float, float addrspace(3)* %1112, align 4 > %1114 = and i32 %7, 8191 > %1115 = and i32 %10, 255 > %1116 = mul nuw nsw i32 %1114, %1115 > %1117 = lshr i32 %7, 12 > %1118 = and i32 %1117, 510 > %1119 = add nuw nsw i32 %1116, %1118 > %1120 = add nuw nsw i32 %1119, 18 > %1121 = zext i32 %1120 to i64 > %1122 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1121 > %1123 = bitcast i32 addrspace(3)* %1122 to float addrspace(3)* > %1124 = load float, float addrspace(3)* %1123, align 4 > %1125 = fadd float %1113, %1124 > %1126 = fmul float %1085, 5.000000e-01 > %1127 = fmul float %1105, 5.000000e-01 > %1128 = fmul float %1125, 5.000000e-01 > %1129 = call float @llvm.fma.f32(float %39, float %928, float %991) > %1130 = call float @llvm.fma.f32(float %40, float %928, float %992) > %1131 = call float @llvm.fma.f32(float %41, float %928, float %993) > %1132 = call float @llvm.fma.f32(float %39, float %929, float %1063) > %1133 = call float @llvm.fma.f32(float %40, float %929, float %1064) > %1134 = call float @llvm.fma.f32(float %41, float %929, float %1065) > %1135 = call float @llvm.fma.f32(float %39, float %930, float %1126) > %1136 = call float @llvm.fma.f32(float %40, float %930, float %1127) > %1137 = call float @llvm.fma.f32(float %41, float %930, float %1128) > %1138 = fmul float %23, %991 > %1139 = fmul float %24, %992 > %1140 = fadd float %1138, %1139 > %1141 = fmul float %25, %993 > %1142 = fadd float %1140, %1141 > %1143 = fadd float %1142, %26 > %1144 = fmul float %27, %991 > %1145 = fmul float %28, %992 > %1146 = fadd float %1144, %1145 > %1147 = fmul float %29, %993 > %1148 = fadd float %1146, %1147 > %1149 = fadd float %1148, %30 > %1150 = fmul float %35, %991 > %1151 = fmul float %36, %992 > %1152 = fadd float %1150, %1151 > %1153 = fmul float %37, %993 > %1154 = fadd float %1152, %1153 > %1155 = fadd float %1154, %38 > %1156 = fmul float %23, %1063 > %1157 = fmul float %24, %1064 > %1158 = fadd float %1156, %1157 > %1159 = fmul float %25, %1065 > %1160 = fadd float %1158, %1159 > %1161 = fadd float %1160, %26 > %1162 = fmul float %27, %1063 > %1163 = fmul float %28, %1064 > %1164 = fadd float %1162, %1163 > %1165 = fmul float %29, %1065 > %1166 = fadd float %1164, %1165 > %1167 = fadd float %1166, %30 > %1168 = fmul float %35, %1063 > %1169 = fmul float %36, %1064 > %1170 = fadd float %1168, %1169 > %1171 = fmul float %37, %1065 > %1172 = fadd float %1170, %1171 > %1173 = fadd float %1172, %38 > %1174 = fmul float %23, %1126 > %1175 = fmul float %24, %1127 > %1176 = fadd float %1174, %1175 > %1177 = fmul float %25, %1128 > %1178 = fadd float %1176, %1177 > %1179 = fadd float %1178, %26 > %1180 = fmul float %27, %1126 > %1181 = fmul float %28, %1127 > %1182 = fadd float %1180, %1181 > %1183 = fmul float %29, %1128 > %1184 = fadd float %1182, %1183 > %1185 = fadd float %1184, %30 > %1186 = fmul float %35, %1126 > %1187 = fmul float %36, %1127 > %1188 = fadd float %1186, %1187 > %1189 = fmul float %37, %1128 > %1190 = fadd float %1188, %1189 > %1191 = fadd float %1190, %38 > %1192 = fmul float %23, %1129 > %1193 = fmul float %24, %1130 > %1194 = fadd float %1192, %1193 > %1195 = fmul float %25, %1131 > %1196 = fadd float %1194, %1195 > %1197 = fadd float %1196, %26 > %1198 = fmul float %27, %1129 > %1199 = fmul float %28, %1130 > %1200 = fadd float %1198, %1199 > %1201 = fmul float %29, %1131 > %1202 = fadd float %1200, %1201 > %1203 = fadd float %1202, %30 > %1204 = fmul float %35, %1129 > %1205 = fmul float %36, %1130 > %1206 = fadd float %1204, %1205 > %1207 = fmul float %37, %1131 > %1208 = fadd float %1206, %1207 > %1209 = fadd float %1208, %38 > %1210 = fmul float %23, %1132 > %1211 = fmul float %24, %1133 > %1212 = fadd float %1210, %1211 > %1213 = fmul float %25, %1134 > %1214 = fadd float %1212, %1213 > %1215 = fadd float %1214, %26 > %1216 = fmul float %27, %1132 > %1217 = fmul float %28, %1133 > %1218 = fadd float %1216, %1217 > %1219 = fmul float %29, %1134 > %1220 = fadd float %1218, %1219 > %1221 = fadd float %1220, %30 > %1222 = fmul float %35, %1132 > %1223 = fmul float %36, %1133 > %1224 = fadd float %1222, %1223 > %1225 = fmul float %37, %1134 > %1226 = fadd float %1224, %1225 > %1227 = fadd float %1226, %38 > %1228 = fmul float %23, %1135 > %1229 = fmul float %24, %1136 > %1230 = fadd float %1228, %1229 > %1231 = fmul float %25, %1137 > %1232 = fadd float %1230, %1231 > %1233 = fadd float %1232, %26 > %1234 = fmul float %27, %1135 > %1235 = fmul float %28, %1136 > %1236 = fadd float %1234, %1235 > %1237 = fmul float %29, %1137 > %1238 = fadd float %1236, %1237 > %1239 = fadd float %1238, %30 > %1240 = fmul float %35, %1135 > %1241 = fmul float %36, %1136 > %1242 = fadd float %1240, %1241 > %1243 = fmul float %37, %1137 > %1244 = fadd float %1242, %1243 > %1245 = fadd float %1244, %38 > %1246 = fcmp oeq float %1173, 0.000000e+00 > %1247 = fcmp oeq float %1173, 0.000000e+00 > %1248 = fcmp ogt float %1161, 0.000000e+00 > %1249 = select i1 %1248, float 1.000000e+00, float %1161 > %1250 = fcmp oge float %1249, 0.000000e+00 > %1251 = fcmp ogt float %1167, 0.000000e+00 > %1252 = select i1 %1251, float 1.000000e+00, float %1167 > %1253 = fcmp oge float %1252, 0.000000e+00 > %.op = fmul float %1249, 0x4600000000000000 > %1254 = select i1 %1250, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1252, 0x4600000000000000 > %1255 = select i1 %1253, float %.op80, float 0xC600000000000000 > %1256 = fdiv float 1.000000e+00, %1173 > %1257 = fmul float %1161, %1256 > %1258 = fmul float %1167, %1256 > %1259 = select i1 %1246, float %1254, float %1257 > %1260 = select i1 %1247, float %1255, float %1258 > %1261 = fcmp oeq float %1191, 0.000000e+00 > %1262 = fcmp oeq float %1191, 0.000000e+00 > %1263 = fcmp ogt float %1179, 0.000000e+00 > %1264 = select i1 %1263, float 1.000000e+00, float %1179 > %1265 = fcmp oge float %1264, 0.000000e+00 > %1266 = fcmp ogt float %1185, 0.000000e+00 > %1267 = select i1 %1266, float 1.000000e+00, float %1185 > %1268 = fcmp oge float %1267, 0.000000e+00 > %.op81 = fmul float %1264, 0x4600000000000000 > %1269 = select i1 %1265, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1267, 0x4600000000000000 > %1270 = select i1 %1268, float %.op82, float 0xC600000000000000 > %1271 = fdiv float 1.000000e+00, %1191 > %1272 = fmul float %1179, %1271 > %1273 = fmul float %1185, %1271 > %1274 = select i1 %1261, float %1269, float %1272 > %1275 = select i1 %1262, float %1270, float %1273 > %1276 = fcmp oeq float %1209, 0.000000e+00 > %1277 = fcmp oeq float %1209, 0.000000e+00 > %1278 = fcmp ogt float %1197, 0.000000e+00 > %1279 = select i1 %1278, float 1.000000e+00, float %1197 > %1280 = fcmp oge float %1279, 0.000000e+00 > %1281 = fcmp ogt float %1203, 0.000000e+00 > %1282 = select i1 %1281, float 1.000000e+00, float %1203 > %1283 = fcmp oge float %1282, 0.000000e+00 > %.op83 = fmul float %1279, 0x4600000000000000 > %1284 = select i1 %1280, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1282, 0x4600000000000000 > %1285 = select i1 %1283, float %.op84, float 0xC600000000000000 > %1286 = fdiv float 1.000000e+00, %1209 > %1287 = fmul float %1197, %1286 > %1288 = fmul float %1203, %1286 > %1289 = select i1 %1276, float %1284, float %1287 > %1290 = select i1 %1277, float %1285, float %1288 > %1291 = fcmp oeq float %1155, 0.000000e+00 > %1292 = fcmp oeq float %1155, 0.000000e+00 > %1293 = fcmp ogt float %1143, 0.000000e+00 > %1294 = select i1 %1293, float 1.000000e+00, float %1143 > %1295 = fcmp oge float %1294, 0.000000e+00 > %1296 = fcmp ogt float %1149, 0.000000e+00 > %1297 = select i1 %1296, float 1.000000e+00, float %1149 > %1298 = fcmp oge float %1297, 0.000000e+00 > %.op85 = fmul float %1294, 0x4600000000000000 > %1299 = select i1 %1295, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1297, 0x4600000000000000 > %1300 = select i1 %1298, float %.op86, float 0xC600000000000000 > %1301 = fdiv float 1.000000e+00, %1155 > %1302 = fmul float %1143, %1301 > %1303 = fmul float %1149, %1301 > %1304 = select i1 %1291, float %1299, float %1302 > %1305 = select i1 %1292, float %1300, float %1303 > %1306 = fsub float %1304, %1289 > %1307 = fsub float %1305, %1290 > %1308 = fcmp oeq float %1227, 0.000000e+00 > %1309 = fcmp oeq float %1227, 0.000000e+00 > %1310 = fcmp ogt float %1215, 0.000000e+00 > %1311 = select i1 %1310, float 1.000000e+00, float %1215 > %1312 = fcmp oge float %1311, 0.000000e+00 > %1313 = fcmp ogt float %1221, 0.000000e+00 > %1314 = select i1 %1313, float 1.000000e+00, float %1221 > %1315 = fcmp oge float %1314, 0.000000e+00 > %.op87 = fmul float %1311, 0x4600000000000000 > %1316 = select i1 %1312, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1314, 0x4600000000000000 > %1317 = select i1 %1315, float %.op88, float 0xC600000000000000 > %1318 = fdiv float 1.000000e+00, %1227 > %1319 = fmul float %1215, %1318 > %1320 = fmul float %1221, %1318 > %1321 = select i1 %1308, float %1316, float %1319 > %1322 = select i1 %1309, float %1317, float %1320 > %1323 = fsub float %1259, %1321 > %1324 = fsub float %1260, %1322 > %1325 = fmul float %1323, %42 > %1326 = fmul float %1324, %43 > %1327 = fcmp oeq float %1245, 0.000000e+00 > %1328 = fcmp oeq float %1245, 0.000000e+00 > %1329 = fcmp ogt float %1233, 0.000000e+00 > %1330 = select i1 %1329, float 1.000000e+00, float %1233 > %1331 = fcmp oge float %1330, 0.000000e+00 > %1332 = fcmp ogt float %1239, 0.000000e+00 > %1333 = select i1 %1332, float 1.000000e+00, float %1239 > %1334 = fcmp oge float %1333, 0.000000e+00 > %.op89 = fmul float %1330, 0x4600000000000000 > %1335 = select i1 %1331, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1333, 0x4600000000000000 > %1336 = select i1 %1334, float %.op90, float 0xC600000000000000 > %1337 = fdiv float 1.000000e+00, %1245 > %1338 = fmul float %1233, %1337 > %1339 = fmul float %1239, %1337 > %1340 = select i1 %1327, float %1335, float %1338 > %1341 = select i1 %1328, float %1336, float %1339 > %1342 = fsub float %1274, %1340 > %1343 = fsub float %1275, %1341 > %1344 = fmul float %1342, %42 > %1345 = fmul float %1306, %42 > %1346 = fmul float %1307, %43 > %1347 = fmul float %1343, %43 > %1348 = fmul float %1345, %1345 > %1349 = fmul float %1346, %1346 > %1350 = fadd float %1348, %1349 > %1351 = fmul float %1325, %1325 > %1352 = fmul float %1326, %1326 > %1353 = fadd float %1351, %1352 > %1354 = fmul float %1344, %1344 > %1355 = fmul float %1347, %1347 > %1356 = fadd float %1354, %1355 > %1357 = call float @llvm.sqrt.f32(float %1356) > %1358 = call float @llvm.sqrt.f32(float %1350) > %1359 = call float @llvm.sqrt.f32(float %1353) > %1360 = fsub float %1155, %15 > %1361 = fsub float %1173, %15 > %1362 = fsub float %1191, %15 > %1363 = fcmp une float %16, 0.000000e+00 > br i1 %1363, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %16, %ENDIF77 ], [ %38, %main_body ] > %temp16.0 = phi float [ %1577, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1578, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1567, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1580, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %15, %ENDIF77 ], [ %37, %main_body ] > %temp13.0 = phi float [ %1560, %ENDIF77 ], [ %36, %main_body ] > %1364 = phi i32 [ 1065353216, %ENDIF77 ], [ %672, %main_body ] > %temp10.0 = phi float [ %1359, %ENDIF77 ], [ %716, %main_body ] > %temp9.0 = phi float [ %1552, %ENDIF77 ], [ %718, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %603, %main_body ] > %temp6.0 = phi float [ %993, %ENDIF77 ], [ %685, %main_body ] > %temp5.0 = phi float [ %1547, %ENDIF77 ], [ %712, %main_body ] > %1365 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1366 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1367 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1368 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1369 = lshr i32 %5, 16 > %1370 = shl nuw nsw i32 %1369, 2 > %1371 = and i32 %6, 8191 > %1372 = and i32 %10, 255 > %1373 = mul nuw nsw i32 %1371, %1372 > %1374 = add nuw nsw i32 %1370, %1373 > %1375 = add nuw nsw i32 %1374, 8 > %1376 = zext i32 %1375 to i64 > %1377 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1376 > %1378 = bitcast i32 addrspace(3)* %1377 to float addrspace(3)* > store float %1365, float addrspace(3)* %1378, align 4 > %1379 = add nuw nsw i32 %1374, 9 > %1380 = zext i32 %1379 to i64 > %1381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1380 > %1382 = bitcast i32 addrspace(3)* %1381 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1382, align 4 > %1383 = add nuw nsw i32 %1374, 10 > %1384 = zext i32 %1383 to i64 > %1385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1384 > %1386 = bitcast i32 addrspace(3)* %1385 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1386, align 4 > %1387 = add nuw nsw i32 %1374, 11 > %1388 = zext i32 %1387 to i64 > %1389 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1388 > %1390 = bitcast i32 addrspace(3)* %1389 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1390, align 4 > %1391 = lshr i32 %5, 16 > %1392 = shl nuw nsw i32 %1391, 2 > %1393 = and i32 %6, 8191 > %1394 = and i32 %10, 255 > %1395 = mul nuw nsw i32 %1393, %1394 > %1396 = add nuw nsw i32 %1392, %1395 > %1397 = add nuw nsw i32 %1396, 12 > %1398 = zext i32 %1397 to i64 > %1399 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1398 > %1400 = bitcast i32 addrspace(3)* %1399 to float addrspace(3)* > store float %1366, float addrspace(3)* %1400, align 4 > %1401 = add nuw nsw i32 %1396, 13 > %1402 = zext i32 %1401 to i64 > %1403 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1402 > %1404 = bitcast i32 addrspace(3)* %1403 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1404, align 4 > %1405 = add nuw nsw i32 %1396, 14 > %1406 = zext i32 %1405 to i64 > %1407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1406 > %1408 = bitcast i32 addrspace(3)* %1407 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1408, align 4 > %1409 = add nuw nsw i32 %1396, 15 > %1410 = zext i32 %1409 to i64 > %1411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1410 > store i32 %1364, i32 addrspace(3)* %1411, align 4 > %1412 = lshr i32 %5, 16 > %1413 = shl nuw nsw i32 %1412, 2 > %1414 = and i32 %6, 8191 > %1415 = and i32 %10, 255 > %1416 = mul nuw nsw i32 %1414, %1415 > %1417 = add nuw nsw i32 %1413, %1416 > %1418 = add nuw nsw i32 %1417, 16 > %1419 = zext i32 %1418 to i64 > %1420 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1419 > %1421 = bitcast i32 addrspace(3)* %1420 to float addrspace(3)* > store float %1367, float addrspace(3)* %1421, align 4 > %1422 = add nuw nsw i32 %1417, 17 > %1423 = zext i32 %1422 to i64 > %1424 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1423 > %1425 = bitcast i32 addrspace(3)* %1424 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1425, align 4 > %1426 = add nuw nsw i32 %1417, 18 > %1427 = zext i32 %1426 to i64 > %1428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1427 > %1429 = bitcast i32 addrspace(3)* %1428 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1429, align 4 > %1430 = add nuw nsw i32 %1417, 19 > %1431 = zext i32 %1430 to i64 > %1432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1431 > %1433 = bitcast i32 addrspace(3)* %1432 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1433, align 4 > %1434 = lshr i32 %5, 16 > %1435 = shl nuw nsw i32 %1434, 2 > %1436 = and i32 %6, 8191 > %1437 = and i32 %10, 255 > %1438 = mul nuw nsw i32 %1436, %1437 > %1439 = add nuw nsw i32 %1435, %1438 > %1440 = add nuw nsw i32 %1439, 20 > %1441 = zext i32 %1440 to i64 > %1442 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1441 > %1443 = bitcast i32 addrspace(3)* %1442 to float addrspace(3)* > store float %1368, float addrspace(3)* %1443, align 4 > %1444 = add nuw nsw i32 %1439, 21 > %1445 = zext i32 %1444 to i64 > %1446 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1445 > %1447 = bitcast i32 addrspace(3)* %1446 to float addrspace(3)* > store float %1366, float addrspace(3)* %1447, align 4 > %1448 = add nuw nsw i32 %1439, 22 > %1449 = zext i32 %1448 to i64 > %1450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1449 > %1451 = bitcast i32 addrspace(3)* %1450 to float addrspace(3)* > store float %1367, float addrspace(3)* %1451, align 4 > %1452 = add nuw nsw i32 %1439, 23 > %1453 = zext i32 %1452 to i64 > %1454 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1453 > %1455 = bitcast i32 addrspace(3)* %1454 to float addrspace(3)* > store float %1368, float addrspace(3)* %1455, align 4 > %1456 = lshr i32 %5, 16 > %1457 = shl nuw nsw i32 %1456, 2 > %1458 = and i32 %6, 8191 > %1459 = and i32 %10, 255 > %1460 = mul nuw nsw i32 %1458, %1459 > %1461 = add nuw nsw i32 %1457, %1460 > %1462 = zext i32 %1461 to i64 > %1463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1462 > %1464 = bitcast i32 addrspace(3)* %1463 to float addrspace(3)* > store float %1365, float addrspace(3)* %1464, align 4 > %1465 = lshr i32 %5, 16 > %1466 = shl nuw nsw i32 %1465, 2 > %1467 = and i32 %6, 8191 > %1468 = and i32 %10, 255 > %1469 = mul nuw nsw i32 %1467, %1468 > %1470 = add nuw nsw i32 %1466, %1469 > %1471 = add nuw nsw i32 %1470, 1 > %1472 = zext i32 %1471 to i64 > %1473 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1472 > %1474 = bitcast i32 addrspace(3)* %1473 to float addrspace(3)* > store float %1366, float addrspace(3)* %1474, align 4 > %1475 = lshr i32 %5, 16 > %1476 = shl nuw nsw i32 %1475, 2 > %1477 = and i32 %6, 8191 > %1478 = and i32 %10, 255 > %1479 = mul nuw nsw i32 %1477, %1478 > %1480 = add nuw nsw i32 %1476, %1479 > %1481 = add nuw nsw i32 %1480, 2 > %1482 = zext i32 %1481 to i64 > %1483 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1482 > %1484 = bitcast i32 addrspace(3)* %1483 to float addrspace(3)* > store float %1367, float addrspace(3)* %1484, align 4 > %1485 = lshr i32 %5, 16 > %1486 = shl nuw nsw i32 %1485, 2 > %1487 = and i32 %6, 8191 > %1488 = and i32 %10, 255 > %1489 = mul nuw nsw i32 %1487, %1488 > %1490 = add nuw nsw i32 %1486, %1489 > %1491 = add nuw nsw i32 %1490, 4 > %1492 = zext i32 %1491 to i64 > %1493 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1492 > %1494 = bitcast i32 addrspace(3)* %1493 to float addrspace(3)* > store float %1368, float addrspace(3)* %1494, align 4 > %1495 = and i32 %10, 255 > %1496 = lshr i32 %10, 8 > %1497 = and i32 %1496, 31 > %1498 = lshr i32 %5, 16 > %1499 = shl nuw nsw i32 %1498, 2 > %1500 = and i32 %6, 8191 > %1501 = and i32 %10, 255 > %1502 = mul nuw nsw i32 %1500, %1501 > %1503 = add nuw nsw i32 %1499, %1502 > %1504 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1505 = bitcast i64 %1504 to <2 x i32> > %1506 = extractelement <2 x i32> %1505, i32 0 > %1507 = extractelement <2 x i32> %1505, i32 1 > %1508 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1506, 0 > %1509 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1508, i32 %1507, 1 > %1510 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1509, i32 %8, 13 > %1511 = bitcast i32 %1495 to float > %1512 = bitcast i32 %1497 to float > %1513 = bitcast i32 %1503 to float > %1514 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1510, float %1511, 14 > %1515 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1514, float %1512, 15 > %1516 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1515, float %1513, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1516 > >IF69: ; preds = %IF > %1517 = fdiv float 1.000000e+00, %16 > %1518 = fmul float %1360, %1517 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1519 = fcmp ogt float %1360, 0.000000e+00 > %1520 = select i1 %1519, float 1.000000e+00, float %1360 > %1521 = fcmp oge float %1520, 0.000000e+00 > %.op91 = fmul float %1520, 0x4600000000000000 > %1522 = select i1 %1521, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1518, %IF69 ], [ %1522, %ELSE70 ] > %1523 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1524 = fsub float 1.000000e+00, %1523 > %1525 = fmul float %1524, %1358 > %1526 = fcmp une float %16, 0.000000e+00 > br i1 %1526, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1527 = fdiv float 1.000000e+00, %16 > %1528 = fmul float %1361, %1527 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1529 = fcmp ogt float %1361, 0.000000e+00 > %1530 = select i1 %1529, float 1.000000e+00, float %1361 > %1531 = fcmp oge float %1530, 0.000000e+00 > %.op92 = fmul float %1530, 0x4600000000000000 > %1532 = select i1 %1531, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1528, %IF72 ], [ %1532, %ELSE73 ] > %1533 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1534 = fsub float 1.000000e+00, %1533 > %1535 = fmul float %1534, %1359 > %1536 = fcmp une float %16, 0.000000e+00 > br i1 %1536, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1537 = fdiv float 1.000000e+00, %16 > %1538 = fmul float %1362, %1537 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1539 = fcmp ogt float %1362, 0.000000e+00 > %1540 = select i1 %1539, float 1.000000e+00, float %1362 > %1541 = fcmp oge float %1540, 0.000000e+00 > %.op93 = fmul float %1540, 0x4600000000000000 > %1542 = select i1 %1541, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1538, %IF75 ], [ %1542, %ELSE76 ] > %1543 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1544 = fsub float 1.000000e+00, %1543 > %1545 = fmul float %1544, %1357 > %1546 = fmul float %13, %19 > %1547 = fmul float %14, %20 > %1548 = call float @llvm.maxnum.f32(float %1547, float 1.000000e+00) > %1549 = fcmp oeq float %1546, 0.000000e+00 > %1550 = fcmp oeq float %1546, 0.000000e+00 > %1551 = sext i1 %1550 to i32 > %1552 = bitcast i32 %1551 to float > %1553 = fcmp ogt float %1545, 0.000000e+00 > %1554 = select i1 %1553, float 1.000000e+00, float %1545 > %1555 = fcmp oge float %1554, 0.000000e+00 > %1556 = fcmp ogt float %1525, 0.000000e+00 > %1557 = select i1 %1556, float 1.000000e+00, float %1525 > %1558 = fcmp oge float %1557, 0.000000e+00 > %.op94 = fmul float %1554, 0x4600000000000000 > %1559 = select i1 %1555, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1557, 0x4600000000000000 > %1560 = select i1 %1558, float %.op95, float 0xC600000000000000 > %1561 = fdiv float 1.000000e+00, %1546 > %1562 = fmul float %1545, %1561 > %1563 = fmul float %1525, %1561 > %1564 = select i1 %1549, float %1559, float %1562 > %1565 = select i1 %1550, float %1560, float %1563 > %1566 = call float @llvm.maxnum.f32(float %1565, float 1.000000e+00) > %1567 = call float @llvm.minnum.f32(float %1548, float %1566) > %1568 = fcmp une float %1546, 0.000000e+00 > br i1 %1568, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1569 = fdiv float 1.000000e+00, %1546 > %1570 = fmul float %1535, %1569 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1571 = fcmp ogt float %1535, 0.000000e+00 > %1572 = select i1 %1571, float 1.000000e+00, float %1535 > %1573 = fcmp oge float %1572, 0.000000e+00 > %.op96 = fmul float %1572, 0x4600000000000000 > %1574 = select i1 %1573, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1570, %IF78 ], [ %1574, %ELSE79 ] > %1575 = call float @llvm.maxnum.f32(float %1564, float 1.000000e+00) > %1576 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1577 = call float @llvm.minnum.f32(float %1548, float %1576) > %1578 = call float @llvm.minnum.f32(float %1548, float %1575) > %1579 = call float @llvm.maxnum.f32(float %1567, float %1578) > %1580 = call float @llvm.maxnum.f32(float %1579, float %1577) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[2].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[2].xxxx >101: MUL TEMP[0], SV[0].yyyy, IN[1][2] >102: FMA TEMP[0], SV[0].xxxx, IN[0][2], TEMP[0] >103: FMA TEMP[0], SV[0].zzzz, IN[2][2], TEMP[0] >104: MOV OUT[5], TEMP[0] >105: MOV OUT[4], TEMP[3] >106: MOV OUT[2], TEMP[6] >107: MOV OUT[3], TEMP[4] >108: MOV OUT[1], TEMP[5] >109: MOV OUT[0], TEMP[1] >110: END >radeonsi: Compiling shader 202 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = lshr i32 %6, 13 > %711 = and i32 %710, 255 > %712 = shl i32 %5, 2 > %713 = and i32 %712, 262140 > %714 = and i32 %6, 8191 > %715 = mul i32 %714, %9 > %716 = add i32 %713, %715 > %717 = add i32 %716, %711 > %718 = add i32 %717, 24 > %719 = sext i32 %718 to i64 > %720 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %719 > %721 = bitcast i32 addrspace(3)* %720 to float addrspace(3)* > %722 = load float, float addrspace(3)* %721, align 4 > %723 = fmul float %722, %8 > %724 = lshr i32 %6, 13 > %725 = and i32 %724, 255 > %726 = shl i32 %5, 2 > %727 = and i32 %726, 262140 > %728 = and i32 %6, 8191 > %729 = mul i32 %728, %9 > %730 = add i32 %727, %729 > %731 = add i32 %730, %725 > %732 = add i32 %731, 25 > %733 = sext i32 %732 to i64 > %734 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %733 > %735 = bitcast i32 addrspace(3)* %734 to float addrspace(3)* > %736 = load float, float addrspace(3)* %735, align 4 > %737 = fmul float %736, %8 > %738 = lshr i32 %6, 13 > %739 = and i32 %738, 255 > %740 = shl i32 %5, 2 > %741 = and i32 %740, 262140 > %742 = and i32 %6, 8191 > %743 = mul i32 %742, %9 > %744 = add i32 %741, %743 > %745 = add i32 %744, %739 > %746 = add i32 %745, 26 > %747 = sext i32 %746 to i64 > %748 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %747 > %749 = bitcast i32 addrspace(3)* %748 to float addrspace(3)* > %750 = load float, float addrspace(3)* %749, align 4 > %751 = fmul float %750, %8 > %752 = lshr i32 %6, 13 > %753 = and i32 %752, 255 > %754 = shl i32 %5, 2 > %755 = and i32 %754, 262140 > %756 = and i32 %6, 8191 > %757 = mul i32 %756, %9 > %758 = add i32 %755, %757 > %759 = add i32 %758, %753 > %760 = add i32 %759, 27 > %761 = sext i32 %760 to i64 > %762 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %761 > %763 = bitcast i32 addrspace(3)* %762 to float addrspace(3)* > %764 = load float, float addrspace(3)* %763, align 4 > %765 = fmul float %764, %8 > %766 = shl i32 %5, 2 > %767 = and i32 %766, 262140 > %768 = and i32 %6, 8191 > %769 = mul i32 %768, %9 > %770 = add i32 %767, %769 > %771 = add i32 %770, 24 > %772 = sext i32 %771 to i64 > %773 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %772 > %774 = bitcast i32 addrspace(3)* %773 to float addrspace(3)* > %775 = load float, float addrspace(3)* %774, align 4 > %776 = call float @llvm.fma.f32(float %7, float %775, float %723) > %777 = shl i32 %5, 2 > %778 = and i32 %777, 262140 > %779 = and i32 %6, 8191 > %780 = mul i32 %779, %9 > %781 = add i32 %778, %780 > %782 = add i32 %781, 25 > %783 = sext i32 %782 to i64 > %784 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %783 > %785 = bitcast i32 addrspace(3)* %784 to float addrspace(3)* > %786 = load float, float addrspace(3)* %785, align 4 > %787 = call float @llvm.fma.f32(float %7, float %786, float %737) > %788 = shl i32 %5, 2 > %789 = and i32 %788, 262140 > %790 = and i32 %6, 8191 > %791 = mul i32 %790, %9 > %792 = add i32 %789, %791 > %793 = add i32 %792, 26 > %794 = sext i32 %793 to i64 > %795 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %794 > %796 = bitcast i32 addrspace(3)* %795 to float addrspace(3)* > %797 = load float, float addrspace(3)* %796, align 4 > %798 = call float @llvm.fma.f32(float %7, float %797, float %751) > %799 = shl i32 %5, 2 > %800 = and i32 %799, 262140 > %801 = and i32 %6, 8191 > %802 = mul i32 %801, %9 > %803 = add i32 %800, %802 > %804 = add i32 %803, 27 > %805 = sext i32 %804 to i64 > %806 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %805 > %807 = bitcast i32 addrspace(3)* %806 to float addrspace(3)* > %808 = load float, float addrspace(3)* %807, align 4 > %809 = call float @llvm.fma.f32(float %7, float %808, float %765) > %810 = shl i32 %5, 2 > %811 = and i32 %810, 262140 > %812 = and i32 %6, 8191 > %813 = mul i32 %812, %9 > %814 = add i32 %811, %813 > %815 = lshr i32 %6, 12 > %816 = and i32 %815, 510 > %817 = add i32 %814, %816 > %818 = add i32 %817, 24 > %819 = sext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = call float @llvm.fma.f32(float %62, float %822, float %776) > %824 = shl i32 %5, 2 > %825 = and i32 %824, 262140 > %826 = and i32 %6, 8191 > %827 = mul i32 %826, %9 > %828 = add i32 %825, %827 > %829 = lshr i32 %6, 12 > %830 = and i32 %829, 510 > %831 = add i32 %828, %830 > %832 = add i32 %831, 25 > %833 = sext i32 %832 to i64 > %834 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %833 > %835 = bitcast i32 addrspace(3)* %834 to float addrspace(3)* > %836 = load float, float addrspace(3)* %835, align 4 > %837 = call float @llvm.fma.f32(float %62, float %836, float %787) > %838 = shl i32 %5, 2 > %839 = and i32 %838, 262140 > %840 = and i32 %6, 8191 > %841 = mul i32 %840, %9 > %842 = add i32 %839, %841 > %843 = lshr i32 %6, 12 > %844 = and i32 %843, 510 > %845 = add i32 %842, %844 > %846 = add i32 %845, 26 > %847 = sext i32 %846 to i64 > %848 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %847 > %849 = bitcast i32 addrspace(3)* %848 to float addrspace(3)* > %850 = load float, float addrspace(3)* %849, align 4 > %851 = call float @llvm.fma.f32(float %62, float %850, float %798) > %852 = shl i32 %5, 2 > %853 = and i32 %852, 262140 > %854 = and i32 %6, 8191 > %855 = mul i32 %854, %9 > %856 = add i32 %853, %855 > %857 = lshr i32 %6, 12 > %858 = and i32 %857, 510 > %859 = add i32 %856, %858 > %860 = add i32 %859, 27 > %861 = sext i32 %860 to i64 > %862 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %861 > %863 = bitcast i32 addrspace(3)* %862 to float addrspace(3)* > %864 = load float, float addrspace(3)* %863, align 4 > %865 = call float @llvm.fma.f32(float %62, float %864, float %809) > %866 = bitcast i32 %10 to float > %867 = insertvalue <{ float, float, float }> undef, float %866, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %823, float %837, float %851, float %865) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %867 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL SV[0], FACE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..26] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 384, 416, 400} >IMM[1] INT32 {-1, 0, 1, 0} >IMM[2] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[3] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, -0.3765} >IMM[4] FLT32 { 2.0000, 0.5000, 0.0000, 0.0000} > 0: UIF SV[0].xxxx :0 > 1: MOV TEMP[0].x, IMM[1].xxxx > 2: ELSE :0 > 3: MOV TEMP[0].x, IMM[1].yyyy > 4: ENDIF > 5: ADD TEMP[1].x, CONST[1][24].yyyy, IMM[2].xxxx > 6: ADD TEMP[2].xy, -IN[4].wwww, IMM[2].xyyy > 7: FMA TEMP[3].x, CONST[1][24].xxxx, TEMP[1].xxxx, TEMP[2].xxxx > 8: CEIL TEMP[4].x, TEMP[2].yyyy > 9: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 10: ADD TEMP[1].x, TEMP[3].xxxx, IMM[2].zzzz > 11: FSNE TEMP[3].x, CONST[1][24].yyyy, IMM[2].wwww > 12: UIF TEMP[3].xxxx :0 > 13: RCP TEMP[3].x, CONST[1][24].yyyy > 14: ELSE :0 > 15: MOV TEMP[3].x, IMM[3].xxxx > 16: ENDIF > 17: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[1].xxxx > 18: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 19: FMA TEMP[5].x, TEMP[3].xxxx, IMM[3].yyyy, IMM[3].zzzz > 20: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[3].xxxx > 21: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx > 22: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx > 23: FMA TEMP[1].x, -TEMP[4].xxxx, TEMP[1].xxxx, IMM[2].xxxx > 24: LG2 TEMP[4].x, TEMP[1].xxxx > 25: MUL TEMP[1].x, TEMP[4].xxxx, CONST[1][26].xxxx > 26: EX2 TEMP[4].x, TEMP[1].xxxx > 27: MUL TEMP[1].x, TEMP[4].xxxx, CONST[1][25].wwww > 28: MUL TEMP[4].xyz, TEMP[1].xxxx, CONST[1][25].xyzz > 29: MOV TEMP[5].xy, IN[0].xyyy > 30: TEX TEMP[5], TEMP[5], SAMP[0], 2D > 31: MOV TEMP[6].xyz, TEMP[5].xyzx > 32: FMA TEMP[3].x, TEMP[3].xxxx, TEMP[5].wwww, IMM[3].wwww > 33: FSLT TEMP[3].x, TEMP[3].xxxx, IMM[2].wwww > 34: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].zzzz > 35: INEG TEMP[3].x, TEMP[3].xxxx > 36: USNE TEMP[2].x, TEMP[3].xxxx, IMM[0].xxxx > 37: AND TEMP[3].x, TEMP[2].xxxx, IMM[2].xxxx > 38: KILL_IF -TEMP[3].xxxx > 39: MOV TEMP[4].w, IMM[2].wwww > 40: MOV TEMP[3].xy, IN[0].xyyy > 41: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D > 42: FMA TEMP[3].xy, TEMP[3].ywww, IMM[4].xxxx, IMM[2].zzzz > 43: MOV TEMP[1].xy, TEMP[3].xyxx > 44: FMA TEMP[5].x, -TEMP[3].xxxx, TEMP[3].xxxx, IMM[2].xxxx > 45: FMA TEMP[3].x, -TEMP[3].yyyy, TEMP[3].yyyy, TEMP[5].xxxx > 46: SQRT TEMP[3].x, TEMP[3].xxxx > 47: MOV TEMP[1].z, TEMP[3].xxxx > 48: DP3 TEMP[2].x, IN[1].xyzz, TEMP[1].xyzz > 49: DP3 TEMP[3].x, IN[2].xyzz, TEMP[1].xyzz > 50: MOV TEMP[2].y, TEMP[3].xxxx > 51: DP3 TEMP[3].x, IN[3].xyzz, TEMP[1].xyzz > 52: MOV TEMP[2].z, TEMP[3].xxxx > 53: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[2].xyzz > 54: RSQ TEMP[3].x, TEMP[1].xxxx > 55: MUL TEMP[1].xyz, TEMP[3].xxxx, TEMP[2].xyzz > 56: MOV TEMP[2].xyz, -TEMP[1].xyzx > 57: USNE TEMP[3].x, TEMP[0].xxxx, IMM[0].xxxx > 58: UIF TEMP[3].xxxx :0 > 59: MOV TEMP[3].x, TEMP[1].xxxx > 60: ELSE :0 > 61: MOV TEMP[3].x, TEMP[2].xxxx > 62: ENDIF > 63: MOV TEMP[3].x, TEMP[3].xxxx > 64: USNE TEMP[5].x, TEMP[0].xxxx, IMM[0].xxxx > 65: UIF TEMP[5].xxxx :0 > 66: MOV TEMP[5].x, TEMP[1].yyyy > 67: ELSE :0 > 68: MOV TEMP[5].x, TEMP[2].yyyy > 69: ENDIF > 70: MOV TEMP[3].y, TEMP[5].xxxx > 71: USNE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 72: UIF TEMP[0].xxxx :0 > 73: MOV TEMP[0].x, TEMP[1].zzzz > 74: ELSE :0 > 75: MOV TEMP[0].x, TEMP[2].zzzz > 76: ENDIF > 77: MOV TEMP[3].z, TEMP[0].xxxx > 78: FMA TEMP[2].xyz, TEMP[3].xyzz, IMM[4].yyyy, IMM[4].yyyy > 79: MOV TEMP[2].w, CONST[1][26].zzzz > 80: MOV TEMP[6].w, IMM[2].xxxx > 81: MOV TEMP[0].xy, IN[0].xyyy > 82: TEX TEMP[0], TEMP[0], SAMP[2], 2D > 83: MUL TEMP[1].x, TEMP[0].zzzz, CONST[1][26].yyyy > 84: MOV TEMP[1].yzw, TEMP[0].xyxw > 85: MOV OUT[0], TEMP[4] > 86: MOV OUT[1], TEMP[2] > 87: MOV OUT[2], TEMP[6] > 88: MOV OUT[3], TEMP[1] > 89: END >radeonsi: Compiling shader 203 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %34 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 > %36 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %37 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %36, i64 0, i64 3 > %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 > %39 = extractelement <8 x i32> %35, i32 7 > %40 = extractelement <4 x i32> %38, i32 0 > %41 = and i32 %40, %39 > %42 = insertelement <4 x i32> %38, i32 %41, i32 0 > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 7 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 11 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %73 = icmp ne i32 %19, 0 > %. = select i1 %73, float 0xFFFFFFFFE0000000, float 0.000000e+00 > %74 = fadd float %26, 1.000000e+00 > %75 = fsub float 1.000000e+00, %72 > %76 = fsub float 0x3FEFD70A40000000, %72 > %77 = call float @llvm.fma.f32(float %25, float %74, float %75) > %78 = call float @llvm.ceil.f32(float %76) > %79 = call float @llvm.AMDGPU.clamp.(float %78, float 0.000000e+00, float 1.000000e+00) > %80 = fadd float %77, -1.000000e+00 > %81 = fcmp une float %26, 0.000000e+00 > %82 = fdiv float 1.000000e+00, %26 > %temp12.0 = select i1 %81, float %82, float 0x4600000000000000 > %83 = fmul float %temp12.0, %80 > %84 = call float @llvm.AMDGPU.clamp.(float %83, float 0.000000e+00, float 1.000000e+00) > %85 = call float @llvm.fma.f32(float %84, float -2.000000e+00, float 3.000000e+00) > %86 = fmul float %84, %84 > %87 = fmul float %86, %85 > %88 = fmul float %87, %79 > %89 = fsub float -0.000000e+00, %79 > %90 = call float @llvm.fma.f32(float %89, float %87, float 1.000000e+00) > %91 = call float @llvm.log2.f32(float %90) > %92 = fmul float %91, %31 > %93 = call float @llvm.exp2.f32(float %92) > %94 = fmul float %93, %30 > %95 = fmul float %94, %27 > %96 = fmul float %94, %28 > %97 = fmul float %94, %29 > %98 = bitcast float %61 to i32 > %99 = bitcast float %62 to i32 > %100 = insertelement <2 x i32> undef, i32 %98, i32 0 > %101 = insertelement <2 x i32> %100, i32 %99, i32 1 > %102 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %101, <8 x i32> %35, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %103 = extractelement <4 x float> %102, i32 0 > %104 = extractelement <4 x float> %102, i32 1 > %105 = extractelement <4 x float> %102, i32 2 > %106 = extractelement <4 x float> %102, i32 3 > %107 = call float @llvm.fma.f32(float %88, float %106, float 0xBFD8181820000000) > %108 = fcmp olt float %107, 0.000000e+00 > %109 = select i1 %108, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %109) > %110 = bitcast float %61 to i32 > %111 = bitcast float %62 to i32 > %112 = insertelement <2 x i32> undef, i32 %110, i32 0 > %113 = insertelement <2 x i32> %112, i32 %111, i32 1 > %114 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %113, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %115 = extractelement <4 x float> %114, i32 1 > %116 = extractelement <4 x float> %114, i32 3 > %117 = call float @llvm.fma.f32(float %115, float 2.000000e+00, float -1.000000e+00) > %118 = call float @llvm.fma.f32(float %116, float 2.000000e+00, float -1.000000e+00) > %119 = fsub float -0.000000e+00, %117 > %120 = call float @llvm.fma.f32(float %119, float %117, float 1.000000e+00) > %121 = fsub float -0.000000e+00, %118 > %122 = call float @llvm.fma.f32(float %121, float %118, float %120) > %123 = call float @llvm.sqrt.f32(float %122) > %124 = fmul float %63, %117 > %125 = fmul float %64, %118 > %126 = fadd float %125, %124 > %127 = fmul float %65, %123 > %128 = fadd float %126, %127 > %129 = fmul float %66, %117 > %130 = fmul float %67, %118 > %131 = fadd float %130, %129 > %132 = fmul float %68, %123 > %133 = fadd float %131, %132 > %134 = fmul float %69, %117 > %135 = fmul float %70, %118 > %136 = fadd float %135, %134 > %137 = fmul float %71, %123 > %138 = fadd float %136, %137 > %139 = fmul float %128, %128 > %140 = fmul float %133, %133 > %141 = fadd float %140, %139 > %142 = fmul float %138, %138 > %143 = fadd float %141, %142 > %144 = call float @llvm.AMDGPU.rsq.clamped.f32(float %143) > %145 = fmul float %144, %128 > %146 = fmul float %144, %133 > %147 = fmul float %144, %138 > %148 = fsub float -0.000000e+00, %145 > %149 = fsub float -0.000000e+00, %146 > %150 = fsub float -0.000000e+00, %147 > %151 = bitcast float %. to i32 > %152 = icmp ne i32 %151, 0 > %.40 = select i1 %152, float %145, float %148 > %153 = bitcast float %. to i32 > %154 = icmp ne i32 %153, 0 > %temp20.0 = select i1 %154, float %146, float %149 > %155 = bitcast float %. to i32 > %156 = icmp ne i32 %155, 0 > %.41 = select i1 %156, float %147, float %150 > %157 = call float @llvm.fma.f32(float %.40, float 5.000000e-01, float 5.000000e-01) > %158 = call float @llvm.fma.f32(float %temp20.0, float 5.000000e-01, float 5.000000e-01) > %159 = call float @llvm.fma.f32(float %.41, float 5.000000e-01, float 5.000000e-01) > %160 = bitcast float %61 to i32 > %161 = bitcast float %62 to i32 > %162 = insertelement <2 x i32> undef, i32 %160, i32 0 > %163 = insertelement <2 x i32> %162, i32 %161, i32 1 > %164 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %163, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %165 = extractelement <4 x float> %164, i32 0 > %166 = extractelement <4 x float> %164, i32 1 > %167 = extractelement <4 x float> %164, i32 2 > %168 = extractelement <4 x float> %164, i32 3 > %169 = fmul float %167, %32 > %170 = bitcast float %5 to i32 > %171 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %170, 10 > %172 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %171, float %95, 11 > %173 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %172, float %96, 12 > %174 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %173, float %97, 13 > %175 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %174, float 0.000000e+00, 14 > %176 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %175, float %157, 15 > %177 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %176, float %158, 16 > %178 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %177, float %159, 17 > %179 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %178, float %33, 18 > %180 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %179, float %103, 19 > %181 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %180, float %104, 20 > %182 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %181, float %105, 21 > %183 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %182, float 1.000000e+00, 22 > %184 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %183, float %169, 23 > %185 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %184, float %166, 24 > %186 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %185, float %165, 25 > %187 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %186, float %168, 26 > %188 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %187, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %188 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} >IMM[2] UINT32 {160, 0, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: MOV TEMP[2].xy, IN[2].xyxx > 8: MUL TEMP[0].xyz, IN[5].wwww, IN[5].xyzz > 9: MOV TEMP[0].w, IN[5].wwww > 10: MUL TEMP[0], TEMP[0], CONST[1][10] > 11: DP3 TEMP[3].x, CONST[1][7].xyzz, IN[3].xyzz > 12: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[4].xyzz > 13: MOV TEMP[3].y, TEMP[4].xxxx > 14: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[1].xyzz > 15: MOV TEMP[3].z, TEMP[4].xxxx > 16: DP3 TEMP[4].x, CONST[1][8].xyzz, IN[3].xyzz > 17: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[4].xyzz > 18: MOV TEMP[4].y, TEMP[5].xxxx > 19: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[1].xyzz > 20: MOV TEMP[4].z, TEMP[5].xxxx > 21: DP3 TEMP[5].x, CONST[1][9].xyzz, IN[3].xyzz > 22: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[4].xyzz > 23: MOV TEMP[5].y, TEMP[6].xxxx > 24: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[1].xyzz > 25: MOV TEMP[5].z, TEMP[6].xxxx > 26: MOV OUT[5], TEMP[5] > 27: MOV OUT[4], TEMP[4] > 28: MOV OUT[3], TEMP[3] > 29: MOV OUT[2], TEMP[0] > 30: MOV OUT[1], TEMP[2] > 31: MOV OUT[0], TEMP[1] > 32: END >radeonsi: Compiling shader 204 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 112) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 116) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 120) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 124) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 128) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 132) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 136) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 140) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 144) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 148) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 152) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 156) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 160) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 164) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 168) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 172) > %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 > %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %13) > %40 = extractelement <4 x float> %39, i32 0 > %41 = extractelement <4 x float> %39, i32 1 > %42 = extractelement <4 x float> %39, i32 2 > %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 > %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %14) > %46 = extractelement <4 x float> %45, i32 0 > %47 = extractelement <4 x float> %45, i32 1 > %48 = extractelement <4 x float> %45, i32 2 > %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 > %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %15) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %16) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %17) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 > %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %18) > %67 = extractelement <4 x float> %66, i32 0 > %68 = extractelement <4 x float> %66, i32 1 > %69 = extractelement <4 x float> %66, i32 2 > %70 = extractelement <4 x float> %66, i32 3 > %71 = fmul float %21, %40 > %72 = fmul float %22, %41 > %73 = fadd float %71, %72 > %74 = fmul float %23, %42 > %75 = fadd float %73, %74 > %76 = fadd float %75, %24 > %77 = fmul float %25, %40 > %78 = fmul float %26, %41 > %79 = fadd float %77, %78 > %80 = fmul float %27, %42 > %81 = fadd float %79, %80 > %82 = fadd float %81, %28 > %83 = fmul float %29, %40 > %84 = fmul float %30, %41 > %85 = fadd float %83, %84 > %86 = fmul float %31, %42 > %87 = fadd float %85, %86 > %88 = fadd float %87, %32 > %89 = fmul float %70, %67 > %90 = fmul float %70, %68 > %91 = fmul float %70, %69 > %92 = fmul float %89, %33 > %93 = fmul float %90, %34 > %94 = fmul float %91, %35 > %95 = fmul float %70, %36 > %96 = fmul float %21, %55 > %97 = fmul float %22, %56 > %98 = fadd float %97, %96 > %99 = fmul float %23, %57 > %100 = fadd float %98, %99 > %101 = fmul float %21, %61 > %102 = fmul float %22, %62 > %103 = fadd float %102, %101 > %104 = fmul float %23, %63 > %105 = fadd float %103, %104 > %106 = fmul float %21, %46 > %107 = fmul float %22, %47 > %108 = fadd float %107, %106 > %109 = fmul float %23, %48 > %110 = fadd float %108, %109 > %111 = fmul float %25, %55 > %112 = fmul float %26, %56 > %113 = fadd float %112, %111 > %114 = fmul float %27, %57 > %115 = fadd float %113, %114 > %116 = fmul float %25, %61 > %117 = fmul float %26, %62 > %118 = fadd float %117, %116 > %119 = fmul float %27, %63 > %120 = fadd float %118, %119 > %121 = fmul float %25, %46 > %122 = fmul float %26, %47 > %123 = fadd float %122, %121 > %124 = fmul float %27, %48 > %125 = fadd float %123, %124 > %126 = fmul float %29, %55 > %127 = fmul float %30, %56 > %128 = fadd float %127, %126 > %129 = fmul float %31, %57 > %130 = fadd float %128, %129 > %131 = fmul float %29, %61 > %132 = fmul float %30, %62 > %133 = fadd float %132, %131 > %134 = fmul float %31, %63 > %135 = fadd float %133, %134 > %136 = fmul float %29, %46 > %137 = fmul float %30, %47 > %138 = fadd float %137, %136 > %139 = fmul float %31, %48 > %140 = fadd float %138, %139 > %141 = lshr i32 %8, 13 > %142 = and i32 %141, 255 > %143 = mul i32 %142, %10 > %144 = add i32 %143, 16 > %145 = sext i32 %144 to i64 > %146 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %145 > %147 = bitcast i32 addrspace(3)* %146 to float addrspace(3)* > store float %76, float addrspace(3)* %147, align 4 > %148 = add i32 %143, 17 > %149 = sext i32 %148 to i64 > %150 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %149 > %151 = bitcast i32 addrspace(3)* %150 to float addrspace(3)* > store float %82, float addrspace(3)* %151, align 4 > %152 = add i32 %143, 18 > %153 = sext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = bitcast i32 addrspace(3)* %154 to float addrspace(3)* > store float %88, float addrspace(3)* %155, align 4 > %156 = add i32 %143, 20 > %bc = bitcast <4 x float> %51 to <4 x i32> > %157 = extractelement <4 x i32> %bc, i32 0 > %158 = sext i32 %156 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %157, i32 addrspace(3)* %159, align 4 > %160 = add i32 %143, 21 > %bc28 = bitcast <4 x float> %51 to <4 x i32> > %161 = extractelement <4 x i32> %bc28, i32 1 > %162 = sext i32 %160 to i64 > %163 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %162 > store i32 %161, i32 addrspace(3)* %163, align 4 > %164 = add i32 %143, 24 > %165 = sext i32 %164 to i64 > %166 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %165 > %167 = bitcast i32 addrspace(3)* %166 to float addrspace(3)* > store float %92, float addrspace(3)* %167, align 4 > %168 = add i32 %143, 25 > %169 = sext i32 %168 to i64 > %170 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %169 > %171 = bitcast i32 addrspace(3)* %170 to float addrspace(3)* > store float %93, float addrspace(3)* %171, align 4 > %172 = add i32 %143, 26 > %173 = sext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > %175 = bitcast i32 addrspace(3)* %174 to float addrspace(3)* > store float %94, float addrspace(3)* %175, align 4 > %176 = add i32 %143, 27 > %177 = sext i32 %176 to i64 > %178 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %177 > %179 = bitcast i32 addrspace(3)* %178 to float addrspace(3)* > store float %95, float addrspace(3)* %179, align 4 > %180 = add i32 %143, 28 > %181 = sext i32 %180 to i64 > %182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %181 > %183 = bitcast i32 addrspace(3)* %182 to float addrspace(3)* > store float %100, float addrspace(3)* %183, align 4 > %184 = add i32 %143, 29 > %185 = sext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > %187 = bitcast i32 addrspace(3)* %186 to float addrspace(3)* > store float %105, float addrspace(3)* %187, align 4 > %188 = add i32 %143, 30 > %189 = sext i32 %188 to i64 > %190 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %189 > %191 = bitcast i32 addrspace(3)* %190 to float addrspace(3)* > store float %110, float addrspace(3)* %191, align 4 > %192 = add i32 %143, 32 > %193 = sext i32 %192 to i64 > %194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %193 > %195 = bitcast i32 addrspace(3)* %194 to float addrspace(3)* > store float %115, float addrspace(3)* %195, align 4 > %196 = add i32 %143, 33 > %197 = sext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = bitcast i32 addrspace(3)* %198 to float addrspace(3)* > store float %120, float addrspace(3)* %199, align 4 > %200 = add i32 %143, 34 > %201 = sext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = bitcast i32 addrspace(3)* %202 to float addrspace(3)* > store float %125, float addrspace(3)* %203, align 4 > %204 = add i32 %143, 36 > %205 = sext i32 %204 to i64 > %206 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %205 > %207 = bitcast i32 addrspace(3)* %206 to float addrspace(3)* > store float %130, float addrspace(3)* %207, align 4 > %208 = add i32 %143, 37 > %209 = sext i32 %208 to i64 > %210 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %209 > %211 = bitcast i32 addrspace(3)* %210 to float addrspace(3)* > store float %135, float addrspace(3)* %211, align 4 > %212 = add i32 %143, 38 > %213 = sext i32 %212 to i64 > %214 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %213 > %215 = bitcast i32 addrspace(3)* %214 to float addrspace(3)* > store float %140, float addrspace(3)* %215, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, 0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 64, 80} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {96, 368, 352, 0} >IMM[5] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: MOV TEMP[1].w, TEMP[8].xxxx > 66: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 67: MOV TEMP[1].z, TEMP[2].xxxx > 68: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 69: MOV TEMP[0].yw, TEMP[2].yxyy > 70: ABS TEMP[2].x, TEMP[3].xxxx > 71: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 72: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 73: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 74: INEG TEMP[9].xy, TEMP[9].xyyy > 75: MOV TEMP[4].yz, TEMP[9].yxyy > 76: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 77: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 78: INEG TEMP[9].xy, TEMP[9].xyyy > 79: MOV TEMP[5].zw, TEMP[9].yyxy > 80: INEG TEMP[9].xy, TEMP[4].yzzz > 81: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 82: MOV TEMP[4].yz, TEMP[9].yxyy > 83: I2F TEMP[9].xy, TEMP[4].yzzz > 84: MOV TEMP[4].yz, TEMP[9].yxyy > 85: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 86: ABS TEMP[2].x, TEMP[6].xxxx > 87: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 88: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 89: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 90: INEG TEMP[9].xy, TEMP[9].xyyy > 91: MOV TEMP[4].yz, TEMP[9].yxyy > 92: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 93: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 94: INEG TEMP[9].xy, TEMP[9].xyyy > 95: MOV TEMP[5].zw, TEMP[9].yyxy > 96: INEG TEMP[9].xy, TEMP[4].yzzz > 97: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 98: MOV TEMP[4].yz, TEMP[9].yxyy > 99: I2F TEMP[9].xy, TEMP[4].yzzz >100: MOV TEMP[4].yz, TEMP[9].yxyy >101: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >102: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >103: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >104: INEG TEMP[2].xy, TEMP[2].xyyy >105: MOV TEMP[5].xy, TEMP[2].xyxx >106: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >107: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >108: INEG TEMP[2].xy, TEMP[2].xyyy >109: MOV TEMP[5].zw, TEMP[2].yyxy >110: INEG TEMP[2].xy, TEMP[5].xyyy >111: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >112: MOV TEMP[5].xy, TEMP[2].xyxx >113: I2F TEMP[5].xy, TEMP[5].xyyy >114: ABS TEMP[2].x, TEMP[8].xxxx >115: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >116: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >117: MOV TEMP[4].zw, TEMP[2].yyxy >118: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >119: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >120: INEG TEMP[2].xy, TEMP[2].xyyy >121: MOV TEMP[5].xy, TEMP[2].xyxx >122: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >123: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >124: INEG TEMP[2].xy, TEMP[2].xyyy >125: MOV TEMP[5].zw, TEMP[2].yyxy >126: AND TEMP[2], TEMP[5], IMM[2].yyyy >127: MOV TEMP[2], TEMP[2] >128: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >129: MOV TEMP[5].xy, TEMP[2].xyxx >130: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >131: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >132: INEG TEMP[2].xy, TEMP[2].xyyy >133: MOV TEMP[5].zw, TEMP[2].yyxy >134: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >135: MOV TEMP[5].zw, TEMP[2].yyxy >136: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >137: MOV TEMP[5].xy, TEMP[2].xyxx >138: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >139: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >140: INEG TEMP[2].x, TEMP[2].xxxx >141: MOV TEMP[1].z, TEMP[2].xxxx >142: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >143: MOV TEMP[1].z, TEMP[2].xxxx >144: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >145: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >146: INEG TEMP[2].xy, TEMP[2].xyyy >147: MOV TEMP[0].yw, TEMP[2].yxyy >148: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >149: MOV TEMP[0].yw, TEMP[2].yxyy >150: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >151: MOV TEMP[0].y, TEMP[2].xxxx >152: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >153: MOV TEMP[0].y, TEMP[2].xxxx >154: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >155: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >156: INEG TEMP[2].xy, TEMP[2].xyyy >157: MOV TEMP[0].xw, TEMP[2].xxxy >158: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >159: MOV TEMP[0].xw, TEMP[2].xxxy >160: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >161: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >162: INEG TEMP[2].xy, TEMP[2].xyyy >163: MOV TEMP[1].xy, TEMP[2].xyxx >164: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >165: MOV TEMP[1].xy, TEMP[2].xyxx >166: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >167: MOV TEMP[0].xz, TEMP[2].xxyx >168: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >169: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >170: INEG TEMP[2].xy, TEMP[2].xyyy >171: MOV TEMP[1].xy, TEMP[2].xyxx >172: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >173: MOV TEMP[1].xy, TEMP[2].xyxx >174: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >175: MOV TEMP[0].xz, TEMP[2].xxyx >176: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >177: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >178: INEG TEMP[2].xy, TEMP[2].xyyy >179: MOV TEMP[1].xy, TEMP[2].xyxx >180: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >181: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >182: INEG TEMP[2].xyz, TEMP[2].xyzz >183: MOV TEMP[0].xyz, TEMP[2].xyzx >184: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >185: MOV TEMP[0].xz, TEMP[2].xxyx >186: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >187: MOV TEMP[0].x, TEMP[2].xxxx >188: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >189: MOV TEMP[0].x, TEMP[2].xxxx >190: MOV TEMP[2].x, TEMP[0].xxxx >191: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >192: UIF TEMP[2].xxxx :0 >193: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >194: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >195: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >196: MOV TEMP[0].yzw, TEMP[2].yxyz >197: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >198: MOV TEMP[0].y, TEMP[2].xxxx >199: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >200: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >201: MOV TEMP[0].z, TEMP[2].xxxx >202: SQRT TEMP[2].x, TEMP[0].xxxx >203: SQRT TEMP[2].y, TEMP[0].yyyy >204: SQRT TEMP[2].z, TEMP[0].zzzz >205: MOV TEMP[0].xyz, TEMP[2].xyzx >206: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >207: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].wwww >208: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >209: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww >210: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >211: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[0].wwww >212: MOV TEMP[2].y, CONST[3][4] >213: MOV TEMP[7].x, TEMP[2].yyyy >214: MOV TEMP[2].y, CONST[3][5] >215: MOV TEMP[7].y, TEMP[2].yyyy >216: MOV TEMP[2].y, CONST[3][6] >217: MOV TEMP[7].z, TEMP[2].yyyy >218: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >219: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >220: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >221: MOV TEMP[1].w, IMM[0].xxxx >222: MOV TEMP[6], CONST[3][0] >223: DP4 TEMP[7].x, TEMP[6], TEMP[1] >224: MOV TEMP[6], CONST[3][1] >225: DP4 TEMP[6].x, TEMP[6], TEMP[1] >226: MOV TEMP[7].y, TEMP[6].xxxx >227: MOV TEMP[6], CONST[3][3] >228: DP4 TEMP[6].x, TEMP[6], TEMP[1] >229: MOV TEMP[4].w, IMM[0].xxxx >230: MOV TEMP[8], CONST[3][0] >231: DP4 TEMP[8].x, TEMP[8], TEMP[4] >232: MOV TEMP[9], CONST[3][1] >233: DP4 TEMP[9].x, TEMP[9], TEMP[4] >234: MOV TEMP[8].y, TEMP[9].xxxx >235: MOV TEMP[9], CONST[3][3] >236: DP4 TEMP[9].x, TEMP[9], TEMP[4] >237: MOV TEMP[5].w, IMM[0].xxxx >238: MOV TEMP[10], CONST[3][0] >239: DP4 TEMP[4].x, TEMP[10], TEMP[5] >240: MOV TEMP[10], CONST[3][1] >241: DP4 TEMP[10].x, TEMP[10], TEMP[5] >242: MOV TEMP[4].y, TEMP[10].xxxx >243: MOV TEMP[10], CONST[3][3] >244: DP4 TEMP[10].x, TEMP[10], TEMP[5] >245: MOV TEMP[2].w, IMM[0].xxxx >246: MOV TEMP[11], CONST[3][0] >247: DP4 TEMP[5].x, TEMP[11], TEMP[2] >248: MOV TEMP[11], CONST[3][1] >249: DP4 TEMP[11].x, TEMP[11], TEMP[2] >250: MOV TEMP[5].y, TEMP[11].xxxx >251: MOV TEMP[11], CONST[3][3] >252: DP4 TEMP[11].x, TEMP[11], TEMP[2] >253: MOV TEMP[3].w, IMM[0].xxxx >254: MOV TEMP[12], CONST[3][0] >255: DP4 TEMP[2].x, TEMP[12], TEMP[3] >256: MOV TEMP[12], CONST[3][1] >257: DP4 TEMP[12].x, TEMP[12], TEMP[3] >258: MOV TEMP[2].y, TEMP[12].xxxx >259: MOV TEMP[12], CONST[3][3] >260: DP4 TEMP[12].x, TEMP[12], TEMP[3] >261: MOV TEMP[0].w, IMM[0].xxxx >262: MOV TEMP[13], CONST[3][0] >263: DP4 TEMP[3].x, TEMP[13], TEMP[0] >264: MOV TEMP[13], CONST[3][1] >265: DP4 TEMP[13].x, TEMP[13], TEMP[0] >266: MOV TEMP[3].y, TEMP[13].xxxx >267: MOV TEMP[13], CONST[3][3] >268: DP4 TEMP[13].x, TEMP[13], TEMP[0] >269: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >270: SSG TEMP[15].xy, TEMP[8].xyyy >271: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >272: RCP TEMP[16].xy, TEMP[9].xxxx >273: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >274: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >275: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >276: SSG TEMP[15].xy, TEMP[4].xyyy >277: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >278: RCP TEMP[16].xy, TEMP[10].xxxx >279: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >280: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >281: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >282: SSG TEMP[16].xy, TEMP[5].xyyy >283: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >284: RCP TEMP[11].xy, TEMP[11].xxxx >285: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >286: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >287: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >288: SSG TEMP[15].xy, TEMP[7].xyyy >289: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >290: RCP TEMP[16].xy, TEMP[6].xxxx >291: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >292: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >293: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >294: MOV TEMP[0].yz, TEMP[5].yxyy >295: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >296: SSG TEMP[7].xy, TEMP[2].xyyy >297: MUL TEMP[7].xy, IMM[5].xxxx, TEMP[7].xyyy >298: RCP TEMP[11].xy, TEMP[12].xxxx >299: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >300: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >301: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >302: MOV TEMP[4].zw, TEMP[2].yyxy >303: MOV TEMP[2].xy, CONST[3][23] >304: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >305: MOV TEMP[4].zw, TEMP[2].yyxy >306: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >307: SSG TEMP[5].xy, TEMP[3].xyyy >308: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >309: RCP TEMP[7].xy, TEMP[13].xxxx >310: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >311: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >312: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >313: MOV TEMP[0].xw, TEMP[2].xxxy >314: MOV TEMP[2].xy, CONST[3][23] >315: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >316: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >317: MOV TEMP[0].y, TEMP[2].xxxx >318: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >319: MOV TEMP[0].z, TEMP[2].xxxx >320: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >321: SQRT TEMP[2].x, TEMP[0].xxxx >322: SQRT TEMP[2].y, TEMP[0].yyyy >323: SQRT TEMP[2].z, TEMP[0].zzzz >324: MOV TEMP[2].xyz, TEMP[2].xyzx >325: MOV TEMP[3].z, CONST[1][22] >326: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >327: MOV TEMP[0].w, TEMP[3].xxxx >328: MOV TEMP[3].z, CONST[1][22] >329: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >330: MOV TEMP[3].z, CONST[1][22] >331: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >332: MOV TEMP[1].y, TEMP[3].xxxx >333: MOV TEMP[3].w, CONST[1][22] >334: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >335: UIF TEMP[3].xxxx :0 >336: MOV TEMP[3].w, CONST[1][22] >337: RCP TEMP[3].x, TEMP[3].wwww >338: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >339: ELSE :0 >340: SSG TEMP[5].x, TEMP[0].wwww >341: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >342: ENDIF >343: MOV_SAT TEMP[3].x, TEMP[3].xxxx >344: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >345: MOV TEMP[0].w, TEMP[3].xxxx >346: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >347: MOV TEMP[0].y, TEMP[3].xxxx >348: MOV TEMP[3].w, CONST[1][22] >349: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >350: UIF TEMP[3].xxxx :0 >351: MOV TEMP[3].w, CONST[1][22] >352: RCP TEMP[3].x, TEMP[3].wwww >353: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >354: ELSE :0 >355: SSG TEMP[5].x, TEMP[1].xxxx >356: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >357: ENDIF >358: MOV_SAT TEMP[3].x, TEMP[3].xxxx >359: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >360: MOV TEMP[0].w, TEMP[3].xxxx >361: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >362: MOV TEMP[0].z, TEMP[3].xxxx >363: MOV TEMP[3].w, CONST[1][22] >364: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >365: UIF TEMP[3].xxxx :0 >366: MOV TEMP[3].w, CONST[1][22] >367: RCP TEMP[3].x, TEMP[3].wwww >368: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >369: ELSE :0 >370: SSG TEMP[5].x, TEMP[1].yyyy >371: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >372: ENDIF >373: MOV_SAT TEMP[3].x, TEMP[3].xxxx >374: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >375: MOV TEMP[0].w, TEMP[3].xxxx >376: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >377: MOV TEMP[2].xy, CONST[1][22] >378: MOV TEMP[3].xy, CONST[2][4] >379: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >380: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >381: MOV TEMP[0].w, TEMP[2].xxxx >382: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >383: SSG TEMP[3].xy, TEMP[0].xyyy >384: MUL TEMP[3].xy, IMM[5].xxxx, TEMP[3].xyyy >385: RCP TEMP[5].xy, TEMP[1].xxxx >386: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >387: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >388: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >389: MOV TEMP[0].y, TEMP[2].xxxx >390: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >391: MOV TEMP[4].z, TEMP[2].xxxx >392: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >393: UIF TEMP[2].xxxx :0 >394: RCP TEMP[1].x, TEMP[1].xxxx >395: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >396: ELSE :0 >397: SSG TEMP[2].x, TEMP[0].zzzz >398: MUL TEMP[1].x, IMM[5].xxxx, TEMP[2].xxxx >399: ENDIF >400: MOV TEMP[0].y, TEMP[1].xxxx >401: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >402: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >403: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >404: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >405: MOV TEMP[4].w, TEMP[0].xxxx >406: ELSE :0 >407: MOV TEMP[4], IMM[0].zzzz >408: ENDIF >409: MIN TEMP[0], TEMP[4], IMM[5].yyyy >410: MOV TEMP[1].x, TEMP[0].xxxx >411: MOV TEMP[2].x, TEMP[0].yyyy >412: MOV TEMP[3].x, TEMP[0].zzzz >413: MOV TEMP[0].x, TEMP[0].wwww >414: MOV OUT[8], TEMP[1] >415: MOV OUT[9], TEMP[2] >416: MOV OUT[10], TEMP[3] >417: MOV OUT[11], TEMP[0] >418: MOV OUT[0].x, TEMP[1].xxxx >419: MOV OUT[0].y, TEMP[2].xxxx >420: MOV OUT[0].z, TEMP[3].xxxx >421: MOV OUT[1].x, TEMP[0].xxxx >422: END >radeonsi: Compiling shader 205 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 64) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 68) > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 84) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %44 = lshr i32 %10, 8 > %45 = and i32 %44, 31 > %46 = lshr i32 %7, 13 > %47 = and i32 %46, 255 > %48 = and i32 %7, 8191 > %49 = and i32 %10, 255 > %50 = mul nuw nsw i32 %48, %49 > %51 = mul nuw nsw i32 %45, %47 > %52 = add nuw nsw i32 %50, %51 > %53 = add nuw nsw i32 %52, 16 > %54 = zext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = load i32, i32 addrspace(3)* %55, align 4 > %57 = lshr i32 %7, 13 > %58 = and i32 %57, 255 > %59 = and i32 %7, 8191 > %60 = and i32 %10, 255 > %61 = mul nuw nsw i32 %59, %60 > %62 = mul nuw nsw i32 %45, %58 > %63 = add nuw nsw i32 %61, %62 > %64 = add nuw nsw i32 %63, 17 > %65 = zext i32 %64 to i64 > %66 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %65 > %67 = load i32, i32 addrspace(3)* %66, align 4 > %68 = lshr i32 %7, 13 > %69 = and i32 %68, 255 > %70 = and i32 %7, 8191 > %71 = and i32 %10, 255 > %72 = mul nuw nsw i32 %70, %71 > %73 = mul nuw nsw i32 %45, %69 > %74 = add nuw nsw i32 %72, %73 > %75 = add nuw nsw i32 %74, 18 > %76 = zext i32 %75 to i64 > %77 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %76 > %78 = load i32, i32 addrspace(3)* %77, align 4 > %79 = lshr i32 %7, 13 > %80 = and i32 %79, 255 > %81 = and i32 %7, 8191 > %82 = and i32 %10, 255 > %83 = mul nuw nsw i32 %81, %82 > %84 = mul nuw nsw i32 %45, %80 > %85 = add nuw nsw i32 %83, %84 > %86 = add nuw nsw i32 %85, 19 > %87 = zext i32 %86 to i64 > %88 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %87 > %89 = load i32, i32 addrspace(3)* %88, align 4 > %90 = lshr i32 %6, 13 > %91 = and i32 %90, 255 > %92 = shl i32 %5, 2 > %93 = and i32 %92, 262140 > %94 = and i32 %6, 8191 > %95 = and i32 %10, 255 > %96 = mul nuw nsw i32 %94, %95 > %97 = add nuw nsw i32 %93, %96 > %98 = mul nuw nsw i32 %45, %91 > %99 = add nuw nsw i32 %97, %98 > %100 = add nuw nsw i32 %99, 16 > %101 = zext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > store i32 %56, i32 addrspace(3)* %102, align 4 > %103 = add nuw nsw i32 %99, 17 > %104 = zext i32 %103 to i64 > %105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %104 > store i32 %67, i32 addrspace(3)* %105, align 4 > %106 = add nuw nsw i32 %99, 18 > %107 = zext i32 %106 to i64 > %108 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %107 > store i32 %78, i32 addrspace(3)* %108, align 4 > %109 = add nuw nsw i32 %99, 19 > %110 = zext i32 %109 to i64 > %111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %110 > store i32 %89, i32 addrspace(3)* %111, align 4 > %112 = lshr i32 %7, 13 > %113 = and i32 %112, 255 > %114 = and i32 %7, 8191 > %115 = and i32 %10, 255 > %116 = mul nuw nsw i32 %114, %115 > %117 = mul nuw nsw i32 %45, %113 > %118 = add nuw nsw i32 %116, %117 > %119 = add nuw nsw i32 %118, 20 > %120 = zext i32 %119 to i64 > %121 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %120 > %122 = load i32, i32 addrspace(3)* %121, align 4 > %123 = lshr i32 %7, 13 > %124 = and i32 %123, 255 > %125 = and i32 %7, 8191 > %126 = and i32 %10, 255 > %127 = mul nuw nsw i32 %125, %126 > %128 = mul nuw nsw i32 %45, %124 > %129 = add nuw nsw i32 %127, %128 > %130 = add nuw nsw i32 %129, 21 > %131 = zext i32 %130 to i64 > %132 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %131 > %133 = load i32, i32 addrspace(3)* %132, align 4 > %134 = lshr i32 %7, 13 > %135 = and i32 %134, 255 > %136 = and i32 %7, 8191 > %137 = and i32 %10, 255 > %138 = mul nuw nsw i32 %136, %137 > %139 = mul nuw nsw i32 %45, %135 > %140 = add nuw nsw i32 %138, %139 > %141 = add nuw nsw i32 %140, 22 > %142 = zext i32 %141 to i64 > %143 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %142 > %144 = load i32, i32 addrspace(3)* %143, align 4 > %145 = lshr i32 %7, 13 > %146 = and i32 %145, 255 > %147 = and i32 %7, 8191 > %148 = and i32 %10, 255 > %149 = mul nuw nsw i32 %147, %148 > %150 = mul nuw nsw i32 %45, %146 > %151 = add nuw nsw i32 %149, %150 > %152 = add nuw nsw i32 %151, 23 > %153 = zext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = load i32, i32 addrspace(3)* %154, align 4 > %156 = lshr i32 %6, 13 > %157 = and i32 %156, 255 > %158 = shl i32 %5, 2 > %159 = and i32 %158, 262140 > %160 = and i32 %6, 8191 > %161 = and i32 %10, 255 > %162 = mul nuw nsw i32 %160, %161 > %163 = add nuw nsw i32 %159, %162 > %164 = mul nuw nsw i32 %45, %157 > %165 = add nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 20 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > store i32 %122, i32 addrspace(3)* %168, align 4 > %169 = add nuw nsw i32 %165, 21 > %170 = zext i32 %169 to i64 > %171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %170 > store i32 %133, i32 addrspace(3)* %171, align 4 > %172 = add nuw nsw i32 %165, 22 > %173 = zext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > store i32 %144, i32 addrspace(3)* %174, align 4 > %175 = add nuw nsw i32 %165, 23 > %176 = zext i32 %175 to i64 > %177 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %176 > store i32 %155, i32 addrspace(3)* %177, align 4 > %178 = lshr i32 %7, 13 > %179 = and i32 %178, 255 > %180 = and i32 %7, 8191 > %181 = and i32 %10, 255 > %182 = mul nuw nsw i32 %180, %181 > %183 = mul nuw nsw i32 %45, %179 > %184 = add nuw nsw i32 %182, %183 > %185 = add nuw nsw i32 %184, 24 > %186 = zext i32 %185 to i64 > %187 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %186 > %188 = load i32, i32 addrspace(3)* %187, align 4 > %189 = lshr i32 %7, 13 > %190 = and i32 %189, 255 > %191 = and i32 %7, 8191 > %192 = and i32 %10, 255 > %193 = mul nuw nsw i32 %191, %192 > %194 = mul nuw nsw i32 %45, %190 > %195 = add nuw nsw i32 %193, %194 > %196 = add nuw nsw i32 %195, 25 > %197 = zext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = load i32, i32 addrspace(3)* %198, align 4 > %200 = lshr i32 %7, 13 > %201 = and i32 %200, 255 > %202 = and i32 %7, 8191 > %203 = and i32 %10, 255 > %204 = mul nuw nsw i32 %202, %203 > %205 = mul nuw nsw i32 %45, %201 > %206 = add nuw nsw i32 %204, %205 > %207 = add nuw nsw i32 %206, 26 > %208 = zext i32 %207 to i64 > %209 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %208 > %210 = load i32, i32 addrspace(3)* %209, align 4 > %211 = lshr i32 %7, 13 > %212 = and i32 %211, 255 > %213 = and i32 %7, 8191 > %214 = and i32 %10, 255 > %215 = mul nuw nsw i32 %213, %214 > %216 = mul nuw nsw i32 %45, %212 > %217 = add nuw nsw i32 %215, %216 > %218 = add nuw nsw i32 %217, 27 > %219 = zext i32 %218 to i64 > %220 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %219 > %221 = load i32, i32 addrspace(3)* %220, align 4 > %222 = lshr i32 %6, 13 > %223 = and i32 %222, 255 > %224 = shl i32 %5, 2 > %225 = and i32 %224, 262140 > %226 = and i32 %6, 8191 > %227 = and i32 %10, 255 > %228 = mul nuw nsw i32 %226, %227 > %229 = add nuw nsw i32 %225, %228 > %230 = mul nuw nsw i32 %45, %223 > %231 = add nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, 24 > %233 = zext i32 %232 to i64 > %234 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %233 > store i32 %188, i32 addrspace(3)* %234, align 4 > %235 = add nuw nsw i32 %231, 25 > %236 = zext i32 %235 to i64 > %237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %236 > store i32 %199, i32 addrspace(3)* %237, align 4 > %238 = add nuw nsw i32 %231, 26 > %239 = zext i32 %238 to i64 > %240 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %239 > store i32 %210, i32 addrspace(3)* %240, align 4 > %241 = add nuw nsw i32 %231, 27 > %242 = zext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > store i32 %221, i32 addrspace(3)* %243, align 4 > %244 = lshr i32 %7, 13 > %245 = and i32 %244, 255 > %246 = and i32 %7, 8191 > %247 = and i32 %10, 255 > %248 = mul nuw nsw i32 %246, %247 > %249 = mul nuw nsw i32 %45, %245 > %250 = add nuw nsw i32 %248, %249 > %251 = add nuw nsw i32 %250, 28 > %252 = zext i32 %251 to i64 > %253 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %252 > %254 = load i32, i32 addrspace(3)* %253, align 4 > %255 = lshr i32 %7, 13 > %256 = and i32 %255, 255 > %257 = and i32 %7, 8191 > %258 = and i32 %10, 255 > %259 = mul nuw nsw i32 %257, %258 > %260 = mul nuw nsw i32 %45, %256 > %261 = add nuw nsw i32 %259, %260 > %262 = add nuw nsw i32 %261, 29 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = load i32, i32 addrspace(3)* %264, align 4 > %266 = lshr i32 %7, 13 > %267 = and i32 %266, 255 > %268 = and i32 %7, 8191 > %269 = and i32 %10, 255 > %270 = mul nuw nsw i32 %268, %269 > %271 = mul nuw nsw i32 %45, %267 > %272 = add nuw nsw i32 %270, %271 > %273 = add nuw nsw i32 %272, 30 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = load i32, i32 addrspace(3)* %275, align 4 > %277 = lshr i32 %7, 13 > %278 = and i32 %277, 255 > %279 = and i32 %7, 8191 > %280 = and i32 %10, 255 > %281 = mul nuw nsw i32 %279, %280 > %282 = mul nuw nsw i32 %45, %278 > %283 = add nuw nsw i32 %281, %282 > %284 = add nuw nsw i32 %283, 31 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = load i32, i32 addrspace(3)* %286, align 4 > %288 = lshr i32 %6, 13 > %289 = and i32 %288, 255 > %290 = shl i32 %5, 2 > %291 = and i32 %290, 262140 > %292 = and i32 %6, 8191 > %293 = and i32 %10, 255 > %294 = mul nuw nsw i32 %292, %293 > %295 = add nuw nsw i32 %291, %294 > %296 = mul nuw nsw i32 %45, %289 > %297 = add nuw nsw i32 %295, %296 > %298 = add nuw nsw i32 %297, 28 > %299 = zext i32 %298 to i64 > %300 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %299 > store i32 %254, i32 addrspace(3)* %300, align 4 > %301 = add nuw nsw i32 %297, 29 > %302 = zext i32 %301 to i64 > %303 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %302 > store i32 %265, i32 addrspace(3)* %303, align 4 > %304 = add nuw nsw i32 %297, 30 > %305 = zext i32 %304 to i64 > %306 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %305 > store i32 %276, i32 addrspace(3)* %306, align 4 > %307 = add nuw nsw i32 %297, 31 > %308 = zext i32 %307 to i64 > %309 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %308 > store i32 %287, i32 addrspace(3)* %309, align 4 > %310 = lshr i32 %7, 13 > %311 = and i32 %310, 255 > %312 = and i32 %7, 8191 > %313 = and i32 %10, 255 > %314 = mul nuw nsw i32 %312, %313 > %315 = mul nuw nsw i32 %45, %311 > %316 = add nuw nsw i32 %314, %315 > %317 = add nuw nsw i32 %316, 32 > %318 = zext i32 %317 to i64 > %319 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %318 > %320 = load i32, i32 addrspace(3)* %319, align 4 > %321 = lshr i32 %7, 13 > %322 = and i32 %321, 255 > %323 = and i32 %7, 8191 > %324 = and i32 %10, 255 > %325 = mul nuw nsw i32 %323, %324 > %326 = mul nuw nsw i32 %45, %322 > %327 = add nuw nsw i32 %325, %326 > %328 = add nuw nsw i32 %327, 33 > %329 = zext i32 %328 to i64 > %330 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %329 > %331 = load i32, i32 addrspace(3)* %330, align 4 > %332 = lshr i32 %7, 13 > %333 = and i32 %332, 255 > %334 = and i32 %7, 8191 > %335 = and i32 %10, 255 > %336 = mul nuw nsw i32 %334, %335 > %337 = mul nuw nsw i32 %45, %333 > %338 = add nuw nsw i32 %336, %337 > %339 = add nuw nsw i32 %338, 34 > %340 = zext i32 %339 to i64 > %341 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %340 > %342 = load i32, i32 addrspace(3)* %341, align 4 > %343 = lshr i32 %7, 13 > %344 = and i32 %343, 255 > %345 = and i32 %7, 8191 > %346 = and i32 %10, 255 > %347 = mul nuw nsw i32 %345, %346 > %348 = mul nuw nsw i32 %45, %344 > %349 = add nuw nsw i32 %347, %348 > %350 = add nuw nsw i32 %349, 35 > %351 = zext i32 %350 to i64 > %352 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %351 > %353 = load i32, i32 addrspace(3)* %352, align 4 > %354 = lshr i32 %6, 13 > %355 = and i32 %354, 255 > %356 = shl i32 %5, 2 > %357 = and i32 %356, 262140 > %358 = and i32 %6, 8191 > %359 = and i32 %10, 255 > %360 = mul nuw nsw i32 %358, %359 > %361 = add nuw nsw i32 %357, %360 > %362 = mul nuw nsw i32 %45, %355 > %363 = add nuw nsw i32 %361, %362 > %364 = add nuw nsw i32 %363, 32 > %365 = zext i32 %364 to i64 > %366 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %365 > store i32 %320, i32 addrspace(3)* %366, align 4 > %367 = add nuw nsw i32 %363, 33 > %368 = zext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > store i32 %331, i32 addrspace(3)* %369, align 4 > %370 = add nuw nsw i32 %363, 34 > %371 = zext i32 %370 to i64 > %372 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %371 > store i32 %342, i32 addrspace(3)* %372, align 4 > %373 = add nuw nsw i32 %363, 35 > %374 = zext i32 %373 to i64 > %375 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %374 > store i32 %353, i32 addrspace(3)* %375, align 4 > %376 = lshr i32 %7, 13 > %377 = and i32 %376, 255 > %378 = and i32 %7, 8191 > %379 = and i32 %10, 255 > %380 = mul nuw nsw i32 %378, %379 > %381 = mul nuw nsw i32 %45, %377 > %382 = add nuw nsw i32 %380, %381 > %383 = add nuw nsw i32 %382, 36 > %384 = zext i32 %383 to i64 > %385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %384 > %386 = load i32, i32 addrspace(3)* %385, align 4 > %387 = lshr i32 %7, 13 > %388 = and i32 %387, 255 > %389 = and i32 %7, 8191 > %390 = and i32 %10, 255 > %391 = mul nuw nsw i32 %389, %390 > %392 = mul nuw nsw i32 %45, %388 > %393 = add nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 37 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = load i32, i32 addrspace(3)* %396, align 4 > %398 = lshr i32 %7, 13 > %399 = and i32 %398, 255 > %400 = and i32 %7, 8191 > %401 = and i32 %10, 255 > %402 = mul nuw nsw i32 %400, %401 > %403 = mul nuw nsw i32 %45, %399 > %404 = add nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, 38 > %406 = zext i32 %405 to i64 > %407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %406 > %408 = load i32, i32 addrspace(3)* %407, align 4 > %409 = lshr i32 %7, 13 > %410 = and i32 %409, 255 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = mul nuw nsw i32 %45, %410 > %415 = add nuw nsw i32 %413, %414 > %416 = add nuw nsw i32 %415, 39 > %417 = zext i32 %416 to i64 > %418 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %417 > %419 = load i32, i32 addrspace(3)* %418, align 4 > %420 = lshr i32 %6, 13 > %421 = and i32 %420, 255 > %422 = shl i32 %5, 2 > %423 = and i32 %422, 262140 > %424 = and i32 %6, 8191 > %425 = and i32 %10, 255 > %426 = mul nuw nsw i32 %424, %425 > %427 = add nuw nsw i32 %423, %426 > %428 = mul nuw nsw i32 %45, %421 > %429 = add nuw nsw i32 %427, %428 > %430 = add nuw nsw i32 %429, 36 > %431 = zext i32 %430 to i64 > %432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %431 > store i32 %386, i32 addrspace(3)* %432, align 4 > %433 = add nuw nsw i32 %429, 37 > %434 = zext i32 %433 to i64 > %435 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %434 > store i32 %397, i32 addrspace(3)* %435, align 4 > %436 = add nuw nsw i32 %429, 38 > %437 = zext i32 %436 to i64 > %438 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %437 > store i32 %408, i32 addrspace(3)* %438, align 4 > %439 = add nuw nsw i32 %429, 39 > %440 = zext i32 %439 to i64 > %441 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %440 > store i32 %419, i32 addrspace(3)* %441, align 4 > %442 = and i32 %7, 8191 > %443 = and i32 %10, 255 > %444 = mul nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 16 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > %448 = bitcast i32 addrspace(3)* %447 to float addrspace(3)* > %449 = load float, float addrspace(3)* %448, align 4 > %450 = and i32 %7, 8191 > %451 = and i32 %10, 255 > %452 = mul nuw nsw i32 %450, %451 > %453 = add nuw nsw i32 %452, 17 > %454 = zext i32 %453 to i64 > %455 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %454 > %456 = bitcast i32 addrspace(3)* %455 to float addrspace(3)* > %457 = load float, float addrspace(3)* %456, align 4 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, 18 > %462 = zext i32 %461 to i64 > %463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %462 > %464 = bitcast i32 addrspace(3)* %463 to float addrspace(3)* > %465 = load float, float addrspace(3)* %464, align 4 > %466 = fmul float %23, %449 > %467 = fmul float %24, %457 > %468 = fadd float %466, %467 > %469 = fmul float %25, %465 > %470 = fadd float %468, %469 > %471 = fadd float %470, %26 > %472 = fmul float %27, %449 > %473 = fmul float %28, %457 > %474 = fadd float %472, %473 > %475 = fmul float %29, %465 > %476 = fadd float %474, %475 > %477 = fadd float %476, %30 > %478 = fmul float %31, %449 > %479 = fmul float %32, %457 > %480 = fadd float %478, %479 > %481 = fmul float %33, %465 > %482 = fadd float %480, %481 > %483 = fadd float %482, %34 > %484 = fmul float %35, %449 > %485 = fmul float %36, %457 > %486 = fadd float %484, %485 > %487 = fmul float %37, %465 > %488 = fadd float %486, %487 > %489 = fadd float %488, %38 > %490 = lshr i32 %7, 13 > %491 = and i32 %490, 255 > %492 = and i32 %7, 8191 > %493 = and i32 %10, 255 > %494 = mul nuw nsw i32 %492, %493 > %495 = add nuw nsw i32 %494, %491 > %496 = add nuw nsw i32 %495, 16 > %497 = zext i32 %496 to i64 > %498 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %497 > %499 = bitcast i32 addrspace(3)* %498 to float addrspace(3)* > %500 = load float, float addrspace(3)* %499, align 4 > %501 = lshr i32 %7, 13 > %502 = and i32 %501, 255 > %503 = and i32 %7, 8191 > %504 = and i32 %10, 255 > %505 = mul nuw nsw i32 %503, %504 > %506 = add nuw nsw i32 %505, %502 > %507 = add nuw nsw i32 %506, 17 > %508 = zext i32 %507 to i64 > %509 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %508 > %510 = bitcast i32 addrspace(3)* %509 to float addrspace(3)* > %511 = load float, float addrspace(3)* %510, align 4 > %512 = lshr i32 %7, 13 > %513 = and i32 %512, 255 > %514 = and i32 %7, 8191 > %515 = and i32 %10, 255 > %516 = mul nuw nsw i32 %514, %515 > %517 = add nuw nsw i32 %516, %513 > %518 = add nuw nsw i32 %517, 18 > %519 = zext i32 %518 to i64 > %520 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %519 > %521 = bitcast i32 addrspace(3)* %520 to float addrspace(3)* > %522 = load float, float addrspace(3)* %521, align 4 > %523 = fmul float %23, %500 > %524 = fmul float %24, %511 > %525 = fadd float %523, %524 > %526 = fmul float %25, %522 > %527 = fadd float %525, %526 > %528 = fadd float %527, %26 > %529 = fmul float %27, %500 > %530 = fmul float %28, %511 > %531 = fadd float %529, %530 > %532 = fmul float %29, %522 > %533 = fadd float %531, %532 > %534 = fadd float %533, %30 > %535 = fmul float %31, %500 > %536 = fmul float %32, %511 > %537 = fadd float %535, %536 > %538 = fmul float %33, %522 > %539 = fadd float %537, %538 > %540 = fadd float %539, %34 > %541 = fmul float %35, %500 > %542 = fmul float %36, %511 > %543 = fadd float %541, %542 > %544 = fmul float %37, %522 > %545 = fadd float %543, %544 > %546 = fadd float %545, %38 > %547 = and i32 %7, 8191 > %548 = and i32 %10, 255 > %549 = mul nuw nsw i32 %547, %548 > %550 = lshr i32 %7, 12 > %551 = and i32 %550, 510 > %552 = add nuw nsw i32 %549, %551 > %553 = add nuw nsw i32 %552, 16 > %554 = zext i32 %553 to i64 > %555 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %554 > %556 = bitcast i32 addrspace(3)* %555 to float addrspace(3)* > %557 = load float, float addrspace(3)* %556, align 4 > %558 = and i32 %7, 8191 > %559 = and i32 %10, 255 > %560 = mul nuw nsw i32 %558, %559 > %561 = lshr i32 %7, 12 > %562 = and i32 %561, 510 > %563 = add nuw nsw i32 %560, %562 > %564 = add nuw nsw i32 %563, 17 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fmul float %23, %557 > %581 = fmul float %24, %568 > %582 = fadd float %580, %581 > %583 = fmul float %25, %579 > %584 = fadd float %582, %583 > %585 = fadd float %584, %26 > %586 = fmul float %27, %557 > %587 = fmul float %28, %568 > %588 = fadd float %586, %587 > %589 = fmul float %29, %579 > %590 = fadd float %588, %589 > %591 = fadd float %590, %30 > %592 = fmul float %31, %557 > %593 = fmul float %32, %568 > %594 = fadd float %592, %593 > %595 = fmul float %33, %579 > %596 = fadd float %594, %595 > %597 = fadd float %596, %34 > %598 = fmul float %35, %557 > %599 = fmul float %36, %568 > %600 = fadd float %598, %599 > %601 = fmul float %37, %579 > %602 = fadd float %600, %601 > %603 = fadd float %602, %38 > %604 = fadd float %483, 1.000000e+02 > %605 = fadd float %540, 1.000000e+02 > %606 = fadd float %597, 1.000000e+02 > %607 = call float @llvm.fabs.f32(float %489) > %608 = call float @llvm.minnum.f32(float %607, float 1.000000e+02) > %609 = fcmp ogt float %471, 0.000000e+00 > %610 = fcmp ogt float %477, 0.000000e+00 > %611 = fcmp olt float %471, 0.000000e+00 > %612 = fcmp olt float %477, 0.000000e+00 > %613 = sext i1 %611 to i32 > %614 = sext i1 %612 to i32 > %615 = zext i1 %609 to i32 > %616 = zext i1 %610 to i32 > %617 = add nsw i32 %615, %613 > %618 = add nsw i32 %616, %614 > %619 = sitofp i32 %617 to float > %620 = sitofp i32 %618 to float > %621 = fsub float -0.000000e+00, %608 > %622 = call float @llvm.fma.f32(float %621, float %619, float %471) > %623 = fsub float -0.000000e+00, %608 > %624 = call float @llvm.fma.f32(float %623, float %620, float %477) > %625 = call float @llvm.fabs.f32(float %546) > %626 = call float @llvm.minnum.f32(float %625, float 1.000000e+02) > %627 = fcmp ogt float %528, 0.000000e+00 > %628 = fcmp ogt float %534, 0.000000e+00 > %629 = fcmp olt float %528, 0.000000e+00 > %630 = fcmp olt float %534, 0.000000e+00 > %631 = sext i1 %629 to i32 > %632 = sext i1 %630 to i32 > %633 = zext i1 %627 to i32 > %634 = zext i1 %628 to i32 > %635 = add nsw i32 %633, %631 > %636 = add nsw i32 %634, %632 > %637 = sitofp i32 %635 to float > %638 = sitofp i32 %636 to float > %639 = fsub float -0.000000e+00, %626 > %640 = call float @llvm.fma.f32(float %639, float %637, float %528) > %641 = fsub float -0.000000e+00, %626 > %642 = call float @llvm.fma.f32(float %641, float %638, float %534) > %643 = fcmp ogt float %585, 0.000000e+00 > %644 = fcmp ogt float %591, 0.000000e+00 > %645 = fcmp olt float %585, 0.000000e+00 > %646 = fcmp olt float %591, 0.000000e+00 > %647 = sext i1 %645 to i32 > %648 = sext i1 %646 to i32 > %649 = zext i1 %643 to i32 > %650 = zext i1 %644 to i32 > %651 = add nsw i32 %649, %647 > %652 = add nsw i32 %650, %648 > %653 = sitofp i32 %651 to float > %654 = sitofp i32 %652 to float > %655 = call float @llvm.fabs.f32(float %603) > %656 = call float @llvm.minnum.f32(float %655, float 1.000000e+02) > %657 = fsub float -0.000000e+00, %656 > %658 = call float @llvm.fma.f32(float %657, float %653, float %585) > %659 = fsub float -0.000000e+00, %656 > %660 = call float @llvm.fma.f32(float %659, float %654, float %591) > %661 = fsub float -0.000000e+00, %489 > %662 = fcmp olt float %622, %661 > %663 = fsub float -0.000000e+00, %489 > %664 = fcmp olt float %624, %663 > %665 = zext i1 %662 to i32 > %666 = zext i1 %664 to i32 > %667 = fsub float -0.000000e+00, %546 > %668 = fcmp olt float %640, %667 > %669 = fsub float -0.000000e+00, %546 > %670 = fcmp olt float %642, %669 > %671 = zext i1 %668 to i32 > %672 = zext i1 %670 to i32 > %673 = add nuw nsw i32 %671, %665 > %674 = add nuw nsw i32 %672, %666 > %675 = fsub float -0.000000e+00, %603 > %676 = fcmp olt float %658, %675 > %677 = fsub float -0.000000e+00, %603 > %678 = fcmp olt float %660, %677 > %679 = zext i1 %676 to i32 > %680 = zext i1 %678 to i32 > %681 = add nuw nsw i32 %679, %673 > %682 = add nuw nsw i32 %680, %674 > %683 = fcmp olt float %604, 0.000000e+00 > %684 = zext i1 %683 to i32 > %685 = bitcast i32 %684 to float > %686 = fcmp olt float %605, 0.000000e+00 > %687 = fcmp olt float %606, 0.000000e+00 > %688 = zext i1 %686 to i32 > %689 = zext i1 %687 to i32 > %690 = add nuw nsw i32 %688, %684 > %691 = add nuw nsw i32 %689, %690 > %692 = fcmp olt float %489, %622 > %693 = fcmp olt float %489, %624 > %694 = zext i1 %692 to i32 > %695 = zext i1 %693 to i32 > %696 = fcmp olt float %546, %640 > %697 = fcmp olt float %546, %642 > %698 = zext i1 %696 to i32 > %699 = zext i1 %697 to i32 > %700 = add nuw nsw i32 %694, %698 > %701 = add nuw nsw i32 %695, %699 > %702 = fcmp olt float %603, %658 > %703 = fcmp olt float %603, %660 > %704 = zext i1 %702 to i32 > %705 = zext i1 %703 to i32 > %706 = add nuw nsw i32 %700, %704 > %707 = add nuw nsw i32 %701, %705 > %708 = icmp eq i32 %681, 3 > %709 = icmp eq i32 %682, 3 > %710 = sext i1 %708 to i32 > %711 = sext i1 %709 to i32 > %712 = bitcast i32 %711 to float > %713 = icmp eq i32 %706, 3 > %714 = icmp eq i32 %707, 3 > %715 = sext i1 %714 to i32 > %716 = bitcast i32 %715 to float > %717 = bitcast i32 %711 to float > %718 = select i1 %714, float 0xFFFFFFFFE0000000, float %717 > %719 = bitcast float %718 to i32 > %720 = select i1 %713, i32 -1, i32 %710 > %721 = or i32 %719, %720 > %722 = icmp eq i32 %721, 0 > %not. = icmp ne i32 %691, 3 > %723 = and i1 %722, %not. > br i1 %723, label %IF, label %ENDIF > >IF: ; preds = %main_body > %724 = lshr i32 %7, 13 > %725 = and i32 %724, 255 > %726 = and i32 %7, 8191 > %727 = and i32 %10, 255 > %728 = mul nuw nsw i32 %726, %727 > %729 = add nuw nsw i32 %728, %725 > %730 = add nuw nsw i32 %729, 16 > %731 = zext i32 %730 to i64 > %732 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %731 > %733 = bitcast i32 addrspace(3)* %732 to float addrspace(3)* > %734 = load float, float addrspace(3)* %733, align 4 > %735 = and i32 %7, 8191 > %736 = and i32 %10, 255 > %737 = mul nuw nsw i32 %735, %736 > %738 = add nuw nsw i32 %737, 16 > %739 = zext i32 %738 to i64 > %740 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %739 > %741 = bitcast i32 addrspace(3)* %740 to float addrspace(3)* > %742 = load float, float addrspace(3)* %741, align 4 > %743 = fsub float %742, %734 > %744 = lshr i32 %7, 13 > %745 = and i32 %744, 255 > %746 = and i32 %7, 8191 > %747 = and i32 %10, 255 > %748 = mul nuw nsw i32 %746, %747 > %749 = add nuw nsw i32 %748, %745 > %750 = add nuw nsw i32 %749, 17 > %751 = zext i32 %750 to i64 > %752 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %751 > %753 = bitcast i32 addrspace(3)* %752 to float addrspace(3)* > %754 = load float, float addrspace(3)* %753, align 4 > %755 = and i32 %7, 8191 > %756 = and i32 %10, 255 > %757 = mul nuw nsw i32 %755, %756 > %758 = add nuw nsw i32 %757, 17 > %759 = zext i32 %758 to i64 > %760 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %759 > %761 = bitcast i32 addrspace(3)* %760 to float addrspace(3)* > %762 = load float, float addrspace(3)* %761, align 4 > %763 = fsub float %762, %754 > %764 = lshr i32 %7, 13 > %765 = and i32 %764, 255 > %766 = and i32 %7, 8191 > %767 = and i32 %10, 255 > %768 = mul nuw nsw i32 %766, %767 > %769 = add nuw nsw i32 %768, %765 > %770 = add nuw nsw i32 %769, 18 > %771 = zext i32 %770 to i64 > %772 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %771 > %773 = bitcast i32 addrspace(3)* %772 to float addrspace(3)* > %774 = load float, float addrspace(3)* %773, align 4 > %775 = and i32 %7, 8191 > %776 = and i32 %10, 255 > %777 = mul nuw nsw i32 %775, %776 > %778 = add nuw nsw i32 %777, 18 > %779 = zext i32 %778 to i64 > %780 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %779 > %781 = bitcast i32 addrspace(3)* %780 to float addrspace(3)* > %782 = load float, float addrspace(3)* %781, align 4 > %783 = fsub float %782, %774 > %784 = fmul float %743, %743 > %785 = fmul float %763, %763 > %786 = fadd float %785, %784 > %787 = fmul float %783, %783 > %788 = fadd float %786, %787 > %789 = and i32 %7, 8191 > %790 = and i32 %10, 255 > %791 = mul nuw nsw i32 %789, %790 > %792 = lshr i32 %7, 12 > %793 = and i32 %792, 510 > %794 = add nuw nsw i32 %791, %793 > %795 = add nuw nsw i32 %794, 16 > %796 = zext i32 %795 to i64 > %797 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %796 > %798 = bitcast i32 addrspace(3)* %797 to float addrspace(3)* > %799 = load float, float addrspace(3)* %798, align 4 > %800 = lshr i32 %7, 13 > %801 = and i32 %800, 255 > %802 = and i32 %7, 8191 > %803 = and i32 %10, 255 > %804 = mul nuw nsw i32 %802, %803 > %805 = add nuw nsw i32 %804, %801 > %806 = add nuw nsw i32 %805, 16 > %807 = zext i32 %806 to i64 > %808 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %807 > %809 = bitcast i32 addrspace(3)* %808 to float addrspace(3)* > %810 = load float, float addrspace(3)* %809, align 4 > %811 = fsub float %810, %799 > %812 = and i32 %7, 8191 > %813 = and i32 %10, 255 > %814 = mul nuw nsw i32 %812, %813 > %815 = lshr i32 %7, 12 > %816 = and i32 %815, 510 > %817 = add nuw nsw i32 %814, %816 > %818 = add nuw nsw i32 %817, 17 > %819 = zext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = lshr i32 %7, 13 > %824 = and i32 %823, 255 > %825 = and i32 %7, 8191 > %826 = and i32 %10, 255 > %827 = mul nuw nsw i32 %825, %826 > %828 = add nuw nsw i32 %827, %824 > %829 = add nuw nsw i32 %828, 17 > %830 = zext i32 %829 to i64 > %831 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %830 > %832 = bitcast i32 addrspace(3)* %831 to float addrspace(3)* > %833 = load float, float addrspace(3)* %832, align 4 > %834 = fsub float %833, %822 > %835 = and i32 %7, 8191 > %836 = and i32 %10, 255 > %837 = mul nuw nsw i32 %835, %836 > %838 = lshr i32 %7, 12 > %839 = and i32 %838, 510 > %840 = add nuw nsw i32 %837, %839 > %841 = add nuw nsw i32 %840, 18 > %842 = zext i32 %841 to i64 > %843 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %842 > %844 = bitcast i32 addrspace(3)* %843 to float addrspace(3)* > %845 = load float, float addrspace(3)* %844, align 4 > %846 = lshr i32 %7, 13 > %847 = and i32 %846, 255 > %848 = and i32 %7, 8191 > %849 = and i32 %10, 255 > %850 = mul nuw nsw i32 %848, %849 > %851 = add nuw nsw i32 %850, %847 > %852 = add nuw nsw i32 %851, 18 > %853 = zext i32 %852 to i64 > %854 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %853 > %855 = bitcast i32 addrspace(3)* %854 to float addrspace(3)* > %856 = load float, float addrspace(3)* %855, align 4 > %857 = fsub float %856, %845 > %858 = fmul float %811, %811 > %859 = fmul float %834, %834 > %860 = fadd float %859, %858 > %861 = fmul float %857, %857 > %862 = fadd float %860, %861 > %863 = and i32 %7, 8191 > %864 = and i32 %10, 255 > %865 = mul nuw nsw i32 %863, %864 > %866 = add nuw nsw i32 %865, 16 > %867 = zext i32 %866 to i64 > %868 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %867 > %869 = bitcast i32 addrspace(3)* %868 to float addrspace(3)* > %870 = load float, float addrspace(3)* %869, align 4 > %871 = and i32 %7, 8191 > %872 = and i32 %10, 255 > %873 = mul nuw nsw i32 %871, %872 > %874 = lshr i32 %7, 12 > %875 = and i32 %874, 510 > %876 = add nuw nsw i32 %873, %875 > %877 = add nuw nsw i32 %876, 16 > %878 = zext i32 %877 to i64 > %879 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %878 > %880 = bitcast i32 addrspace(3)* %879 to float addrspace(3)* > %881 = load float, float addrspace(3)* %880, align 4 > %882 = fsub float %881, %870 > %883 = and i32 %7, 8191 > %884 = and i32 %10, 255 > %885 = mul nuw nsw i32 %883, %884 > %886 = add nuw nsw i32 %885, 17 > %887 = zext i32 %886 to i64 > %888 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %887 > %889 = bitcast i32 addrspace(3)* %888 to float addrspace(3)* > %890 = load float, float addrspace(3)* %889, align 4 > %891 = and i32 %7, 8191 > %892 = and i32 %10, 255 > %893 = mul nuw nsw i32 %891, %892 > %894 = lshr i32 %7, 12 > %895 = and i32 %894, 510 > %896 = add nuw nsw i32 %893, %895 > %897 = add nuw nsw i32 %896, 17 > %898 = zext i32 %897 to i64 > %899 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %898 > %900 = bitcast i32 addrspace(3)* %899 to float addrspace(3)* > %901 = load float, float addrspace(3)* %900, align 4 > %902 = fsub float %901, %890 > %903 = and i32 %7, 8191 > %904 = and i32 %10, 255 > %905 = mul nuw nsw i32 %903, %904 > %906 = add nuw nsw i32 %905, 18 > %907 = zext i32 %906 to i64 > %908 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %907 > %909 = bitcast i32 addrspace(3)* %908 to float addrspace(3)* > %910 = load float, float addrspace(3)* %909, align 4 > %911 = and i32 %7, 8191 > %912 = and i32 %10, 255 > %913 = mul nuw nsw i32 %911, %912 > %914 = lshr i32 %7, 12 > %915 = and i32 %914, 510 > %916 = add nuw nsw i32 %913, %915 > %917 = add nuw nsw i32 %916, 18 > %918 = zext i32 %917 to i64 > %919 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %918 > %920 = bitcast i32 addrspace(3)* %919 to float addrspace(3)* > %921 = load float, float addrspace(3)* %920, align 4 > %922 = fsub float %921, %910 > %923 = fmul float %882, %882 > %924 = fmul float %902, %902 > %925 = fadd float %924, %923 > %926 = fmul float %922, %922 > %927 = fadd float %925, %926 > %928 = call float @llvm.sqrt.f32(float %788) > %929 = call float @llvm.sqrt.f32(float %862) > %930 = call float @llvm.sqrt.f32(float %927) > %931 = lshr i32 %7, 13 > %932 = and i32 %931, 255 > %933 = and i32 %7, 8191 > %934 = and i32 %10, 255 > %935 = mul nuw nsw i32 %933, %934 > %936 = add nuw nsw i32 %935, %932 > %937 = add nuw nsw i32 %936, 16 > %938 = zext i32 %937 to i64 > %939 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %938 > %940 = bitcast i32 addrspace(3)* %939 to float addrspace(3)* > %941 = load float, float addrspace(3)* %940, align 4 > %942 = and i32 %7, 8191 > %943 = and i32 %10, 255 > %944 = mul nuw nsw i32 %942, %943 > %945 = add nuw nsw i32 %944, 16 > %946 = zext i32 %945 to i64 > %947 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %946 > %948 = bitcast i32 addrspace(3)* %947 to float addrspace(3)* > %949 = load float, float addrspace(3)* %948, align 4 > %950 = fadd float %941, %949 > %951 = lshr i32 %7, 13 > %952 = and i32 %951, 255 > %953 = and i32 %7, 8191 > %954 = and i32 %10, 255 > %955 = mul nuw nsw i32 %953, %954 > %956 = add nuw nsw i32 %955, %952 > %957 = add nuw nsw i32 %956, 17 > %958 = zext i32 %957 to i64 > %959 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %958 > %960 = bitcast i32 addrspace(3)* %959 to float addrspace(3)* > %961 = load float, float addrspace(3)* %960, align 4 > %962 = and i32 %7, 8191 > %963 = and i32 %10, 255 > %964 = mul nuw nsw i32 %962, %963 > %965 = add nuw nsw i32 %964, 17 > %966 = zext i32 %965 to i64 > %967 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %966 > %968 = bitcast i32 addrspace(3)* %967 to float addrspace(3)* > %969 = load float, float addrspace(3)* %968, align 4 > %970 = fadd float %961, %969 > %971 = lshr i32 %7, 13 > %972 = and i32 %971, 255 > %973 = and i32 %7, 8191 > %974 = and i32 %10, 255 > %975 = mul nuw nsw i32 %973, %974 > %976 = add nuw nsw i32 %975, %972 > %977 = add nuw nsw i32 %976, 18 > %978 = zext i32 %977 to i64 > %979 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %978 > %980 = bitcast i32 addrspace(3)* %979 to float addrspace(3)* > %981 = load float, float addrspace(3)* %980, align 4 > %982 = and i32 %7, 8191 > %983 = and i32 %10, 255 > %984 = mul nuw nsw i32 %982, %983 > %985 = add nuw nsw i32 %984, 18 > %986 = zext i32 %985 to i64 > %987 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %986 > %988 = bitcast i32 addrspace(3)* %987 to float addrspace(3)* > %989 = load float, float addrspace(3)* %988, align 4 > %990 = fadd float %981, %989 > %991 = fmul float %950, 5.000000e-01 > %992 = fmul float %970, 5.000000e-01 > %993 = fmul float %990, 5.000000e-01 > %994 = and i32 %7, 8191 > %995 = and i32 %10, 255 > %996 = mul nuw nsw i32 %994, %995 > %997 = lshr i32 %7, 12 > %998 = and i32 %997, 510 > %999 = add nuw nsw i32 %996, %998 > %1000 = add nuw nsw i32 %999, 16 > %1001 = zext i32 %1000 to i64 > %1002 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1001 > %1003 = bitcast i32 addrspace(3)* %1002 to float addrspace(3)* > %1004 = load float, float addrspace(3)* %1003, align 4 > %1005 = lshr i32 %7, 13 > %1006 = and i32 %1005, 255 > %1007 = and i32 %7, 8191 > %1008 = and i32 %10, 255 > %1009 = mul nuw nsw i32 %1007, %1008 > %1010 = add nuw nsw i32 %1009, %1006 > %1011 = add nuw nsw i32 %1010, 16 > %1012 = zext i32 %1011 to i64 > %1013 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1012 > %1014 = bitcast i32 addrspace(3)* %1013 to float addrspace(3)* > %1015 = load float, float addrspace(3)* %1014, align 4 > %1016 = fadd float %1004, %1015 > %1017 = and i32 %7, 8191 > %1018 = and i32 %10, 255 > %1019 = mul nuw nsw i32 %1017, %1018 > %1020 = lshr i32 %7, 12 > %1021 = and i32 %1020, 510 > %1022 = add nuw nsw i32 %1019, %1021 > %1023 = add nuw nsw i32 %1022, 17 > %1024 = zext i32 %1023 to i64 > %1025 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1024 > %1026 = bitcast i32 addrspace(3)* %1025 to float addrspace(3)* > %1027 = load float, float addrspace(3)* %1026, align 4 > %1028 = lshr i32 %7, 13 > %1029 = and i32 %1028, 255 > %1030 = and i32 %7, 8191 > %1031 = and i32 %10, 255 > %1032 = mul nuw nsw i32 %1030, %1031 > %1033 = add nuw nsw i32 %1032, %1029 > %1034 = add nuw nsw i32 %1033, 17 > %1035 = zext i32 %1034 to i64 > %1036 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1035 > %1037 = bitcast i32 addrspace(3)* %1036 to float addrspace(3)* > %1038 = load float, float addrspace(3)* %1037, align 4 > %1039 = fadd float %1027, %1038 > %1040 = and i32 %7, 8191 > %1041 = and i32 %10, 255 > %1042 = mul nuw nsw i32 %1040, %1041 > %1043 = lshr i32 %7, 12 > %1044 = and i32 %1043, 510 > %1045 = add nuw nsw i32 %1042, %1044 > %1046 = add nuw nsw i32 %1045, 18 > %1047 = zext i32 %1046 to i64 > %1048 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1047 > %1049 = bitcast i32 addrspace(3)* %1048 to float addrspace(3)* > %1050 = load float, float addrspace(3)* %1049, align 4 > %1051 = lshr i32 %7, 13 > %1052 = and i32 %1051, 255 > %1053 = and i32 %7, 8191 > %1054 = and i32 %10, 255 > %1055 = mul nuw nsw i32 %1053, %1054 > %1056 = add nuw nsw i32 %1055, %1052 > %1057 = add nuw nsw i32 %1056, 18 > %1058 = zext i32 %1057 to i64 > %1059 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1058 > %1060 = bitcast i32 addrspace(3)* %1059 to float addrspace(3)* > %1061 = load float, float addrspace(3)* %1060, align 4 > %1062 = fadd float %1050, %1061 > %1063 = fmul float %1016, 5.000000e-01 > %1064 = fmul float %1039, 5.000000e-01 > %1065 = fmul float %1062, 5.000000e-01 > %1066 = and i32 %7, 8191 > %1067 = and i32 %10, 255 > %1068 = mul nuw nsw i32 %1066, %1067 > %1069 = add nuw nsw i32 %1068, 16 > %1070 = zext i32 %1069 to i64 > %1071 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1070 > %1072 = bitcast i32 addrspace(3)* %1071 to float addrspace(3)* > %1073 = load float, float addrspace(3)* %1072, align 4 > %1074 = and i32 %7, 8191 > %1075 = and i32 %10, 255 > %1076 = mul nuw nsw i32 %1074, %1075 > %1077 = lshr i32 %7, 12 > %1078 = and i32 %1077, 510 > %1079 = add nuw nsw i32 %1076, %1078 > %1080 = add nuw nsw i32 %1079, 16 > %1081 = zext i32 %1080 to i64 > %1082 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1081 > %1083 = bitcast i32 addrspace(3)* %1082 to float addrspace(3)* > %1084 = load float, float addrspace(3)* %1083, align 4 > %1085 = fadd float %1073, %1084 > %1086 = and i32 %7, 8191 > %1087 = and i32 %10, 255 > %1088 = mul nuw nsw i32 %1086, %1087 > %1089 = add nuw nsw i32 %1088, 17 > %1090 = zext i32 %1089 to i64 > %1091 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1090 > %1092 = bitcast i32 addrspace(3)* %1091 to float addrspace(3)* > %1093 = load float, float addrspace(3)* %1092, align 4 > %1094 = and i32 %7, 8191 > %1095 = and i32 %10, 255 > %1096 = mul nuw nsw i32 %1094, %1095 > %1097 = lshr i32 %7, 12 > %1098 = and i32 %1097, 510 > %1099 = add nuw nsw i32 %1096, %1098 > %1100 = add nuw nsw i32 %1099, 17 > %1101 = zext i32 %1100 to i64 > %1102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1101 > %1103 = bitcast i32 addrspace(3)* %1102 to float addrspace(3)* > %1104 = load float, float addrspace(3)* %1103, align 4 > %1105 = fadd float %1093, %1104 > %1106 = and i32 %7, 8191 > %1107 = and i32 %10, 255 > %1108 = mul nuw nsw i32 %1106, %1107 > %1109 = add nuw nsw i32 %1108, 18 > %1110 = zext i32 %1109 to i64 > %1111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1110 > %1112 = bitcast i32 addrspace(3)* %1111 to float addrspace(3)* > %1113 = load float, float addrspace(3)* %1112, align 4 > %1114 = and i32 %7, 8191 > %1115 = and i32 %10, 255 > %1116 = mul nuw nsw i32 %1114, %1115 > %1117 = lshr i32 %7, 12 > %1118 = and i32 %1117, 510 > %1119 = add nuw nsw i32 %1116, %1118 > %1120 = add nuw nsw i32 %1119, 18 > %1121 = zext i32 %1120 to i64 > %1122 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1121 > %1123 = bitcast i32 addrspace(3)* %1122 to float addrspace(3)* > %1124 = load float, float addrspace(3)* %1123, align 4 > %1125 = fadd float %1113, %1124 > %1126 = fmul float %1085, 5.000000e-01 > %1127 = fmul float %1105, 5.000000e-01 > %1128 = fmul float %1125, 5.000000e-01 > %1129 = call float @llvm.fma.f32(float %39, float %928, float %991) > %1130 = call float @llvm.fma.f32(float %40, float %928, float %992) > %1131 = call float @llvm.fma.f32(float %41, float %928, float %993) > %1132 = call float @llvm.fma.f32(float %39, float %929, float %1063) > %1133 = call float @llvm.fma.f32(float %40, float %929, float %1064) > %1134 = call float @llvm.fma.f32(float %41, float %929, float %1065) > %1135 = call float @llvm.fma.f32(float %39, float %930, float %1126) > %1136 = call float @llvm.fma.f32(float %40, float %930, float %1127) > %1137 = call float @llvm.fma.f32(float %41, float %930, float %1128) > %1138 = fmul float %23, %991 > %1139 = fmul float %24, %992 > %1140 = fadd float %1138, %1139 > %1141 = fmul float %25, %993 > %1142 = fadd float %1140, %1141 > %1143 = fadd float %1142, %26 > %1144 = fmul float %27, %991 > %1145 = fmul float %28, %992 > %1146 = fadd float %1144, %1145 > %1147 = fmul float %29, %993 > %1148 = fadd float %1146, %1147 > %1149 = fadd float %1148, %30 > %1150 = fmul float %35, %991 > %1151 = fmul float %36, %992 > %1152 = fadd float %1150, %1151 > %1153 = fmul float %37, %993 > %1154 = fadd float %1152, %1153 > %1155 = fadd float %1154, %38 > %1156 = fmul float %23, %1063 > %1157 = fmul float %24, %1064 > %1158 = fadd float %1156, %1157 > %1159 = fmul float %25, %1065 > %1160 = fadd float %1158, %1159 > %1161 = fadd float %1160, %26 > %1162 = fmul float %27, %1063 > %1163 = fmul float %28, %1064 > %1164 = fadd float %1162, %1163 > %1165 = fmul float %29, %1065 > %1166 = fadd float %1164, %1165 > %1167 = fadd float %1166, %30 > %1168 = fmul float %35, %1063 > %1169 = fmul float %36, %1064 > %1170 = fadd float %1168, %1169 > %1171 = fmul float %37, %1065 > %1172 = fadd float %1170, %1171 > %1173 = fadd float %1172, %38 > %1174 = fmul float %23, %1126 > %1175 = fmul float %24, %1127 > %1176 = fadd float %1174, %1175 > %1177 = fmul float %25, %1128 > %1178 = fadd float %1176, %1177 > %1179 = fadd float %1178, %26 > %1180 = fmul float %27, %1126 > %1181 = fmul float %28, %1127 > %1182 = fadd float %1180, %1181 > %1183 = fmul float %29, %1128 > %1184 = fadd float %1182, %1183 > %1185 = fadd float %1184, %30 > %1186 = fmul float %35, %1126 > %1187 = fmul float %36, %1127 > %1188 = fadd float %1186, %1187 > %1189 = fmul float %37, %1128 > %1190 = fadd float %1188, %1189 > %1191 = fadd float %1190, %38 > %1192 = fmul float %23, %1129 > %1193 = fmul float %24, %1130 > %1194 = fadd float %1192, %1193 > %1195 = fmul float %25, %1131 > %1196 = fadd float %1194, %1195 > %1197 = fadd float %1196, %26 > %1198 = fmul float %27, %1129 > %1199 = fmul float %28, %1130 > %1200 = fadd float %1198, %1199 > %1201 = fmul float %29, %1131 > %1202 = fadd float %1200, %1201 > %1203 = fadd float %1202, %30 > %1204 = fmul float %35, %1129 > %1205 = fmul float %36, %1130 > %1206 = fadd float %1204, %1205 > %1207 = fmul float %37, %1131 > %1208 = fadd float %1206, %1207 > %1209 = fadd float %1208, %38 > %1210 = fmul float %23, %1132 > %1211 = fmul float %24, %1133 > %1212 = fadd float %1210, %1211 > %1213 = fmul float %25, %1134 > %1214 = fadd float %1212, %1213 > %1215 = fadd float %1214, %26 > %1216 = fmul float %27, %1132 > %1217 = fmul float %28, %1133 > %1218 = fadd float %1216, %1217 > %1219 = fmul float %29, %1134 > %1220 = fadd float %1218, %1219 > %1221 = fadd float %1220, %30 > %1222 = fmul float %35, %1132 > %1223 = fmul float %36, %1133 > %1224 = fadd float %1222, %1223 > %1225 = fmul float %37, %1134 > %1226 = fadd float %1224, %1225 > %1227 = fadd float %1226, %38 > %1228 = fmul float %23, %1135 > %1229 = fmul float %24, %1136 > %1230 = fadd float %1228, %1229 > %1231 = fmul float %25, %1137 > %1232 = fadd float %1230, %1231 > %1233 = fadd float %1232, %26 > %1234 = fmul float %27, %1135 > %1235 = fmul float %28, %1136 > %1236 = fadd float %1234, %1235 > %1237 = fmul float %29, %1137 > %1238 = fadd float %1236, %1237 > %1239 = fadd float %1238, %30 > %1240 = fmul float %35, %1135 > %1241 = fmul float %36, %1136 > %1242 = fadd float %1240, %1241 > %1243 = fmul float %37, %1137 > %1244 = fadd float %1242, %1243 > %1245 = fadd float %1244, %38 > %1246 = fcmp oeq float %1173, 0.000000e+00 > %1247 = fcmp oeq float %1173, 0.000000e+00 > %1248 = fcmp ogt float %1161, 0.000000e+00 > %1249 = select i1 %1248, float 1.000000e+00, float %1161 > %1250 = fcmp oge float %1249, 0.000000e+00 > %1251 = fcmp ogt float %1167, 0.000000e+00 > %1252 = select i1 %1251, float 1.000000e+00, float %1167 > %1253 = fcmp oge float %1252, 0.000000e+00 > %.op = fmul float %1249, 0x4600000000000000 > %1254 = select i1 %1250, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1252, 0x4600000000000000 > %1255 = select i1 %1253, float %.op80, float 0xC600000000000000 > %1256 = fdiv float 1.000000e+00, %1173 > %1257 = fmul float %1161, %1256 > %1258 = fmul float %1167, %1256 > %1259 = select i1 %1246, float %1254, float %1257 > %1260 = select i1 %1247, float %1255, float %1258 > %1261 = fcmp oeq float %1191, 0.000000e+00 > %1262 = fcmp oeq float %1191, 0.000000e+00 > %1263 = fcmp ogt float %1179, 0.000000e+00 > %1264 = select i1 %1263, float 1.000000e+00, float %1179 > %1265 = fcmp oge float %1264, 0.000000e+00 > %1266 = fcmp ogt float %1185, 0.000000e+00 > %1267 = select i1 %1266, float 1.000000e+00, float %1185 > %1268 = fcmp oge float %1267, 0.000000e+00 > %.op81 = fmul float %1264, 0x4600000000000000 > %1269 = select i1 %1265, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1267, 0x4600000000000000 > %1270 = select i1 %1268, float %.op82, float 0xC600000000000000 > %1271 = fdiv float 1.000000e+00, %1191 > %1272 = fmul float %1179, %1271 > %1273 = fmul float %1185, %1271 > %1274 = select i1 %1261, float %1269, float %1272 > %1275 = select i1 %1262, float %1270, float %1273 > %1276 = fcmp oeq float %1209, 0.000000e+00 > %1277 = fcmp oeq float %1209, 0.000000e+00 > %1278 = fcmp ogt float %1197, 0.000000e+00 > %1279 = select i1 %1278, float 1.000000e+00, float %1197 > %1280 = fcmp oge float %1279, 0.000000e+00 > %1281 = fcmp ogt float %1203, 0.000000e+00 > %1282 = select i1 %1281, float 1.000000e+00, float %1203 > %1283 = fcmp oge float %1282, 0.000000e+00 > %.op83 = fmul float %1279, 0x4600000000000000 > %1284 = select i1 %1280, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1282, 0x4600000000000000 > %1285 = select i1 %1283, float %.op84, float 0xC600000000000000 > %1286 = fdiv float 1.000000e+00, %1209 > %1287 = fmul float %1197, %1286 > %1288 = fmul float %1203, %1286 > %1289 = select i1 %1276, float %1284, float %1287 > %1290 = select i1 %1277, float %1285, float %1288 > %1291 = fcmp oeq float %1155, 0.000000e+00 > %1292 = fcmp oeq float %1155, 0.000000e+00 > %1293 = fcmp ogt float %1143, 0.000000e+00 > %1294 = select i1 %1293, float 1.000000e+00, float %1143 > %1295 = fcmp oge float %1294, 0.000000e+00 > %1296 = fcmp ogt float %1149, 0.000000e+00 > %1297 = select i1 %1296, float 1.000000e+00, float %1149 > %1298 = fcmp oge float %1297, 0.000000e+00 > %.op85 = fmul float %1294, 0x4600000000000000 > %1299 = select i1 %1295, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1297, 0x4600000000000000 > %1300 = select i1 %1298, float %.op86, float 0xC600000000000000 > %1301 = fdiv float 1.000000e+00, %1155 > %1302 = fmul float %1143, %1301 > %1303 = fmul float %1149, %1301 > %1304 = select i1 %1291, float %1299, float %1302 > %1305 = select i1 %1292, float %1300, float %1303 > %1306 = fsub float %1304, %1289 > %1307 = fsub float %1305, %1290 > %1308 = fcmp oeq float %1227, 0.000000e+00 > %1309 = fcmp oeq float %1227, 0.000000e+00 > %1310 = fcmp ogt float %1215, 0.000000e+00 > %1311 = select i1 %1310, float 1.000000e+00, float %1215 > %1312 = fcmp oge float %1311, 0.000000e+00 > %1313 = fcmp ogt float %1221, 0.000000e+00 > %1314 = select i1 %1313, float 1.000000e+00, float %1221 > %1315 = fcmp oge float %1314, 0.000000e+00 > %.op87 = fmul float %1311, 0x4600000000000000 > %1316 = select i1 %1312, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1314, 0x4600000000000000 > %1317 = select i1 %1315, float %.op88, float 0xC600000000000000 > %1318 = fdiv float 1.000000e+00, %1227 > %1319 = fmul float %1215, %1318 > %1320 = fmul float %1221, %1318 > %1321 = select i1 %1308, float %1316, float %1319 > %1322 = select i1 %1309, float %1317, float %1320 > %1323 = fsub float %1259, %1321 > %1324 = fsub float %1260, %1322 > %1325 = fmul float %1323, %42 > %1326 = fmul float %1324, %43 > %1327 = fcmp oeq float %1245, 0.000000e+00 > %1328 = fcmp oeq float %1245, 0.000000e+00 > %1329 = fcmp ogt float %1233, 0.000000e+00 > %1330 = select i1 %1329, float 1.000000e+00, float %1233 > %1331 = fcmp oge float %1330, 0.000000e+00 > %1332 = fcmp ogt float %1239, 0.000000e+00 > %1333 = select i1 %1332, float 1.000000e+00, float %1239 > %1334 = fcmp oge float %1333, 0.000000e+00 > %.op89 = fmul float %1330, 0x4600000000000000 > %1335 = select i1 %1331, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1333, 0x4600000000000000 > %1336 = select i1 %1334, float %.op90, float 0xC600000000000000 > %1337 = fdiv float 1.000000e+00, %1245 > %1338 = fmul float %1233, %1337 > %1339 = fmul float %1239, %1337 > %1340 = select i1 %1327, float %1335, float %1338 > %1341 = select i1 %1328, float %1336, float %1339 > %1342 = fsub float %1274, %1340 > %1343 = fsub float %1275, %1341 > %1344 = fmul float %1342, %42 > %1345 = fmul float %1306, %42 > %1346 = fmul float %1307, %43 > %1347 = fmul float %1343, %43 > %1348 = fmul float %1345, %1345 > %1349 = fmul float %1346, %1346 > %1350 = fadd float %1348, %1349 > %1351 = fmul float %1325, %1325 > %1352 = fmul float %1326, %1326 > %1353 = fadd float %1351, %1352 > %1354 = fmul float %1344, %1344 > %1355 = fmul float %1347, %1347 > %1356 = fadd float %1354, %1355 > %1357 = call float @llvm.sqrt.f32(float %1356) > %1358 = call float @llvm.sqrt.f32(float %1350) > %1359 = call float @llvm.sqrt.f32(float %1353) > %1360 = fsub float %1155, %15 > %1361 = fsub float %1173, %15 > %1362 = fsub float %1191, %15 > %1363 = fcmp une float %16, 0.000000e+00 > br i1 %1363, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %16, %ENDIF77 ], [ %38, %main_body ] > %temp16.0 = phi float [ %1577, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1578, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1567, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1580, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %15, %ENDIF77 ], [ %37, %main_body ] > %temp13.0 = phi float [ %1560, %ENDIF77 ], [ %36, %main_body ] > %1364 = phi i32 [ 1065353216, %ENDIF77 ], [ %672, %main_body ] > %temp10.0 = phi float [ %1359, %ENDIF77 ], [ %716, %main_body ] > %temp9.0 = phi float [ %1552, %ENDIF77 ], [ %718, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %603, %main_body ] > %temp6.0 = phi float [ %993, %ENDIF77 ], [ %685, %main_body ] > %temp5.0 = phi float [ %1547, %ENDIF77 ], [ %712, %main_body ] > %1365 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1366 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1367 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1368 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1369 = lshr i32 %5, 16 > %1370 = shl nuw nsw i32 %1369, 2 > %1371 = and i32 %6, 8191 > %1372 = and i32 %10, 255 > %1373 = mul nuw nsw i32 %1371, %1372 > %1374 = add nuw nsw i32 %1370, %1373 > %1375 = add nuw nsw i32 %1374, 8 > %1376 = zext i32 %1375 to i64 > %1377 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1376 > %1378 = bitcast i32 addrspace(3)* %1377 to float addrspace(3)* > store float %1365, float addrspace(3)* %1378, align 4 > %1379 = add nuw nsw i32 %1374, 9 > %1380 = zext i32 %1379 to i64 > %1381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1380 > %1382 = bitcast i32 addrspace(3)* %1381 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1382, align 4 > %1383 = add nuw nsw i32 %1374, 10 > %1384 = zext i32 %1383 to i64 > %1385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1384 > %1386 = bitcast i32 addrspace(3)* %1385 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1386, align 4 > %1387 = add nuw nsw i32 %1374, 11 > %1388 = zext i32 %1387 to i64 > %1389 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1388 > %1390 = bitcast i32 addrspace(3)* %1389 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1390, align 4 > %1391 = lshr i32 %5, 16 > %1392 = shl nuw nsw i32 %1391, 2 > %1393 = and i32 %6, 8191 > %1394 = and i32 %10, 255 > %1395 = mul nuw nsw i32 %1393, %1394 > %1396 = add nuw nsw i32 %1392, %1395 > %1397 = add nuw nsw i32 %1396, 12 > %1398 = zext i32 %1397 to i64 > %1399 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1398 > %1400 = bitcast i32 addrspace(3)* %1399 to float addrspace(3)* > store float %1366, float addrspace(3)* %1400, align 4 > %1401 = add nuw nsw i32 %1396, 13 > %1402 = zext i32 %1401 to i64 > %1403 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1402 > %1404 = bitcast i32 addrspace(3)* %1403 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1404, align 4 > %1405 = add nuw nsw i32 %1396, 14 > %1406 = zext i32 %1405 to i64 > %1407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1406 > %1408 = bitcast i32 addrspace(3)* %1407 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1408, align 4 > %1409 = add nuw nsw i32 %1396, 15 > %1410 = zext i32 %1409 to i64 > %1411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1410 > store i32 %1364, i32 addrspace(3)* %1411, align 4 > %1412 = lshr i32 %5, 16 > %1413 = shl nuw nsw i32 %1412, 2 > %1414 = and i32 %6, 8191 > %1415 = and i32 %10, 255 > %1416 = mul nuw nsw i32 %1414, %1415 > %1417 = add nuw nsw i32 %1413, %1416 > %1418 = add nuw nsw i32 %1417, 16 > %1419 = zext i32 %1418 to i64 > %1420 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1419 > %1421 = bitcast i32 addrspace(3)* %1420 to float addrspace(3)* > store float %1367, float addrspace(3)* %1421, align 4 > %1422 = add nuw nsw i32 %1417, 17 > %1423 = zext i32 %1422 to i64 > %1424 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1423 > %1425 = bitcast i32 addrspace(3)* %1424 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1425, align 4 > %1426 = add nuw nsw i32 %1417, 18 > %1427 = zext i32 %1426 to i64 > %1428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1427 > %1429 = bitcast i32 addrspace(3)* %1428 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1429, align 4 > %1430 = add nuw nsw i32 %1417, 19 > %1431 = zext i32 %1430 to i64 > %1432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1431 > %1433 = bitcast i32 addrspace(3)* %1432 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1433, align 4 > %1434 = lshr i32 %5, 16 > %1435 = shl nuw nsw i32 %1434, 2 > %1436 = and i32 %6, 8191 > %1437 = and i32 %10, 255 > %1438 = mul nuw nsw i32 %1436, %1437 > %1439 = add nuw nsw i32 %1435, %1438 > %1440 = add nuw nsw i32 %1439, 20 > %1441 = zext i32 %1440 to i64 > %1442 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1441 > %1443 = bitcast i32 addrspace(3)* %1442 to float addrspace(3)* > store float %1368, float addrspace(3)* %1443, align 4 > %1444 = add nuw nsw i32 %1439, 21 > %1445 = zext i32 %1444 to i64 > %1446 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1445 > %1447 = bitcast i32 addrspace(3)* %1446 to float addrspace(3)* > store float %1366, float addrspace(3)* %1447, align 4 > %1448 = add nuw nsw i32 %1439, 22 > %1449 = zext i32 %1448 to i64 > %1450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1449 > %1451 = bitcast i32 addrspace(3)* %1450 to float addrspace(3)* > store float %1367, float addrspace(3)* %1451, align 4 > %1452 = add nuw nsw i32 %1439, 23 > %1453 = zext i32 %1452 to i64 > %1454 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1453 > %1455 = bitcast i32 addrspace(3)* %1454 to float addrspace(3)* > store float %1368, float addrspace(3)* %1455, align 4 > %1456 = lshr i32 %5, 16 > %1457 = shl nuw nsw i32 %1456, 2 > %1458 = and i32 %6, 8191 > %1459 = and i32 %10, 255 > %1460 = mul nuw nsw i32 %1458, %1459 > %1461 = add nuw nsw i32 %1457, %1460 > %1462 = zext i32 %1461 to i64 > %1463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1462 > %1464 = bitcast i32 addrspace(3)* %1463 to float addrspace(3)* > store float %1365, float addrspace(3)* %1464, align 4 > %1465 = lshr i32 %5, 16 > %1466 = shl nuw nsw i32 %1465, 2 > %1467 = and i32 %6, 8191 > %1468 = and i32 %10, 255 > %1469 = mul nuw nsw i32 %1467, %1468 > %1470 = add nuw nsw i32 %1466, %1469 > %1471 = add nuw nsw i32 %1470, 1 > %1472 = zext i32 %1471 to i64 > %1473 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1472 > %1474 = bitcast i32 addrspace(3)* %1473 to float addrspace(3)* > store float %1366, float addrspace(3)* %1474, align 4 > %1475 = lshr i32 %5, 16 > %1476 = shl nuw nsw i32 %1475, 2 > %1477 = and i32 %6, 8191 > %1478 = and i32 %10, 255 > %1479 = mul nuw nsw i32 %1477, %1478 > %1480 = add nuw nsw i32 %1476, %1479 > %1481 = add nuw nsw i32 %1480, 2 > %1482 = zext i32 %1481 to i64 > %1483 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1482 > %1484 = bitcast i32 addrspace(3)* %1483 to float addrspace(3)* > store float %1367, float addrspace(3)* %1484, align 4 > %1485 = lshr i32 %5, 16 > %1486 = shl nuw nsw i32 %1485, 2 > %1487 = and i32 %6, 8191 > %1488 = and i32 %10, 255 > %1489 = mul nuw nsw i32 %1487, %1488 > %1490 = add nuw nsw i32 %1486, %1489 > %1491 = add nuw nsw i32 %1490, 4 > %1492 = zext i32 %1491 to i64 > %1493 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1492 > %1494 = bitcast i32 addrspace(3)* %1493 to float addrspace(3)* > store float %1368, float addrspace(3)* %1494, align 4 > %1495 = and i32 %10, 255 > %1496 = lshr i32 %10, 8 > %1497 = and i32 %1496, 31 > %1498 = lshr i32 %5, 16 > %1499 = shl nuw nsw i32 %1498, 2 > %1500 = and i32 %6, 8191 > %1501 = and i32 %10, 255 > %1502 = mul nuw nsw i32 %1500, %1501 > %1503 = add nuw nsw i32 %1499, %1502 > %1504 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1505 = bitcast i64 %1504 to <2 x i32> > %1506 = extractelement <2 x i32> %1505, i32 0 > %1507 = extractelement <2 x i32> %1505, i32 1 > %1508 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1506, 0 > %1509 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1508, i32 %1507, 1 > %1510 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1509, i32 %8, 13 > %1511 = bitcast i32 %1495 to float > %1512 = bitcast i32 %1497 to float > %1513 = bitcast i32 %1503 to float > %1514 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1510, float %1511, 14 > %1515 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1514, float %1512, 15 > %1516 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1515, float %1513, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1516 > >IF69: ; preds = %IF > %1517 = fdiv float 1.000000e+00, %16 > %1518 = fmul float %1360, %1517 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1519 = fcmp ogt float %1360, 0.000000e+00 > %1520 = select i1 %1519, float 1.000000e+00, float %1360 > %1521 = fcmp oge float %1520, 0.000000e+00 > %.op91 = fmul float %1520, 0x4600000000000000 > %1522 = select i1 %1521, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1518, %IF69 ], [ %1522, %ELSE70 ] > %1523 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1524 = fsub float 1.000000e+00, %1523 > %1525 = fmul float %1524, %1358 > %1526 = fcmp une float %16, 0.000000e+00 > br i1 %1526, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1527 = fdiv float 1.000000e+00, %16 > %1528 = fmul float %1361, %1527 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1529 = fcmp ogt float %1361, 0.000000e+00 > %1530 = select i1 %1529, float 1.000000e+00, float %1361 > %1531 = fcmp oge float %1530, 0.000000e+00 > %.op92 = fmul float %1530, 0x4600000000000000 > %1532 = select i1 %1531, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1528, %IF72 ], [ %1532, %ELSE73 ] > %1533 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1534 = fsub float 1.000000e+00, %1533 > %1535 = fmul float %1534, %1359 > %1536 = fcmp une float %16, 0.000000e+00 > br i1 %1536, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1537 = fdiv float 1.000000e+00, %16 > %1538 = fmul float %1362, %1537 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1539 = fcmp ogt float %1362, 0.000000e+00 > %1540 = select i1 %1539, float 1.000000e+00, float %1362 > %1541 = fcmp oge float %1540, 0.000000e+00 > %.op93 = fmul float %1540, 0x4600000000000000 > %1542 = select i1 %1541, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1538, %IF75 ], [ %1542, %ELSE76 ] > %1543 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1544 = fsub float 1.000000e+00, %1543 > %1545 = fmul float %1544, %1357 > %1546 = fmul float %13, %19 > %1547 = fmul float %14, %20 > %1548 = call float @llvm.maxnum.f32(float %1547, float 1.000000e+00) > %1549 = fcmp oeq float %1546, 0.000000e+00 > %1550 = fcmp oeq float %1546, 0.000000e+00 > %1551 = sext i1 %1550 to i32 > %1552 = bitcast i32 %1551 to float > %1553 = fcmp ogt float %1545, 0.000000e+00 > %1554 = select i1 %1553, float 1.000000e+00, float %1545 > %1555 = fcmp oge float %1554, 0.000000e+00 > %1556 = fcmp ogt float %1525, 0.000000e+00 > %1557 = select i1 %1556, float 1.000000e+00, float %1525 > %1558 = fcmp oge float %1557, 0.000000e+00 > %.op94 = fmul float %1554, 0x4600000000000000 > %1559 = select i1 %1555, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1557, 0x4600000000000000 > %1560 = select i1 %1558, float %.op95, float 0xC600000000000000 > %1561 = fdiv float 1.000000e+00, %1546 > %1562 = fmul float %1545, %1561 > %1563 = fmul float %1525, %1561 > %1564 = select i1 %1549, float %1559, float %1562 > %1565 = select i1 %1550, float %1560, float %1563 > %1566 = call float @llvm.maxnum.f32(float %1565, float 1.000000e+00) > %1567 = call float @llvm.minnum.f32(float %1548, float %1566) > %1568 = fcmp une float %1546, 0.000000e+00 > br i1 %1568, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1569 = fdiv float 1.000000e+00, %1546 > %1570 = fmul float %1535, %1569 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1571 = fcmp ogt float %1535, 0.000000e+00 > %1572 = select i1 %1571, float 1.000000e+00, float %1535 > %1573 = fcmp oge float %1572, 0.000000e+00 > %.op96 = fmul float %1572, 0x4600000000000000 > %1574 = select i1 %1573, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1570, %IF78 ], [ %1574, %ELSE79 ] > %1575 = call float @llvm.maxnum.f32(float %1564, float 1.000000e+00) > %1576 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1577 = call float @llvm.minnum.f32(float %1548, float %1576) > %1578 = call float @llvm.minnum.f32(float %1548, float %1575) > %1579 = call float @llvm.maxnum.f32(float %1567, float %1578) > %1580 = call float @llvm.maxnum.f32(float %1579, float %1577) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[0].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[0].xxxx >101: MOV OUT[4], TEMP[3] >102: MOV OUT[2], TEMP[6] >103: MOV OUT[3], TEMP[4] >104: MOV OUT[1], TEMP[5] >105: MOV OUT[0], TEMP[1] >106: END >radeonsi: Compiling shader 206 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = bitcast i32 %10 to float > %711 = insertvalue <{ float, float, float }> undef, float %710, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %711 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL CONST[1][0..9] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[0].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[0].xxxx > 7: MOV TEMP[0].xy, IN[1].xyxx > 8: MOV OUT[1], TEMP[0] > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 207 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 128) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 132) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 136) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 140) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 144) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 148) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 152) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 156) > %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 > %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %13) > %32 = extractelement <4 x float> %31, i32 0 > %33 = extractelement <4 x float> %31, i32 1 > %34 = extractelement <4 x float> %31, i32 2 > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %14) > %38 = fmul float %17, %32 > %39 = fmul float %18, %33 > %40 = fadd float %38, %39 > %41 = fmul float %19, %34 > %42 = fadd float %40, %41 > %43 = fadd float %42, %20 > %44 = fmul float %21, %32 > %45 = fmul float %22, %33 > %46 = fadd float %44, %45 > %47 = fmul float %23, %34 > %48 = fadd float %46, %47 > %49 = fadd float %48, %24 > %50 = fmul float %25, %32 > %51 = fmul float %26, %33 > %52 = fadd float %50, %51 > %53 = fmul float %27, %34 > %54 = fadd float %52, %53 > %55 = fadd float %54, %28 > %56 = lshr i32 %8, 13 > %57 = and i32 %56, 255 > %58 = mul i32 %57, %10 > %59 = add i32 %58, 16 > %60 = sext i32 %59 to i64 > %61 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %60 > %62 = bitcast i32 addrspace(3)* %61 to float addrspace(3)* > store float %43, float addrspace(3)* %62, align 4 > %63 = add i32 %58, 17 > %64 = sext i32 %63 to i64 > %65 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %64 > %66 = bitcast i32 addrspace(3)* %65 to float addrspace(3)* > store float %49, float addrspace(3)* %66, align 4 > %67 = add i32 %58, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > store float %55, float addrspace(3)* %70, align 4 > %71 = add i32 %58, 20 > %bc = bitcast <4 x float> %37 to <4 x i32> > %72 = extractelement <4 x i32> %bc, i32 0 > %73 = sext i32 %71 to i64 > %74 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %73 > store i32 %72, i32 addrspace(3)* %74, align 4 > %75 = add i32 %58, 21 > %bc12 = bitcast <4 x float> %37 to <4 x i32> > %76 = extractelement <4 x i32> %bc12, i32 1 > %77 = sext i32 %75 to i64 > %78 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %77 > store i32 %76, i32 addrspace(3)* %78, align 4 > %79 = add i32 %58, 22 > %80 = sext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = bitcast i32 addrspace(3)* %81 to float addrspace(3)* > store float %34, float addrspace(3)* %82, align 4 > %83 = add i32 %58, 23 > %84 = sext i32 %83 to i64 > %85 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %84 > store i32 1065353216, i32 addrspace(3)* %85, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..3], ARRAY(1), GENERIC[0] >DCL OUT[4], PATCH >DCL OUT[5], PATCH[1] >DCL OUT[6], PATCH[2] >DCL OUT[7], PATCH[3] >DCL CONST[1][0..56] >DCL CONST[2][0..39] >DCL TEMP[0..10], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 0.0000, 0.4000, 100.0000} >IMM[1] UINT32 {0, 864, 880, 896} >IMM[2] UINT32 {1, 624, 0, 0} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: MOV TEMP[0].xyz, IN[0][0].xyzx > 11: MOV TEMP[0].w, IMM[0].xxxx > 12: MOV TEMP[1], CONST[1][54] > 13: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 14: MOV TEMP[2], CONST[1][55] > 15: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 16: MOV TEMP[3], CONST[1][56] > 17: DP4 TEMP[0].x, TEMP[3], TEMP[0] > 18: MOV TEMP[4].xyz, IN[1][0].xyzx > 19: MOV TEMP[4].w, IMM[0].xxxx > 20: MOV TEMP[5], CONST[1][54] > 21: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 22: MOV TEMP[0].y, TEMP[5].xxxx > 23: MOV TEMP[5], CONST[1][55] > 24: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 25: MOV TEMP[0].z, TEMP[5].xxxx > 26: MOV TEMP[5], CONST[1][56] > 27: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 28: MOV TEMP[0].w, TEMP[5].xxxx > 29: MOV TEMP[4].xyz, IN[2][0].xyzx > 30: MOV TEMP[4].w, IMM[0].xxxx > 31: MOV TEMP[5], CONST[1][54] > 32: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 33: MOV TEMP[3].z, TEMP[5].xxxx > 34: MOV TEMP[6], CONST[1][55] > 35: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 36: MOV TEMP[7].z, CONST[2][39] > 37: MUL TEMP[7].xy, TEMP[0].xwww, TEMP[7].zzzz > 38: MOV TEMP[0].xw, TEMP[7].xxxy > 39: MOV TEMP[7], CONST[1][56] > 40: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 41: MOV TEMP[8].z, CONST[2][39] > 42: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[8].zzzz > 43: MOV TEMP[7].x, CONST[2][39] > 44: FSLT TEMP[7].x, TEMP[1].xxxx, -TEMP[7].xxxx > 45: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 46: INEG TEMP[7].x, TEMP[7].xxxx > 47: MOV TEMP[4].y, TEMP[7].xxxx > 48: MOV TEMP[7].x, CONST[2][39] > 49: FSLT TEMP[7].xy, TEMP[0].yzzz, -TEMP[7].xxxx > 50: AND TEMP[7].xy, TEMP[7].xyyy, IMM[3].xxxx > 51: INEG TEMP[7].xy, TEMP[7].xyyy > 52: MOV TEMP[4].zw, TEMP[7].yyxy > 53: AND TEMP[7].xy, TEMP[4].yzzz, IMM[2].xxxx > 54: MOV TEMP[4].yz, TEMP[7].yxyy > 55: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 56: MOV TEMP[4].y, TEMP[7].xxxx > 57: MOV TEMP[7].x, CONST[2][39] > 58: FSLT TEMP[7].x, TEMP[5].xxxx, -TEMP[7].xxxx > 59: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 60: INEG TEMP[7].x, TEMP[7].xxxx > 61: MOV TEMP[4].z, TEMP[7].xxxx > 62: AND TEMP[7].x, TEMP[4].zzzz, IMM[2].xxxx > 63: MOV TEMP[4].z, TEMP[7].xxxx > 64: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 65: MOV TEMP[4].y, TEMP[7].xxxx > 66: MOV TEMP[7].x, CONST[2][39] > 67: FSLT TEMP[7].x, TEMP[2].xxxx, -TEMP[7].xxxx > 68: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 69: INEG TEMP[7].x, TEMP[7].xxxx > 70: MOV TEMP[4].z, TEMP[7].xxxx > 71: AND TEMP[7].xy, TEMP[4].zwww, IMM[2].xxxx > 72: MOV TEMP[4].zw, TEMP[7].yyxy > 73: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 74: MOV TEMP[4].z, TEMP[7].xxxx > 75: MOV TEMP[7].x, CONST[2][39] > 76: FSLT TEMP[7].x, TEMP[6].xxxx, -TEMP[7].xxxx > 77: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 78: INEG TEMP[7].x, TEMP[7].xxxx > 79: MOV TEMP[4].w, TEMP[7].xxxx > 80: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 81: MOV TEMP[4].w, TEMP[7].xxxx > 82: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 83: MOV TEMP[4].z, TEMP[7].xxxx > 84: FSLT TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy > 85: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 86: INEG TEMP[7].x, TEMP[7].xxxx > 87: MOV TEMP[4].w, TEMP[7].xxxx > 88: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 89: MOV TEMP[4].w, TEMP[7].xxxx > 90: FSLT TEMP[7].x, TEMP[0].wwww, IMM[0].yyyy > 91: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 92: INEG TEMP[7].x, TEMP[7].xxxx > 93: MOV TEMP[7].x, TEMP[7].xxxx > 94: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx > 95: MOV TEMP[7].x, TEMP[8].xxxx > 96: UADD TEMP[8].x, TEMP[4].wwww, TEMP[7].xxxx > 97: MOV TEMP[4].w, TEMP[8].xxxx > 98: FSLT TEMP[8].x, TEMP[4].xxxx, IMM[0].yyyy > 99: AND TEMP[8].x, TEMP[8].xxxx, IMM[3].xxxx >100: INEG TEMP[8].x, TEMP[8].xxxx >101: MOV TEMP[7].x, TEMP[8].xxxx >102: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx >103: MOV TEMP[7].x, TEMP[8].xxxx >104: UADD TEMP[7].x, TEMP[4].wwww, TEMP[7].xxxx >105: MOV TEMP[4].w, TEMP[7].xxxx >106: MOV TEMP[7].x, CONST[2][39] >107: FSLT TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx >108: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >109: INEG TEMP[1].x, TEMP[1].xxxx >110: MOV TEMP[3].x, TEMP[1].xxxx >111: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >112: MOV TEMP[3].x, TEMP[1].xxxx >113: MOV TEMP[1].x, CONST[2][39] >114: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].yzzz >115: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >116: INEG TEMP[1].xy, TEMP[1].xyyy >117: MOV TEMP[0].yz, TEMP[1].yxyy >118: AND TEMP[1].xy, TEMP[0].yzzz, IMM[2].xxxx >119: MOV TEMP[0].yz, TEMP[1].yxyy >120: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >121: MOV TEMP[0].y, TEMP[1].xxxx >122: MOV TEMP[1].x, CONST[2][39] >123: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx >124: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >125: INEG TEMP[1].x, TEMP[1].xxxx >126: MOV TEMP[3].x, TEMP[1].xxxx >127: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >128: MOV TEMP[3].x, TEMP[1].xxxx >129: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >130: MOV TEMP[0].y, TEMP[1].xxxx >131: MOV TEMP[1].x, CONST[2][39] >132: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx >133: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >134: INEG TEMP[1].x, TEMP[1].xxxx >135: MOV TEMP[3].x, TEMP[1].xxxx >136: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >137: MOV TEMP[3].x, TEMP[1].xxxx >138: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >139: MOV TEMP[0].z, TEMP[1].xxxx >140: MOV TEMP[1].x, CONST[2][39] >141: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx >142: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >143: INEG TEMP[1].x, TEMP[1].xxxx >144: MOV TEMP[3].x, TEMP[1].xxxx >145: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >146: MOV TEMP[3].x, TEMP[1].xxxx >147: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >148: MOV TEMP[0].z, TEMP[1].xxxx >149: MOV TEMP[1].x, CONST[2][39] >150: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].xwww >151: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >152: INEG TEMP[1].xy, TEMP[1].xyyy >153: MOV TEMP[3].xy, TEMP[1].xyxx >154: AND TEMP[1].xy, TEMP[3].xyyy, IMM[2].xxxx >155: MOV TEMP[3].xy, TEMP[1].xyxx >156: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >157: MOV TEMP[3].x, TEMP[1].xxxx >158: MOV TEMP[1].x, CONST[2][39] >159: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx >160: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >161: INEG TEMP[1].x, TEMP[1].xxxx >162: MOV TEMP[3].y, TEMP[1].xxxx >163: AND TEMP[1].x, TEMP[3].yyyy, IMM[2].xxxx >164: MOV TEMP[3].y, TEMP[1].xxxx >165: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >166: MOV TEMP[3].x, TEMP[1].xxxx >167: USEQ TEMP[1].x, TEMP[4].yyyy, IMM[3].yyyy >168: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >169: INEG TEMP[1].x, TEMP[1].xxxx >170: MOV TEMP[3].y, TEMP[1].xxxx >171: USEQ TEMP[1].xy, TEMP[0].yzzz, IMM[3].yyyy >172: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >173: INEG TEMP[1].xy, TEMP[1].xyyy >174: MOV TEMP[0].yz, TEMP[1].yxyy >175: OR TEMP[1].x, TEMP[0].yyyy, TEMP[3].yyyy >176: MOV TEMP[0].y, TEMP[1].xxxx >177: USEQ TEMP[1].x, TEMP[4].zzzz, IMM[3].yyyy >178: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >179: INEG TEMP[1].x, TEMP[1].xxxx >180: MOV TEMP[3].y, TEMP[1].xxxx >181: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].yyyy >182: MOV TEMP[0].z, TEMP[1].xxxx >183: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >184: MOV TEMP[0].y, TEMP[1].xxxx >185: USEQ TEMP[1].x, TEMP[4].wwww, IMM[3].yyyy >186: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >187: INEG TEMP[1].x, TEMP[1].xxxx >188: MOV TEMP[0].z, TEMP[1].xxxx >189: USEQ TEMP[1].x, TEMP[3].xxxx, IMM[3].yyyy >190: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >191: INEG TEMP[1].x, TEMP[1].xxxx >192: MOV TEMP[3].x, TEMP[1].xxxx >193: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >194: MOV TEMP[0].z, TEMP[1].xxxx >195: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >196: MOV TEMP[0].y, TEMP[1].xxxx >197: MOV TEMP[1].x, TEMP[0].yyyy >198: USNE TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx >199: UIF TEMP[1].xxxx :0 >200: MOV TEMP[1].x, IMM[0].yyyy >201: MOV TEMP[2].x, IMM[0].yyyy >202: MOV TEMP[5].x, IMM[0].yyyy >203: MOV TEMP[6].x, IMM[0].yyyy >204: ELSE :0 >205: ADD TEMP[3].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >206: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >207: MOV TEMP[0].y, TEMP[7].xxxx >208: ADD TEMP[3].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >209: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >210: MOV TEMP[0].z, TEMP[7].xxxx >211: SQRT TEMP[7].x, TEMP[0].yyyy >212: SQRT TEMP[7].y, TEMP[0].zzzz >213: MOV TEMP[7].xy, TEMP[7].xyxx >214: ADD TEMP[3].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >215: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz >216: SQRT TEMP[8].x, TEMP[3].xxxx >217: MIN TEMP[9].x, TEMP[0].wwww, TEMP[0].xxxx >218: MIN TEMP[10].x, TEMP[0].wwww, TEMP[4].xxxx >219: MOV TEMP[0].w, TEMP[10].xxxx >220: MIN TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >221: MUL TEMP[4].x, TEMP[9].xxxx, IMM[0].zzzz >222: MOV TEMP[3].y, TEMP[4].xxxx >223: MAX TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww >224: MUL TEMP[4].xy, TEMP[0].xwww, IMM[0].zzzz >225: MOV TEMP[0].xw, TEMP[4].xxxy >226: MAX TEMP[4].xy, TEMP[0].xwww, IMM[0].wwww >227: FSNE TEMP[9].x, TEMP[3].xxxx, IMM[0].yyyy >228: UIF TEMP[9].xxxx :0 >229: RCP TEMP[3].x, TEMP[3].xxxx >230: MUL TEMP[3].x, TEMP[7].xxxx, TEMP[3].xxxx >231: ELSE :0 >232: SSG TEMP[9].x, TEMP[7].xxxx >233: MUL TEMP[3].x, IMM[4].xxxx, TEMP[9].xxxx >234: ENDIF >235: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >236: MOV TEMP[0].y, TEMP[3].xxxx >237: FSNE TEMP[3].x, TEMP[4].yyyy, IMM[0].yyyy >238: UIF TEMP[3].xxxx :0 >239: RCP TEMP[3].x, TEMP[4].yyyy >240: MUL TEMP[3].x, TEMP[7].yyyy, TEMP[3].xxxx >241: ELSE :0 >242: SSG TEMP[7].x, TEMP[7].yyyy >243: MUL TEMP[3].x, IMM[4].xxxx, TEMP[7].xxxx >244: ENDIF >245: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >246: MOV TEMP[0].z, TEMP[3].xxxx >247: FSNE TEMP[3].x, TEMP[4].xxxx, IMM[0].yyyy >248: UIF TEMP[3].xxxx :0 >249: RCP TEMP[3].x, TEMP[4].xxxx >250: MUL TEMP[3].x, TEMP[8].xxxx, TEMP[3].xxxx >251: ELSE :0 >252: SSG TEMP[4].x, TEMP[8].xxxx >253: MUL TEMP[3].x, IMM[4].xxxx, TEMP[4].xxxx >254: ENDIF >255: MAX TEMP[0].x, TEMP[3].xxxx, IMM[0].xxxx >256: MIN TEMP[0].xyz, TEMP[0].xyzz, IMM[4].yyyy >257: MAX TEMP[3].x, TEMP[0].yyyy, TEMP[0].xxxx >258: MOV TEMP[0].w, TEMP[3].xxxx >259: MAX TEMP[6].x, TEMP[0].wwww, TEMP[0].zzzz >260: MOV TEMP[1].x, TEMP[0].zzzz >261: MOV TEMP[2].x, TEMP[0].xxxx >262: MOV TEMP[5].x, TEMP[0].yyyy >263: ENDIF >264: MOV OUT[4], TEMP[1] >265: MOV OUT[5], TEMP[2] >266: MOV OUT[6], TEMP[5] >267: MOV OUT[7], TEMP[6] >268: MOV OUT[0].x, TEMP[1].xxxx >269: MOV OUT[0].y, TEMP[2].xxxx >270: MOV OUT[0].z, TEMP[5].xxxx >271: MOV OUT[1].x, TEMP[6].xxxx >272: END >radeonsi: Compiling shader 208 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 864) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 868) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 872) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 876) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 880) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 884) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 888) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 892) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 896) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 900) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 904) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 908) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = lshr i32 %10, 8 > %30 = and i32 %29, 31 > %31 = lshr i32 %7, 13 > %32 = and i32 %31, 255 > %33 = and i32 %7, 8191 > %34 = and i32 %10, 255 > %35 = mul nuw nsw i32 %33, %34 > %36 = mul nuw nsw i32 %30, %32 > %37 = add nuw nsw i32 %35, %36 > %38 = add nuw nsw i32 %37, 16 > %39 = zext i32 %38 to i64 > %40 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %39 > %41 = load i32, i32 addrspace(3)* %40, align 4 > %42 = lshr i32 %7, 13 > %43 = and i32 %42, 255 > %44 = and i32 %7, 8191 > %45 = and i32 %10, 255 > %46 = mul nuw nsw i32 %44, %45 > %47 = mul nuw nsw i32 %30, %43 > %48 = add nuw nsw i32 %46, %47 > %49 = add nuw nsw i32 %48, 17 > %50 = zext i32 %49 to i64 > %51 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %50 > %52 = load i32, i32 addrspace(3)* %51, align 4 > %53 = lshr i32 %7, 13 > %54 = and i32 %53, 255 > %55 = and i32 %7, 8191 > %56 = and i32 %10, 255 > %57 = mul nuw nsw i32 %55, %56 > %58 = mul nuw nsw i32 %30, %54 > %59 = add nuw nsw i32 %57, %58 > %60 = add nuw nsw i32 %59, 18 > %61 = zext i32 %60 to i64 > %62 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %61 > %63 = load i32, i32 addrspace(3)* %62, align 4 > %64 = lshr i32 %7, 13 > %65 = and i32 %64, 255 > %66 = and i32 %7, 8191 > %67 = and i32 %10, 255 > %68 = mul nuw nsw i32 %66, %67 > %69 = mul nuw nsw i32 %30, %65 > %70 = add nuw nsw i32 %68, %69 > %71 = add nuw nsw i32 %70, 19 > %72 = zext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = load i32, i32 addrspace(3)* %73, align 4 > %75 = lshr i32 %6, 13 > %76 = and i32 %75, 255 > %77 = shl i32 %5, 2 > %78 = and i32 %77, 262140 > %79 = and i32 %6, 8191 > %80 = and i32 %10, 255 > %81 = mul nuw nsw i32 %79, %80 > %82 = add nuw nsw i32 %78, %81 > %83 = mul nuw nsw i32 %30, %76 > %84 = add nuw nsw i32 %82, %83 > %85 = add nuw nsw i32 %84, 16 > %86 = zext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > store i32 %41, i32 addrspace(3)* %87, align 4 > %88 = add nuw nsw i32 %84, 17 > %89 = zext i32 %88 to i64 > %90 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %89 > store i32 %52, i32 addrspace(3)* %90, align 4 > %91 = add nuw nsw i32 %84, 18 > %92 = zext i32 %91 to i64 > %93 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %92 > store i32 %63, i32 addrspace(3)* %93, align 4 > %94 = add nuw nsw i32 %84, 19 > %95 = zext i32 %94 to i64 > %96 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %95 > store i32 %74, i32 addrspace(3)* %96, align 4 > %97 = lshr i32 %7, 13 > %98 = and i32 %97, 255 > %99 = and i32 %7, 8191 > %100 = and i32 %10, 255 > %101 = mul nuw nsw i32 %99, %100 > %102 = mul nuw nsw i32 %30, %98 > %103 = add nuw nsw i32 %101, %102 > %104 = add nuw nsw i32 %103, 20 > %105 = zext i32 %104 to i64 > %106 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %105 > %107 = load i32, i32 addrspace(3)* %106, align 4 > %108 = lshr i32 %7, 13 > %109 = and i32 %108, 255 > %110 = and i32 %7, 8191 > %111 = and i32 %10, 255 > %112 = mul nuw nsw i32 %110, %111 > %113 = mul nuw nsw i32 %30, %109 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 21 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > %118 = load i32, i32 addrspace(3)* %117, align 4 > %119 = lshr i32 %7, 13 > %120 = and i32 %119, 255 > %121 = and i32 %7, 8191 > %122 = and i32 %10, 255 > %123 = mul nuw nsw i32 %121, %122 > %124 = mul nuw nsw i32 %30, %120 > %125 = add nuw nsw i32 %123, %124 > %126 = add nuw nsw i32 %125, 22 > %127 = zext i32 %126 to i64 > %128 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %127 > %129 = load i32, i32 addrspace(3)* %128, align 4 > %130 = lshr i32 %7, 13 > %131 = and i32 %130, 255 > %132 = and i32 %7, 8191 > %133 = and i32 %10, 255 > %134 = mul nuw nsw i32 %132, %133 > %135 = mul nuw nsw i32 %30, %131 > %136 = add nuw nsw i32 %134, %135 > %137 = add nuw nsw i32 %136, 23 > %138 = zext i32 %137 to i64 > %139 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %138 > %140 = load i32, i32 addrspace(3)* %139, align 4 > %141 = lshr i32 %6, 13 > %142 = and i32 %141, 255 > %143 = shl i32 %5, 2 > %144 = and i32 %143, 262140 > %145 = and i32 %6, 8191 > %146 = and i32 %10, 255 > %147 = mul nuw nsw i32 %145, %146 > %148 = add nuw nsw i32 %144, %147 > %149 = mul nuw nsw i32 %30, %142 > %150 = add nuw nsw i32 %148, %149 > %151 = add nuw nsw i32 %150, 20 > %152 = zext i32 %151 to i64 > %153 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %152 > store i32 %107, i32 addrspace(3)* %153, align 4 > %154 = add nuw nsw i32 %150, 21 > %155 = zext i32 %154 to i64 > %156 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %155 > store i32 %118, i32 addrspace(3)* %156, align 4 > %157 = add nuw nsw i32 %150, 22 > %158 = zext i32 %157 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %129, i32 addrspace(3)* %159, align 4 > %160 = add nuw nsw i32 %150, 23 > %161 = zext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > store i32 %140, i32 addrspace(3)* %162, align 4 > %163 = and i32 %7, 8191 > %164 = and i32 %10, 255 > %165 = mul nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 16 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > %169 = bitcast i32 addrspace(3)* %168 to float addrspace(3)* > %170 = load float, float addrspace(3)* %169, align 4 > %171 = and i32 %7, 8191 > %172 = and i32 %10, 255 > %173 = mul nuw nsw i32 %171, %172 > %174 = add nuw nsw i32 %173, 17 > %175 = zext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = and i32 %7, 8191 > %180 = and i32 %10, 255 > %181 = mul nuw nsw i32 %179, %180 > %182 = add nuw nsw i32 %181, 18 > %183 = zext i32 %182 to i64 > %184 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %183 > %185 = bitcast i32 addrspace(3)* %184 to float addrspace(3)* > %186 = load float, float addrspace(3)* %185, align 4 > %187 = fmul float %13, %170 > %188 = fmul float %14, %178 > %189 = fadd float %187, %188 > %190 = fmul float %15, %186 > %191 = fadd float %189, %190 > %192 = fadd float %191, %16 > %193 = fmul float %17, %170 > %194 = fmul float %18, %178 > %195 = fadd float %193, %194 > %196 = fmul float %19, %186 > %197 = fadd float %195, %196 > %198 = fadd float %197, %20 > %199 = fmul float %21, %170 > %200 = fmul float %22, %178 > %201 = fadd float %199, %200 > %202 = fmul float %23, %186 > %203 = fadd float %201, %202 > %204 = fadd float %203, %24 > %205 = lshr i32 %7, 13 > %206 = and i32 %205, 255 > %207 = and i32 %7, 8191 > %208 = and i32 %10, 255 > %209 = mul nuw nsw i32 %207, %208 > %210 = add nuw nsw i32 %209, %206 > %211 = add nuw nsw i32 %210, 16 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = bitcast i32 addrspace(3)* %213 to float addrspace(3)* > %215 = load float, float addrspace(3)* %214, align 4 > %216 = lshr i32 %7, 13 > %217 = and i32 %216, 255 > %218 = and i32 %7, 8191 > %219 = and i32 %10, 255 > %220 = mul nuw nsw i32 %218, %219 > %221 = add nuw nsw i32 %220, %217 > %222 = add nuw nsw i32 %221, 17 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = bitcast i32 addrspace(3)* %224 to float addrspace(3)* > %226 = load float, float addrspace(3)* %225, align 4 > %227 = lshr i32 %7, 13 > %228 = and i32 %227, 255 > %229 = and i32 %7, 8191 > %230 = and i32 %10, 255 > %231 = mul nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, %228 > %233 = add nuw nsw i32 %232, 18 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %13, %215 > %239 = fmul float %14, %226 > %240 = fadd float %238, %239 > %241 = fmul float %15, %237 > %242 = fadd float %240, %241 > %243 = fadd float %242, %16 > %244 = fmul float %17, %215 > %245 = fmul float %18, %226 > %246 = fadd float %244, %245 > %247 = fmul float %19, %237 > %248 = fadd float %246, %247 > %249 = fadd float %248, %20 > %250 = fmul float %21, %215 > %251 = fmul float %22, %226 > %252 = fadd float %250, %251 > %253 = fmul float %23, %237 > %254 = fadd float %252, %253 > %255 = fadd float %254, %24 > %256 = and i32 %7, 8191 > %257 = and i32 %10, 255 > %258 = mul nuw nsw i32 %256, %257 > %259 = lshr i32 %7, 12 > %260 = and i32 %259, 510 > %261 = add nuw nsw i32 %258, %260 > %262 = add nuw nsw i32 %261, 16 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = bitcast i32 addrspace(3)* %264 to float addrspace(3)* > %266 = load float, float addrspace(3)* %265, align 4 > %267 = and i32 %7, 8191 > %268 = and i32 %10, 255 > %269 = mul nuw nsw i32 %267, %268 > %270 = lshr i32 %7, 12 > %271 = and i32 %270, 510 > %272 = add nuw nsw i32 %269, %271 > %273 = add nuw nsw i32 %272, 17 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = bitcast i32 addrspace(3)* %275 to float addrspace(3)* > %277 = load float, float addrspace(3)* %276, align 4 > %278 = and i32 %7, 8191 > %279 = and i32 %10, 255 > %280 = mul nuw nsw i32 %278, %279 > %281 = lshr i32 %7, 12 > %282 = and i32 %281, 510 > %283 = add nuw nsw i32 %280, %282 > %284 = add nuw nsw i32 %283, 18 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = bitcast i32 addrspace(3)* %286 to float addrspace(3)* > %288 = load float, float addrspace(3)* %287, align 4 > %289 = fmul float %13, %266 > %290 = fmul float %14, %277 > %291 = fadd float %289, %290 > %292 = fmul float %15, %288 > %293 = fadd float %291, %292 > %294 = fadd float %293, %16 > %295 = fmul float %17, %266 > %296 = fmul float %18, %277 > %297 = fadd float %295, %296 > %298 = fmul float %19, %288 > %299 = fadd float %297, %298 > %300 = fadd float %299, %20 > %301 = fmul float %204, %28 > %302 = fmul float %255, %28 > %303 = fmul float %21, %266 > %304 = fmul float %22, %277 > %305 = fadd float %303, %304 > %306 = fmul float %23, %288 > %307 = fadd float %305, %306 > %308 = fadd float %307, %24 > %309 = fmul float %308, %28 > %310 = fsub float -0.000000e+00, %27 > %311 = fcmp olt float %192, %310 > %312 = zext i1 %311 to i32 > %313 = fsub float -0.000000e+00, %27 > %314 = fcmp olt float %243, %313 > %315 = fsub float -0.000000e+00, %27 > %316 = fcmp olt float %249, %315 > %317 = zext i1 %314 to i32 > %318 = zext i1 %316 to i32 > %319 = add nuw nsw i32 %317, %312 > %320 = fsub float -0.000000e+00, %27 > %321 = fcmp olt float %294, %320 > %322 = zext i1 %321 to i32 > %323 = add nuw nsw i32 %322, %319 > %324 = fsub float -0.000000e+00, %27 > %325 = fcmp olt float %198, %324 > %326 = zext i1 %325 to i32 > %327 = add nuw nsw i32 %318, %326 > %328 = fsub float -0.000000e+00, %27 > %329 = fcmp olt float %300, %328 > %330 = zext i1 %329 to i32 > %331 = add nuw nsw i32 %330, %327 > %332 = fcmp olt float %301, 0.000000e+00 > %333 = zext i1 %332 to i32 > %334 = fcmp olt float %302, 0.000000e+00 > %335 = zext i1 %334 to i32 > %336 = add nuw nsw i32 %333, %335 > %337 = fcmp olt float %309, 0.000000e+00 > %338 = zext i1 %337 to i32 > %339 = add nuw nsw i32 %336, %338 > %340 = fcmp olt float %27, %192 > %341 = zext i1 %340 to i32 > %342 = fcmp olt float %27, %243 > %343 = fcmp olt float %27, %249 > %344 = zext i1 %342 to i32 > %345 = zext i1 %343 to i32 > %346 = add nuw nsw i32 %344, %341 > %347 = fcmp olt float %27, %294 > %348 = zext i1 %347 to i32 > %349 = add nuw nsw i32 %346, %348 > %350 = fcmp olt float %27, %198 > %351 = zext i1 %350 to i32 > %352 = add nuw nsw i32 %345, %351 > %353 = fcmp olt float %27, %300 > %354 = zext i1 %353 to i32 > %355 = add nuw nsw i32 %352, %354 > %356 = fcmp olt float %27, %301 > %357 = fcmp olt float %27, %302 > %358 = zext i1 %356 to i32 > %359 = zext i1 %357 to i32 > %360 = add nuw nsw i32 %359, %358 > %361 = fcmp olt float %27, %309 > %362 = zext i1 %361 to i32 > %363 = add nuw nsw i32 %362, %360 > %364 = icmp eq i32 %323, 3 > %365 = sext i1 %364 to i32 > %366 = icmp eq i32 %349, 3 > %367 = icmp eq i32 %355, 3 > %368 = sext i1 %367 to i32 > %369 = icmp eq i32 %331, 3 > %370 = sext i1 %369 to i32 > %371 = select i1 %367, i32 -1, i32 %370 > %372 = select i1 %366, i32 -1, i32 %365 > %373 = or i32 %371, %372 > %374 = icmp eq i32 %339, 3 > %375 = icmp eq i32 %363, 3 > %376 = sext i1 %375 to i32 > %377 = select i1 %374, i32 -1, i32 %376 > %378 = or i32 %377, %373 > %379 = icmp eq i32 %378, 0 > br i1 %379, label %ELSE, label %ENDIF > >ELSE: ; preds = %main_body > %380 = lshr i32 %7, 13 > %381 = and i32 %380, 255 > %382 = and i32 %7, 8191 > %383 = and i32 %10, 255 > %384 = mul nuw nsw i32 %382, %383 > %385 = add nuw nsw i32 %384, %381 > %386 = add nuw nsw i32 %385, 16 > %387 = zext i32 %386 to i64 > %388 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %387 > %389 = bitcast i32 addrspace(3)* %388 to float addrspace(3)* > %390 = load float, float addrspace(3)* %389, align 4 > %391 = and i32 %7, 8191 > %392 = and i32 %10, 255 > %393 = mul nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 16 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = bitcast i32 addrspace(3)* %396 to float addrspace(3)* > %398 = load float, float addrspace(3)* %397, align 4 > %399 = fsub float %398, %390 > %400 = lshr i32 %7, 13 > %401 = and i32 %400, 255 > %402 = and i32 %7, 8191 > %403 = and i32 %10, 255 > %404 = mul nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, %401 > %406 = add nuw nsw i32 %405, 17 > %407 = zext i32 %406 to i64 > %408 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %407 > %409 = bitcast i32 addrspace(3)* %408 to float addrspace(3)* > %410 = load float, float addrspace(3)* %409, align 4 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = add nuw nsw i32 %413, 17 > %415 = zext i32 %414 to i64 > %416 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %415 > %417 = bitcast i32 addrspace(3)* %416 to float addrspace(3)* > %418 = load float, float addrspace(3)* %417, align 4 > %419 = fsub float %418, %410 > %420 = lshr i32 %7, 13 > %421 = and i32 %420, 255 > %422 = and i32 %7, 8191 > %423 = and i32 %10, 255 > %424 = mul nuw nsw i32 %422, %423 > %425 = add nuw nsw i32 %424, %421 > %426 = add nuw nsw i32 %425, 18 > %427 = zext i32 %426 to i64 > %428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %427 > %429 = bitcast i32 addrspace(3)* %428 to float addrspace(3)* > %430 = load float, float addrspace(3)* %429, align 4 > %431 = and i32 %7, 8191 > %432 = and i32 %10, 255 > %433 = mul nuw nsw i32 %431, %432 > %434 = add nuw nsw i32 %433, 18 > %435 = zext i32 %434 to i64 > %436 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %435 > %437 = bitcast i32 addrspace(3)* %436 to float addrspace(3)* > %438 = load float, float addrspace(3)* %437, align 4 > %439 = fsub float %438, %430 > %440 = fmul float %399, %399 > %441 = fmul float %419, %419 > %442 = fadd float %441, %440 > %443 = fmul float %439, %439 > %444 = fadd float %442, %443 > %445 = and i32 %7, 8191 > %446 = and i32 %10, 255 > %447 = mul nuw nsw i32 %445, %446 > %448 = lshr i32 %7, 12 > %449 = and i32 %448, 510 > %450 = add nuw nsw i32 %447, %449 > %451 = add nuw nsw i32 %450, 16 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > %454 = bitcast i32 addrspace(3)* %453 to float addrspace(3)* > %455 = load float, float addrspace(3)* %454, align 4 > %456 = lshr i32 %7, 13 > %457 = and i32 %456, 255 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, %457 > %462 = add nuw nsw i32 %461, 16 > %463 = zext i32 %462 to i64 > %464 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %463 > %465 = bitcast i32 addrspace(3)* %464 to float addrspace(3)* > %466 = load float, float addrspace(3)* %465, align 4 > %467 = fsub float %466, %455 > %468 = and i32 %7, 8191 > %469 = and i32 %10, 255 > %470 = mul nuw nsw i32 %468, %469 > %471 = lshr i32 %7, 12 > %472 = and i32 %471, 510 > %473 = add nuw nsw i32 %470, %472 > %474 = add nuw nsw i32 %473, 17 > %475 = zext i32 %474 to i64 > %476 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %475 > %477 = bitcast i32 addrspace(3)* %476 to float addrspace(3)* > %478 = load float, float addrspace(3)* %477, align 4 > %479 = lshr i32 %7, 13 > %480 = and i32 %479, 255 > %481 = and i32 %7, 8191 > %482 = and i32 %10, 255 > %483 = mul nuw nsw i32 %481, %482 > %484 = add nuw nsw i32 %483, %480 > %485 = add nuw nsw i32 %484, 17 > %486 = zext i32 %485 to i64 > %487 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %486 > %488 = bitcast i32 addrspace(3)* %487 to float addrspace(3)* > %489 = load float, float addrspace(3)* %488, align 4 > %490 = fsub float %489, %478 > %491 = and i32 %7, 8191 > %492 = and i32 %10, 255 > %493 = mul nuw nsw i32 %491, %492 > %494 = lshr i32 %7, 12 > %495 = and i32 %494, 510 > %496 = add nuw nsw i32 %493, %495 > %497 = add nuw nsw i32 %496, 18 > %498 = zext i32 %497 to i64 > %499 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %498 > %500 = bitcast i32 addrspace(3)* %499 to float addrspace(3)* > %501 = load float, float addrspace(3)* %500, align 4 > %502 = lshr i32 %7, 13 > %503 = and i32 %502, 255 > %504 = and i32 %7, 8191 > %505 = and i32 %10, 255 > %506 = mul nuw nsw i32 %504, %505 > %507 = add nuw nsw i32 %506, %503 > %508 = add nuw nsw i32 %507, 18 > %509 = zext i32 %508 to i64 > %510 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %509 > %511 = bitcast i32 addrspace(3)* %510 to float addrspace(3)* > %512 = load float, float addrspace(3)* %511, align 4 > %513 = fsub float %512, %501 > %514 = fmul float %467, %467 > %515 = fmul float %490, %490 > %516 = fadd float %515, %514 > %517 = fmul float %513, %513 > %518 = fadd float %516, %517 > %519 = call float @llvm.sqrt.f32(float %444) > %520 = call float @llvm.sqrt.f32(float %518) > %521 = and i32 %7, 8191 > %522 = and i32 %10, 255 > %523 = mul nuw nsw i32 %521, %522 > %524 = add nuw nsw i32 %523, 16 > %525 = zext i32 %524 to i64 > %526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %525 > %527 = bitcast i32 addrspace(3)* %526 to float addrspace(3)* > %528 = load float, float addrspace(3)* %527, align 4 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = lshr i32 %7, 12 > %533 = and i32 %532, 510 > %534 = add nuw nsw i32 %531, %533 > %535 = add nuw nsw i32 %534, 16 > %536 = zext i32 %535 to i64 > %537 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %536 > %538 = bitcast i32 addrspace(3)* %537 to float addrspace(3)* > %539 = load float, float addrspace(3)* %538, align 4 > %540 = fsub float %539, %528 > %541 = and i32 %7, 8191 > %542 = and i32 %10, 255 > %543 = mul nuw nsw i32 %541, %542 > %544 = add nuw nsw i32 %543, 17 > %545 = zext i32 %544 to i64 > %546 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %545 > %547 = bitcast i32 addrspace(3)* %546 to float addrspace(3)* > %548 = load float, float addrspace(3)* %547, align 4 > %549 = and i32 %7, 8191 > %550 = and i32 %10, 255 > %551 = mul nuw nsw i32 %549, %550 > %552 = lshr i32 %7, 12 > %553 = and i32 %552, 510 > %554 = add nuw nsw i32 %551, %553 > %555 = add nuw nsw i32 %554, 17 > %556 = zext i32 %555 to i64 > %557 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %556 > %558 = bitcast i32 addrspace(3)* %557 to float addrspace(3)* > %559 = load float, float addrspace(3)* %558, align 4 > %560 = fsub float %559, %548 > %561 = and i32 %7, 8191 > %562 = and i32 %10, 255 > %563 = mul nuw nsw i32 %561, %562 > %564 = add nuw nsw i32 %563, 18 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fsub float %579, %568 > %581 = fmul float %540, %540 > %582 = fmul float %560, %560 > %583 = fadd float %582, %581 > %584 = fmul float %580, %580 > %585 = fadd float %583, %584 > %586 = call float @llvm.sqrt.f32(float %585) > %587 = call float @llvm.minnum.f32(float %302, float %301) > %588 = call float @llvm.minnum.f32(float %302, float %309) > %589 = call float @llvm.minnum.f32(float %301, float %309) > %590 = fmul float %587, 0x3FD99999A0000000 > %591 = call float @llvm.maxnum.f32(float %590, float 1.000000e+02) > %592 = fmul float %589, 0x3FD99999A0000000 > %593 = fmul float %588, 0x3FD99999A0000000 > %594 = call float @llvm.maxnum.f32(float %592, float 1.000000e+02) > %595 = call float @llvm.maxnum.f32(float %593, float 1.000000e+02) > %596 = fcmp une float %591, 0.000000e+00 > br i1 %596, label %IF45, label %ELSE46 > >ENDIF: ; preds = %main_body, %ENDIF50 > %temp24.0 = phi i32 [ %phitmp57, %ENDIF50 ], [ 0, %main_body ] > %temp20.0 = phi i32 [ %phitmp56, %ENDIF50 ], [ 0, %main_body ] > %temp8.0 = phi i32 [ %phitmp55, %ENDIF50 ], [ 0, %main_body ] > %temp4.0 = phi i32 [ %phitmp, %ENDIF50 ], [ 0, %main_body ] > %597 = lshr i32 %5, 16 > %598 = shl nuw nsw i32 %597, 2 > %599 = and i32 %6, 8191 > %600 = and i32 %10, 255 > %601 = mul nuw nsw i32 %599, %600 > %602 = add nuw nsw i32 %598, %601 > %603 = add nuw nsw i32 %602, 8 > %604 = zext i32 %603 to i64 > %605 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %604 > store i32 %temp4.0, i32 addrspace(3)* %605, align 4 > %606 = add nuw nsw i32 %602, 9 > %607 = zext i32 %606 to i64 > %608 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %607 > store i32 %368, i32 addrspace(3)* %608, align 4 > %609 = add nuw nsw i32 %602, 10 > %610 = zext i32 %609 to i64 > %611 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %610 > %612 = bitcast i32 addrspace(3)* %611 to float addrspace(3)* > store float %15, float addrspace(3)* %612, align 4 > %613 = add nuw nsw i32 %602, 11 > %614 = zext i32 %613 to i64 > %615 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %614 > %616 = bitcast i32 addrspace(3)* %615 to float addrspace(3)* > store float %16, float addrspace(3)* %616, align 4 > %617 = lshr i32 %5, 16 > %618 = shl nuw nsw i32 %617, 2 > %619 = and i32 %6, 8191 > %620 = and i32 %10, 255 > %621 = mul nuw nsw i32 %619, %620 > %622 = add nuw nsw i32 %618, %621 > %623 = add nuw nsw i32 %622, 12 > %624 = zext i32 %623 to i64 > %625 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %624 > store i32 %temp8.0, i32 addrspace(3)* %625, align 4 > %626 = add nuw nsw i32 %622, 13 > %627 = zext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > store float %18, float addrspace(3)* %629, align 4 > %630 = add nuw nsw i32 %622, 14 > %631 = zext i32 %630 to i64 > %632 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %631 > %633 = bitcast i32 addrspace(3)* %632 to float addrspace(3)* > store float %19, float addrspace(3)* %633, align 4 > %634 = add nuw nsw i32 %622, 15 > %635 = zext i32 %634 to i64 > %636 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %635 > %637 = bitcast i32 addrspace(3)* %636 to float addrspace(3)* > store float %20, float addrspace(3)* %637, align 4 > %638 = lshr i32 %5, 16 > %639 = shl nuw nsw i32 %638, 2 > %640 = and i32 %6, 8191 > %641 = and i32 %10, 255 > %642 = mul nuw nsw i32 %640, %641 > %643 = add nuw nsw i32 %639, %642 > %644 = add nuw nsw i32 %643, 16 > %645 = zext i32 %644 to i64 > %646 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %645 > store i32 %temp20.0, i32 addrspace(3)* %646, align 4 > %647 = add nuw nsw i32 %643, 17 > %648 = zext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %14, float addrspace(3)* %650, align 4 > %651 = add nuw nsw i32 %643, 18 > %652 = zext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %15, float addrspace(3)* %654, align 4 > %655 = add nuw nsw i32 %643, 19 > %656 = zext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %16, float addrspace(3)* %658, align 4 > %659 = lshr i32 %5, 16 > %660 = shl nuw nsw i32 %659, 2 > %661 = and i32 %6, 8191 > %662 = and i32 %10, 255 > %663 = mul nuw nsw i32 %661, %662 > %664 = add nuw nsw i32 %660, %663 > %665 = add nuw nsw i32 %664, 20 > %666 = zext i32 %665 to i64 > %667 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %666 > store i32 %temp24.0, i32 addrspace(3)* %667, align 4 > %668 = add nuw nsw i32 %664, 21 > %669 = zext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > store float %18, float addrspace(3)* %671, align 4 > %672 = add nuw nsw i32 %664, 22 > %673 = zext i32 %672 to i64 > %674 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %673 > %675 = bitcast i32 addrspace(3)* %674 to float addrspace(3)* > store float %19, float addrspace(3)* %675, align 4 > %676 = add nuw nsw i32 %664, 23 > %677 = zext i32 %676 to i64 > %678 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %677 > %679 = bitcast i32 addrspace(3)* %678 to float addrspace(3)* > store float %20, float addrspace(3)* %679, align 4 > %680 = lshr i32 %5, 16 > %681 = shl nuw nsw i32 %680, 2 > %682 = and i32 %6, 8191 > %683 = and i32 %10, 255 > %684 = mul nuw nsw i32 %682, %683 > %685 = add nuw nsw i32 %681, %684 > %686 = zext i32 %685 to i64 > %687 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %686 > store i32 %temp4.0, i32 addrspace(3)* %687, align 4 > %688 = lshr i32 %5, 16 > %689 = shl nuw nsw i32 %688, 2 > %690 = and i32 %6, 8191 > %691 = and i32 %10, 255 > %692 = mul nuw nsw i32 %690, %691 > %693 = add nuw nsw i32 %689, %692 > %694 = add nuw nsw i32 %693, 1 > %695 = zext i32 %694 to i64 > %696 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %695 > store i32 %temp8.0, i32 addrspace(3)* %696, align 4 > %697 = lshr i32 %5, 16 > %698 = shl nuw nsw i32 %697, 2 > %699 = and i32 %6, 8191 > %700 = and i32 %10, 255 > %701 = mul nuw nsw i32 %699, %700 > %702 = add nuw nsw i32 %698, %701 > %703 = add nuw nsw i32 %702, 2 > %704 = zext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > store i32 %temp20.0, i32 addrspace(3)* %705, align 4 > %706 = lshr i32 %5, 16 > %707 = shl nuw nsw i32 %706, 2 > %708 = and i32 %6, 8191 > %709 = and i32 %10, 255 > %710 = mul nuw nsw i32 %708, %709 > %711 = add nuw nsw i32 %707, %710 > %712 = add nuw nsw i32 %711, 4 > %713 = zext i32 %712 to i64 > %714 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %713 > store i32 %temp24.0, i32 addrspace(3)* %714, align 4 > %715 = and i32 %10, 255 > %716 = lshr i32 %10, 8 > %717 = and i32 %716, 31 > %718 = lshr i32 %5, 16 > %719 = shl nuw nsw i32 %718, 2 > %720 = and i32 %6, 8191 > %721 = and i32 %10, 255 > %722 = mul nuw nsw i32 %720, %721 > %723 = add nuw nsw i32 %719, %722 > %724 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %725 = bitcast i64 %724 to <2 x i32> > %726 = extractelement <2 x i32> %725, i32 0 > %727 = extractelement <2 x i32> %725, i32 1 > %728 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %726, 0 > %729 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %728, i32 %727, 1 > %730 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %729, i32 %8, 13 > %731 = bitcast i32 %715 to float > %732 = bitcast i32 %717 to float > %733 = bitcast i32 %723 to float > %734 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %730, float %731, 14 > %735 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %734, float %732, 15 > %736 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %735, float %733, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %736 > >IF45: ; preds = %ELSE > %737 = fdiv float 1.000000e+00, %591 > %738 = fmul float %519, %737 > br label %ENDIF44 > >ELSE46: ; preds = %ELSE > %739 = fcmp ogt float %519, 0.000000e+00 > %740 = select i1 %739, float 1.000000e+00, float %519 > %741 = fcmp oge float %740, 0.000000e+00 > %.op = fmul float %740, 0x4600000000000000 > %742 = select i1 %741, float %.op, float 0xC600000000000000 > br label %ENDIF44 > >ENDIF44: ; preds = %ELSE46, %IF45 > %temp12.0 = phi float [ %738, %IF45 ], [ %742, %ELSE46 ] > %743 = call float @llvm.maxnum.f32(float %temp12.0, float 1.000000e+00) > %744 = fcmp une float %595, 0.000000e+00 > br i1 %744, label %IF48, label %ELSE49 > >IF48: ; preds = %ENDIF44 > %745 = fdiv float 1.000000e+00, %595 > %746 = fmul float %520, %745 > br label %ENDIF47 > >ELSE49: ; preds = %ENDIF44 > %747 = fcmp ogt float %520, 0.000000e+00 > %748 = select i1 %747, float 1.000000e+00, float %520 > %749 = fcmp oge float %748, 0.000000e+00 > %.op53 = fmul float %748, 0x4600000000000000 > %750 = select i1 %749, float %.op53, float 0xC600000000000000 > br label %ENDIF47 > >ENDIF47: ; preds = %ELSE49, %IF48 > %temp12.1 = phi float [ %746, %IF48 ], [ %750, %ELSE49 ] > %751 = call float @llvm.maxnum.f32(float %temp12.1, float 1.000000e+00) > %752 = fcmp une float %594, 0.000000e+00 > br i1 %752, label %IF51, label %ELSE52 > >IF51: ; preds = %ENDIF47 > %753 = fdiv float 1.000000e+00, %594 > %754 = fmul float %586, %753 > br label %ENDIF50 > >ELSE52: ; preds = %ENDIF47 > %755 = fcmp ogt float %586, 0.000000e+00 > %756 = select i1 %755, float 1.000000e+00, float %586 > %757 = fcmp oge float %756, 0.000000e+00 > %.op54 = fmul float %756, 0x4600000000000000 > %758 = select i1 %757, float %.op54, float 0xC600000000000000 > br label %ENDIF50 > >ENDIF50: ; preds = %ELSE52, %IF51 > %temp12.2 = phi float [ %754, %IF51 ], [ %758, %ELSE52 ] > %759 = call float @llvm.maxnum.f32(float %temp12.2, float 1.000000e+00) > %760 = call float @llvm.minnum.f32(float %759, float 6.300000e+01) > %761 = call float @llvm.minnum.f32(float %743, float 6.300000e+01) > %762 = call float @llvm.minnum.f32(float %751, float 6.300000e+01) > %763 = call float @llvm.maxnum.f32(float %761, float %760) > %764 = call float @llvm.maxnum.f32(float %763, float %762) > %phitmp = bitcast float %762 to i32 > %phitmp55 = bitcast float %760 to i32 > %phitmp56 = bitcast float %761 to i32 > %phitmp57 = bitcast float %764 to i32 > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..1], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..56] >DCL CONST[2][0..39] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[1] UINT32 {0, 864, 880, 896} >IMM[2] UINT32 {1, 624, 0, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[0].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[0].w, IMM[0].xxxx > 4: MOV TEMP[1], CONST[1][54] > 5: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 6: MOV TEMP[2], CONST[1][55] > 7: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 8: MOV TEMP[1].y, TEMP[2].xxxx > 9: MOV TEMP[2], CONST[1][56] > 10: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 11: MOV TEMP[1].z, TEMP[2].xxxx > 12: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 13: SQRT TEMP[2].x, TEMP[0].xxxx > 14: FSEQ TEMP[3].xyz, TEMP[2].xxxx, IMM[0].yyyy > 15: SSG TEMP[4].xyz, TEMP[1].xyzz > 16: MUL TEMP[4].xyz, IMM[0].zzzz, TEMP[4].xyzz > 17: RCP TEMP[5].xyz, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz > 19: UCMP TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[1].xyzz > 20: MOV TEMP[3].x, CONST[2][39] > 21: FSNE TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy > 22: UIF TEMP[3].xxxx :0 > 23: MOV TEMP[3].x, CONST[2][39] > 24: RCP TEMP[3].x, TEMP[3].xxxx > 25: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[3].xxxx > 26: ELSE :0 > 27: SSG TEMP[2].x, TEMP[2].xxxx > 28: MUL TEMP[3].x, IMM[0].zzzz, TEMP[2].xxxx > 29: ENDIF > 30: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[0].xxxx > 31: MOV TEMP[2].z, CONST[2][39] > 32: FMA TEMP[2].x, TEMP[1].zzzz, TEMP[2].zzzz, IMM[0].xxxx > 33: FSEQ TEMP[3].xy, TEMP[2].xxxx, IMM[0].yyyy > 34: SSG TEMP[4].xy, TEMP[1].xyyy > 35: MUL TEMP[4].xy, IMM[0].zzzz, TEMP[4].xyyy > 36: RCP TEMP[2].xy, TEMP[2].xxxx > 37: MUL TEMP[2].xy, TEMP[1].xyyy, TEMP[2].xyyy > 38: UCMP TEMP[2].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[2].xyyy > 39: MOV TEMP[3].z, CONST[2][39] > 40: MUL TEMP[1].x, TEMP[1].zzzz, TEMP[3].zzzz > 41: MOV TEMP[0].y, TEMP[1].xxxx > 42: MOV TEMP[2].z, TEMP[0].xxxx > 43: MOV TEMP[1].zw, TEMP[0].xxyx > 44: MOV TEMP[2].w, IMM[0].xxxx > 45: MUL TEMP[0].xy, SV[0].yyyy, IN[1][1].xyyy > 46: FMA TEMP[0].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[0].xyyy > 47: FMA TEMP[1].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[0].xyyy > 48: MOV OUT[1], TEMP[1] > 49: MOV OUT[0], TEMP[2] > 50: END >radeonsi: Compiling shader 209 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 864) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 868) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 872) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 876) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 880) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 884) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 888) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 892) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 896) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 900) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 904) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 908) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = fadd float %7, %8 > %30 = fsub float 1.000000e+00, %29 > %31 = lshr i32 %6, 13 > %32 = and i32 %31, 255 > %33 = shl i32 %5, 2 > %34 = and i32 %33, 262140 > %35 = and i32 %6, 8191 > %36 = mul i32 %35, %9 > %37 = add i32 %34, %36 > %38 = add i32 %37, %32 > %39 = add i32 %38, 16 > %40 = sext i32 %39 to i64 > %41 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %40 > %42 = bitcast i32 addrspace(3)* %41 to float addrspace(3)* > %43 = load float, float addrspace(3)* %42, align 4 > %44 = fmul float %43, %8 > %45 = lshr i32 %6, 13 > %46 = and i32 %45, 255 > %47 = shl i32 %5, 2 > %48 = and i32 %47, 262140 > %49 = and i32 %6, 8191 > %50 = mul i32 %49, %9 > %51 = add i32 %48, %50 > %52 = add i32 %51, %46 > %53 = add i32 %52, 17 > %54 = sext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = bitcast i32 addrspace(3)* %55 to float addrspace(3)* > %57 = load float, float addrspace(3)* %56, align 4 > %58 = fmul float %57, %8 > %59 = lshr i32 %6, 13 > %60 = and i32 %59, 255 > %61 = shl i32 %5, 2 > %62 = and i32 %61, 262140 > %63 = and i32 %6, 8191 > %64 = mul i32 %63, %9 > %65 = add i32 %62, %64 > %66 = add i32 %65, %60 > %67 = add i32 %66, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > %71 = load float, float addrspace(3)* %70, align 4 > %72 = fmul float %71, %8 > %73 = shl i32 %5, 2 > %74 = and i32 %73, 262140 > %75 = and i32 %6, 8191 > %76 = mul i32 %75, %9 > %77 = add i32 %74, %76 > %78 = add i32 %77, 16 > %79 = sext i32 %78 to i64 > %80 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %79 > %81 = bitcast i32 addrspace(3)* %80 to float addrspace(3)* > %82 = load float, float addrspace(3)* %81, align 4 > %83 = call float @llvm.fma.f32(float %7, float %82, float %44) > %84 = shl i32 %5, 2 > %85 = and i32 %84, 262140 > %86 = and i32 %6, 8191 > %87 = mul i32 %86, %9 > %88 = add i32 %85, %87 > %89 = add i32 %88, 17 > %90 = sext i32 %89 to i64 > %91 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %90 > %92 = bitcast i32 addrspace(3)* %91 to float addrspace(3)* > %93 = load float, float addrspace(3)* %92, align 4 > %94 = call float @llvm.fma.f32(float %7, float %93, float %58) > %95 = shl i32 %5, 2 > %96 = and i32 %95, 262140 > %97 = and i32 %6, 8191 > %98 = mul i32 %97, %9 > %99 = add i32 %96, %98 > %100 = add i32 %99, 18 > %101 = sext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > %103 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > %104 = load float, float addrspace(3)* %103, align 4 > %105 = call float @llvm.fma.f32(float %7, float %104, float %72) > %106 = shl i32 %5, 2 > %107 = and i32 %106, 262140 > %108 = and i32 %6, 8191 > %109 = mul i32 %108, %9 > %110 = add i32 %107, %109 > %111 = lshr i32 %6, 12 > %112 = and i32 %111, 510 > %113 = add i32 %110, %112 > %114 = add i32 %113, 16 > %115 = sext i32 %114 to i64 > %116 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %115 > %117 = bitcast i32 addrspace(3)* %116 to float addrspace(3)* > %118 = load float, float addrspace(3)* %117, align 4 > %119 = call float @llvm.fma.f32(float %30, float %118, float %83) > %120 = shl i32 %5, 2 > %121 = and i32 %120, 262140 > %122 = and i32 %6, 8191 > %123 = mul i32 %122, %9 > %124 = add i32 %121, %123 > %125 = lshr i32 %6, 12 > %126 = and i32 %125, 510 > %127 = add i32 %124, %126 > %128 = add i32 %127, 17 > %129 = sext i32 %128 to i64 > %130 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %129 > %131 = bitcast i32 addrspace(3)* %130 to float addrspace(3)* > %132 = load float, float addrspace(3)* %131, align 4 > %133 = call float @llvm.fma.f32(float %30, float %132, float %94) > %134 = shl i32 %5, 2 > %135 = and i32 %134, 262140 > %136 = and i32 %6, 8191 > %137 = mul i32 %136, %9 > %138 = add i32 %135, %137 > %139 = lshr i32 %6, 12 > %140 = and i32 %139, 510 > %141 = add i32 %138, %140 > %142 = add i32 %141, 18 > %143 = sext i32 %142 to i64 > %144 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %143 > %145 = bitcast i32 addrspace(3)* %144 to float addrspace(3)* > %146 = load float, float addrspace(3)* %145, align 4 > %147 = call float @llvm.fma.f32(float %30, float %146, float %105) > %148 = fmul float %13, %119 > %149 = fmul float %14, %133 > %150 = fadd float %148, %149 > %151 = fmul float %15, %147 > %152 = fadd float %150, %151 > %153 = fadd float %152, %16 > %154 = fmul float %17, %119 > %155 = fmul float %18, %133 > %156 = fadd float %154, %155 > %157 = fmul float %19, %147 > %158 = fadd float %156, %157 > %159 = fadd float %158, %20 > %160 = fmul float %21, %119 > %161 = fmul float %22, %133 > %162 = fadd float %160, %161 > %163 = fmul float %23, %147 > %164 = fadd float %162, %163 > %165 = fadd float %164, %24 > %166 = fmul float %153, %153 > %167 = fmul float %159, %159 > %168 = fadd float %167, %166 > %169 = fmul float %165, %165 > %170 = fadd float %168, %169 > %171 = call float @llvm.sqrt.f32(float %170) > %172 = fcmp oeq float %171, 0.000000e+00 > %173 = fcmp oeq float %171, 0.000000e+00 > %174 = fcmp oeq float %171, 0.000000e+00 > %175 = fcmp ogt float %153, 0.000000e+00 > %176 = select i1 %175, float 1.000000e+00, float %153 > %177 = fcmp oge float %176, 0.000000e+00 > %178 = fcmp ogt float %159, 0.000000e+00 > %179 = select i1 %178, float 1.000000e+00, float %159 > %180 = fcmp oge float %179, 0.000000e+00 > %181 = fcmp ogt float %165, 0.000000e+00 > %182 = select i1 %181, float 1.000000e+00, float %165 > %183 = fcmp oge float %182, 0.000000e+00 > %.op = fmul float %176, 0x4600000000000000 > %184 = select i1 %177, float %.op, float 0xC600000000000000 > %.op24 = fmul float %179, 0x4600000000000000 > %185 = select i1 %180, float %.op24, float 0xC600000000000000 > %.op25 = fmul float %182, 0x4600000000000000 > %186 = select i1 %183, float %.op25, float 0xC600000000000000 > %187 = fdiv float 1.000000e+00, %171 > %188 = fmul float %153, %187 > %189 = fmul float %159, %187 > %190 = fmul float %165, %187 > %191 = select i1 %172, float %184, float %188 > %192 = select i1 %173, float %185, float %189 > %193 = select i1 %174, float %186, float %190 > %194 = fcmp une float %27, 0.000000e+00 > br i1 %194, label %IF, label %ELSE > >IF: ; preds = %main_body > %195 = fdiv float 1.000000e+00, %27 > %196 = fmul float %171, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fcmp ogt float %171, 0.000000e+00 > %198 = select i1 %197, float 1.000000e+00, float %171 > %199 = fcmp oge float %198, 0.000000e+00 > %.op26 = fmul float %198, 0x4600000000000000 > %200 = select i1 %199, float %.op26, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %196, %IF ], [ %200, %ELSE ] > %201 = fsub float 1.000000e+00, %temp12.0 > %202 = call float @llvm.fma.f32(float %193, float %28, float 1.000000e+00) > %203 = fcmp oeq float %202, 0.000000e+00 > %204 = fcmp oeq float %202, 0.000000e+00 > %205 = fcmp ogt float %191, 0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %191 > %207 = fcmp oge float %206, 0.000000e+00 > %208 = fcmp ogt float %192, 0.000000e+00 > %209 = select i1 %208, float 1.000000e+00, float %192 > %210 = fcmp oge float %209, 0.000000e+00 > %.op27 = fmul float %206, 0x4600000000000000 > %211 = select i1 %207, float %.op27, float 0xC600000000000000 > %.op28 = fmul float %209, 0x4600000000000000 > %212 = select i1 %210, float %.op28, float 0xC600000000000000 > %213 = fdiv float 1.000000e+00, %202 > %214 = fmul float %191, %213 > %215 = fmul float %192, %213 > %216 = select i1 %203, float %211, float %214 > %217 = select i1 %204, float %212, float %215 > %218 = fmul float %193, %28 > %219 = lshr i32 %6, 13 > %220 = and i32 %219, 255 > %221 = shl i32 %5, 2 > %222 = and i32 %221, 262140 > %223 = and i32 %6, 8191 > %224 = mul i32 %223, %9 > %225 = add i32 %222, %224 > %226 = add i32 %225, %220 > %227 = add i32 %226, 20 > %228 = sext i32 %227 to i64 > %229 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %228 > %230 = bitcast i32 addrspace(3)* %229 to float addrspace(3)* > %231 = load float, float addrspace(3)* %230, align 4 > %232 = fmul float %231, %8 > %233 = lshr i32 %6, 13 > %234 = and i32 %233, 255 > %235 = shl i32 %5, 2 > %236 = and i32 %235, 262140 > %237 = and i32 %6, 8191 > %238 = mul i32 %237, %9 > %239 = add i32 %236, %238 > %240 = add i32 %239, %234 > %241 = add i32 %240, 21 > %242 = sext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > %244 = bitcast i32 addrspace(3)* %243 to float addrspace(3)* > %245 = load float, float addrspace(3)* %244, align 4 > %246 = fmul float %245, %8 > %247 = shl i32 %5, 2 > %248 = and i32 %247, 262140 > %249 = and i32 %6, 8191 > %250 = mul i32 %249, %9 > %251 = add i32 %248, %250 > %252 = add i32 %251, 20 > %253 = sext i32 %252 to i64 > %254 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %253 > %255 = bitcast i32 addrspace(3)* %254 to float addrspace(3)* > %256 = load float, float addrspace(3)* %255, align 4 > %257 = call float @llvm.fma.f32(float %7, float %256, float %232) > %258 = shl i32 %5, 2 > %259 = and i32 %258, 262140 > %260 = and i32 %6, 8191 > %261 = mul i32 %260, %9 > %262 = add i32 %259, %261 > %263 = add i32 %262, 21 > %264 = sext i32 %263 to i64 > %265 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %264 > %266 = bitcast i32 addrspace(3)* %265 to float addrspace(3)* > %267 = load float, float addrspace(3)* %266, align 4 > %268 = call float @llvm.fma.f32(float %7, float %267, float %246) > %269 = shl i32 %5, 2 > %270 = and i32 %269, 262140 > %271 = and i32 %6, 8191 > %272 = mul i32 %271, %9 > %273 = add i32 %270, %272 > %274 = lshr i32 %6, 12 > %275 = and i32 %274, 510 > %276 = add i32 %273, %275 > %277 = add i32 %276, 20 > %278 = sext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = bitcast i32 addrspace(3)* %279 to float addrspace(3)* > %281 = load float, float addrspace(3)* %280, align 4 > %282 = call float @llvm.fma.f32(float %30, float %281, float %257) > %283 = shl i32 %5, 2 > %284 = and i32 %283, 262140 > %285 = and i32 %6, 8191 > %286 = mul i32 %285, %9 > %287 = add i32 %284, %286 > %288 = lshr i32 %6, 12 > %289 = and i32 %288, 510 > %290 = add i32 %287, %289 > %291 = add i32 %290, 21 > %292 = sext i32 %291 to i64 > %293 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %292 > %294 = bitcast i32 addrspace(3)* %293 to float addrspace(3)* > %295 = load float, float addrspace(3)* %294, align 4 > %296 = call float @llvm.fma.f32(float %30, float %295, float %268) > %297 = bitcast i32 %10 to float > %298 = insertvalue <{ float, float, float }> undef, float %297, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %282, float %296, float %218, float %201) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %216, float %217, float %201, float 1.000000e+00) > ret <{ float, float, float }> %298 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL CONST[1][0..9] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[0].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[0].xxxx > 7: MOV TEMP[0].xy, IN[1].xyxx > 8: MOV OUT[1], TEMP[0] > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 210 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 128) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 132) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 136) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 140) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 144) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 148) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 152) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 156) > %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 > %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %13) > %32 = extractelement <4 x float> %31, i32 0 > %33 = extractelement <4 x float> %31, i32 1 > %34 = extractelement <4 x float> %31, i32 2 > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %14) > %38 = fmul float %17, %32 > %39 = fmul float %18, %33 > %40 = fadd float %38, %39 > %41 = fmul float %19, %34 > %42 = fadd float %40, %41 > %43 = fadd float %42, %20 > %44 = fmul float %21, %32 > %45 = fmul float %22, %33 > %46 = fadd float %44, %45 > %47 = fmul float %23, %34 > %48 = fadd float %46, %47 > %49 = fadd float %48, %24 > %50 = fmul float %25, %32 > %51 = fmul float %26, %33 > %52 = fadd float %50, %51 > %53 = fmul float %27, %34 > %54 = fadd float %52, %53 > %55 = fadd float %54, %28 > %56 = lshr i32 %8, 13 > %57 = and i32 %56, 255 > %58 = mul i32 %57, %10 > %59 = add i32 %58, 16 > %60 = sext i32 %59 to i64 > %61 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %60 > %62 = bitcast i32 addrspace(3)* %61 to float addrspace(3)* > store float %43, float addrspace(3)* %62, align 4 > %63 = add i32 %58, 17 > %64 = sext i32 %63 to i64 > %65 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %64 > %66 = bitcast i32 addrspace(3)* %65 to float addrspace(3)* > store float %49, float addrspace(3)* %66, align 4 > %67 = add i32 %58, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > store float %55, float addrspace(3)* %70, align 4 > %71 = add i32 %58, 20 > %bc = bitcast <4 x float> %37 to <4 x i32> > %72 = extractelement <4 x i32> %bc, i32 0 > %73 = sext i32 %71 to i64 > %74 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %73 > store i32 %72, i32 addrspace(3)* %74, align 4 > %75 = add i32 %58, 21 > %bc12 = bitcast <4 x float> %37 to <4 x i32> > %76 = extractelement <4 x i32> %bc12, i32 1 > %77 = sext i32 %75 to i64 > %78 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %77 > store i32 %76, i32 addrspace(3)* %78, align 4 > %79 = add i32 %58, 22 > %80 = sext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = bitcast i32 addrspace(3)* %81 to float addrspace(3)* > store float %34, float addrspace(3)* %82, align 4 > %83 = add i32 %58, 23 > %84 = sext i32 %83 to i64 > %85 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %84 > store i32 1065353216, i32 addrspace(3)* %85, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..3], ARRAY(1), GENERIC[0] >DCL OUT[4], PATCH >DCL OUT[5], PATCH[1] >DCL OUT[6], PATCH[2] >DCL OUT[7], PATCH[3] >DCL CONST[1][0..58] >DCL CONST[2][0..39] >DCL TEMP[0..10], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 0.0000, 0.4000, 100.0000} >IMM[1] UINT32 {0, 896, 912, 928} >IMM[2] UINT32 {1, 624, 0, 0} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: MOV TEMP[0].xyz, IN[0][0].xyzx > 11: MOV TEMP[0].w, IMM[0].xxxx > 12: MOV TEMP[1], CONST[1][56] > 13: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 14: MOV TEMP[2], CONST[1][57] > 15: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 16: MOV TEMP[3], CONST[1][58] > 17: DP4 TEMP[0].x, TEMP[3], TEMP[0] > 18: MOV TEMP[4].xyz, IN[1][0].xyzx > 19: MOV TEMP[4].w, IMM[0].xxxx > 20: MOV TEMP[5], CONST[1][56] > 21: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 22: MOV TEMP[0].y, TEMP[5].xxxx > 23: MOV TEMP[5], CONST[1][57] > 24: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 25: MOV TEMP[0].z, TEMP[5].xxxx > 26: MOV TEMP[5], CONST[1][58] > 27: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 28: MOV TEMP[0].w, TEMP[5].xxxx > 29: MOV TEMP[4].xyz, IN[2][0].xyzx > 30: MOV TEMP[4].w, IMM[0].xxxx > 31: MOV TEMP[5], CONST[1][56] > 32: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 33: MOV TEMP[3].z, TEMP[5].xxxx > 34: MOV TEMP[6], CONST[1][57] > 35: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 36: MOV TEMP[7].z, CONST[2][39] > 37: MUL TEMP[7].xy, TEMP[0].xwww, TEMP[7].zzzz > 38: MOV TEMP[0].xw, TEMP[7].xxxy > 39: MOV TEMP[7], CONST[1][58] > 40: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 41: MOV TEMP[8].z, CONST[2][39] > 42: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[8].zzzz > 43: MOV TEMP[7].x, CONST[2][39] > 44: FSLT TEMP[7].x, TEMP[1].xxxx, -TEMP[7].xxxx > 45: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 46: INEG TEMP[7].x, TEMP[7].xxxx > 47: MOV TEMP[4].y, TEMP[7].xxxx > 48: MOV TEMP[7].x, CONST[2][39] > 49: FSLT TEMP[7].xy, TEMP[0].yzzz, -TEMP[7].xxxx > 50: AND TEMP[7].xy, TEMP[7].xyyy, IMM[3].xxxx > 51: INEG TEMP[7].xy, TEMP[7].xyyy > 52: MOV TEMP[4].zw, TEMP[7].yyxy > 53: AND TEMP[7].xy, TEMP[4].yzzz, IMM[2].xxxx > 54: MOV TEMP[4].yz, TEMP[7].yxyy > 55: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 56: MOV TEMP[4].y, TEMP[7].xxxx > 57: MOV TEMP[7].x, CONST[2][39] > 58: FSLT TEMP[7].x, TEMP[5].xxxx, -TEMP[7].xxxx > 59: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 60: INEG TEMP[7].x, TEMP[7].xxxx > 61: MOV TEMP[4].z, TEMP[7].xxxx > 62: AND TEMP[7].x, TEMP[4].zzzz, IMM[2].xxxx > 63: MOV TEMP[4].z, TEMP[7].xxxx > 64: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 65: MOV TEMP[4].y, TEMP[7].xxxx > 66: MOV TEMP[7].x, CONST[2][39] > 67: FSLT TEMP[7].x, TEMP[2].xxxx, -TEMP[7].xxxx > 68: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 69: INEG TEMP[7].x, TEMP[7].xxxx > 70: MOV TEMP[4].z, TEMP[7].xxxx > 71: AND TEMP[7].xy, TEMP[4].zwww, IMM[2].xxxx > 72: MOV TEMP[4].zw, TEMP[7].yyxy > 73: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 74: MOV TEMP[4].z, TEMP[7].xxxx > 75: MOV TEMP[7].x, CONST[2][39] > 76: FSLT TEMP[7].x, TEMP[6].xxxx, -TEMP[7].xxxx > 77: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 78: INEG TEMP[7].x, TEMP[7].xxxx > 79: MOV TEMP[4].w, TEMP[7].xxxx > 80: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 81: MOV TEMP[4].w, TEMP[7].xxxx > 82: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 83: MOV TEMP[4].z, TEMP[7].xxxx > 84: FSLT TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy > 85: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 86: INEG TEMP[7].x, TEMP[7].xxxx > 87: MOV TEMP[4].w, TEMP[7].xxxx > 88: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 89: MOV TEMP[4].w, TEMP[7].xxxx > 90: FSLT TEMP[7].x, TEMP[0].wwww, IMM[0].yyyy > 91: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 92: INEG TEMP[7].x, TEMP[7].xxxx > 93: MOV TEMP[7].x, TEMP[7].xxxx > 94: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx > 95: MOV TEMP[7].x, TEMP[8].xxxx > 96: UADD TEMP[8].x, TEMP[4].wwww, TEMP[7].xxxx > 97: MOV TEMP[4].w, TEMP[8].xxxx > 98: FSLT TEMP[8].x, TEMP[4].xxxx, IMM[0].yyyy > 99: AND TEMP[8].x, TEMP[8].xxxx, IMM[3].xxxx >100: INEG TEMP[8].x, TEMP[8].xxxx >101: MOV TEMP[7].x, TEMP[8].xxxx >102: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx >103: MOV TEMP[7].x, TEMP[8].xxxx >104: UADD TEMP[7].x, TEMP[4].wwww, TEMP[7].xxxx >105: MOV TEMP[4].w, TEMP[7].xxxx >106: MOV TEMP[7].x, CONST[2][39] >107: FSLT TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx >108: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >109: INEG TEMP[1].x, TEMP[1].xxxx >110: MOV TEMP[3].x, TEMP[1].xxxx >111: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >112: MOV TEMP[3].x, TEMP[1].xxxx >113: MOV TEMP[1].x, CONST[2][39] >114: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].yzzz >115: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >116: INEG TEMP[1].xy, TEMP[1].xyyy >117: MOV TEMP[0].yz, TEMP[1].yxyy >118: AND TEMP[1].xy, TEMP[0].yzzz, IMM[2].xxxx >119: MOV TEMP[0].yz, TEMP[1].yxyy >120: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >121: MOV TEMP[0].y, TEMP[1].xxxx >122: MOV TEMP[1].x, CONST[2][39] >123: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx >124: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >125: INEG TEMP[1].x, TEMP[1].xxxx >126: MOV TEMP[3].x, TEMP[1].xxxx >127: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >128: MOV TEMP[3].x, TEMP[1].xxxx >129: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >130: MOV TEMP[0].y, TEMP[1].xxxx >131: MOV TEMP[1].x, CONST[2][39] >132: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx >133: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >134: INEG TEMP[1].x, TEMP[1].xxxx >135: MOV TEMP[3].x, TEMP[1].xxxx >136: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >137: MOV TEMP[3].x, TEMP[1].xxxx >138: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >139: MOV TEMP[0].z, TEMP[1].xxxx >140: MOV TEMP[1].x, CONST[2][39] >141: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx >142: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >143: INEG TEMP[1].x, TEMP[1].xxxx >144: MOV TEMP[3].x, TEMP[1].xxxx >145: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >146: MOV TEMP[3].x, TEMP[1].xxxx >147: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >148: MOV TEMP[0].z, TEMP[1].xxxx >149: MOV TEMP[1].x, CONST[2][39] >150: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].xwww >151: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >152: INEG TEMP[1].xy, TEMP[1].xyyy >153: MOV TEMP[3].xy, TEMP[1].xyxx >154: AND TEMP[1].xy, TEMP[3].xyyy, IMM[2].xxxx >155: MOV TEMP[3].xy, TEMP[1].xyxx >156: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >157: MOV TEMP[3].x, TEMP[1].xxxx >158: MOV TEMP[1].x, CONST[2][39] >159: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx >160: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >161: INEG TEMP[1].x, TEMP[1].xxxx >162: MOV TEMP[3].y, TEMP[1].xxxx >163: AND TEMP[1].x, TEMP[3].yyyy, IMM[2].xxxx >164: MOV TEMP[3].y, TEMP[1].xxxx >165: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >166: MOV TEMP[3].x, TEMP[1].xxxx >167: USEQ TEMP[1].x, TEMP[4].yyyy, IMM[3].yyyy >168: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >169: INEG TEMP[1].x, TEMP[1].xxxx >170: MOV TEMP[3].y, TEMP[1].xxxx >171: USEQ TEMP[1].xy, TEMP[0].yzzz, IMM[3].yyyy >172: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >173: INEG TEMP[1].xy, TEMP[1].xyyy >174: MOV TEMP[0].yz, TEMP[1].yxyy >175: OR TEMP[1].x, TEMP[0].yyyy, TEMP[3].yyyy >176: MOV TEMP[0].y, TEMP[1].xxxx >177: USEQ TEMP[1].x, TEMP[4].zzzz, IMM[3].yyyy >178: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >179: INEG TEMP[1].x, TEMP[1].xxxx >180: MOV TEMP[3].y, TEMP[1].xxxx >181: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].yyyy >182: MOV TEMP[0].z, TEMP[1].xxxx >183: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >184: MOV TEMP[0].y, TEMP[1].xxxx >185: USEQ TEMP[1].x, TEMP[4].wwww, IMM[3].yyyy >186: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >187: INEG TEMP[1].x, TEMP[1].xxxx >188: MOV TEMP[0].z, TEMP[1].xxxx >189: USEQ TEMP[1].x, TEMP[3].xxxx, IMM[3].yyyy >190: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >191: INEG TEMP[1].x, TEMP[1].xxxx >192: MOV TEMP[3].x, TEMP[1].xxxx >193: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >194: MOV TEMP[0].z, TEMP[1].xxxx >195: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >196: MOV TEMP[0].y, TEMP[1].xxxx >197: MOV TEMP[1].x, TEMP[0].yyyy >198: USNE TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx >199: UIF TEMP[1].xxxx :0 >200: MOV TEMP[1].x, IMM[0].yyyy >201: MOV TEMP[2].x, IMM[0].yyyy >202: MOV TEMP[5].x, IMM[0].yyyy >203: MOV TEMP[6].x, IMM[0].yyyy >204: ELSE :0 >205: ADD TEMP[3].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >206: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >207: MOV TEMP[0].y, TEMP[7].xxxx >208: ADD TEMP[3].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >209: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >210: MOV TEMP[0].z, TEMP[7].xxxx >211: SQRT TEMP[7].x, TEMP[0].yyyy >212: SQRT TEMP[7].y, TEMP[0].zzzz >213: MOV TEMP[7].xy, TEMP[7].xyxx >214: ADD TEMP[3].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >215: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz >216: SQRT TEMP[8].x, TEMP[3].xxxx >217: MIN TEMP[9].x, TEMP[0].wwww, TEMP[0].xxxx >218: MIN TEMP[10].x, TEMP[0].wwww, TEMP[4].xxxx >219: MOV TEMP[0].w, TEMP[10].xxxx >220: MIN TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >221: MUL TEMP[4].x, TEMP[9].xxxx, IMM[0].zzzz >222: MOV TEMP[3].y, TEMP[4].xxxx >223: MAX TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww >224: MUL TEMP[4].xy, TEMP[0].xwww, IMM[0].zzzz >225: MOV TEMP[0].xw, TEMP[4].xxxy >226: MAX TEMP[4].xy, TEMP[0].xwww, IMM[0].wwww >227: FSNE TEMP[9].x, TEMP[3].xxxx, IMM[0].yyyy >228: UIF TEMP[9].xxxx :0 >229: RCP TEMP[3].x, TEMP[3].xxxx >230: MUL TEMP[3].x, TEMP[7].xxxx, TEMP[3].xxxx >231: ELSE :0 >232: SSG TEMP[9].x, TEMP[7].xxxx >233: MUL TEMP[3].x, IMM[4].xxxx, TEMP[9].xxxx >234: ENDIF >235: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >236: MOV TEMP[0].y, TEMP[3].xxxx >237: FSNE TEMP[3].x, TEMP[4].yyyy, IMM[0].yyyy >238: UIF TEMP[3].xxxx :0 >239: RCP TEMP[3].x, TEMP[4].yyyy >240: MUL TEMP[3].x, TEMP[7].yyyy, TEMP[3].xxxx >241: ELSE :0 >242: SSG TEMP[7].x, TEMP[7].yyyy >243: MUL TEMP[3].x, IMM[4].xxxx, TEMP[7].xxxx >244: ENDIF >245: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >246: MOV TEMP[0].z, TEMP[3].xxxx >247: FSNE TEMP[3].x, TEMP[4].xxxx, IMM[0].yyyy >248: UIF TEMP[3].xxxx :0 >249: RCP TEMP[3].x, TEMP[4].xxxx >250: MUL TEMP[3].x, TEMP[8].xxxx, TEMP[3].xxxx >251: ELSE :0 >252: SSG TEMP[4].x, TEMP[8].xxxx >253: MUL TEMP[3].x, IMM[4].xxxx, TEMP[4].xxxx >254: ENDIF >255: MAX TEMP[0].x, TEMP[3].xxxx, IMM[0].xxxx >256: MIN TEMP[0].xyz, TEMP[0].xyzz, IMM[4].yyyy >257: MAX TEMP[3].x, TEMP[0].yyyy, TEMP[0].xxxx >258: MOV TEMP[0].w, TEMP[3].xxxx >259: MAX TEMP[6].x, TEMP[0].wwww, TEMP[0].zzzz >260: MOV TEMP[1].x, TEMP[0].zzzz >261: MOV TEMP[2].x, TEMP[0].xxxx >262: MOV TEMP[5].x, TEMP[0].yyyy >263: ENDIF >264: MOV OUT[4], TEMP[1] >265: MOV OUT[5], TEMP[2] >266: MOV OUT[6], TEMP[5] >267: MOV OUT[7], TEMP[6] >268: MOV OUT[0].x, TEMP[1].xxxx >269: MOV OUT[0].y, TEMP[2].xxxx >270: MOV OUT[0].z, TEMP[5].xxxx >271: MOV OUT[1].x, TEMP[6].xxxx >272: END >radeonsi: Compiling shader 211 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 896) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 900) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 904) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 908) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 912) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 916) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 920) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 924) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 928) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 932) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 936) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 940) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = lshr i32 %10, 8 > %30 = and i32 %29, 31 > %31 = lshr i32 %7, 13 > %32 = and i32 %31, 255 > %33 = and i32 %7, 8191 > %34 = and i32 %10, 255 > %35 = mul nuw nsw i32 %33, %34 > %36 = mul nuw nsw i32 %30, %32 > %37 = add nuw nsw i32 %35, %36 > %38 = add nuw nsw i32 %37, 16 > %39 = zext i32 %38 to i64 > %40 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %39 > %41 = load i32, i32 addrspace(3)* %40, align 4 > %42 = lshr i32 %7, 13 > %43 = and i32 %42, 255 > %44 = and i32 %7, 8191 > %45 = and i32 %10, 255 > %46 = mul nuw nsw i32 %44, %45 > %47 = mul nuw nsw i32 %30, %43 > %48 = add nuw nsw i32 %46, %47 > %49 = add nuw nsw i32 %48, 17 > %50 = zext i32 %49 to i64 > %51 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %50 > %52 = load i32, i32 addrspace(3)* %51, align 4 > %53 = lshr i32 %7, 13 > %54 = and i32 %53, 255 > %55 = and i32 %7, 8191 > %56 = and i32 %10, 255 > %57 = mul nuw nsw i32 %55, %56 > %58 = mul nuw nsw i32 %30, %54 > %59 = add nuw nsw i32 %57, %58 > %60 = add nuw nsw i32 %59, 18 > %61 = zext i32 %60 to i64 > %62 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %61 > %63 = load i32, i32 addrspace(3)* %62, align 4 > %64 = lshr i32 %7, 13 > %65 = and i32 %64, 255 > %66 = and i32 %7, 8191 > %67 = and i32 %10, 255 > %68 = mul nuw nsw i32 %66, %67 > %69 = mul nuw nsw i32 %30, %65 > %70 = add nuw nsw i32 %68, %69 > %71 = add nuw nsw i32 %70, 19 > %72 = zext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = load i32, i32 addrspace(3)* %73, align 4 > %75 = lshr i32 %6, 13 > %76 = and i32 %75, 255 > %77 = shl i32 %5, 2 > %78 = and i32 %77, 262140 > %79 = and i32 %6, 8191 > %80 = and i32 %10, 255 > %81 = mul nuw nsw i32 %79, %80 > %82 = add nuw nsw i32 %78, %81 > %83 = mul nuw nsw i32 %30, %76 > %84 = add nuw nsw i32 %82, %83 > %85 = add nuw nsw i32 %84, 16 > %86 = zext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > store i32 %41, i32 addrspace(3)* %87, align 4 > %88 = add nuw nsw i32 %84, 17 > %89 = zext i32 %88 to i64 > %90 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %89 > store i32 %52, i32 addrspace(3)* %90, align 4 > %91 = add nuw nsw i32 %84, 18 > %92 = zext i32 %91 to i64 > %93 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %92 > store i32 %63, i32 addrspace(3)* %93, align 4 > %94 = add nuw nsw i32 %84, 19 > %95 = zext i32 %94 to i64 > %96 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %95 > store i32 %74, i32 addrspace(3)* %96, align 4 > %97 = lshr i32 %7, 13 > %98 = and i32 %97, 255 > %99 = and i32 %7, 8191 > %100 = and i32 %10, 255 > %101 = mul nuw nsw i32 %99, %100 > %102 = mul nuw nsw i32 %30, %98 > %103 = add nuw nsw i32 %101, %102 > %104 = add nuw nsw i32 %103, 20 > %105 = zext i32 %104 to i64 > %106 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %105 > %107 = load i32, i32 addrspace(3)* %106, align 4 > %108 = lshr i32 %7, 13 > %109 = and i32 %108, 255 > %110 = and i32 %7, 8191 > %111 = and i32 %10, 255 > %112 = mul nuw nsw i32 %110, %111 > %113 = mul nuw nsw i32 %30, %109 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 21 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > %118 = load i32, i32 addrspace(3)* %117, align 4 > %119 = lshr i32 %7, 13 > %120 = and i32 %119, 255 > %121 = and i32 %7, 8191 > %122 = and i32 %10, 255 > %123 = mul nuw nsw i32 %121, %122 > %124 = mul nuw nsw i32 %30, %120 > %125 = add nuw nsw i32 %123, %124 > %126 = add nuw nsw i32 %125, 22 > %127 = zext i32 %126 to i64 > %128 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %127 > %129 = load i32, i32 addrspace(3)* %128, align 4 > %130 = lshr i32 %7, 13 > %131 = and i32 %130, 255 > %132 = and i32 %7, 8191 > %133 = and i32 %10, 255 > %134 = mul nuw nsw i32 %132, %133 > %135 = mul nuw nsw i32 %30, %131 > %136 = add nuw nsw i32 %134, %135 > %137 = add nuw nsw i32 %136, 23 > %138 = zext i32 %137 to i64 > %139 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %138 > %140 = load i32, i32 addrspace(3)* %139, align 4 > %141 = lshr i32 %6, 13 > %142 = and i32 %141, 255 > %143 = shl i32 %5, 2 > %144 = and i32 %143, 262140 > %145 = and i32 %6, 8191 > %146 = and i32 %10, 255 > %147 = mul nuw nsw i32 %145, %146 > %148 = add nuw nsw i32 %144, %147 > %149 = mul nuw nsw i32 %30, %142 > %150 = add nuw nsw i32 %148, %149 > %151 = add nuw nsw i32 %150, 20 > %152 = zext i32 %151 to i64 > %153 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %152 > store i32 %107, i32 addrspace(3)* %153, align 4 > %154 = add nuw nsw i32 %150, 21 > %155 = zext i32 %154 to i64 > %156 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %155 > store i32 %118, i32 addrspace(3)* %156, align 4 > %157 = add nuw nsw i32 %150, 22 > %158 = zext i32 %157 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %129, i32 addrspace(3)* %159, align 4 > %160 = add nuw nsw i32 %150, 23 > %161 = zext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > store i32 %140, i32 addrspace(3)* %162, align 4 > %163 = and i32 %7, 8191 > %164 = and i32 %10, 255 > %165 = mul nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 16 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > %169 = bitcast i32 addrspace(3)* %168 to float addrspace(3)* > %170 = load float, float addrspace(3)* %169, align 4 > %171 = and i32 %7, 8191 > %172 = and i32 %10, 255 > %173 = mul nuw nsw i32 %171, %172 > %174 = add nuw nsw i32 %173, 17 > %175 = zext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = and i32 %7, 8191 > %180 = and i32 %10, 255 > %181 = mul nuw nsw i32 %179, %180 > %182 = add nuw nsw i32 %181, 18 > %183 = zext i32 %182 to i64 > %184 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %183 > %185 = bitcast i32 addrspace(3)* %184 to float addrspace(3)* > %186 = load float, float addrspace(3)* %185, align 4 > %187 = fmul float %13, %170 > %188 = fmul float %14, %178 > %189 = fadd float %187, %188 > %190 = fmul float %15, %186 > %191 = fadd float %189, %190 > %192 = fadd float %191, %16 > %193 = fmul float %17, %170 > %194 = fmul float %18, %178 > %195 = fadd float %193, %194 > %196 = fmul float %19, %186 > %197 = fadd float %195, %196 > %198 = fadd float %197, %20 > %199 = fmul float %21, %170 > %200 = fmul float %22, %178 > %201 = fadd float %199, %200 > %202 = fmul float %23, %186 > %203 = fadd float %201, %202 > %204 = fadd float %203, %24 > %205 = lshr i32 %7, 13 > %206 = and i32 %205, 255 > %207 = and i32 %7, 8191 > %208 = and i32 %10, 255 > %209 = mul nuw nsw i32 %207, %208 > %210 = add nuw nsw i32 %209, %206 > %211 = add nuw nsw i32 %210, 16 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = bitcast i32 addrspace(3)* %213 to float addrspace(3)* > %215 = load float, float addrspace(3)* %214, align 4 > %216 = lshr i32 %7, 13 > %217 = and i32 %216, 255 > %218 = and i32 %7, 8191 > %219 = and i32 %10, 255 > %220 = mul nuw nsw i32 %218, %219 > %221 = add nuw nsw i32 %220, %217 > %222 = add nuw nsw i32 %221, 17 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = bitcast i32 addrspace(3)* %224 to float addrspace(3)* > %226 = load float, float addrspace(3)* %225, align 4 > %227 = lshr i32 %7, 13 > %228 = and i32 %227, 255 > %229 = and i32 %7, 8191 > %230 = and i32 %10, 255 > %231 = mul nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, %228 > %233 = add nuw nsw i32 %232, 18 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %13, %215 > %239 = fmul float %14, %226 > %240 = fadd float %238, %239 > %241 = fmul float %15, %237 > %242 = fadd float %240, %241 > %243 = fadd float %242, %16 > %244 = fmul float %17, %215 > %245 = fmul float %18, %226 > %246 = fadd float %244, %245 > %247 = fmul float %19, %237 > %248 = fadd float %246, %247 > %249 = fadd float %248, %20 > %250 = fmul float %21, %215 > %251 = fmul float %22, %226 > %252 = fadd float %250, %251 > %253 = fmul float %23, %237 > %254 = fadd float %252, %253 > %255 = fadd float %254, %24 > %256 = and i32 %7, 8191 > %257 = and i32 %10, 255 > %258 = mul nuw nsw i32 %256, %257 > %259 = lshr i32 %7, 12 > %260 = and i32 %259, 510 > %261 = add nuw nsw i32 %258, %260 > %262 = add nuw nsw i32 %261, 16 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = bitcast i32 addrspace(3)* %264 to float addrspace(3)* > %266 = load float, float addrspace(3)* %265, align 4 > %267 = and i32 %7, 8191 > %268 = and i32 %10, 255 > %269 = mul nuw nsw i32 %267, %268 > %270 = lshr i32 %7, 12 > %271 = and i32 %270, 510 > %272 = add nuw nsw i32 %269, %271 > %273 = add nuw nsw i32 %272, 17 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = bitcast i32 addrspace(3)* %275 to float addrspace(3)* > %277 = load float, float addrspace(3)* %276, align 4 > %278 = and i32 %7, 8191 > %279 = and i32 %10, 255 > %280 = mul nuw nsw i32 %278, %279 > %281 = lshr i32 %7, 12 > %282 = and i32 %281, 510 > %283 = add nuw nsw i32 %280, %282 > %284 = add nuw nsw i32 %283, 18 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = bitcast i32 addrspace(3)* %286 to float addrspace(3)* > %288 = load float, float addrspace(3)* %287, align 4 > %289 = fmul float %13, %266 > %290 = fmul float %14, %277 > %291 = fadd float %289, %290 > %292 = fmul float %15, %288 > %293 = fadd float %291, %292 > %294 = fadd float %293, %16 > %295 = fmul float %17, %266 > %296 = fmul float %18, %277 > %297 = fadd float %295, %296 > %298 = fmul float %19, %288 > %299 = fadd float %297, %298 > %300 = fadd float %299, %20 > %301 = fmul float %204, %28 > %302 = fmul float %255, %28 > %303 = fmul float %21, %266 > %304 = fmul float %22, %277 > %305 = fadd float %303, %304 > %306 = fmul float %23, %288 > %307 = fadd float %305, %306 > %308 = fadd float %307, %24 > %309 = fmul float %308, %28 > %310 = fsub float -0.000000e+00, %27 > %311 = fcmp olt float %192, %310 > %312 = zext i1 %311 to i32 > %313 = fsub float -0.000000e+00, %27 > %314 = fcmp olt float %243, %313 > %315 = fsub float -0.000000e+00, %27 > %316 = fcmp olt float %249, %315 > %317 = zext i1 %314 to i32 > %318 = zext i1 %316 to i32 > %319 = add nuw nsw i32 %317, %312 > %320 = fsub float -0.000000e+00, %27 > %321 = fcmp olt float %294, %320 > %322 = zext i1 %321 to i32 > %323 = add nuw nsw i32 %322, %319 > %324 = fsub float -0.000000e+00, %27 > %325 = fcmp olt float %198, %324 > %326 = zext i1 %325 to i32 > %327 = add nuw nsw i32 %318, %326 > %328 = fsub float -0.000000e+00, %27 > %329 = fcmp olt float %300, %328 > %330 = zext i1 %329 to i32 > %331 = add nuw nsw i32 %330, %327 > %332 = fcmp olt float %301, 0.000000e+00 > %333 = zext i1 %332 to i32 > %334 = fcmp olt float %302, 0.000000e+00 > %335 = zext i1 %334 to i32 > %336 = add nuw nsw i32 %333, %335 > %337 = fcmp olt float %309, 0.000000e+00 > %338 = zext i1 %337 to i32 > %339 = add nuw nsw i32 %336, %338 > %340 = fcmp olt float %27, %192 > %341 = zext i1 %340 to i32 > %342 = fcmp olt float %27, %243 > %343 = fcmp olt float %27, %249 > %344 = zext i1 %342 to i32 > %345 = zext i1 %343 to i32 > %346 = add nuw nsw i32 %344, %341 > %347 = fcmp olt float %27, %294 > %348 = zext i1 %347 to i32 > %349 = add nuw nsw i32 %346, %348 > %350 = fcmp olt float %27, %198 > %351 = zext i1 %350 to i32 > %352 = add nuw nsw i32 %345, %351 > %353 = fcmp olt float %27, %300 > %354 = zext i1 %353 to i32 > %355 = add nuw nsw i32 %352, %354 > %356 = fcmp olt float %27, %301 > %357 = fcmp olt float %27, %302 > %358 = zext i1 %356 to i32 > %359 = zext i1 %357 to i32 > %360 = add nuw nsw i32 %359, %358 > %361 = fcmp olt float %27, %309 > %362 = zext i1 %361 to i32 > %363 = add nuw nsw i32 %362, %360 > %364 = icmp eq i32 %323, 3 > %365 = sext i1 %364 to i32 > %366 = icmp eq i32 %349, 3 > %367 = icmp eq i32 %355, 3 > %368 = sext i1 %367 to i32 > %369 = icmp eq i32 %331, 3 > %370 = sext i1 %369 to i32 > %371 = select i1 %367, i32 -1, i32 %370 > %372 = select i1 %366, i32 -1, i32 %365 > %373 = or i32 %371, %372 > %374 = icmp eq i32 %339, 3 > %375 = icmp eq i32 %363, 3 > %376 = sext i1 %375 to i32 > %377 = select i1 %374, i32 -1, i32 %376 > %378 = or i32 %377, %373 > %379 = icmp eq i32 %378, 0 > br i1 %379, label %ELSE, label %ENDIF > >ELSE: ; preds = %main_body > %380 = lshr i32 %7, 13 > %381 = and i32 %380, 255 > %382 = and i32 %7, 8191 > %383 = and i32 %10, 255 > %384 = mul nuw nsw i32 %382, %383 > %385 = add nuw nsw i32 %384, %381 > %386 = add nuw nsw i32 %385, 16 > %387 = zext i32 %386 to i64 > %388 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %387 > %389 = bitcast i32 addrspace(3)* %388 to float addrspace(3)* > %390 = load float, float addrspace(3)* %389, align 4 > %391 = and i32 %7, 8191 > %392 = and i32 %10, 255 > %393 = mul nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 16 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = bitcast i32 addrspace(3)* %396 to float addrspace(3)* > %398 = load float, float addrspace(3)* %397, align 4 > %399 = fsub float %398, %390 > %400 = lshr i32 %7, 13 > %401 = and i32 %400, 255 > %402 = and i32 %7, 8191 > %403 = and i32 %10, 255 > %404 = mul nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, %401 > %406 = add nuw nsw i32 %405, 17 > %407 = zext i32 %406 to i64 > %408 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %407 > %409 = bitcast i32 addrspace(3)* %408 to float addrspace(3)* > %410 = load float, float addrspace(3)* %409, align 4 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = add nuw nsw i32 %413, 17 > %415 = zext i32 %414 to i64 > %416 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %415 > %417 = bitcast i32 addrspace(3)* %416 to float addrspace(3)* > %418 = load float, float addrspace(3)* %417, align 4 > %419 = fsub float %418, %410 > %420 = lshr i32 %7, 13 > %421 = and i32 %420, 255 > %422 = and i32 %7, 8191 > %423 = and i32 %10, 255 > %424 = mul nuw nsw i32 %422, %423 > %425 = add nuw nsw i32 %424, %421 > %426 = add nuw nsw i32 %425, 18 > %427 = zext i32 %426 to i64 > %428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %427 > %429 = bitcast i32 addrspace(3)* %428 to float addrspace(3)* > %430 = load float, float addrspace(3)* %429, align 4 > %431 = and i32 %7, 8191 > %432 = and i32 %10, 255 > %433 = mul nuw nsw i32 %431, %432 > %434 = add nuw nsw i32 %433, 18 > %435 = zext i32 %434 to i64 > %436 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %435 > %437 = bitcast i32 addrspace(3)* %436 to float addrspace(3)* > %438 = load float, float addrspace(3)* %437, align 4 > %439 = fsub float %438, %430 > %440 = fmul float %399, %399 > %441 = fmul float %419, %419 > %442 = fadd float %441, %440 > %443 = fmul float %439, %439 > %444 = fadd float %442, %443 > %445 = and i32 %7, 8191 > %446 = and i32 %10, 255 > %447 = mul nuw nsw i32 %445, %446 > %448 = lshr i32 %7, 12 > %449 = and i32 %448, 510 > %450 = add nuw nsw i32 %447, %449 > %451 = add nuw nsw i32 %450, 16 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > %454 = bitcast i32 addrspace(3)* %453 to float addrspace(3)* > %455 = load float, float addrspace(3)* %454, align 4 > %456 = lshr i32 %7, 13 > %457 = and i32 %456, 255 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, %457 > %462 = add nuw nsw i32 %461, 16 > %463 = zext i32 %462 to i64 > %464 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %463 > %465 = bitcast i32 addrspace(3)* %464 to float addrspace(3)* > %466 = load float, float addrspace(3)* %465, align 4 > %467 = fsub float %466, %455 > %468 = and i32 %7, 8191 > %469 = and i32 %10, 255 > %470 = mul nuw nsw i32 %468, %469 > %471 = lshr i32 %7, 12 > %472 = and i32 %471, 510 > %473 = add nuw nsw i32 %470, %472 > %474 = add nuw nsw i32 %473, 17 > %475 = zext i32 %474 to i64 > %476 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %475 > %477 = bitcast i32 addrspace(3)* %476 to float addrspace(3)* > %478 = load float, float addrspace(3)* %477, align 4 > %479 = lshr i32 %7, 13 > %480 = and i32 %479, 255 > %481 = and i32 %7, 8191 > %482 = and i32 %10, 255 > %483 = mul nuw nsw i32 %481, %482 > %484 = add nuw nsw i32 %483, %480 > %485 = add nuw nsw i32 %484, 17 > %486 = zext i32 %485 to i64 > %487 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %486 > %488 = bitcast i32 addrspace(3)* %487 to float addrspace(3)* > %489 = load float, float addrspace(3)* %488, align 4 > %490 = fsub float %489, %478 > %491 = and i32 %7, 8191 > %492 = and i32 %10, 255 > %493 = mul nuw nsw i32 %491, %492 > %494 = lshr i32 %7, 12 > %495 = and i32 %494, 510 > %496 = add nuw nsw i32 %493, %495 > %497 = add nuw nsw i32 %496, 18 > %498 = zext i32 %497 to i64 > %499 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %498 > %500 = bitcast i32 addrspace(3)* %499 to float addrspace(3)* > %501 = load float, float addrspace(3)* %500, align 4 > %502 = lshr i32 %7, 13 > %503 = and i32 %502, 255 > %504 = and i32 %7, 8191 > %505 = and i32 %10, 255 > %506 = mul nuw nsw i32 %504, %505 > %507 = add nuw nsw i32 %506, %503 > %508 = add nuw nsw i32 %507, 18 > %509 = zext i32 %508 to i64 > %510 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %509 > %511 = bitcast i32 addrspace(3)* %510 to float addrspace(3)* > %512 = load float, float addrspace(3)* %511, align 4 > %513 = fsub float %512, %501 > %514 = fmul float %467, %467 > %515 = fmul float %490, %490 > %516 = fadd float %515, %514 > %517 = fmul float %513, %513 > %518 = fadd float %516, %517 > %519 = call float @llvm.sqrt.f32(float %444) > %520 = call float @llvm.sqrt.f32(float %518) > %521 = and i32 %7, 8191 > %522 = and i32 %10, 255 > %523 = mul nuw nsw i32 %521, %522 > %524 = add nuw nsw i32 %523, 16 > %525 = zext i32 %524 to i64 > %526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %525 > %527 = bitcast i32 addrspace(3)* %526 to float addrspace(3)* > %528 = load float, float addrspace(3)* %527, align 4 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = lshr i32 %7, 12 > %533 = and i32 %532, 510 > %534 = add nuw nsw i32 %531, %533 > %535 = add nuw nsw i32 %534, 16 > %536 = zext i32 %535 to i64 > %537 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %536 > %538 = bitcast i32 addrspace(3)* %537 to float addrspace(3)* > %539 = load float, float addrspace(3)* %538, align 4 > %540 = fsub float %539, %528 > %541 = and i32 %7, 8191 > %542 = and i32 %10, 255 > %543 = mul nuw nsw i32 %541, %542 > %544 = add nuw nsw i32 %543, 17 > %545 = zext i32 %544 to i64 > %546 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %545 > %547 = bitcast i32 addrspace(3)* %546 to float addrspace(3)* > %548 = load float, float addrspace(3)* %547, align 4 > %549 = and i32 %7, 8191 > %550 = and i32 %10, 255 > %551 = mul nuw nsw i32 %549, %550 > %552 = lshr i32 %7, 12 > %553 = and i32 %552, 510 > %554 = add nuw nsw i32 %551, %553 > %555 = add nuw nsw i32 %554, 17 > %556 = zext i32 %555 to i64 > %557 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %556 > %558 = bitcast i32 addrspace(3)* %557 to float addrspace(3)* > %559 = load float, float addrspace(3)* %558, align 4 > %560 = fsub float %559, %548 > %561 = and i32 %7, 8191 > %562 = and i32 %10, 255 > %563 = mul nuw nsw i32 %561, %562 > %564 = add nuw nsw i32 %563, 18 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fsub float %579, %568 > %581 = fmul float %540, %540 > %582 = fmul float %560, %560 > %583 = fadd float %582, %581 > %584 = fmul float %580, %580 > %585 = fadd float %583, %584 > %586 = call float @llvm.sqrt.f32(float %585) > %587 = call float @llvm.minnum.f32(float %302, float %301) > %588 = call float @llvm.minnum.f32(float %302, float %309) > %589 = call float @llvm.minnum.f32(float %301, float %309) > %590 = fmul float %587, 0x3FD99999A0000000 > %591 = call float @llvm.maxnum.f32(float %590, float 1.000000e+02) > %592 = fmul float %589, 0x3FD99999A0000000 > %593 = fmul float %588, 0x3FD99999A0000000 > %594 = call float @llvm.maxnum.f32(float %592, float 1.000000e+02) > %595 = call float @llvm.maxnum.f32(float %593, float 1.000000e+02) > %596 = fcmp une float %591, 0.000000e+00 > br i1 %596, label %IF45, label %ELSE46 > >ENDIF: ; preds = %main_body, %ENDIF50 > %temp24.0 = phi i32 [ %phitmp57, %ENDIF50 ], [ 0, %main_body ] > %temp20.0 = phi i32 [ %phitmp56, %ENDIF50 ], [ 0, %main_body ] > %temp8.0 = phi i32 [ %phitmp55, %ENDIF50 ], [ 0, %main_body ] > %temp4.0 = phi i32 [ %phitmp, %ENDIF50 ], [ 0, %main_body ] > %597 = lshr i32 %5, 16 > %598 = shl nuw nsw i32 %597, 2 > %599 = and i32 %6, 8191 > %600 = and i32 %10, 255 > %601 = mul nuw nsw i32 %599, %600 > %602 = add nuw nsw i32 %598, %601 > %603 = add nuw nsw i32 %602, 8 > %604 = zext i32 %603 to i64 > %605 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %604 > store i32 %temp4.0, i32 addrspace(3)* %605, align 4 > %606 = add nuw nsw i32 %602, 9 > %607 = zext i32 %606 to i64 > %608 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %607 > store i32 %368, i32 addrspace(3)* %608, align 4 > %609 = add nuw nsw i32 %602, 10 > %610 = zext i32 %609 to i64 > %611 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %610 > %612 = bitcast i32 addrspace(3)* %611 to float addrspace(3)* > store float %15, float addrspace(3)* %612, align 4 > %613 = add nuw nsw i32 %602, 11 > %614 = zext i32 %613 to i64 > %615 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %614 > %616 = bitcast i32 addrspace(3)* %615 to float addrspace(3)* > store float %16, float addrspace(3)* %616, align 4 > %617 = lshr i32 %5, 16 > %618 = shl nuw nsw i32 %617, 2 > %619 = and i32 %6, 8191 > %620 = and i32 %10, 255 > %621 = mul nuw nsw i32 %619, %620 > %622 = add nuw nsw i32 %618, %621 > %623 = add nuw nsw i32 %622, 12 > %624 = zext i32 %623 to i64 > %625 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %624 > store i32 %temp8.0, i32 addrspace(3)* %625, align 4 > %626 = add nuw nsw i32 %622, 13 > %627 = zext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > store float %18, float addrspace(3)* %629, align 4 > %630 = add nuw nsw i32 %622, 14 > %631 = zext i32 %630 to i64 > %632 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %631 > %633 = bitcast i32 addrspace(3)* %632 to float addrspace(3)* > store float %19, float addrspace(3)* %633, align 4 > %634 = add nuw nsw i32 %622, 15 > %635 = zext i32 %634 to i64 > %636 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %635 > %637 = bitcast i32 addrspace(3)* %636 to float addrspace(3)* > store float %20, float addrspace(3)* %637, align 4 > %638 = lshr i32 %5, 16 > %639 = shl nuw nsw i32 %638, 2 > %640 = and i32 %6, 8191 > %641 = and i32 %10, 255 > %642 = mul nuw nsw i32 %640, %641 > %643 = add nuw nsw i32 %639, %642 > %644 = add nuw nsw i32 %643, 16 > %645 = zext i32 %644 to i64 > %646 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %645 > store i32 %temp20.0, i32 addrspace(3)* %646, align 4 > %647 = add nuw nsw i32 %643, 17 > %648 = zext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %14, float addrspace(3)* %650, align 4 > %651 = add nuw nsw i32 %643, 18 > %652 = zext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %15, float addrspace(3)* %654, align 4 > %655 = add nuw nsw i32 %643, 19 > %656 = zext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %16, float addrspace(3)* %658, align 4 > %659 = lshr i32 %5, 16 > %660 = shl nuw nsw i32 %659, 2 > %661 = and i32 %6, 8191 > %662 = and i32 %10, 255 > %663 = mul nuw nsw i32 %661, %662 > %664 = add nuw nsw i32 %660, %663 > %665 = add nuw nsw i32 %664, 20 > %666 = zext i32 %665 to i64 > %667 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %666 > store i32 %temp24.0, i32 addrspace(3)* %667, align 4 > %668 = add nuw nsw i32 %664, 21 > %669 = zext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > store float %18, float addrspace(3)* %671, align 4 > %672 = add nuw nsw i32 %664, 22 > %673 = zext i32 %672 to i64 > %674 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %673 > %675 = bitcast i32 addrspace(3)* %674 to float addrspace(3)* > store float %19, float addrspace(3)* %675, align 4 > %676 = add nuw nsw i32 %664, 23 > %677 = zext i32 %676 to i64 > %678 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %677 > %679 = bitcast i32 addrspace(3)* %678 to float addrspace(3)* > store float %20, float addrspace(3)* %679, align 4 > %680 = lshr i32 %5, 16 > %681 = shl nuw nsw i32 %680, 2 > %682 = and i32 %6, 8191 > %683 = and i32 %10, 255 > %684 = mul nuw nsw i32 %682, %683 > %685 = add nuw nsw i32 %681, %684 > %686 = zext i32 %685 to i64 > %687 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %686 > store i32 %temp4.0, i32 addrspace(3)* %687, align 4 > %688 = lshr i32 %5, 16 > %689 = shl nuw nsw i32 %688, 2 > %690 = and i32 %6, 8191 > %691 = and i32 %10, 255 > %692 = mul nuw nsw i32 %690, %691 > %693 = add nuw nsw i32 %689, %692 > %694 = add nuw nsw i32 %693, 1 > %695 = zext i32 %694 to i64 > %696 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %695 > store i32 %temp8.0, i32 addrspace(3)* %696, align 4 > %697 = lshr i32 %5, 16 > %698 = shl nuw nsw i32 %697, 2 > %699 = and i32 %6, 8191 > %700 = and i32 %10, 255 > %701 = mul nuw nsw i32 %699, %700 > %702 = add nuw nsw i32 %698, %701 > %703 = add nuw nsw i32 %702, 2 > %704 = zext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > store i32 %temp20.0, i32 addrspace(3)* %705, align 4 > %706 = lshr i32 %5, 16 > %707 = shl nuw nsw i32 %706, 2 > %708 = and i32 %6, 8191 > %709 = and i32 %10, 255 > %710 = mul nuw nsw i32 %708, %709 > %711 = add nuw nsw i32 %707, %710 > %712 = add nuw nsw i32 %711, 4 > %713 = zext i32 %712 to i64 > %714 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %713 > store i32 %temp24.0, i32 addrspace(3)* %714, align 4 > %715 = and i32 %10, 255 > %716 = lshr i32 %10, 8 > %717 = and i32 %716, 31 > %718 = lshr i32 %5, 16 > %719 = shl nuw nsw i32 %718, 2 > %720 = and i32 %6, 8191 > %721 = and i32 %10, 255 > %722 = mul nuw nsw i32 %720, %721 > %723 = add nuw nsw i32 %719, %722 > %724 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %725 = bitcast i64 %724 to <2 x i32> > %726 = extractelement <2 x i32> %725, i32 0 > %727 = extractelement <2 x i32> %725, i32 1 > %728 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %726, 0 > %729 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %728, i32 %727, 1 > %730 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %729, i32 %8, 13 > %731 = bitcast i32 %715 to float > %732 = bitcast i32 %717 to float > %733 = bitcast i32 %723 to float > %734 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %730, float %731, 14 > %735 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %734, float %732, 15 > %736 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %735, float %733, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %736 > >IF45: ; preds = %ELSE > %737 = fdiv float 1.000000e+00, %591 > %738 = fmul float %519, %737 > br label %ENDIF44 > >ELSE46: ; preds = %ELSE > %739 = fcmp ogt float %519, 0.000000e+00 > %740 = select i1 %739, float 1.000000e+00, float %519 > %741 = fcmp oge float %740, 0.000000e+00 > %.op = fmul float %740, 0x4600000000000000 > %742 = select i1 %741, float %.op, float 0xC600000000000000 > br label %ENDIF44 > >ENDIF44: ; preds = %ELSE46, %IF45 > %temp12.0 = phi float [ %738, %IF45 ], [ %742, %ELSE46 ] > %743 = call float @llvm.maxnum.f32(float %temp12.0, float 1.000000e+00) > %744 = fcmp une float %595, 0.000000e+00 > br i1 %744, label %IF48, label %ELSE49 > >IF48: ; preds = %ENDIF44 > %745 = fdiv float 1.000000e+00, %595 > %746 = fmul float %520, %745 > br label %ENDIF47 > >ELSE49: ; preds = %ENDIF44 > %747 = fcmp ogt float %520, 0.000000e+00 > %748 = select i1 %747, float 1.000000e+00, float %520 > %749 = fcmp oge float %748, 0.000000e+00 > %.op53 = fmul float %748, 0x4600000000000000 > %750 = select i1 %749, float %.op53, float 0xC600000000000000 > br label %ENDIF47 > >ENDIF47: ; preds = %ELSE49, %IF48 > %temp12.1 = phi float [ %746, %IF48 ], [ %750, %ELSE49 ] > %751 = call float @llvm.maxnum.f32(float %temp12.1, float 1.000000e+00) > %752 = fcmp une float %594, 0.000000e+00 > br i1 %752, label %IF51, label %ELSE52 > >IF51: ; preds = %ENDIF47 > %753 = fdiv float 1.000000e+00, %594 > %754 = fmul float %586, %753 > br label %ENDIF50 > >ELSE52: ; preds = %ENDIF47 > %755 = fcmp ogt float %586, 0.000000e+00 > %756 = select i1 %755, float 1.000000e+00, float %586 > %757 = fcmp oge float %756, 0.000000e+00 > %.op54 = fmul float %756, 0x4600000000000000 > %758 = select i1 %757, float %.op54, float 0xC600000000000000 > br label %ENDIF50 > >ENDIF50: ; preds = %ELSE52, %IF51 > %temp12.2 = phi float [ %754, %IF51 ], [ %758, %ELSE52 ] > %759 = call float @llvm.maxnum.f32(float %temp12.2, float 1.000000e+00) > %760 = call float @llvm.minnum.f32(float %759, float 6.300000e+01) > %761 = call float @llvm.minnum.f32(float %743, float 6.300000e+01) > %762 = call float @llvm.minnum.f32(float %751, float 6.300000e+01) > %763 = call float @llvm.maxnum.f32(float %761, float %760) > %764 = call float @llvm.maxnum.f32(float %763, float %762) > %phitmp = bitcast float %762 to i32 > %phitmp55 = bitcast float %760 to i32 > %phitmp56 = bitcast float %761 to i32 > %phitmp57 = bitcast float %764 to i32 > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..1], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..58] >DCL CONST[2][0..39] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[1] UINT32 {0, 896, 912, 928} >IMM[2] UINT32 {1, 624, 0, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[0].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[0].w, IMM[0].xxxx > 4: MOV TEMP[1], CONST[1][56] > 5: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 6: MOV TEMP[2], CONST[1][57] > 7: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 8: MOV TEMP[1].y, TEMP[2].xxxx > 9: MOV TEMP[2], CONST[1][58] > 10: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 11: MOV TEMP[1].z, TEMP[2].xxxx > 12: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 13: SQRT TEMP[2].x, TEMP[0].xxxx > 14: FSEQ TEMP[3].xyz, TEMP[2].xxxx, IMM[0].yyyy > 15: SSG TEMP[4].xyz, TEMP[1].xyzz > 16: MUL TEMP[4].xyz, IMM[0].zzzz, TEMP[4].xyzz > 17: RCP TEMP[5].xyz, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz > 19: UCMP TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[1].xyzz > 20: MOV TEMP[3].x, CONST[2][39] > 21: FSNE TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy > 22: UIF TEMP[3].xxxx :0 > 23: MOV TEMP[3].x, CONST[2][39] > 24: RCP TEMP[3].x, TEMP[3].xxxx > 25: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[3].xxxx > 26: ELSE :0 > 27: SSG TEMP[2].x, TEMP[2].xxxx > 28: MUL TEMP[3].x, IMM[0].zzzz, TEMP[2].xxxx > 29: ENDIF > 30: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[0].xxxx > 31: MOV TEMP[2].z, CONST[2][39] > 32: FMA TEMP[2].x, TEMP[1].zzzz, TEMP[2].zzzz, IMM[0].xxxx > 33: FSEQ TEMP[3].xy, TEMP[2].xxxx, IMM[0].yyyy > 34: SSG TEMP[4].xy, TEMP[1].xyyy > 35: MUL TEMP[4].xy, IMM[0].zzzz, TEMP[4].xyyy > 36: RCP TEMP[2].xy, TEMP[2].xxxx > 37: MUL TEMP[2].xy, TEMP[1].xyyy, TEMP[2].xyyy > 38: UCMP TEMP[2].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[2].xyyy > 39: MOV TEMP[3].z, CONST[2][39] > 40: MUL TEMP[1].x, TEMP[1].zzzz, TEMP[3].zzzz > 41: MOV TEMP[0].y, TEMP[1].xxxx > 42: MOV TEMP[2].z, TEMP[0].xxxx > 43: MOV TEMP[1].zw, TEMP[0].xxyx > 44: MOV TEMP[2].w, IMM[0].xxxx > 45: MUL TEMP[0].xy, SV[0].yyyy, IN[1][1].xyyy > 46: FMA TEMP[0].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[0].xyyy > 47: FMA TEMP[1].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[0].xyyy > 48: MOV OUT[1], TEMP[1] > 49: MOV OUT[0], TEMP[2] > 50: END >radeonsi: Compiling shader 212 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 896) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 900) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 904) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 908) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 912) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 916) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 920) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 924) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 928) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 932) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 936) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 940) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = fadd float %7, %8 > %30 = fsub float 1.000000e+00, %29 > %31 = lshr i32 %6, 13 > %32 = and i32 %31, 255 > %33 = shl i32 %5, 2 > %34 = and i32 %33, 262140 > %35 = and i32 %6, 8191 > %36 = mul i32 %35, %9 > %37 = add i32 %34, %36 > %38 = add i32 %37, %32 > %39 = add i32 %38, 16 > %40 = sext i32 %39 to i64 > %41 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %40 > %42 = bitcast i32 addrspace(3)* %41 to float addrspace(3)* > %43 = load float, float addrspace(3)* %42, align 4 > %44 = fmul float %43, %8 > %45 = lshr i32 %6, 13 > %46 = and i32 %45, 255 > %47 = shl i32 %5, 2 > %48 = and i32 %47, 262140 > %49 = and i32 %6, 8191 > %50 = mul i32 %49, %9 > %51 = add i32 %48, %50 > %52 = add i32 %51, %46 > %53 = add i32 %52, 17 > %54 = sext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = bitcast i32 addrspace(3)* %55 to float addrspace(3)* > %57 = load float, float addrspace(3)* %56, align 4 > %58 = fmul float %57, %8 > %59 = lshr i32 %6, 13 > %60 = and i32 %59, 255 > %61 = shl i32 %5, 2 > %62 = and i32 %61, 262140 > %63 = and i32 %6, 8191 > %64 = mul i32 %63, %9 > %65 = add i32 %62, %64 > %66 = add i32 %65, %60 > %67 = add i32 %66, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > %71 = load float, float addrspace(3)* %70, align 4 > %72 = fmul float %71, %8 > %73 = shl i32 %5, 2 > %74 = and i32 %73, 262140 > %75 = and i32 %6, 8191 > %76 = mul i32 %75, %9 > %77 = add i32 %74, %76 > %78 = add i32 %77, 16 > %79 = sext i32 %78 to i64 > %80 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %79 > %81 = bitcast i32 addrspace(3)* %80 to float addrspace(3)* > %82 = load float, float addrspace(3)* %81, align 4 > %83 = call float @llvm.fma.f32(float %7, float %82, float %44) > %84 = shl i32 %5, 2 > %85 = and i32 %84, 262140 > %86 = and i32 %6, 8191 > %87 = mul i32 %86, %9 > %88 = add i32 %85, %87 > %89 = add i32 %88, 17 > %90 = sext i32 %89 to i64 > %91 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %90 > %92 = bitcast i32 addrspace(3)* %91 to float addrspace(3)* > %93 = load float, float addrspace(3)* %92, align 4 > %94 = call float @llvm.fma.f32(float %7, float %93, float %58) > %95 = shl i32 %5, 2 > %96 = and i32 %95, 262140 > %97 = and i32 %6, 8191 > %98 = mul i32 %97, %9 > %99 = add i32 %96, %98 > %100 = add i32 %99, 18 > %101 = sext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > %103 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > %104 = load float, float addrspace(3)* %103, align 4 > %105 = call float @llvm.fma.f32(float %7, float %104, float %72) > %106 = shl i32 %5, 2 > %107 = and i32 %106, 262140 > %108 = and i32 %6, 8191 > %109 = mul i32 %108, %9 > %110 = add i32 %107, %109 > %111 = lshr i32 %6, 12 > %112 = and i32 %111, 510 > %113 = add i32 %110, %112 > %114 = add i32 %113, 16 > %115 = sext i32 %114 to i64 > %116 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %115 > %117 = bitcast i32 addrspace(3)* %116 to float addrspace(3)* > %118 = load float, float addrspace(3)* %117, align 4 > %119 = call float @llvm.fma.f32(float %30, float %118, float %83) > %120 = shl i32 %5, 2 > %121 = and i32 %120, 262140 > %122 = and i32 %6, 8191 > %123 = mul i32 %122, %9 > %124 = add i32 %121, %123 > %125 = lshr i32 %6, 12 > %126 = and i32 %125, 510 > %127 = add i32 %124, %126 > %128 = add i32 %127, 17 > %129 = sext i32 %128 to i64 > %130 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %129 > %131 = bitcast i32 addrspace(3)* %130 to float addrspace(3)* > %132 = load float, float addrspace(3)* %131, align 4 > %133 = call float @llvm.fma.f32(float %30, float %132, float %94) > %134 = shl i32 %5, 2 > %135 = and i32 %134, 262140 > %136 = and i32 %6, 8191 > %137 = mul i32 %136, %9 > %138 = add i32 %135, %137 > %139 = lshr i32 %6, 12 > %140 = and i32 %139, 510 > %141 = add i32 %138, %140 > %142 = add i32 %141, 18 > %143 = sext i32 %142 to i64 > %144 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %143 > %145 = bitcast i32 addrspace(3)* %144 to float addrspace(3)* > %146 = load float, float addrspace(3)* %145, align 4 > %147 = call float @llvm.fma.f32(float %30, float %146, float %105) > %148 = fmul float %13, %119 > %149 = fmul float %14, %133 > %150 = fadd float %148, %149 > %151 = fmul float %15, %147 > %152 = fadd float %150, %151 > %153 = fadd float %152, %16 > %154 = fmul float %17, %119 > %155 = fmul float %18, %133 > %156 = fadd float %154, %155 > %157 = fmul float %19, %147 > %158 = fadd float %156, %157 > %159 = fadd float %158, %20 > %160 = fmul float %21, %119 > %161 = fmul float %22, %133 > %162 = fadd float %160, %161 > %163 = fmul float %23, %147 > %164 = fadd float %162, %163 > %165 = fadd float %164, %24 > %166 = fmul float %153, %153 > %167 = fmul float %159, %159 > %168 = fadd float %167, %166 > %169 = fmul float %165, %165 > %170 = fadd float %168, %169 > %171 = call float @llvm.sqrt.f32(float %170) > %172 = fcmp oeq float %171, 0.000000e+00 > %173 = fcmp oeq float %171, 0.000000e+00 > %174 = fcmp oeq float %171, 0.000000e+00 > %175 = fcmp ogt float %153, 0.000000e+00 > %176 = select i1 %175, float 1.000000e+00, float %153 > %177 = fcmp oge float %176, 0.000000e+00 > %178 = fcmp ogt float %159, 0.000000e+00 > %179 = select i1 %178, float 1.000000e+00, float %159 > %180 = fcmp oge float %179, 0.000000e+00 > %181 = fcmp ogt float %165, 0.000000e+00 > %182 = select i1 %181, float 1.000000e+00, float %165 > %183 = fcmp oge float %182, 0.000000e+00 > %.op = fmul float %176, 0x4600000000000000 > %184 = select i1 %177, float %.op, float 0xC600000000000000 > %.op24 = fmul float %179, 0x4600000000000000 > %185 = select i1 %180, float %.op24, float 0xC600000000000000 > %.op25 = fmul float %182, 0x4600000000000000 > %186 = select i1 %183, float %.op25, float 0xC600000000000000 > %187 = fdiv float 1.000000e+00, %171 > %188 = fmul float %153, %187 > %189 = fmul float %159, %187 > %190 = fmul float %165, %187 > %191 = select i1 %172, float %184, float %188 > %192 = select i1 %173, float %185, float %189 > %193 = select i1 %174, float %186, float %190 > %194 = fcmp une float %27, 0.000000e+00 > br i1 %194, label %IF, label %ELSE > >IF: ; preds = %main_body > %195 = fdiv float 1.000000e+00, %27 > %196 = fmul float %171, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fcmp ogt float %171, 0.000000e+00 > %198 = select i1 %197, float 1.000000e+00, float %171 > %199 = fcmp oge float %198, 0.000000e+00 > %.op26 = fmul float %198, 0x4600000000000000 > %200 = select i1 %199, float %.op26, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %196, %IF ], [ %200, %ELSE ] > %201 = fsub float 1.000000e+00, %temp12.0 > %202 = call float @llvm.fma.f32(float %193, float %28, float 1.000000e+00) > %203 = fcmp oeq float %202, 0.000000e+00 > %204 = fcmp oeq float %202, 0.000000e+00 > %205 = fcmp ogt float %191, 0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %191 > %207 = fcmp oge float %206, 0.000000e+00 > %208 = fcmp ogt float %192, 0.000000e+00 > %209 = select i1 %208, float 1.000000e+00, float %192 > %210 = fcmp oge float %209, 0.000000e+00 > %.op27 = fmul float %206, 0x4600000000000000 > %211 = select i1 %207, float %.op27, float 0xC600000000000000 > %.op28 = fmul float %209, 0x4600000000000000 > %212 = select i1 %210, float %.op28, float 0xC600000000000000 > %213 = fdiv float 1.000000e+00, %202 > %214 = fmul float %191, %213 > %215 = fmul float %192, %213 > %216 = select i1 %203, float %211, float %214 > %217 = select i1 %204, float %212, float %215 > %218 = fmul float %193, %28 > %219 = lshr i32 %6, 13 > %220 = and i32 %219, 255 > %221 = shl i32 %5, 2 > %222 = and i32 %221, 262140 > %223 = and i32 %6, 8191 > %224 = mul i32 %223, %9 > %225 = add i32 %222, %224 > %226 = add i32 %225, %220 > %227 = add i32 %226, 20 > %228 = sext i32 %227 to i64 > %229 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %228 > %230 = bitcast i32 addrspace(3)* %229 to float addrspace(3)* > %231 = load float, float addrspace(3)* %230, align 4 > %232 = fmul float %231, %8 > %233 = lshr i32 %6, 13 > %234 = and i32 %233, 255 > %235 = shl i32 %5, 2 > %236 = and i32 %235, 262140 > %237 = and i32 %6, 8191 > %238 = mul i32 %237, %9 > %239 = add i32 %236, %238 > %240 = add i32 %239, %234 > %241 = add i32 %240, 21 > %242 = sext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > %244 = bitcast i32 addrspace(3)* %243 to float addrspace(3)* > %245 = load float, float addrspace(3)* %244, align 4 > %246 = fmul float %245, %8 > %247 = shl i32 %5, 2 > %248 = and i32 %247, 262140 > %249 = and i32 %6, 8191 > %250 = mul i32 %249, %9 > %251 = add i32 %248, %250 > %252 = add i32 %251, 20 > %253 = sext i32 %252 to i64 > %254 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %253 > %255 = bitcast i32 addrspace(3)* %254 to float addrspace(3)* > %256 = load float, float addrspace(3)* %255, align 4 > %257 = call float @llvm.fma.f32(float %7, float %256, float %232) > %258 = shl i32 %5, 2 > %259 = and i32 %258, 262140 > %260 = and i32 %6, 8191 > %261 = mul i32 %260, %9 > %262 = add i32 %259, %261 > %263 = add i32 %262, 21 > %264 = sext i32 %263 to i64 > %265 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %264 > %266 = bitcast i32 addrspace(3)* %265 to float addrspace(3)* > %267 = load float, float addrspace(3)* %266, align 4 > %268 = call float @llvm.fma.f32(float %7, float %267, float %246) > %269 = shl i32 %5, 2 > %270 = and i32 %269, 262140 > %271 = and i32 %6, 8191 > %272 = mul i32 %271, %9 > %273 = add i32 %270, %272 > %274 = lshr i32 %6, 12 > %275 = and i32 %274, 510 > %276 = add i32 %273, %275 > %277 = add i32 %276, 20 > %278 = sext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = bitcast i32 addrspace(3)* %279 to float addrspace(3)* > %281 = load float, float addrspace(3)* %280, align 4 > %282 = call float @llvm.fma.f32(float %30, float %281, float %257) > %283 = shl i32 %5, 2 > %284 = and i32 %283, 262140 > %285 = and i32 %6, 8191 > %286 = mul i32 %285, %9 > %287 = add i32 %284, %286 > %288 = lshr i32 %6, 12 > %289 = and i32 %288, 510 > %290 = add i32 %287, %289 > %291 = add i32 %290, 21 > %292 = sext i32 %291 to i64 > %293 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %292 > %294 = bitcast i32 addrspace(3)* %293 to float addrspace(3)* > %295 = load float, float addrspace(3)* %294, align 4 > %296 = call float @llvm.fma.f32(float %30, float %295, float %268) > %297 = bitcast i32 %10 to float > %298 = insertvalue <{ float, float, float }> undef, float %297, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %282, float %296, float %218, float %201) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %216, float %217, float %201, float 1.000000e+00) > ret <{ float, float, float }> %298 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..3] >DCL CONST[2][0..25] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 32, 1, 400} >IMM[2] UINT32 {48, 16, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][2], TEMP[0] > 3: MIN TEMP[2].x, TEMP[1].xxxx, CONST[2][25].zzzz > 4: MOV TEMP[2].z, TEMP[2].xxxx > 5: MOV TEMP[3].z, TEMP[1].xxxx > 6: DP4 TEMP[1].x, CONST[1][3], TEMP[0] > 7: MOV TEMP[2].w, TEMP[1].xxxx > 8: MOV TEMP[3].w, TEMP[1].xxxx > 9: DP4 TEMP[2].x, CONST[1][0], TEMP[0] > 10: DP4 TEMP[0].x, CONST[1][1], TEMP[0] > 11: MOV TEMP[2].y, TEMP[0].xxxx > 12: MOV TEMP[3].xy, IN[1].xyxx > 13: MOV OUT[1], TEMP[3] > 14: MOV OUT[0], TEMP[2] > 15: END >radeonsi: Compiling shader 213 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 > %35 = call float @llvm.SI.load.const(<16 x i8> %34, i32 408) > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %13) > %39 = extractelement <4 x float> %38, i32 0 > %40 = extractelement <4 x float> %38, i32 1 > %41 = extractelement <4 x float> %38, i32 2 > %42 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 > %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %14) > %45 = extractelement <4 x float> %44, i32 0 > %46 = extractelement <4 x float> %44, i32 1 > %47 = fmul float %25, %39 > %48 = fmul float %26, %40 > %49 = fadd float %47, %48 > %50 = fmul float %27, %41 > %51 = fadd float %49, %50 > %52 = fadd float %51, %28 > %53 = call float @llvm.minnum.f32(float %52, float %35) > %54 = fmul float %29, %39 > %55 = fmul float %30, %40 > %56 = fadd float %54, %55 > %57 = fmul float %31, %41 > %58 = fadd float %56, %57 > %59 = fadd float %58, %32 > %60 = fmul float %17, %39 > %61 = fmul float %18, %40 > %62 = fadd float %60, %61 > %63 = fmul float %19, %41 > %64 = fadd float %62, %63 > %65 = fadd float %64, %20 > %66 = fmul float %21, %39 > %67 = fmul float %22, %40 > %68 = fadd float %66, %67 > %69 = fmul float %23, %41 > %70 = fadd float %68, %69 > %71 = fadd float %70, %24 > %72 = bitcast i32 %11 to float > %73 = insertvalue <{ float, float, float }> undef, float %72, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float %52, float %59) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %71, float %53, float %59) > ret <{ float, float, float }> %73 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 214 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, -0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 176, 112} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {128, 144, 64, 80} >IMM[5] FLT32 { 0.5000, 158456325028528675187087900672.0000, 63.0000, 0.0000} >IMM[6] UINT32 {96, 368, 352, 0} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 66: MOV TEMP[1].z, TEMP[2].xxxx > 67: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 68: MOV TEMP[0].yw, TEMP[2].yxyy > 69: ABS TEMP[2].x, TEMP[3].xxxx > 70: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 71: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 72: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 73: INEG TEMP[9].xy, TEMP[9].xyyy > 74: MOV TEMP[4].yz, TEMP[9].yxyy > 75: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 76: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 77: INEG TEMP[9].xy, TEMP[9].xyyy > 78: MOV TEMP[5].zw, TEMP[9].yyxy > 79: INEG TEMP[9].xy, TEMP[4].yzzz > 80: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 81: MOV TEMP[4].yz, TEMP[9].yxyy > 82: I2F TEMP[9].xy, TEMP[4].yzzz > 83: MOV TEMP[4].yz, TEMP[9].yxyy > 84: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 85: ABS TEMP[2].x, TEMP[6].xxxx > 86: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 87: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 88: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 89: INEG TEMP[9].xy, TEMP[9].xyyy > 90: MOV TEMP[4].yz, TEMP[9].yxyy > 91: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 92: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 93: INEG TEMP[9].xy, TEMP[9].xyyy > 94: MOV TEMP[5].zw, TEMP[9].yyxy > 95: INEG TEMP[9].xy, TEMP[4].yzzz > 96: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 97: MOV TEMP[4].yz, TEMP[9].yxyy > 98: I2F TEMP[9].xy, TEMP[4].yzzz > 99: MOV TEMP[4].yz, TEMP[9].yxyy >100: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >101: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >102: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >103: INEG TEMP[2].xy, TEMP[2].xyyy >104: MOV TEMP[5].xy, TEMP[2].xyxx >105: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >106: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >107: INEG TEMP[2].xy, TEMP[2].xyyy >108: MOV TEMP[5].zw, TEMP[2].yyxy >109: INEG TEMP[2].xy, TEMP[5].xyyy >110: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >111: MOV TEMP[5].xy, TEMP[2].xyxx >112: I2F TEMP[5].xy, TEMP[5].xyyy >113: ABS TEMP[2].x, TEMP[8].xxxx >114: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >115: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >116: MOV TEMP[4].zw, TEMP[2].yyxy >117: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >118: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >119: INEG TEMP[2].xy, TEMP[2].xyyy >120: MOV TEMP[5].xy, TEMP[2].xyxx >121: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >122: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >123: INEG TEMP[2].xy, TEMP[2].xyyy >124: MOV TEMP[5].zw, TEMP[2].yyxy >125: AND TEMP[2], TEMP[5], IMM[2].yyyy >126: MOV TEMP[2], TEMP[2] >127: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >128: MOV TEMP[5].xy, TEMP[2].xyxx >129: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >130: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >131: INEG TEMP[2].xy, TEMP[2].xyyy >132: MOV TEMP[5].zw, TEMP[2].yyxy >133: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >134: MOV TEMP[5].zw, TEMP[2].yyxy >135: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >136: MOV TEMP[5].xy, TEMP[2].xyxx >137: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >138: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >139: INEG TEMP[2].x, TEMP[2].xxxx >140: MOV TEMP[1].z, TEMP[2].xxxx >141: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >142: MOV TEMP[1].z, TEMP[2].xxxx >143: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >144: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >145: INEG TEMP[2].xy, TEMP[2].xyyy >146: MOV TEMP[0].yw, TEMP[2].yxyy >147: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >148: MOV TEMP[0].yw, TEMP[2].yxyy >149: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >150: MOV TEMP[0].y, TEMP[2].xxxx >151: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >152: MOV TEMP[0].y, TEMP[2].xxxx >153: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >154: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >155: INEG TEMP[2].xy, TEMP[2].xyyy >156: MOV TEMP[0].xw, TEMP[2].xxxy >157: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >158: MOV TEMP[0].xw, TEMP[2].xxxy >159: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >160: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >161: INEG TEMP[2].xy, TEMP[2].xyyy >162: MOV TEMP[1].xy, TEMP[2].xyxx >163: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >164: MOV TEMP[1].xy, TEMP[2].xyxx >165: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >166: MOV TEMP[0].xz, TEMP[2].xxyx >167: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >168: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >169: INEG TEMP[2].xy, TEMP[2].xyyy >170: MOV TEMP[1].xy, TEMP[2].xyxx >171: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >172: MOV TEMP[1].xy, TEMP[2].xyxx >173: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >174: MOV TEMP[0].xz, TEMP[2].xxyx >175: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >176: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >177: INEG TEMP[2].xy, TEMP[2].xyyy >178: MOV TEMP[1].xy, TEMP[2].xyxx >179: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >180: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >181: INEG TEMP[2].xyz, TEMP[2].xyzz >182: MOV TEMP[0].xyz, TEMP[2].xyzx >183: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >184: MOV TEMP[0].xz, TEMP[2].xxyx >185: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >186: MOV TEMP[0].x, TEMP[2].xxxx >187: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >188: MOV TEMP[0].x, TEMP[2].xxxx >189: ADD TEMP[2].xyz, -IN[0][0].zxyy, IN[1][0].zxyy >190: MOV TEMP[0].yzw, TEMP[2].yxyz >191: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >192: MUL TEMP[4].xyz, TEMP[0].yzww, TEMP[1].yzxx >193: FMA TEMP[2].xyz, TEMP[0].wyzz, TEMP[1].zxyy, -TEMP[4].xyzz >194: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz >195: MOV TEMP[1].w, TEMP[3].xxxx >196: RSQ TEMP[3].x, TEMP[1].wwww >197: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx >198: MOV TEMP[0].yzw, TEMP[2].yxyz >199: MOV TEMP[2].xyz, CONST[1][11] >200: MOV TEMP[4].xyz, TEMP[2].xyzx >201: MOV TEMP[4].w, IMM[0].xxxx >202: MOV TEMP[2], CONST[1][7] >203: DP4 TEMP[5].x, TEMP[2], TEMP[4] >204: MOV TEMP[2], CONST[1][8] >205: DP4 TEMP[2].x, TEMP[2], TEMP[4] >206: MOV TEMP[5].y, TEMP[2].xxxx >207: MOV TEMP[2], CONST[1][9] >208: DP4 TEMP[2].x, TEMP[2], TEMP[4] >209: MOV TEMP[5].z, TEMP[2].xxxx >210: ADD TEMP[4].xyz, TEMP[5].xyzz, -IN[0][0].xyzz >211: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[4].xyzz >212: MOV TEMP[1].w, TEMP[2].xxxx >213: RSQ TEMP[2].x, TEMP[1].wwww >214: MOV TEMP[1].w, TEMP[2].xxxx >215: MUL TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz >216: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[4].xyzz >217: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].wwww >218: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >219: INEG TEMP[2].x, TEMP[2].xxxx >220: MOV TEMP[0].y, TEMP[2].xxxx >221: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >222: MOV TEMP[0].x, TEMP[2].xxxx >223: MOV TEMP[2].x, TEMP[0].xxxx >224: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >225: UIF TEMP[2].xxxx :0 >226: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >227: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >228: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >229: MOV TEMP[0].yzw, TEMP[2].yxyz >230: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >231: MOV TEMP[0].y, TEMP[2].xxxx >232: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >233: MOV TEMP[0].z, TEMP[2].xxxx >234: SQRT TEMP[2].x, TEMP[0].xxxx >235: SQRT TEMP[2].y, TEMP[0].yyyy >236: SQRT TEMP[2].z, TEMP[0].zzzz >237: MOV TEMP[0].xyz, TEMP[2].xyzx >238: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >239: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].xxxx >240: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >241: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[5].xxxx >242: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >243: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[5].xxxx >244: MOV TEMP[2].y, CONST[3][4] >245: MOV TEMP[7].x, TEMP[2].yyyy >246: MOV TEMP[2].y, CONST[3][5] >247: MOV TEMP[7].y, TEMP[2].yyyy >248: MOV TEMP[2].y, CONST[3][6] >249: MOV TEMP[7].z, TEMP[2].yyyy >250: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >251: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >252: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >253: MOV TEMP[1].w, IMM[0].xxxx >254: MOV TEMP[6], CONST[3][0] >255: DP4 TEMP[7].x, TEMP[6], TEMP[1] >256: MOV TEMP[6], CONST[3][1] >257: DP4 TEMP[6].x, TEMP[6], TEMP[1] >258: MOV TEMP[7].y, TEMP[6].xxxx >259: MOV TEMP[6], CONST[3][3] >260: DP4 TEMP[6].x, TEMP[6], TEMP[1] >261: MOV TEMP[4].w, IMM[0].xxxx >262: MOV TEMP[8], CONST[3][0] >263: DP4 TEMP[8].x, TEMP[8], TEMP[4] >264: MOV TEMP[9], CONST[3][1] >265: DP4 TEMP[9].x, TEMP[9], TEMP[4] >266: MOV TEMP[8].y, TEMP[9].xxxx >267: MOV TEMP[9], CONST[3][3] >268: DP4 TEMP[9].x, TEMP[9], TEMP[4] >269: MOV TEMP[5].w, IMM[0].xxxx >270: MOV TEMP[10], CONST[3][0] >271: DP4 TEMP[4].x, TEMP[10], TEMP[5] >272: MOV TEMP[10], CONST[3][1] >273: DP4 TEMP[10].x, TEMP[10], TEMP[5] >274: MOV TEMP[4].y, TEMP[10].xxxx >275: MOV TEMP[10], CONST[3][3] >276: DP4 TEMP[10].x, TEMP[10], TEMP[5] >277: MOV TEMP[2].w, IMM[0].xxxx >278: MOV TEMP[11], CONST[3][0] >279: DP4 TEMP[5].x, TEMP[11], TEMP[2] >280: MOV TEMP[11], CONST[3][1] >281: DP4 TEMP[11].x, TEMP[11], TEMP[2] >282: MOV TEMP[5].y, TEMP[11].xxxx >283: MOV TEMP[11], CONST[3][3] >284: DP4 TEMP[11].x, TEMP[11], TEMP[2] >285: MOV TEMP[3].w, IMM[0].xxxx >286: MOV TEMP[12], CONST[3][0] >287: DP4 TEMP[2].x, TEMP[12], TEMP[3] >288: MOV TEMP[12], CONST[3][1] >289: DP4 TEMP[12].x, TEMP[12], TEMP[3] >290: MOV TEMP[2].y, TEMP[12].xxxx >291: MOV TEMP[12], CONST[3][3] >292: DP4 TEMP[12].x, TEMP[12], TEMP[3] >293: MOV TEMP[0].w, IMM[0].xxxx >294: MOV TEMP[13], CONST[3][0] >295: DP4 TEMP[3].x, TEMP[13], TEMP[0] >296: MOV TEMP[13], CONST[3][1] >297: DP4 TEMP[13].x, TEMP[13], TEMP[0] >298: MOV TEMP[3].y, TEMP[13].xxxx >299: MOV TEMP[13], CONST[3][3] >300: DP4 TEMP[13].x, TEMP[13], TEMP[0] >301: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >302: SSG TEMP[15].xy, TEMP[8].xyyy >303: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >304: RCP TEMP[16].xy, TEMP[9].xxxx >305: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >306: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >307: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >308: SSG TEMP[15].xy, TEMP[4].xyyy >309: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >310: RCP TEMP[16].xy, TEMP[10].xxxx >311: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >312: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >313: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >314: SSG TEMP[16].xy, TEMP[5].xyyy >315: MUL TEMP[16].xy, IMM[5].yyyy, TEMP[16].xyyy >316: RCP TEMP[11].xy, TEMP[11].xxxx >317: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >318: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >319: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >320: SSG TEMP[15].xy, TEMP[7].xyyy >321: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >322: RCP TEMP[16].xy, TEMP[6].xxxx >323: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >324: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >325: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >326: MOV TEMP[0].yz, TEMP[5].yxyy >327: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >328: SSG TEMP[7].xy, TEMP[2].xyyy >329: MUL TEMP[7].xy, IMM[5].yyyy, TEMP[7].xyyy >330: RCP TEMP[11].xy, TEMP[12].xxxx >331: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >332: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >333: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >334: MOV TEMP[4].zw, TEMP[2].yyxy >335: MOV TEMP[2].xy, CONST[3][23] >336: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >337: MOV TEMP[4].zw, TEMP[2].yyxy >338: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >339: SSG TEMP[5].xy, TEMP[3].xyyy >340: MUL TEMP[5].xy, IMM[5].yyyy, TEMP[5].xyyy >341: RCP TEMP[7].xy, TEMP[13].xxxx >342: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >343: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >344: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >345: MOV TEMP[0].xw, TEMP[2].xxxy >346: MOV TEMP[2].xy, CONST[3][23] >347: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >348: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >349: MOV TEMP[0].y, TEMP[2].xxxx >350: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >351: MOV TEMP[0].z, TEMP[2].xxxx >352: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >353: SQRT TEMP[2].x, TEMP[0].xxxx >354: SQRT TEMP[2].y, TEMP[0].yyyy >355: SQRT TEMP[2].z, TEMP[0].zzzz >356: MOV TEMP[2].xyz, TEMP[2].xyzx >357: MOV TEMP[3].z, CONST[1][22] >358: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >359: MOV TEMP[0].w, TEMP[3].xxxx >360: MOV TEMP[3].z, CONST[1][22] >361: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >362: MOV TEMP[3].z, CONST[1][22] >363: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >364: MOV TEMP[1].y, TEMP[3].xxxx >365: MOV TEMP[3].w, CONST[1][22] >366: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >367: UIF TEMP[3].xxxx :0 >368: MOV TEMP[3].w, CONST[1][22] >369: RCP TEMP[3].x, TEMP[3].wwww >370: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >371: ELSE :0 >372: SSG TEMP[5].x, TEMP[0].wwww >373: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >374: ENDIF >375: MOV_SAT TEMP[3].x, TEMP[3].xxxx >376: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >377: MOV TEMP[0].w, TEMP[3].xxxx >378: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >379: MOV TEMP[0].y, TEMP[3].xxxx >380: MOV TEMP[3].w, CONST[1][22] >381: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >382: UIF TEMP[3].xxxx :0 >383: MOV TEMP[3].w, CONST[1][22] >384: RCP TEMP[3].x, TEMP[3].wwww >385: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >386: ELSE :0 >387: SSG TEMP[5].x, TEMP[1].xxxx >388: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >389: ENDIF >390: MOV_SAT TEMP[3].x, TEMP[3].xxxx >391: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >392: MOV TEMP[0].w, TEMP[3].xxxx >393: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >394: MOV TEMP[0].z, TEMP[3].xxxx >395: MOV TEMP[3].w, CONST[1][22] >396: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >397: UIF TEMP[3].xxxx :0 >398: MOV TEMP[3].w, CONST[1][22] >399: RCP TEMP[3].x, TEMP[3].wwww >400: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >401: ELSE :0 >402: SSG TEMP[5].x, TEMP[1].yyyy >403: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >404: ENDIF >405: MOV_SAT TEMP[3].x, TEMP[3].xxxx >406: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >407: MOV TEMP[0].w, TEMP[3].xxxx >408: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >409: MOV TEMP[2].xy, CONST[1][22] >410: MOV TEMP[3].xy, CONST[2][4] >411: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >412: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >413: MOV TEMP[0].w, TEMP[2].xxxx >414: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >415: SSG TEMP[3].xy, TEMP[0].xyyy >416: MUL TEMP[3].xy, IMM[5].yyyy, TEMP[3].xyyy >417: RCP TEMP[5].xy, TEMP[1].xxxx >418: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >419: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >420: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >421: MOV TEMP[0].y, TEMP[2].xxxx >422: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >423: MOV TEMP[4].z, TEMP[2].xxxx >424: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >425: UIF TEMP[2].xxxx :0 >426: RCP TEMP[1].x, TEMP[1].xxxx >427: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >428: ELSE :0 >429: SSG TEMP[2].x, TEMP[0].zzzz >430: MUL TEMP[1].x, IMM[5].yyyy, TEMP[2].xxxx >431: ENDIF >432: MOV TEMP[0].y, TEMP[1].xxxx >433: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >434: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >435: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >436: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >437: MOV TEMP[4].w, TEMP[0].xxxx >438: ELSE :0 >439: MOV TEMP[4], IMM[0].zzzz >440: ENDIF >441: MIN TEMP[0], TEMP[4], IMM[5].zzzz >442: MOV TEMP[1].x, TEMP[0].xxxx >443: MOV TEMP[2].x, TEMP[0].yyyy >444: MOV TEMP[3].x, TEMP[0].zzzz >445: MOV TEMP[0].x, TEMP[0].wwww >446: MOV OUT[8], TEMP[1] >447: MOV OUT[9], TEMP[2] >448: MOV OUT[10], TEMP[3] >449: MOV OUT[11], TEMP[0] >450: MOV OUT[0].x, TEMP[1].xxxx >451: MOV OUT[0].y, TEMP[2].xxxx >452: MOV OUT[0].z, TEMP[3].xxxx >453: MOV OUT[1].x, TEMP[0].xxxx >454: END >radeonsi: Compiling shader 215 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call float @llvm.SI.load.const(<16 x i8> %33, i32 64) > %35 = call float @llvm.SI.load.const(<16 x i8> %33, i32 68) > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = call float @llvm.SI.load.const(<16 x i8> %37, i32 0) > %39 = call float @llvm.SI.load.const(<16 x i8> %37, i32 4) > %40 = call float @llvm.SI.load.const(<16 x i8> %37, i32 8) > %41 = call float @llvm.SI.load.const(<16 x i8> %37, i32 12) > %42 = call float @llvm.SI.load.const(<16 x i8> %37, i32 16) > %43 = call float @llvm.SI.load.const(<16 x i8> %37, i32 20) > %44 = call float @llvm.SI.load.const(<16 x i8> %37, i32 24) > %45 = call float @llvm.SI.load.const(<16 x i8> %37, i32 28) > %46 = call float @llvm.SI.load.const(<16 x i8> %37, i32 32) > %47 = call float @llvm.SI.load.const(<16 x i8> %37, i32 36) > %48 = call float @llvm.SI.load.const(<16 x i8> %37, i32 40) > %49 = call float @llvm.SI.load.const(<16 x i8> %37, i32 44) > %50 = call float @llvm.SI.load.const(<16 x i8> %37, i32 48) > %51 = call float @llvm.SI.load.const(<16 x i8> %37, i32 52) > %52 = call float @llvm.SI.load.const(<16 x i8> %37, i32 56) > %53 = call float @llvm.SI.load.const(<16 x i8> %37, i32 60) > %54 = call float @llvm.SI.load.const(<16 x i8> %37, i32 68) > %55 = call float @llvm.SI.load.const(<16 x i8> %37, i32 84) > %56 = call float @llvm.SI.load.const(<16 x i8> %37, i32 100) > %57 = call float @llvm.SI.load.const(<16 x i8> %37, i32 368) > %58 = call float @llvm.SI.load.const(<16 x i8> %37, i32 372) > %59 = lshr i32 %10, 8 > %60 = and i32 %59, 31 > %61 = lshr i32 %7, 13 > %62 = and i32 %61, 255 > %63 = and i32 %7, 8191 > %64 = and i32 %10, 255 > %65 = mul nuw nsw i32 %63, %64 > %66 = mul nuw nsw i32 %60, %62 > %67 = add nuw nsw i32 %65, %66 > %68 = add nuw nsw i32 %67, 16 > %69 = zext i32 %68 to i64 > %70 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %69 > %71 = load i32, i32 addrspace(3)* %70, align 4 > %72 = lshr i32 %7, 13 > %73 = and i32 %72, 255 > %74 = and i32 %7, 8191 > %75 = and i32 %10, 255 > %76 = mul nuw nsw i32 %74, %75 > %77 = mul nuw nsw i32 %60, %73 > %78 = add nuw nsw i32 %76, %77 > %79 = add nuw nsw i32 %78, 17 > %80 = zext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = load i32, i32 addrspace(3)* %81, align 4 > %83 = lshr i32 %7, 13 > %84 = and i32 %83, 255 > %85 = and i32 %7, 8191 > %86 = and i32 %10, 255 > %87 = mul nuw nsw i32 %85, %86 > %88 = mul nuw nsw i32 %60, %84 > %89 = add nuw nsw i32 %87, %88 > %90 = add nuw nsw i32 %89, 18 > %91 = zext i32 %90 to i64 > %92 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %91 > %93 = load i32, i32 addrspace(3)* %92, align 4 > %94 = lshr i32 %7, 13 > %95 = and i32 %94, 255 > %96 = and i32 %7, 8191 > %97 = and i32 %10, 255 > %98 = mul nuw nsw i32 %96, %97 > %99 = mul nuw nsw i32 %60, %95 > %100 = add nuw nsw i32 %98, %99 > %101 = add nuw nsw i32 %100, 19 > %102 = zext i32 %101 to i64 > %103 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %102 > %104 = load i32, i32 addrspace(3)* %103, align 4 > %105 = lshr i32 %6, 13 > %106 = and i32 %105, 255 > %107 = shl i32 %5, 2 > %108 = and i32 %107, 262140 > %109 = and i32 %6, 8191 > %110 = and i32 %10, 255 > %111 = mul nuw nsw i32 %109, %110 > %112 = add nuw nsw i32 %108, %111 > %113 = mul nuw nsw i32 %60, %106 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 16 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > store i32 %71, i32 addrspace(3)* %117, align 4 > %118 = add nuw nsw i32 %114, 17 > %119 = zext i32 %118 to i64 > %120 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %119 > store i32 %82, i32 addrspace(3)* %120, align 4 > %121 = add nuw nsw i32 %114, 18 > %122 = zext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > store i32 %93, i32 addrspace(3)* %123, align 4 > %124 = add nuw nsw i32 %114, 19 > %125 = zext i32 %124 to i64 > %126 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %125 > store i32 %104, i32 addrspace(3)* %126, align 4 > %127 = lshr i32 %7, 13 > %128 = and i32 %127, 255 > %129 = and i32 %7, 8191 > %130 = and i32 %10, 255 > %131 = mul nuw nsw i32 %129, %130 > %132 = mul nuw nsw i32 %60, %128 > %133 = add nuw nsw i32 %131, %132 > %134 = add nuw nsw i32 %133, 20 > %135 = zext i32 %134 to i64 > %136 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %135 > %137 = load i32, i32 addrspace(3)* %136, align 4 > %138 = lshr i32 %7, 13 > %139 = and i32 %138, 255 > %140 = and i32 %7, 8191 > %141 = and i32 %10, 255 > %142 = mul nuw nsw i32 %140, %141 > %143 = mul nuw nsw i32 %60, %139 > %144 = add nuw nsw i32 %142, %143 > %145 = add nuw nsw i32 %144, 21 > %146 = zext i32 %145 to i64 > %147 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %146 > %148 = load i32, i32 addrspace(3)* %147, align 4 > %149 = lshr i32 %7, 13 > %150 = and i32 %149, 255 > %151 = and i32 %7, 8191 > %152 = and i32 %10, 255 > %153 = mul nuw nsw i32 %151, %152 > %154 = mul nuw nsw i32 %60, %150 > %155 = add nuw nsw i32 %153, %154 > %156 = add nuw nsw i32 %155, 22 > %157 = zext i32 %156 to i64 > %158 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %157 > %159 = load i32, i32 addrspace(3)* %158, align 4 > %160 = lshr i32 %7, 13 > %161 = and i32 %160, 255 > %162 = and i32 %7, 8191 > %163 = and i32 %10, 255 > %164 = mul nuw nsw i32 %162, %163 > %165 = mul nuw nsw i32 %60, %161 > %166 = add nuw nsw i32 %164, %165 > %167 = add nuw nsw i32 %166, 23 > %168 = zext i32 %167 to i64 > %169 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %168 > %170 = load i32, i32 addrspace(3)* %169, align 4 > %171 = lshr i32 %6, 13 > %172 = and i32 %171, 255 > %173 = shl i32 %5, 2 > %174 = and i32 %173, 262140 > %175 = and i32 %6, 8191 > %176 = and i32 %10, 255 > %177 = mul nuw nsw i32 %175, %176 > %178 = add nuw nsw i32 %174, %177 > %179 = mul nuw nsw i32 %60, %172 > %180 = add nuw nsw i32 %178, %179 > %181 = add nuw nsw i32 %180, 20 > %182 = zext i32 %181 to i64 > %183 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %182 > store i32 %137, i32 addrspace(3)* %183, align 4 > %184 = add nuw nsw i32 %180, 21 > %185 = zext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > store i32 %148, i32 addrspace(3)* %186, align 4 > %187 = add nuw nsw i32 %180, 22 > %188 = zext i32 %187 to i64 > %189 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %188 > store i32 %159, i32 addrspace(3)* %189, align 4 > %190 = add nuw nsw i32 %180, 23 > %191 = zext i32 %190 to i64 > %192 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %191 > store i32 %170, i32 addrspace(3)* %192, align 4 > %193 = lshr i32 %7, 13 > %194 = and i32 %193, 255 > %195 = and i32 %7, 8191 > %196 = and i32 %10, 255 > %197 = mul nuw nsw i32 %195, %196 > %198 = mul nuw nsw i32 %60, %194 > %199 = add nuw nsw i32 %197, %198 > %200 = add nuw nsw i32 %199, 24 > %201 = zext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = load i32, i32 addrspace(3)* %202, align 4 > %204 = lshr i32 %7, 13 > %205 = and i32 %204, 255 > %206 = and i32 %7, 8191 > %207 = and i32 %10, 255 > %208 = mul nuw nsw i32 %206, %207 > %209 = mul nuw nsw i32 %60, %205 > %210 = add nuw nsw i32 %208, %209 > %211 = add nuw nsw i32 %210, 25 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = load i32, i32 addrspace(3)* %213, align 4 > %215 = lshr i32 %7, 13 > %216 = and i32 %215, 255 > %217 = and i32 %7, 8191 > %218 = and i32 %10, 255 > %219 = mul nuw nsw i32 %217, %218 > %220 = mul nuw nsw i32 %60, %216 > %221 = add nuw nsw i32 %219, %220 > %222 = add nuw nsw i32 %221, 26 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = load i32, i32 addrspace(3)* %224, align 4 > %226 = lshr i32 %7, 13 > %227 = and i32 %226, 255 > %228 = and i32 %7, 8191 > %229 = and i32 %10, 255 > %230 = mul nuw nsw i32 %228, %229 > %231 = mul nuw nsw i32 %60, %227 > %232 = add nuw nsw i32 %230, %231 > %233 = add nuw nsw i32 %232, 27 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = load i32, i32 addrspace(3)* %235, align 4 > %237 = lshr i32 %6, 13 > %238 = and i32 %237, 255 > %239 = shl i32 %5, 2 > %240 = and i32 %239, 262140 > %241 = and i32 %6, 8191 > %242 = and i32 %10, 255 > %243 = mul nuw nsw i32 %241, %242 > %244 = add nuw nsw i32 %240, %243 > %245 = mul nuw nsw i32 %60, %238 > %246 = add nuw nsw i32 %244, %245 > %247 = add nuw nsw i32 %246, 24 > %248 = zext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > store i32 %203, i32 addrspace(3)* %249, align 4 > %250 = add nuw nsw i32 %246, 25 > %251 = zext i32 %250 to i64 > %252 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %251 > store i32 %214, i32 addrspace(3)* %252, align 4 > %253 = add nuw nsw i32 %246, 26 > %254 = zext i32 %253 to i64 > %255 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %254 > store i32 %225, i32 addrspace(3)* %255, align 4 > %256 = add nuw nsw i32 %246, 27 > %257 = zext i32 %256 to i64 > %258 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %257 > store i32 %236, i32 addrspace(3)* %258, align 4 > %259 = lshr i32 %7, 13 > %260 = and i32 %259, 255 > %261 = and i32 %7, 8191 > %262 = and i32 %10, 255 > %263 = mul nuw nsw i32 %261, %262 > %264 = mul nuw nsw i32 %60, %260 > %265 = add nuw nsw i32 %263, %264 > %266 = add nuw nsw i32 %265, 28 > %267 = zext i32 %266 to i64 > %268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %267 > %269 = load i32, i32 addrspace(3)* %268, align 4 > %270 = lshr i32 %7, 13 > %271 = and i32 %270, 255 > %272 = and i32 %7, 8191 > %273 = and i32 %10, 255 > %274 = mul nuw nsw i32 %272, %273 > %275 = mul nuw nsw i32 %60, %271 > %276 = add nuw nsw i32 %274, %275 > %277 = add nuw nsw i32 %276, 29 > %278 = zext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = load i32, i32 addrspace(3)* %279, align 4 > %281 = lshr i32 %7, 13 > %282 = and i32 %281, 255 > %283 = and i32 %7, 8191 > %284 = and i32 %10, 255 > %285 = mul nuw nsw i32 %283, %284 > %286 = mul nuw nsw i32 %60, %282 > %287 = add nuw nsw i32 %285, %286 > %288 = add nuw nsw i32 %287, 30 > %289 = zext i32 %288 to i64 > %290 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %289 > %291 = load i32, i32 addrspace(3)* %290, align 4 > %292 = lshr i32 %7, 13 > %293 = and i32 %292, 255 > %294 = and i32 %7, 8191 > %295 = and i32 %10, 255 > %296 = mul nuw nsw i32 %294, %295 > %297 = mul nuw nsw i32 %60, %293 > %298 = add nuw nsw i32 %296, %297 > %299 = add nuw nsw i32 %298, 31 > %300 = zext i32 %299 to i64 > %301 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %300 > %302 = load i32, i32 addrspace(3)* %301, align 4 > %303 = lshr i32 %6, 13 > %304 = and i32 %303, 255 > %305 = shl i32 %5, 2 > %306 = and i32 %305, 262140 > %307 = and i32 %6, 8191 > %308 = and i32 %10, 255 > %309 = mul nuw nsw i32 %307, %308 > %310 = add nuw nsw i32 %306, %309 > %311 = mul nuw nsw i32 %60, %304 > %312 = add nuw nsw i32 %310, %311 > %313 = add nuw nsw i32 %312, 28 > %314 = zext i32 %313 to i64 > %315 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %314 > store i32 %269, i32 addrspace(3)* %315, align 4 > %316 = add nuw nsw i32 %312, 29 > %317 = zext i32 %316 to i64 > %318 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %317 > store i32 %280, i32 addrspace(3)* %318, align 4 > %319 = add nuw nsw i32 %312, 30 > %320 = zext i32 %319 to i64 > %321 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %320 > store i32 %291, i32 addrspace(3)* %321, align 4 > %322 = add nuw nsw i32 %312, 31 > %323 = zext i32 %322 to i64 > %324 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %323 > store i32 %302, i32 addrspace(3)* %324, align 4 > %325 = lshr i32 %7, 13 > %326 = and i32 %325, 255 > %327 = and i32 %7, 8191 > %328 = and i32 %10, 255 > %329 = mul nuw nsw i32 %327, %328 > %330 = mul nuw nsw i32 %60, %326 > %331 = add nuw nsw i32 %329, %330 > %332 = add nuw nsw i32 %331, 32 > %333 = zext i32 %332 to i64 > %334 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %333 > %335 = load i32, i32 addrspace(3)* %334, align 4 > %336 = lshr i32 %7, 13 > %337 = and i32 %336, 255 > %338 = and i32 %7, 8191 > %339 = and i32 %10, 255 > %340 = mul nuw nsw i32 %338, %339 > %341 = mul nuw nsw i32 %60, %337 > %342 = add nuw nsw i32 %340, %341 > %343 = add nuw nsw i32 %342, 33 > %344 = zext i32 %343 to i64 > %345 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %344 > %346 = load i32, i32 addrspace(3)* %345, align 4 > %347 = lshr i32 %7, 13 > %348 = and i32 %347, 255 > %349 = and i32 %7, 8191 > %350 = and i32 %10, 255 > %351 = mul nuw nsw i32 %349, %350 > %352 = mul nuw nsw i32 %60, %348 > %353 = add nuw nsw i32 %351, %352 > %354 = add nuw nsw i32 %353, 34 > %355 = zext i32 %354 to i64 > %356 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %355 > %357 = load i32, i32 addrspace(3)* %356, align 4 > %358 = lshr i32 %7, 13 > %359 = and i32 %358, 255 > %360 = and i32 %7, 8191 > %361 = and i32 %10, 255 > %362 = mul nuw nsw i32 %360, %361 > %363 = mul nuw nsw i32 %60, %359 > %364 = add nuw nsw i32 %362, %363 > %365 = add nuw nsw i32 %364, 35 > %366 = zext i32 %365 to i64 > %367 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %366 > %368 = load i32, i32 addrspace(3)* %367, align 4 > %369 = lshr i32 %6, 13 > %370 = and i32 %369, 255 > %371 = shl i32 %5, 2 > %372 = and i32 %371, 262140 > %373 = and i32 %6, 8191 > %374 = and i32 %10, 255 > %375 = mul nuw nsw i32 %373, %374 > %376 = add nuw nsw i32 %372, %375 > %377 = mul nuw nsw i32 %60, %370 > %378 = add nuw nsw i32 %376, %377 > %379 = add nuw nsw i32 %378, 32 > %380 = zext i32 %379 to i64 > %381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %380 > store i32 %335, i32 addrspace(3)* %381, align 4 > %382 = add nuw nsw i32 %378, 33 > %383 = zext i32 %382 to i64 > %384 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %383 > store i32 %346, i32 addrspace(3)* %384, align 4 > %385 = add nuw nsw i32 %378, 34 > %386 = zext i32 %385 to i64 > %387 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %386 > store i32 %357, i32 addrspace(3)* %387, align 4 > %388 = add nuw nsw i32 %378, 35 > %389 = zext i32 %388 to i64 > %390 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %389 > store i32 %368, i32 addrspace(3)* %390, align 4 > %391 = lshr i32 %7, 13 > %392 = and i32 %391, 255 > %393 = and i32 %7, 8191 > %394 = and i32 %10, 255 > %395 = mul nuw nsw i32 %393, %394 > %396 = mul nuw nsw i32 %60, %392 > %397 = add nuw nsw i32 %395, %396 > %398 = add nuw nsw i32 %397, 36 > %399 = zext i32 %398 to i64 > %400 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %399 > %401 = load i32, i32 addrspace(3)* %400, align 4 > %402 = lshr i32 %7, 13 > %403 = and i32 %402, 255 > %404 = and i32 %7, 8191 > %405 = and i32 %10, 255 > %406 = mul nuw nsw i32 %404, %405 > %407 = mul nuw nsw i32 %60, %403 > %408 = add nuw nsw i32 %406, %407 > %409 = add nuw nsw i32 %408, 37 > %410 = zext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = load i32, i32 addrspace(3)* %411, align 4 > %413 = lshr i32 %7, 13 > %414 = and i32 %413, 255 > %415 = and i32 %7, 8191 > %416 = and i32 %10, 255 > %417 = mul nuw nsw i32 %415, %416 > %418 = mul nuw nsw i32 %60, %414 > %419 = add nuw nsw i32 %417, %418 > %420 = add nuw nsw i32 %419, 38 > %421 = zext i32 %420 to i64 > %422 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %421 > %423 = load i32, i32 addrspace(3)* %422, align 4 > %424 = lshr i32 %7, 13 > %425 = and i32 %424, 255 > %426 = and i32 %7, 8191 > %427 = and i32 %10, 255 > %428 = mul nuw nsw i32 %426, %427 > %429 = mul nuw nsw i32 %60, %425 > %430 = add nuw nsw i32 %428, %429 > %431 = add nuw nsw i32 %430, 39 > %432 = zext i32 %431 to i64 > %433 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %432 > %434 = load i32, i32 addrspace(3)* %433, align 4 > %435 = lshr i32 %6, 13 > %436 = and i32 %435, 255 > %437 = shl i32 %5, 2 > %438 = and i32 %437, 262140 > %439 = and i32 %6, 8191 > %440 = and i32 %10, 255 > %441 = mul nuw nsw i32 %439, %440 > %442 = add nuw nsw i32 %438, %441 > %443 = mul nuw nsw i32 %60, %436 > %444 = add nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 36 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > store i32 %401, i32 addrspace(3)* %447, align 4 > %448 = add nuw nsw i32 %444, 37 > %449 = zext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > store i32 %412, i32 addrspace(3)* %450, align 4 > %451 = add nuw nsw i32 %444, 38 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > store i32 %423, i32 addrspace(3)* %453, align 4 > %454 = add nuw nsw i32 %444, 39 > %455 = zext i32 %454 to i64 > %456 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %455 > store i32 %434, i32 addrspace(3)* %456, align 4 > %457 = and i32 %7, 8191 > %458 = and i32 %10, 255 > %459 = mul nuw nsw i32 %457, %458 > %460 = add nuw nsw i32 %459, 16 > %461 = zext i32 %460 to i64 > %462 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %461 > %463 = bitcast i32 addrspace(3)* %462 to float addrspace(3)* > %464 = load float, float addrspace(3)* %463, align 4 > %465 = and i32 %7, 8191 > %466 = and i32 %10, 255 > %467 = mul nuw nsw i32 %465, %466 > %468 = add nuw nsw i32 %467, 17 > %469 = zext i32 %468 to i64 > %470 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %469 > %471 = bitcast i32 addrspace(3)* %470 to float addrspace(3)* > %472 = load float, float addrspace(3)* %471, align 4 > %473 = and i32 %7, 8191 > %474 = and i32 %10, 255 > %475 = mul nuw nsw i32 %473, %474 > %476 = add nuw nsw i32 %475, 18 > %477 = zext i32 %476 to i64 > %478 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %477 > %479 = bitcast i32 addrspace(3)* %478 to float addrspace(3)* > %480 = load float, float addrspace(3)* %479, align 4 > %481 = fmul float %38, %464 > %482 = fmul float %39, %472 > %483 = fadd float %481, %482 > %484 = fmul float %40, %480 > %485 = fadd float %483, %484 > %486 = fadd float %485, %41 > %487 = fmul float %42, %464 > %488 = fmul float %43, %472 > %489 = fadd float %487, %488 > %490 = fmul float %44, %480 > %491 = fadd float %489, %490 > %492 = fadd float %491, %45 > %493 = fmul float %46, %464 > %494 = fmul float %47, %472 > %495 = fadd float %493, %494 > %496 = fmul float %48, %480 > %497 = fadd float %495, %496 > %498 = fadd float %497, %49 > %499 = fmul float %50, %464 > %500 = fmul float %51, %472 > %501 = fadd float %499, %500 > %502 = fmul float %52, %480 > %503 = fadd float %501, %502 > %504 = fadd float %503, %53 > %505 = lshr i32 %7, 13 > %506 = and i32 %505, 255 > %507 = and i32 %7, 8191 > %508 = and i32 %10, 255 > %509 = mul nuw nsw i32 %507, %508 > %510 = add nuw nsw i32 %509, %506 > %511 = add nuw nsw i32 %510, 16 > %512 = zext i32 %511 to i64 > %513 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %512 > %514 = bitcast i32 addrspace(3)* %513 to float addrspace(3)* > %515 = load float, float addrspace(3)* %514, align 4 > %516 = lshr i32 %7, 13 > %517 = and i32 %516, 255 > %518 = and i32 %7, 8191 > %519 = and i32 %10, 255 > %520 = mul nuw nsw i32 %518, %519 > %521 = add nuw nsw i32 %520, %517 > %522 = add nuw nsw i32 %521, 17 > %523 = zext i32 %522 to i64 > %524 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %523 > %525 = bitcast i32 addrspace(3)* %524 to float addrspace(3)* > %526 = load float, float addrspace(3)* %525, align 4 > %527 = lshr i32 %7, 13 > %528 = and i32 %527, 255 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = add nuw nsw i32 %531, %528 > %533 = add nuw nsw i32 %532, 18 > %534 = zext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %38, %515 > %539 = fmul float %39, %526 > %540 = fadd float %538, %539 > %541 = fmul float %40, %537 > %542 = fadd float %540, %541 > %543 = fadd float %542, %41 > %544 = fmul float %42, %515 > %545 = fmul float %43, %526 > %546 = fadd float %544, %545 > %547 = fmul float %44, %537 > %548 = fadd float %546, %547 > %549 = fadd float %548, %45 > %550 = fmul float %46, %515 > %551 = fmul float %47, %526 > %552 = fadd float %550, %551 > %553 = fmul float %48, %537 > %554 = fadd float %552, %553 > %555 = fadd float %554, %49 > %556 = fmul float %50, %515 > %557 = fmul float %51, %526 > %558 = fadd float %556, %557 > %559 = fmul float %52, %537 > %560 = fadd float %558, %559 > %561 = fadd float %560, %53 > %562 = and i32 %7, 8191 > %563 = and i32 %10, 255 > %564 = mul nuw nsw i32 %562, %563 > %565 = lshr i32 %7, 12 > %566 = and i32 %565, 510 > %567 = add nuw nsw i32 %564, %566 > %568 = add nuw nsw i32 %567, 16 > %569 = zext i32 %568 to i64 > %570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %569 > %571 = bitcast i32 addrspace(3)* %570 to float addrspace(3)* > %572 = load float, float addrspace(3)* %571, align 4 > %573 = and i32 %7, 8191 > %574 = and i32 %10, 255 > %575 = mul nuw nsw i32 %573, %574 > %576 = lshr i32 %7, 12 > %577 = and i32 %576, 510 > %578 = add nuw nsw i32 %575, %577 > %579 = add nuw nsw i32 %578, 17 > %580 = zext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = and i32 %7, 8191 > %585 = and i32 %10, 255 > %586 = mul nuw nsw i32 %584, %585 > %587 = lshr i32 %7, 12 > %588 = and i32 %587, 510 > %589 = add nuw nsw i32 %586, %588 > %590 = add nuw nsw i32 %589, 18 > %591 = zext i32 %590 to i64 > %592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %591 > %593 = bitcast i32 addrspace(3)* %592 to float addrspace(3)* > %594 = load float, float addrspace(3)* %593, align 4 > %595 = fmul float %38, %572 > %596 = fmul float %39, %583 > %597 = fadd float %595, %596 > %598 = fmul float %40, %594 > %599 = fadd float %597, %598 > %600 = fadd float %599, %41 > %601 = fmul float %42, %572 > %602 = fmul float %43, %583 > %603 = fadd float %601, %602 > %604 = fmul float %44, %594 > %605 = fadd float %603, %604 > %606 = fadd float %605, %45 > %607 = fmul float %46, %572 > %608 = fmul float %47, %583 > %609 = fadd float %607, %608 > %610 = fmul float %48, %594 > %611 = fadd float %609, %610 > %612 = fadd float %611, %49 > %613 = fmul float %50, %572 > %614 = fmul float %51, %583 > %615 = fadd float %613, %614 > %616 = fmul float %52, %594 > %617 = fadd float %615, %616 > %618 = fadd float %617, %53 > %619 = fadd float %498, 1.000000e+02 > %620 = fadd float %555, 1.000000e+02 > %621 = fadd float %612, 1.000000e+02 > %622 = call float @llvm.fabs.f32(float %504) > %623 = call float @llvm.minnum.f32(float %622, float 1.000000e+02) > %624 = fcmp ogt float %486, 0.000000e+00 > %625 = fcmp ogt float %492, 0.000000e+00 > %626 = fcmp olt float %486, 0.000000e+00 > %627 = fcmp olt float %492, 0.000000e+00 > %628 = sext i1 %626 to i32 > %629 = sext i1 %627 to i32 > %630 = zext i1 %624 to i32 > %631 = zext i1 %625 to i32 > %632 = add nsw i32 %630, %628 > %633 = add nsw i32 %631, %629 > %634 = sitofp i32 %632 to float > %635 = sitofp i32 %633 to float > %636 = fsub float -0.000000e+00, %623 > %637 = call float @llvm.fma.f32(float %636, float %634, float %486) > %638 = fsub float -0.000000e+00, %623 > %639 = call float @llvm.fma.f32(float %638, float %635, float %492) > %640 = call float @llvm.fabs.f32(float %561) > %641 = call float @llvm.minnum.f32(float %640, float 1.000000e+02) > %642 = fcmp ogt float %543, 0.000000e+00 > %643 = fcmp ogt float %549, 0.000000e+00 > %644 = fcmp olt float %543, 0.000000e+00 > %645 = fcmp olt float %549, 0.000000e+00 > %646 = sext i1 %644 to i32 > %647 = sext i1 %645 to i32 > %648 = zext i1 %642 to i32 > %649 = zext i1 %643 to i32 > %650 = add nsw i32 %648, %646 > %651 = add nsw i32 %649, %647 > %652 = sitofp i32 %650 to float > %653 = sitofp i32 %651 to float > %654 = fsub float -0.000000e+00, %641 > %655 = call float @llvm.fma.f32(float %654, float %652, float %543) > %656 = fsub float -0.000000e+00, %641 > %657 = call float @llvm.fma.f32(float %656, float %653, float %549) > %658 = fcmp ogt float %600, 0.000000e+00 > %659 = fcmp ogt float %606, 0.000000e+00 > %660 = fcmp olt float %600, 0.000000e+00 > %661 = fcmp olt float %606, 0.000000e+00 > %662 = sext i1 %660 to i32 > %663 = sext i1 %661 to i32 > %664 = zext i1 %658 to i32 > %665 = zext i1 %659 to i32 > %666 = add nsw i32 %664, %662 > %667 = add nsw i32 %665, %663 > %668 = sitofp i32 %666 to float > %669 = sitofp i32 %667 to float > %670 = call float @llvm.fabs.f32(float %618) > %671 = call float @llvm.minnum.f32(float %670, float 1.000000e+02) > %672 = fsub float -0.000000e+00, %671 > %673 = call float @llvm.fma.f32(float %672, float %668, float %600) > %674 = fsub float -0.000000e+00, %671 > %675 = call float @llvm.fma.f32(float %674, float %669, float %606) > %676 = fsub float -0.000000e+00, %504 > %677 = fcmp olt float %637, %676 > %678 = fsub float -0.000000e+00, %504 > %679 = fcmp olt float %639, %678 > %680 = zext i1 %677 to i32 > %681 = zext i1 %679 to i32 > %682 = fsub float -0.000000e+00, %561 > %683 = fcmp olt float %655, %682 > %684 = fsub float -0.000000e+00, %561 > %685 = fcmp olt float %657, %684 > %686 = zext i1 %683 to i32 > %687 = zext i1 %685 to i32 > %688 = add nuw nsw i32 %686, %680 > %689 = add nuw nsw i32 %687, %681 > %690 = fsub float -0.000000e+00, %618 > %691 = fcmp olt float %673, %690 > %692 = fsub float -0.000000e+00, %618 > %693 = fcmp olt float %675, %692 > %694 = zext i1 %691 to i32 > %695 = zext i1 %693 to i32 > %696 = add nuw nsw i32 %694, %688 > %697 = add nuw nsw i32 %695, %689 > %698 = fcmp olt float %619, 0.000000e+00 > %699 = zext i1 %698 to i32 > %700 = fcmp olt float %620, 0.000000e+00 > %701 = fcmp olt float %621, 0.000000e+00 > %702 = zext i1 %700 to i32 > %703 = zext i1 %701 to i32 > %704 = add nuw nsw i32 %702, %699 > %705 = add nuw nsw i32 %703, %704 > %706 = fcmp olt float %504, %637 > %707 = fcmp olt float %504, %639 > %708 = zext i1 %706 to i32 > %709 = zext i1 %707 to i32 > %710 = fcmp olt float %561, %655 > %711 = fcmp olt float %561, %657 > %712 = zext i1 %710 to i32 > %713 = zext i1 %711 to i32 > %714 = add nuw nsw i32 %708, %712 > %715 = add nuw nsw i32 %709, %713 > %716 = fcmp olt float %618, %673 > %717 = fcmp olt float %618, %675 > %718 = zext i1 %716 to i32 > %719 = zext i1 %717 to i32 > %720 = add nuw nsw i32 %714, %718 > %721 = add nuw nsw i32 %715, %719 > %722 = icmp eq i32 %696, 3 > %723 = icmp eq i32 %697, 3 > %724 = sext i1 %722 to i32 > %725 = sext i1 %723 to i32 > %726 = icmp eq i32 %720, 3 > %727 = icmp eq i32 %721, 3 > %728 = select i1 %727, i32 -1, i32 %725 > %729 = select i1 %726, i32 -1, i32 %724 > %730 = or i32 %728, %729 > %731 = and i32 %7, 8191 > %732 = and i32 %10, 255 > %733 = mul nuw nsw i32 %731, %732 > %734 = add nuw nsw i32 %733, 18 > %735 = zext i32 %734 to i64 > %736 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %735 > %737 = bitcast i32 addrspace(3)* %736 to float addrspace(3)* > %738 = load float, float addrspace(3)* %737, align 4 > %739 = lshr i32 %7, 13 > %740 = and i32 %739, 255 > %741 = and i32 %7, 8191 > %742 = and i32 %10, 255 > %743 = mul nuw nsw i32 %741, %742 > %744 = add nuw nsw i32 %743, %740 > %745 = add nuw nsw i32 %744, 18 > %746 = zext i32 %745 to i64 > %747 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %746 > %748 = bitcast i32 addrspace(3)* %747 to float addrspace(3)* > %749 = load float, float addrspace(3)* %748, align 4 > %750 = fsub float %749, %738 > %751 = and i32 %7, 8191 > %752 = and i32 %10, 255 > %753 = mul nuw nsw i32 %751, %752 > %754 = add nuw nsw i32 %753, 16 > %755 = zext i32 %754 to i64 > %756 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %755 > %757 = bitcast i32 addrspace(3)* %756 to float addrspace(3)* > %758 = load float, float addrspace(3)* %757, align 4 > %759 = lshr i32 %7, 13 > %760 = and i32 %759, 255 > %761 = and i32 %7, 8191 > %762 = and i32 %10, 255 > %763 = mul nuw nsw i32 %761, %762 > %764 = add nuw nsw i32 %763, %760 > %765 = add nuw nsw i32 %764, 16 > %766 = zext i32 %765 to i64 > %767 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %766 > %768 = bitcast i32 addrspace(3)* %767 to float addrspace(3)* > %769 = load float, float addrspace(3)* %768, align 4 > %770 = fsub float %769, %758 > %771 = and i32 %7, 8191 > %772 = and i32 %10, 255 > %773 = mul nuw nsw i32 %771, %772 > %774 = add nuw nsw i32 %773, 17 > %775 = zext i32 %774 to i64 > %776 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %775 > %777 = bitcast i32 addrspace(3)* %776 to float addrspace(3)* > %778 = load float, float addrspace(3)* %777, align 4 > %779 = lshr i32 %7, 13 > %780 = and i32 %779, 255 > %781 = and i32 %7, 8191 > %782 = and i32 %10, 255 > %783 = mul nuw nsw i32 %781, %782 > %784 = add nuw nsw i32 %783, %780 > %785 = add nuw nsw i32 %784, 17 > %786 = zext i32 %785 to i64 > %787 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %786 > %788 = bitcast i32 addrspace(3)* %787 to float addrspace(3)* > %789 = load float, float addrspace(3)* %788, align 4 > %790 = fsub float %789, %778 > %791 = and i32 %7, 8191 > %792 = and i32 %10, 255 > %793 = mul nuw nsw i32 %791, %792 > %794 = add nuw nsw i32 %793, 16 > %795 = zext i32 %794 to i64 > %796 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %795 > %797 = bitcast i32 addrspace(3)* %796 to float addrspace(3)* > %798 = load float, float addrspace(3)* %797, align 4 > %799 = and i32 %7, 8191 > %800 = and i32 %10, 255 > %801 = mul nuw nsw i32 %799, %800 > %802 = lshr i32 %7, 12 > %803 = and i32 %802, 510 > %804 = add nuw nsw i32 %801, %803 > %805 = add nuw nsw i32 %804, 16 > %806 = zext i32 %805 to i64 > %807 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %806 > %808 = bitcast i32 addrspace(3)* %807 to float addrspace(3)* > %809 = load float, float addrspace(3)* %808, align 4 > %810 = fsub float %809, %798 > %811 = and i32 %7, 8191 > %812 = and i32 %10, 255 > %813 = mul nuw nsw i32 %811, %812 > %814 = add nuw nsw i32 %813, 17 > %815 = zext i32 %814 to i64 > %816 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %815 > %817 = bitcast i32 addrspace(3)* %816 to float addrspace(3)* > %818 = load float, float addrspace(3)* %817, align 4 > %819 = and i32 %7, 8191 > %820 = and i32 %10, 255 > %821 = mul nuw nsw i32 %819, %820 > %822 = lshr i32 %7, 12 > %823 = and i32 %822, 510 > %824 = add nuw nsw i32 %821, %823 > %825 = add nuw nsw i32 %824, 17 > %826 = zext i32 %825 to i64 > %827 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %826 > %828 = bitcast i32 addrspace(3)* %827 to float addrspace(3)* > %829 = load float, float addrspace(3)* %828, align 4 > %830 = fsub float %829, %818 > %831 = and i32 %7, 8191 > %832 = and i32 %10, 255 > %833 = mul nuw nsw i32 %831, %832 > %834 = add nuw nsw i32 %833, 18 > %835 = zext i32 %834 to i64 > %836 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %835 > %837 = bitcast i32 addrspace(3)* %836 to float addrspace(3)* > %838 = load float, float addrspace(3)* %837, align 4 > %839 = and i32 %7, 8191 > %840 = and i32 %10, 255 > %841 = mul nuw nsw i32 %839, %840 > %842 = lshr i32 %7, 12 > %843 = and i32 %842, 510 > %844 = add nuw nsw i32 %841, %843 > %845 = add nuw nsw i32 %844, 18 > %846 = zext i32 %845 to i64 > %847 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %846 > %848 = bitcast i32 addrspace(3)* %847 to float addrspace(3)* > %849 = load float, float addrspace(3)* %848, align 4 > %850 = fsub float %849, %838 > %851 = fmul float %750, %830 > %852 = fmul float %770, %850 > %853 = fmul float %790, %810 > %854 = fsub float -0.000000e+00, %851 > %855 = call float @llvm.fma.f32(float %790, float %850, float %854) > %856 = fsub float -0.000000e+00, %852 > %857 = call float @llvm.fma.f32(float %750, float %810, float %856) > %858 = fsub float -0.000000e+00, %853 > %859 = call float @llvm.fma.f32(float %770, float %830, float %858) > %860 = fmul float %855, %855 > %861 = fmul float %857, %857 > %862 = fadd float %861, %860 > %863 = fmul float %859, %859 > %864 = fadd float %862, %863 > %865 = call float @llvm.AMDGPU.rsq.clamped.f32(float %864) > %866 = fmul float %855, %865 > %867 = fmul float %857, %865 > %868 = fmul float %859, %865 > %869 = fmul float %13, %25 > %870 = fmul float %14, %26 > %871 = fadd float %869, %870 > %872 = fmul float %15, %27 > %873 = fadd float %871, %872 > %874 = fadd float %873, %16 > %875 = fmul float %17, %25 > %876 = fmul float %18, %26 > %877 = fadd float %875, %876 > %878 = fmul float %19, %27 > %879 = fadd float %877, %878 > %880 = fadd float %879, %20 > %881 = fmul float %21, %25 > %882 = fmul float %22, %26 > %883 = fadd float %881, %882 > %884 = fmul float %23, %27 > %885 = fadd float %883, %884 > %886 = fadd float %885, %24 > %887 = and i32 %7, 8191 > %888 = and i32 %10, 255 > %889 = mul nuw nsw i32 %887, %888 > %890 = add nuw nsw i32 %889, 16 > %891 = zext i32 %890 to i64 > %892 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %891 > %893 = bitcast i32 addrspace(3)* %892 to float addrspace(3)* > %894 = load float, float addrspace(3)* %893, align 4 > %895 = fsub float %874, %894 > %896 = and i32 %7, 8191 > %897 = and i32 %10, 255 > %898 = mul nuw nsw i32 %896, %897 > %899 = add nuw nsw i32 %898, 17 > %900 = zext i32 %899 to i64 > %901 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %900 > %902 = bitcast i32 addrspace(3)* %901 to float addrspace(3)* > %903 = load float, float addrspace(3)* %902, align 4 > %904 = fsub float %880, %903 > %905 = and i32 %7, 8191 > %906 = and i32 %10, 255 > %907 = mul nuw nsw i32 %905, %906 > %908 = add nuw nsw i32 %907, 18 > %909 = zext i32 %908 to i64 > %910 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %909 > %911 = bitcast i32 addrspace(3)* %910 to float addrspace(3)* > %912 = load float, float addrspace(3)* %911, align 4 > %913 = fsub float %886, %912 > %914 = fmul float %895, %895 > %915 = fmul float %904, %904 > %916 = fadd float %915, %914 > %917 = fmul float %913, %913 > %918 = fadd float %916, %917 > %919 = call float @llvm.AMDGPU.rsq.clamped.f32(float %918) > %920 = fmul float %919, %895 > %921 = fmul float %919, %904 > %922 = fmul float %919, %913 > %923 = fmul float %866, %920 > %924 = fmul float %867, %921 > %925 = fadd float %924, %923 > %926 = fmul float %868, %922 > %927 = fadd float %925, %926 > %928 = icmp eq i32 %730, 0 > %notlhs = fcmp uge float %927, -5.000000e-01 > %notrhs = icmp ne i32 %705, 3 > %not. = and i1 %notrhs, %notlhs > %929 = and i1 %928, %not. > br i1 %929, label %IF, label %ENDIF > >IF: ; preds = %main_body > %930 = lshr i32 %7, 13 > %931 = and i32 %930, 255 > %932 = and i32 %7, 8191 > %933 = and i32 %10, 255 > %934 = mul nuw nsw i32 %932, %933 > %935 = add nuw nsw i32 %934, %931 > %936 = add nuw nsw i32 %935, 16 > %937 = zext i32 %936 to i64 > %938 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %937 > %939 = bitcast i32 addrspace(3)* %938 to float addrspace(3)* > %940 = load float, float addrspace(3)* %939, align 4 > %941 = and i32 %7, 8191 > %942 = and i32 %10, 255 > %943 = mul nuw nsw i32 %941, %942 > %944 = add nuw nsw i32 %943, 16 > %945 = zext i32 %944 to i64 > %946 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %945 > %947 = bitcast i32 addrspace(3)* %946 to float addrspace(3)* > %948 = load float, float addrspace(3)* %947, align 4 > %949 = fsub float %948, %940 > %950 = lshr i32 %7, 13 > %951 = and i32 %950, 255 > %952 = and i32 %7, 8191 > %953 = and i32 %10, 255 > %954 = mul nuw nsw i32 %952, %953 > %955 = add nuw nsw i32 %954, %951 > %956 = add nuw nsw i32 %955, 17 > %957 = zext i32 %956 to i64 > %958 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %957 > %959 = bitcast i32 addrspace(3)* %958 to float addrspace(3)* > %960 = load float, float addrspace(3)* %959, align 4 > %961 = and i32 %7, 8191 > %962 = and i32 %10, 255 > %963 = mul nuw nsw i32 %961, %962 > %964 = add nuw nsw i32 %963, 17 > %965 = zext i32 %964 to i64 > %966 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %965 > %967 = bitcast i32 addrspace(3)* %966 to float addrspace(3)* > %968 = load float, float addrspace(3)* %967, align 4 > %969 = fsub float %968, %960 > %970 = lshr i32 %7, 13 > %971 = and i32 %970, 255 > %972 = and i32 %7, 8191 > %973 = and i32 %10, 255 > %974 = mul nuw nsw i32 %972, %973 > %975 = add nuw nsw i32 %974, %971 > %976 = add nuw nsw i32 %975, 18 > %977 = zext i32 %976 to i64 > %978 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %977 > %979 = bitcast i32 addrspace(3)* %978 to float addrspace(3)* > %980 = load float, float addrspace(3)* %979, align 4 > %981 = and i32 %7, 8191 > %982 = and i32 %10, 255 > %983 = mul nuw nsw i32 %981, %982 > %984 = add nuw nsw i32 %983, 18 > %985 = zext i32 %984 to i64 > %986 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %985 > %987 = bitcast i32 addrspace(3)* %986 to float addrspace(3)* > %988 = load float, float addrspace(3)* %987, align 4 > %989 = fsub float %988, %980 > %990 = fmul float %949, %949 > %991 = fmul float %969, %969 > %992 = fadd float %991, %990 > %993 = fmul float %989, %989 > %994 = fadd float %992, %993 > %995 = and i32 %7, 8191 > %996 = and i32 %10, 255 > %997 = mul nuw nsw i32 %995, %996 > %998 = lshr i32 %7, 12 > %999 = and i32 %998, 510 > %1000 = add nuw nsw i32 %997, %999 > %1001 = add nuw nsw i32 %1000, 16 > %1002 = zext i32 %1001 to i64 > %1003 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1002 > %1004 = bitcast i32 addrspace(3)* %1003 to float addrspace(3)* > %1005 = load float, float addrspace(3)* %1004, align 4 > %1006 = lshr i32 %7, 13 > %1007 = and i32 %1006, 255 > %1008 = and i32 %7, 8191 > %1009 = and i32 %10, 255 > %1010 = mul nuw nsw i32 %1008, %1009 > %1011 = add nuw nsw i32 %1010, %1007 > %1012 = add nuw nsw i32 %1011, 16 > %1013 = zext i32 %1012 to i64 > %1014 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1013 > %1015 = bitcast i32 addrspace(3)* %1014 to float addrspace(3)* > %1016 = load float, float addrspace(3)* %1015, align 4 > %1017 = fsub float %1016, %1005 > %1018 = and i32 %7, 8191 > %1019 = and i32 %10, 255 > %1020 = mul nuw nsw i32 %1018, %1019 > %1021 = lshr i32 %7, 12 > %1022 = and i32 %1021, 510 > %1023 = add nuw nsw i32 %1020, %1022 > %1024 = add nuw nsw i32 %1023, 17 > %1025 = zext i32 %1024 to i64 > %1026 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1025 > %1027 = bitcast i32 addrspace(3)* %1026 to float addrspace(3)* > %1028 = load float, float addrspace(3)* %1027, align 4 > %1029 = lshr i32 %7, 13 > %1030 = and i32 %1029, 255 > %1031 = and i32 %7, 8191 > %1032 = and i32 %10, 255 > %1033 = mul nuw nsw i32 %1031, %1032 > %1034 = add nuw nsw i32 %1033, %1030 > %1035 = add nuw nsw i32 %1034, 17 > %1036 = zext i32 %1035 to i64 > %1037 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1036 > %1038 = bitcast i32 addrspace(3)* %1037 to float addrspace(3)* > %1039 = load float, float addrspace(3)* %1038, align 4 > %1040 = fsub float %1039, %1028 > %1041 = and i32 %7, 8191 > %1042 = and i32 %10, 255 > %1043 = mul nuw nsw i32 %1041, %1042 > %1044 = lshr i32 %7, 12 > %1045 = and i32 %1044, 510 > %1046 = add nuw nsw i32 %1043, %1045 > %1047 = add nuw nsw i32 %1046, 18 > %1048 = zext i32 %1047 to i64 > %1049 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1048 > %1050 = bitcast i32 addrspace(3)* %1049 to float addrspace(3)* > %1051 = load float, float addrspace(3)* %1050, align 4 > %1052 = lshr i32 %7, 13 > %1053 = and i32 %1052, 255 > %1054 = and i32 %7, 8191 > %1055 = and i32 %10, 255 > %1056 = mul nuw nsw i32 %1054, %1055 > %1057 = add nuw nsw i32 %1056, %1053 > %1058 = add nuw nsw i32 %1057, 18 > %1059 = zext i32 %1058 to i64 > %1060 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1059 > %1061 = bitcast i32 addrspace(3)* %1060 to float addrspace(3)* > %1062 = load float, float addrspace(3)* %1061, align 4 > %1063 = fsub float %1062, %1051 > %1064 = fmul float %1017, %1017 > %1065 = fmul float %1040, %1040 > %1066 = fadd float %1065, %1064 > %1067 = fmul float %1063, %1063 > %1068 = fadd float %1066, %1067 > %1069 = fmul float %810, %810 > %1070 = fmul float %830, %830 > %1071 = fadd float %1070, %1069 > %1072 = fmul float %850, %850 > %1073 = fadd float %1071, %1072 > %1074 = call float @llvm.sqrt.f32(float %994) > %1075 = call float @llvm.sqrt.f32(float %1068) > %1076 = call float @llvm.sqrt.f32(float %1073) > %1077 = lshr i32 %7, 13 > %1078 = and i32 %1077, 255 > %1079 = and i32 %7, 8191 > %1080 = and i32 %10, 255 > %1081 = mul nuw nsw i32 %1079, %1080 > %1082 = add nuw nsw i32 %1081, %1078 > %1083 = add nuw nsw i32 %1082, 16 > %1084 = zext i32 %1083 to i64 > %1085 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1084 > %1086 = bitcast i32 addrspace(3)* %1085 to float addrspace(3)* > %1087 = load float, float addrspace(3)* %1086, align 4 > %1088 = and i32 %7, 8191 > %1089 = and i32 %10, 255 > %1090 = mul nuw nsw i32 %1088, %1089 > %1091 = add nuw nsw i32 %1090, 16 > %1092 = zext i32 %1091 to i64 > %1093 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1092 > %1094 = bitcast i32 addrspace(3)* %1093 to float addrspace(3)* > %1095 = load float, float addrspace(3)* %1094, align 4 > %1096 = fadd float %1087, %1095 > %1097 = lshr i32 %7, 13 > %1098 = and i32 %1097, 255 > %1099 = and i32 %7, 8191 > %1100 = and i32 %10, 255 > %1101 = mul nuw nsw i32 %1099, %1100 > %1102 = add nuw nsw i32 %1101, %1098 > %1103 = add nuw nsw i32 %1102, 17 > %1104 = zext i32 %1103 to i64 > %1105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1104 > %1106 = bitcast i32 addrspace(3)* %1105 to float addrspace(3)* > %1107 = load float, float addrspace(3)* %1106, align 4 > %1108 = and i32 %7, 8191 > %1109 = and i32 %10, 255 > %1110 = mul nuw nsw i32 %1108, %1109 > %1111 = add nuw nsw i32 %1110, 17 > %1112 = zext i32 %1111 to i64 > %1113 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1112 > %1114 = bitcast i32 addrspace(3)* %1113 to float addrspace(3)* > %1115 = load float, float addrspace(3)* %1114, align 4 > %1116 = fadd float %1107, %1115 > %1117 = lshr i32 %7, 13 > %1118 = and i32 %1117, 255 > %1119 = and i32 %7, 8191 > %1120 = and i32 %10, 255 > %1121 = mul nuw nsw i32 %1119, %1120 > %1122 = add nuw nsw i32 %1121, %1118 > %1123 = add nuw nsw i32 %1122, 18 > %1124 = zext i32 %1123 to i64 > %1125 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1124 > %1126 = bitcast i32 addrspace(3)* %1125 to float addrspace(3)* > %1127 = load float, float addrspace(3)* %1126, align 4 > %1128 = and i32 %7, 8191 > %1129 = and i32 %10, 255 > %1130 = mul nuw nsw i32 %1128, %1129 > %1131 = add nuw nsw i32 %1130, 18 > %1132 = zext i32 %1131 to i64 > %1133 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1132 > %1134 = bitcast i32 addrspace(3)* %1133 to float addrspace(3)* > %1135 = load float, float addrspace(3)* %1134, align 4 > %1136 = fadd float %1127, %1135 > %1137 = fmul float %1096, 5.000000e-01 > %1138 = fmul float %1116, 5.000000e-01 > %1139 = fmul float %1136, 5.000000e-01 > %1140 = and i32 %7, 8191 > %1141 = and i32 %10, 255 > %1142 = mul nuw nsw i32 %1140, %1141 > %1143 = lshr i32 %7, 12 > %1144 = and i32 %1143, 510 > %1145 = add nuw nsw i32 %1142, %1144 > %1146 = add nuw nsw i32 %1145, 16 > %1147 = zext i32 %1146 to i64 > %1148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1147 > %1149 = bitcast i32 addrspace(3)* %1148 to float addrspace(3)* > %1150 = load float, float addrspace(3)* %1149, align 4 > %1151 = lshr i32 %7, 13 > %1152 = and i32 %1151, 255 > %1153 = and i32 %7, 8191 > %1154 = and i32 %10, 255 > %1155 = mul nuw nsw i32 %1153, %1154 > %1156 = add nuw nsw i32 %1155, %1152 > %1157 = add nuw nsw i32 %1156, 16 > %1158 = zext i32 %1157 to i64 > %1159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1158 > %1160 = bitcast i32 addrspace(3)* %1159 to float addrspace(3)* > %1161 = load float, float addrspace(3)* %1160, align 4 > %1162 = fadd float %1150, %1161 > %1163 = and i32 %7, 8191 > %1164 = and i32 %10, 255 > %1165 = mul nuw nsw i32 %1163, %1164 > %1166 = lshr i32 %7, 12 > %1167 = and i32 %1166, 510 > %1168 = add nuw nsw i32 %1165, %1167 > %1169 = add nuw nsw i32 %1168, 17 > %1170 = zext i32 %1169 to i64 > %1171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1170 > %1172 = bitcast i32 addrspace(3)* %1171 to float addrspace(3)* > %1173 = load float, float addrspace(3)* %1172, align 4 > %1174 = lshr i32 %7, 13 > %1175 = and i32 %1174, 255 > %1176 = and i32 %7, 8191 > %1177 = and i32 %10, 255 > %1178 = mul nuw nsw i32 %1176, %1177 > %1179 = add nuw nsw i32 %1178, %1175 > %1180 = add nuw nsw i32 %1179, 17 > %1181 = zext i32 %1180 to i64 > %1182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1181 > %1183 = bitcast i32 addrspace(3)* %1182 to float addrspace(3)* > %1184 = load float, float addrspace(3)* %1183, align 4 > %1185 = fadd float %1173, %1184 > %1186 = and i32 %7, 8191 > %1187 = and i32 %10, 255 > %1188 = mul nuw nsw i32 %1186, %1187 > %1189 = lshr i32 %7, 12 > %1190 = and i32 %1189, 510 > %1191 = add nuw nsw i32 %1188, %1190 > %1192 = add nuw nsw i32 %1191, 18 > %1193 = zext i32 %1192 to i64 > %1194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1193 > %1195 = bitcast i32 addrspace(3)* %1194 to float addrspace(3)* > %1196 = load float, float addrspace(3)* %1195, align 4 > %1197 = lshr i32 %7, 13 > %1198 = and i32 %1197, 255 > %1199 = and i32 %7, 8191 > %1200 = and i32 %10, 255 > %1201 = mul nuw nsw i32 %1199, %1200 > %1202 = add nuw nsw i32 %1201, %1198 > %1203 = add nuw nsw i32 %1202, 18 > %1204 = zext i32 %1203 to i64 > %1205 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1204 > %1206 = bitcast i32 addrspace(3)* %1205 to float addrspace(3)* > %1207 = load float, float addrspace(3)* %1206, align 4 > %1208 = fadd float %1196, %1207 > %1209 = fmul float %1162, 5.000000e-01 > %1210 = fmul float %1185, 5.000000e-01 > %1211 = fmul float %1208, 5.000000e-01 > %1212 = and i32 %7, 8191 > %1213 = and i32 %10, 255 > %1214 = mul nuw nsw i32 %1212, %1213 > %1215 = add nuw nsw i32 %1214, 16 > %1216 = zext i32 %1215 to i64 > %1217 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1216 > %1218 = bitcast i32 addrspace(3)* %1217 to float addrspace(3)* > %1219 = load float, float addrspace(3)* %1218, align 4 > %1220 = and i32 %7, 8191 > %1221 = and i32 %10, 255 > %1222 = mul nuw nsw i32 %1220, %1221 > %1223 = lshr i32 %7, 12 > %1224 = and i32 %1223, 510 > %1225 = add nuw nsw i32 %1222, %1224 > %1226 = add nuw nsw i32 %1225, 16 > %1227 = zext i32 %1226 to i64 > %1228 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1227 > %1229 = bitcast i32 addrspace(3)* %1228 to float addrspace(3)* > %1230 = load float, float addrspace(3)* %1229, align 4 > %1231 = fadd float %1219, %1230 > %1232 = and i32 %7, 8191 > %1233 = and i32 %10, 255 > %1234 = mul nuw nsw i32 %1232, %1233 > %1235 = add nuw nsw i32 %1234, 17 > %1236 = zext i32 %1235 to i64 > %1237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1236 > %1238 = bitcast i32 addrspace(3)* %1237 to float addrspace(3)* > %1239 = load float, float addrspace(3)* %1238, align 4 > %1240 = and i32 %7, 8191 > %1241 = and i32 %10, 255 > %1242 = mul nuw nsw i32 %1240, %1241 > %1243 = lshr i32 %7, 12 > %1244 = and i32 %1243, 510 > %1245 = add nuw nsw i32 %1242, %1244 > %1246 = add nuw nsw i32 %1245, 17 > %1247 = zext i32 %1246 to i64 > %1248 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1247 > %1249 = bitcast i32 addrspace(3)* %1248 to float addrspace(3)* > %1250 = load float, float addrspace(3)* %1249, align 4 > %1251 = fadd float %1239, %1250 > %1252 = and i32 %7, 8191 > %1253 = and i32 %10, 255 > %1254 = mul nuw nsw i32 %1252, %1253 > %1255 = add nuw nsw i32 %1254, 18 > %1256 = zext i32 %1255 to i64 > %1257 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1256 > %1258 = bitcast i32 addrspace(3)* %1257 to float addrspace(3)* > %1259 = load float, float addrspace(3)* %1258, align 4 > %1260 = and i32 %7, 8191 > %1261 = and i32 %10, 255 > %1262 = mul nuw nsw i32 %1260, %1261 > %1263 = lshr i32 %7, 12 > %1264 = and i32 %1263, 510 > %1265 = add nuw nsw i32 %1262, %1264 > %1266 = add nuw nsw i32 %1265, 18 > %1267 = zext i32 %1266 to i64 > %1268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1267 > %1269 = bitcast i32 addrspace(3)* %1268 to float addrspace(3)* > %1270 = load float, float addrspace(3)* %1269, align 4 > %1271 = fadd float %1259, %1270 > %1272 = fmul float %1231, 5.000000e-01 > %1273 = fmul float %1251, 5.000000e-01 > %1274 = fmul float %1271, 5.000000e-01 > %1275 = call float @llvm.fma.f32(float %54, float %1074, float %1137) > %1276 = call float @llvm.fma.f32(float %55, float %1074, float %1138) > %1277 = call float @llvm.fma.f32(float %56, float %1074, float %1139) > %1278 = call float @llvm.fma.f32(float %54, float %1075, float %1209) > %1279 = call float @llvm.fma.f32(float %55, float %1075, float %1210) > %1280 = call float @llvm.fma.f32(float %56, float %1075, float %1211) > %1281 = call float @llvm.fma.f32(float %54, float %1076, float %1272) > %1282 = call float @llvm.fma.f32(float %55, float %1076, float %1273) > %1283 = call float @llvm.fma.f32(float %56, float %1076, float %1274) > %1284 = fmul float %38, %1137 > %1285 = fmul float %39, %1138 > %1286 = fadd float %1284, %1285 > %1287 = fmul float %40, %1139 > %1288 = fadd float %1286, %1287 > %1289 = fadd float %1288, %41 > %1290 = fmul float %42, %1137 > %1291 = fmul float %43, %1138 > %1292 = fadd float %1290, %1291 > %1293 = fmul float %44, %1139 > %1294 = fadd float %1292, %1293 > %1295 = fadd float %1294, %45 > %1296 = fmul float %50, %1137 > %1297 = fmul float %51, %1138 > %1298 = fadd float %1296, %1297 > %1299 = fmul float %52, %1139 > %1300 = fadd float %1298, %1299 > %1301 = fadd float %1300, %53 > %1302 = fmul float %38, %1209 > %1303 = fmul float %39, %1210 > %1304 = fadd float %1302, %1303 > %1305 = fmul float %40, %1211 > %1306 = fadd float %1304, %1305 > %1307 = fadd float %1306, %41 > %1308 = fmul float %42, %1209 > %1309 = fmul float %43, %1210 > %1310 = fadd float %1308, %1309 > %1311 = fmul float %44, %1211 > %1312 = fadd float %1310, %1311 > %1313 = fadd float %1312, %45 > %1314 = fmul float %50, %1209 > %1315 = fmul float %51, %1210 > %1316 = fadd float %1314, %1315 > %1317 = fmul float %52, %1211 > %1318 = fadd float %1316, %1317 > %1319 = fadd float %1318, %53 > %1320 = fmul float %38, %1272 > %1321 = fmul float %39, %1273 > %1322 = fadd float %1320, %1321 > %1323 = fmul float %40, %1274 > %1324 = fadd float %1322, %1323 > %1325 = fadd float %1324, %41 > %1326 = fmul float %42, %1272 > %1327 = fmul float %43, %1273 > %1328 = fadd float %1326, %1327 > %1329 = fmul float %44, %1274 > %1330 = fadd float %1328, %1329 > %1331 = fadd float %1330, %45 > %1332 = fmul float %50, %1272 > %1333 = fmul float %51, %1273 > %1334 = fadd float %1332, %1333 > %1335 = fmul float %52, %1274 > %1336 = fadd float %1334, %1335 > %1337 = fadd float %1336, %53 > %1338 = fmul float %38, %1275 > %1339 = fmul float %39, %1276 > %1340 = fadd float %1338, %1339 > %1341 = fmul float %40, %1277 > %1342 = fadd float %1340, %1341 > %1343 = fadd float %1342, %41 > %1344 = fmul float %42, %1275 > %1345 = fmul float %43, %1276 > %1346 = fadd float %1344, %1345 > %1347 = fmul float %44, %1277 > %1348 = fadd float %1346, %1347 > %1349 = fadd float %1348, %45 > %1350 = fmul float %50, %1275 > %1351 = fmul float %51, %1276 > %1352 = fadd float %1350, %1351 > %1353 = fmul float %52, %1277 > %1354 = fadd float %1352, %1353 > %1355 = fadd float %1354, %53 > %1356 = fmul float %38, %1278 > %1357 = fmul float %39, %1279 > %1358 = fadd float %1356, %1357 > %1359 = fmul float %40, %1280 > %1360 = fadd float %1358, %1359 > %1361 = fadd float %1360, %41 > %1362 = fmul float %42, %1278 > %1363 = fmul float %43, %1279 > %1364 = fadd float %1362, %1363 > %1365 = fmul float %44, %1280 > %1366 = fadd float %1364, %1365 > %1367 = fadd float %1366, %45 > %1368 = fmul float %50, %1278 > %1369 = fmul float %51, %1279 > %1370 = fadd float %1368, %1369 > %1371 = fmul float %52, %1280 > %1372 = fadd float %1370, %1371 > %1373 = fadd float %1372, %53 > %1374 = fmul float %38, %1281 > %1375 = fmul float %39, %1282 > %1376 = fadd float %1374, %1375 > %1377 = fmul float %40, %1283 > %1378 = fadd float %1376, %1377 > %1379 = fadd float %1378, %41 > %1380 = fmul float %42, %1281 > %1381 = fmul float %43, %1282 > %1382 = fadd float %1380, %1381 > %1383 = fmul float %44, %1283 > %1384 = fadd float %1382, %1383 > %1385 = fadd float %1384, %45 > %1386 = fmul float %50, %1281 > %1387 = fmul float %51, %1282 > %1388 = fadd float %1386, %1387 > %1389 = fmul float %52, %1283 > %1390 = fadd float %1388, %1389 > %1391 = fadd float %1390, %53 > %1392 = fcmp oeq float %1319, 0.000000e+00 > %1393 = fcmp oeq float %1319, 0.000000e+00 > %1394 = fcmp ogt float %1307, 0.000000e+00 > %1395 = select i1 %1394, float 1.000000e+00, float %1307 > %1396 = fcmp oge float %1395, 0.000000e+00 > %1397 = fcmp ogt float %1313, 0.000000e+00 > %1398 = select i1 %1397, float 1.000000e+00, float %1313 > %1399 = fcmp oge float %1398, 0.000000e+00 > %.op = fmul float %1395, 0x4600000000000000 > %1400 = select i1 %1396, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1398, 0x4600000000000000 > %1401 = select i1 %1399, float %.op80, float 0xC600000000000000 > %1402 = fdiv float 1.000000e+00, %1319 > %1403 = fmul float %1307, %1402 > %1404 = fmul float %1313, %1402 > %1405 = select i1 %1392, float %1400, float %1403 > %1406 = select i1 %1393, float %1401, float %1404 > %1407 = fcmp oeq float %1337, 0.000000e+00 > %1408 = fcmp oeq float %1337, 0.000000e+00 > %1409 = fcmp ogt float %1325, 0.000000e+00 > %1410 = select i1 %1409, float 1.000000e+00, float %1325 > %1411 = fcmp oge float %1410, 0.000000e+00 > %1412 = fcmp ogt float %1331, 0.000000e+00 > %1413 = select i1 %1412, float 1.000000e+00, float %1331 > %1414 = fcmp oge float %1413, 0.000000e+00 > %.op81 = fmul float %1410, 0x4600000000000000 > %1415 = select i1 %1411, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1413, 0x4600000000000000 > %1416 = select i1 %1414, float %.op82, float 0xC600000000000000 > %1417 = fdiv float 1.000000e+00, %1337 > %1418 = fmul float %1325, %1417 > %1419 = fmul float %1331, %1417 > %1420 = select i1 %1407, float %1415, float %1418 > %1421 = select i1 %1408, float %1416, float %1419 > %1422 = fcmp oeq float %1355, 0.000000e+00 > %1423 = fcmp oeq float %1355, 0.000000e+00 > %1424 = fcmp ogt float %1343, 0.000000e+00 > %1425 = select i1 %1424, float 1.000000e+00, float %1343 > %1426 = fcmp oge float %1425, 0.000000e+00 > %1427 = fcmp ogt float %1349, 0.000000e+00 > %1428 = select i1 %1427, float 1.000000e+00, float %1349 > %1429 = fcmp oge float %1428, 0.000000e+00 > %.op83 = fmul float %1425, 0x4600000000000000 > %1430 = select i1 %1426, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1428, 0x4600000000000000 > %1431 = select i1 %1429, float %.op84, float 0xC600000000000000 > %1432 = fdiv float 1.000000e+00, %1355 > %1433 = fmul float %1343, %1432 > %1434 = fmul float %1349, %1432 > %1435 = select i1 %1422, float %1430, float %1433 > %1436 = select i1 %1423, float %1431, float %1434 > %1437 = fcmp oeq float %1301, 0.000000e+00 > %1438 = fcmp oeq float %1301, 0.000000e+00 > %1439 = fcmp ogt float %1289, 0.000000e+00 > %1440 = select i1 %1439, float 1.000000e+00, float %1289 > %1441 = fcmp oge float %1440, 0.000000e+00 > %1442 = fcmp ogt float %1295, 0.000000e+00 > %1443 = select i1 %1442, float 1.000000e+00, float %1295 > %1444 = fcmp oge float %1443, 0.000000e+00 > %.op85 = fmul float %1440, 0x4600000000000000 > %1445 = select i1 %1441, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1443, 0x4600000000000000 > %1446 = select i1 %1444, float %.op86, float 0xC600000000000000 > %1447 = fdiv float 1.000000e+00, %1301 > %1448 = fmul float %1289, %1447 > %1449 = fmul float %1295, %1447 > %1450 = select i1 %1437, float %1445, float %1448 > %1451 = select i1 %1438, float %1446, float %1449 > %1452 = fsub float %1450, %1435 > %1453 = fsub float %1451, %1436 > %1454 = fcmp oeq float %1373, 0.000000e+00 > %1455 = fcmp oeq float %1373, 0.000000e+00 > %1456 = fcmp ogt float %1361, 0.000000e+00 > %1457 = select i1 %1456, float 1.000000e+00, float %1361 > %1458 = fcmp oge float %1457, 0.000000e+00 > %1459 = fcmp ogt float %1367, 0.000000e+00 > %1460 = select i1 %1459, float 1.000000e+00, float %1367 > %1461 = fcmp oge float %1460, 0.000000e+00 > %.op87 = fmul float %1457, 0x4600000000000000 > %1462 = select i1 %1458, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1460, 0x4600000000000000 > %1463 = select i1 %1461, float %.op88, float 0xC600000000000000 > %1464 = fdiv float 1.000000e+00, %1373 > %1465 = fmul float %1361, %1464 > %1466 = fmul float %1367, %1464 > %1467 = select i1 %1454, float %1462, float %1465 > %1468 = select i1 %1455, float %1463, float %1466 > %1469 = fsub float %1405, %1467 > %1470 = fsub float %1406, %1468 > %1471 = fmul float %1469, %57 > %1472 = fmul float %1470, %58 > %1473 = fcmp oeq float %1391, 0.000000e+00 > %1474 = fcmp oeq float %1391, 0.000000e+00 > %1475 = fcmp ogt float %1379, 0.000000e+00 > %1476 = select i1 %1475, float 1.000000e+00, float %1379 > %1477 = fcmp oge float %1476, 0.000000e+00 > %1478 = fcmp ogt float %1385, 0.000000e+00 > %1479 = select i1 %1478, float 1.000000e+00, float %1385 > %1480 = fcmp oge float %1479, 0.000000e+00 > %.op89 = fmul float %1476, 0x4600000000000000 > %1481 = select i1 %1477, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1479, 0x4600000000000000 > %1482 = select i1 %1480, float %.op90, float 0xC600000000000000 > %1483 = fdiv float 1.000000e+00, %1391 > %1484 = fmul float %1379, %1483 > %1485 = fmul float %1385, %1483 > %1486 = select i1 %1473, float %1481, float %1484 > %1487 = select i1 %1474, float %1482, float %1485 > %1488 = fsub float %1420, %1486 > %1489 = fsub float %1421, %1487 > %1490 = fmul float %1488, %57 > %1491 = fmul float %1452, %57 > %1492 = fmul float %1453, %58 > %1493 = fmul float %1489, %58 > %1494 = fmul float %1491, %1491 > %1495 = fmul float %1492, %1492 > %1496 = fadd float %1494, %1495 > %1497 = fmul float %1471, %1471 > %1498 = fmul float %1472, %1472 > %1499 = fadd float %1497, %1498 > %1500 = fmul float %1490, %1490 > %1501 = fmul float %1493, %1493 > %1502 = fadd float %1500, %1501 > %1503 = call float @llvm.sqrt.f32(float %1502) > %1504 = call float @llvm.sqrt.f32(float %1496) > %1505 = call float @llvm.sqrt.f32(float %1499) > %1506 = fsub float %1301, %30 > %1507 = fsub float %1319, %30 > %1508 = fsub float %1337, %30 > %1509 = fcmp une float %31, 0.000000e+00 > br i1 %1509, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %31, %ENDIF77 ], [ %53, %main_body ] > %temp16.0 = phi float [ %1723, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1724, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1713, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1726, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %30, %ENDIF77 ], [ %52, %main_body ] > %temp13.0 = phi float [ %1706, %ENDIF77 ], [ %51, %main_body ] > %temp11.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %24, %main_body ] > %temp10.0 = phi float [ %1505, %ENDIF77 ], [ %23, %main_body ] > %temp9.0 = phi float [ %1698, %ENDIF77 ], [ %22, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %919, %main_body ] > %temp6.0 = phi float [ %1139, %ENDIF77 ], [ %850, %main_body ] > %temp5.0 = phi float [ %1693, %ENDIF77 ], [ %830, %main_body ] > %1510 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1511 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1512 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1513 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1514 = lshr i32 %5, 16 > %1515 = shl nuw nsw i32 %1514, 2 > %1516 = and i32 %6, 8191 > %1517 = and i32 %10, 255 > %1518 = mul nuw nsw i32 %1516, %1517 > %1519 = add nuw nsw i32 %1515, %1518 > %1520 = add nuw nsw i32 %1519, 8 > %1521 = zext i32 %1520 to i64 > %1522 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1521 > %1523 = bitcast i32 addrspace(3)* %1522 to float addrspace(3)* > store float %1510, float addrspace(3)* %1523, align 4 > %1524 = add nuw nsw i32 %1519, 9 > %1525 = zext i32 %1524 to i64 > %1526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1525 > %1527 = bitcast i32 addrspace(3)* %1526 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1527, align 4 > %1528 = add nuw nsw i32 %1519, 10 > %1529 = zext i32 %1528 to i64 > %1530 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1529 > %1531 = bitcast i32 addrspace(3)* %1530 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1531, align 4 > %1532 = add nuw nsw i32 %1519, 11 > %1533 = zext i32 %1532 to i64 > %1534 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1533 > %1535 = bitcast i32 addrspace(3)* %1534 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1535, align 4 > %1536 = lshr i32 %5, 16 > %1537 = shl nuw nsw i32 %1536, 2 > %1538 = and i32 %6, 8191 > %1539 = and i32 %10, 255 > %1540 = mul nuw nsw i32 %1538, %1539 > %1541 = add nuw nsw i32 %1537, %1540 > %1542 = add nuw nsw i32 %1541, 12 > %1543 = zext i32 %1542 to i64 > %1544 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1543 > %1545 = bitcast i32 addrspace(3)* %1544 to float addrspace(3)* > store float %1511, float addrspace(3)* %1545, align 4 > %1546 = add nuw nsw i32 %1541, 13 > %1547 = zext i32 %1546 to i64 > %1548 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1547 > %1549 = bitcast i32 addrspace(3)* %1548 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1549, align 4 > %1550 = add nuw nsw i32 %1541, 14 > %1551 = zext i32 %1550 to i64 > %1552 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1551 > %1553 = bitcast i32 addrspace(3)* %1552 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1553, align 4 > %1554 = add nuw nsw i32 %1541, 15 > %1555 = zext i32 %1554 to i64 > %1556 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1555 > %1557 = bitcast i32 addrspace(3)* %1556 to float addrspace(3)* > store float %temp11.0, float addrspace(3)* %1557, align 4 > %1558 = lshr i32 %5, 16 > %1559 = shl nuw nsw i32 %1558, 2 > %1560 = and i32 %6, 8191 > %1561 = and i32 %10, 255 > %1562 = mul nuw nsw i32 %1560, %1561 > %1563 = add nuw nsw i32 %1559, %1562 > %1564 = add nuw nsw i32 %1563, 16 > %1565 = zext i32 %1564 to i64 > %1566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1565 > %1567 = bitcast i32 addrspace(3)* %1566 to float addrspace(3)* > store float %1512, float addrspace(3)* %1567, align 4 > %1568 = add nuw nsw i32 %1563, 17 > %1569 = zext i32 %1568 to i64 > %1570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1569 > %1571 = bitcast i32 addrspace(3)* %1570 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1571, align 4 > %1572 = add nuw nsw i32 %1563, 18 > %1573 = zext i32 %1572 to i64 > %1574 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1573 > %1575 = bitcast i32 addrspace(3)* %1574 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1575, align 4 > %1576 = add nuw nsw i32 %1563, 19 > %1577 = zext i32 %1576 to i64 > %1578 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1577 > %1579 = bitcast i32 addrspace(3)* %1578 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1579, align 4 > %1580 = lshr i32 %5, 16 > %1581 = shl nuw nsw i32 %1580, 2 > %1582 = and i32 %6, 8191 > %1583 = and i32 %10, 255 > %1584 = mul nuw nsw i32 %1582, %1583 > %1585 = add nuw nsw i32 %1581, %1584 > %1586 = add nuw nsw i32 %1585, 20 > %1587 = zext i32 %1586 to i64 > %1588 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1587 > %1589 = bitcast i32 addrspace(3)* %1588 to float addrspace(3)* > store float %1513, float addrspace(3)* %1589, align 4 > %1590 = add nuw nsw i32 %1585, 21 > %1591 = zext i32 %1590 to i64 > %1592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1591 > %1593 = bitcast i32 addrspace(3)* %1592 to float addrspace(3)* > store float %1511, float addrspace(3)* %1593, align 4 > %1594 = add nuw nsw i32 %1585, 22 > %1595 = zext i32 %1594 to i64 > %1596 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1595 > %1597 = bitcast i32 addrspace(3)* %1596 to float addrspace(3)* > store float %1512, float addrspace(3)* %1597, align 4 > %1598 = add nuw nsw i32 %1585, 23 > %1599 = zext i32 %1598 to i64 > %1600 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1599 > %1601 = bitcast i32 addrspace(3)* %1600 to float addrspace(3)* > store float %1513, float addrspace(3)* %1601, align 4 > %1602 = lshr i32 %5, 16 > %1603 = shl nuw nsw i32 %1602, 2 > %1604 = and i32 %6, 8191 > %1605 = and i32 %10, 255 > %1606 = mul nuw nsw i32 %1604, %1605 > %1607 = add nuw nsw i32 %1603, %1606 > %1608 = zext i32 %1607 to i64 > %1609 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1608 > %1610 = bitcast i32 addrspace(3)* %1609 to float addrspace(3)* > store float %1510, float addrspace(3)* %1610, align 4 > %1611 = lshr i32 %5, 16 > %1612 = shl nuw nsw i32 %1611, 2 > %1613 = and i32 %6, 8191 > %1614 = and i32 %10, 255 > %1615 = mul nuw nsw i32 %1613, %1614 > %1616 = add nuw nsw i32 %1612, %1615 > %1617 = add nuw nsw i32 %1616, 1 > %1618 = zext i32 %1617 to i64 > %1619 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1618 > %1620 = bitcast i32 addrspace(3)* %1619 to float addrspace(3)* > store float %1511, float addrspace(3)* %1620, align 4 > %1621 = lshr i32 %5, 16 > %1622 = shl nuw nsw i32 %1621, 2 > %1623 = and i32 %6, 8191 > %1624 = and i32 %10, 255 > %1625 = mul nuw nsw i32 %1623, %1624 > %1626 = add nuw nsw i32 %1622, %1625 > %1627 = add nuw nsw i32 %1626, 2 > %1628 = zext i32 %1627 to i64 > %1629 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1628 > %1630 = bitcast i32 addrspace(3)* %1629 to float addrspace(3)* > store float %1512, float addrspace(3)* %1630, align 4 > %1631 = lshr i32 %5, 16 > %1632 = shl nuw nsw i32 %1631, 2 > %1633 = and i32 %6, 8191 > %1634 = and i32 %10, 255 > %1635 = mul nuw nsw i32 %1633, %1634 > %1636 = add nuw nsw i32 %1632, %1635 > %1637 = add nuw nsw i32 %1636, 4 > %1638 = zext i32 %1637 to i64 > %1639 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1638 > %1640 = bitcast i32 addrspace(3)* %1639 to float addrspace(3)* > store float %1513, float addrspace(3)* %1640, align 4 > %1641 = and i32 %10, 255 > %1642 = lshr i32 %10, 8 > %1643 = and i32 %1642, 31 > %1644 = lshr i32 %5, 16 > %1645 = shl nuw nsw i32 %1644, 2 > %1646 = and i32 %6, 8191 > %1647 = and i32 %10, 255 > %1648 = mul nuw nsw i32 %1646, %1647 > %1649 = add nuw nsw i32 %1645, %1648 > %1650 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1651 = bitcast i64 %1650 to <2 x i32> > %1652 = extractelement <2 x i32> %1651, i32 0 > %1653 = extractelement <2 x i32> %1651, i32 1 > %1654 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1652, 0 > %1655 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1654, i32 %1653, 1 > %1656 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1655, i32 %8, 13 > %1657 = bitcast i32 %1641 to float > %1658 = bitcast i32 %1643 to float > %1659 = bitcast i32 %1649 to float > %1660 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1656, float %1657, 14 > %1661 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1660, float %1658, 15 > %1662 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1661, float %1659, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1662 > >IF69: ; preds = %IF > %1663 = fdiv float 1.000000e+00, %31 > %1664 = fmul float %1506, %1663 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1665 = fcmp ogt float %1506, 0.000000e+00 > %1666 = select i1 %1665, float 1.000000e+00, float %1506 > %1667 = fcmp oge float %1666, 0.000000e+00 > %.op91 = fmul float %1666, 0x4600000000000000 > %1668 = select i1 %1667, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1664, %IF69 ], [ %1668, %ELSE70 ] > %1669 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1670 = fsub float 1.000000e+00, %1669 > %1671 = fmul float %1670, %1504 > %1672 = fcmp une float %31, 0.000000e+00 > br i1 %1672, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1673 = fdiv float 1.000000e+00, %31 > %1674 = fmul float %1507, %1673 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1675 = fcmp ogt float %1507, 0.000000e+00 > %1676 = select i1 %1675, float 1.000000e+00, float %1507 > %1677 = fcmp oge float %1676, 0.000000e+00 > %.op92 = fmul float %1676, 0x4600000000000000 > %1678 = select i1 %1677, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1674, %IF72 ], [ %1678, %ELSE73 ] > %1679 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1680 = fsub float 1.000000e+00, %1679 > %1681 = fmul float %1680, %1505 > %1682 = fcmp une float %31, 0.000000e+00 > br i1 %1682, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1683 = fdiv float 1.000000e+00, %31 > %1684 = fmul float %1508, %1683 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1685 = fcmp ogt float %1508, 0.000000e+00 > %1686 = select i1 %1685, float 1.000000e+00, float %1508 > %1687 = fcmp oge float %1686, 0.000000e+00 > %.op93 = fmul float %1686, 0x4600000000000000 > %1688 = select i1 %1687, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1684, %IF75 ], [ %1688, %ELSE76 ] > %1689 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1690 = fsub float 1.000000e+00, %1689 > %1691 = fmul float %1690, %1503 > %1692 = fmul float %28, %34 > %1693 = fmul float %29, %35 > %1694 = call float @llvm.maxnum.f32(float %1693, float 1.000000e+00) > %1695 = fcmp oeq float %1692, 0.000000e+00 > %1696 = fcmp oeq float %1692, 0.000000e+00 > %1697 = sext i1 %1696 to i32 > %1698 = bitcast i32 %1697 to float > %1699 = fcmp ogt float %1691, 0.000000e+00 > %1700 = select i1 %1699, float 1.000000e+00, float %1691 > %1701 = fcmp oge float %1700, 0.000000e+00 > %1702 = fcmp ogt float %1671, 0.000000e+00 > %1703 = select i1 %1702, float 1.000000e+00, float %1671 > %1704 = fcmp oge float %1703, 0.000000e+00 > %.op94 = fmul float %1700, 0x4600000000000000 > %1705 = select i1 %1701, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1703, 0x4600000000000000 > %1706 = select i1 %1704, float %.op95, float 0xC600000000000000 > %1707 = fdiv float 1.000000e+00, %1692 > %1708 = fmul float %1691, %1707 > %1709 = fmul float %1671, %1707 > %1710 = select i1 %1695, float %1705, float %1708 > %1711 = select i1 %1696, float %1706, float %1709 > %1712 = call float @llvm.maxnum.f32(float %1711, float 1.000000e+00) > %1713 = call float @llvm.minnum.f32(float %1694, float %1712) > %1714 = fcmp une float %1692, 0.000000e+00 > br i1 %1714, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1715 = fdiv float 1.000000e+00, %1692 > %1716 = fmul float %1681, %1715 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1717 = fcmp ogt float %1681, 0.000000e+00 > %1718 = select i1 %1717, float 1.000000e+00, float %1681 > %1719 = fcmp oge float %1718, 0.000000e+00 > %.op96 = fmul float %1718, 0x4600000000000000 > %1720 = select i1 %1719, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1716, %IF78 ], [ %1720, %ELSE79 ] > %1721 = call float @llvm.maxnum.f32(float %1710, float 1.000000e+00) > %1722 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1723 = call float @llvm.minnum.f32(float %1694, float %1722) > %1724 = call float @llvm.minnum.f32(float %1694, float %1721) > %1725 = call float @llvm.maxnum.f32(float %1713, float %1724) > %1726 = call float @llvm.maxnum.f32(float %1725, float %1723) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[0].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[0].xxxx >101: MOV OUT[4], TEMP[3] >102: MOV OUT[2], TEMP[6] >103: MOV OUT[3], TEMP[4] >104: MOV OUT[1], TEMP[5] >105: MOV OUT[0], TEMP[1] >106: END >radeonsi: Compiling shader 216 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = bitcast i32 %10 to float > %711 = insertvalue <{ float, float, float }> undef, float %710, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %711 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..29] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} >IMM[1] UINT32 {0, 464, 384, 432} >IMM[2] FLT32 { 0.0000, 158456325028528675187087900672.0000, -2.0000, 3.0000} >IMM[3] UINT32 {416, 400, 448, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 9: DP3 TEMP[2].x, IN[2].xyzz, TEMP[0].xyzz > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 14: RSQ TEMP[2].x, TEMP[0].xxxx > 15: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 16: FMA TEMP[2].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[0].wwww > 17: MOV TEMP[2].w, CONST[1][29].zzzz > 18: FSNE TEMP[3].x, CONST[1][24].yyyy, IMM[2].xxxx > 19: UIF TEMP[3].xxxx :0 > 20: RCP TEMP[3].x, CONST[1][24].yyyy > 21: ELSE :0 > 22: MOV TEMP[3].x, IMM[2].yyyy > 23: ENDIF > 24: MOV TEMP[4].xy, IN[0].xyyy > 25: TEX TEMP[4].xy, TEMP[4], SAMP[1], 2D > 26: MAX TEMP[5].x, TEMP[4].yyyy, TEMP[4].xxxx > 27: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx > 28: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 29: FMA TEMP[5].x, TEMP[3].xxxx, IMM[2].zzzz, IMM[2].wwww > 30: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[3].xxxx > 31: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].xxxx > 32: MUL TEMP[3].x, TEMP[0].xxxx, CONST[1][27].wwww > 33: ADD TEMP[1].xyz, CONST[1][26].xyzz, -CONST[1][27].xyzz > 34: FMA TEMP[5].xyz, TEMP[4].xxxx, TEMP[1].xyzz, CONST[1][27].xyzz > 35: MUL TEMP[1].xyz, TEMP[3].xxxx, TEMP[5].xyzz > 36: FMA TEMP[3].x, -TEMP[0].xxxx, CONST[1][27].wwww, IMM[0].zzzz > 37: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][29].xxxx > 38: MOV TEMP[5].xy, IN[0].xyyy > 39: TEX TEMP[5], TEMP[5], SAMP[2], 2D > 40: FMA TEMP[1].xyz, TEMP[5].xyzz, TEMP[3].xxxx, TEMP[1].xyzz > 41: MOV TEMP[5].w, TEMP[5].wwww > 42: MUL TEMP[6].x, TEMP[4].yyyy, CONST[1][25].wwww > 43: FMA TEMP[4].x, -TEMP[4].yyyy, CONST[1][25].wwww, IMM[0].zzzz > 44: MUL TEMP[3].xyz, TEMP[6].xxxx, CONST[1][25].xyzz > 45: FMA TEMP[5].xyz, TEMP[1].xyzz, TEMP[4].xxxx, TEMP[3].xyzz > 46: MOV TEMP[3].xy, IN[0].xyyy > 47: TEX TEMP[3].xyz, TEMP[3], SAMP[3], 2D > 48: MUL TEMP[1].x, TEMP[3].zzzz, CONST[1][24].xxxx > 49: MOV TEMP[1].y, TEMP[3].yyyy > 50: MOV TEMP[3].z, TEMP[3].xxxx > 51: ADD TEMP[4].xy, -TEMP[1].xyyy, CONST[1][28].wxxx > 52: FMA TEMP[3].xy, TEMP[0].xxxx, TEMP[4].xyyy, TEMP[1].xyyy > 53: MOV TEMP[3].w, IMM[2].xxxx > 54: MOV OUT[0], TEMP[2] > 55: MOV OUT[1], TEMP[5] > 56: MOV OUT[2], TEMP[3] > 57: END >radeonsi: Compiling shader 217 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 444) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 448) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 460) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 472) > %42 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !tbaa !0 > %44 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %45 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %44, i64 0, i64 3 > %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 > %47 = extractelement <8 x i32> %43, i32 7 > %48 = extractelement <4 x i32> %46, i32 0 > %49 = and i32 %48, %47 > %50 = insertelement <4 x i32> %46, i32 %49, i32 0 > %51 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 > %53 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %54 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %53, i64 0, i64 7 > %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 > %56 = extractelement <8 x i32> %52, i32 7 > %57 = extractelement <4 x i32> %55, i32 0 > %58 = and i32 %57, %56 > %59 = insertelement <4 x i32> %55, i32 %58, i32 0 > %60 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 > %62 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %63 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %62, i64 0, i64 11 > %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 > %65 = extractelement <8 x i32> %61, i32 7 > %66 = extractelement <4 x i32> %64, i32 0 > %67 = and i32 %66, %65 > %68 = insertelement <4 x i32> %64, i32 %67, i32 0 > %69 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %70 = load <8 x i32>, <8 x i32> addrspace(2)* %69, align 32, !tbaa !0 > %71 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %72 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %71, i64 0, i64 15 > %73 = load <4 x i32>, <4 x i32> addrspace(2)* %72, align 16, !tbaa !0 > %74 = extractelement <8 x i32> %70, i32 7 > %75 = extractelement <4 x i32> %73, i32 0 > %76 = and i32 %75, %74 > %77 = insertelement <4 x i32> %73, i32 %76, i32 0 > %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %89 = bitcast float %78 to i32 > %90 = bitcast float %79 to i32 > %91 = insertelement <2 x i32> undef, i32 %89, i32 0 > %92 = insertelement <2 x i32> %91, i32 %90, i32 1 > %93 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %92, <8 x i32> %43, <4 x i32> %50, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %94 = extractelement <4 x float> %93, i32 1 > %95 = extractelement <4 x float> %93, i32 3 > %96 = call float @llvm.fma.f32(float %94, float 2.000000e+00, float -1.000000e+00) > %97 = call float @llvm.fma.f32(float %95, float 2.000000e+00, float -1.000000e+00) > %98 = fsub float -0.000000e+00, %96 > %99 = call float @llvm.fma.f32(float %98, float %96, float 1.000000e+00) > %100 = fsub float -0.000000e+00, %97 > %101 = call float @llvm.fma.f32(float %100, float %97, float %99) > %102 = call float @llvm.sqrt.f32(float %101) > %103 = fmul float %80, %96 > %104 = fmul float %81, %97 > %105 = fadd float %104, %103 > %106 = fmul float %82, %102 > %107 = fadd float %105, %106 > %108 = fmul float %83, %96 > %109 = fmul float %84, %97 > %110 = fadd float %109, %108 > %111 = fmul float %85, %102 > %112 = fadd float %110, %111 > %113 = fmul float %86, %96 > %114 = fmul float %87, %97 > %115 = fadd float %114, %113 > %116 = fmul float %88, %102 > %117 = fadd float %115, %116 > %118 = fmul float %107, %107 > %119 = fmul float %112, %112 > %120 = fadd float %119, %118 > %121 = fmul float %117, %117 > %122 = fadd float %120, %121 > %123 = call float @llvm.AMDGPU.rsq.clamped.f32(float %122) > %124 = fmul float %123, %107 > %125 = fmul float %123, %112 > %126 = fmul float %123, %117 > %127 = call float @llvm.fma.f32(float %124, float 5.000000e-01, float 5.000000e-01) > %128 = call float @llvm.fma.f32(float %125, float 5.000000e-01, float 5.000000e-01) > %129 = call float @llvm.fma.f32(float %126, float 5.000000e-01, float 5.000000e-01) > %130 = fcmp une float %26, 0.000000e+00 > %131 = fdiv float 1.000000e+00, %26 > %temp12.0 = select i1 %130, float %131, float 0x4600000000000000 > %132 = bitcast float %78 to i32 > %133 = bitcast float %79 to i32 > %134 = insertelement <2 x i32> undef, i32 %132, i32 0 > %135 = insertelement <2 x i32> %134, i32 %133, i32 1 > %136 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %135, <8 x i32> %52, <4 x i32> %59, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %137 = extractelement <4 x float> %136, i32 0 > %138 = extractelement <4 x float> %136, i32 1 > %139 = call float @llvm.maxnum.f32(float %138, float %137) > %140 = fmul float %temp12.0, %139 > %141 = call float @llvm.AMDGPU.clamp.(float %140, float 0.000000e+00, float 1.000000e+00) > %142 = call float @llvm.fma.f32(float %141, float -2.000000e+00, float 3.000000e+00) > %143 = fmul float %141, %141 > %144 = fmul float %143, %142 > %145 = fmul float %144, %37 > %146 = fsub float %31, %34 > %147 = fsub float %32, %35 > %148 = fsub float %33, %36 > %149 = call float @llvm.fma.f32(float %137, float %146, float %34) > %150 = call float @llvm.fma.f32(float %137, float %147, float %35) > %151 = call float @llvm.fma.f32(float %137, float %148, float %36) > %152 = fmul float %145, %149 > %153 = fmul float %145, %150 > %154 = fmul float %145, %151 > %155 = fsub float -0.000000e+00, %144 > %156 = call float @llvm.fma.f32(float %155, float %37, float 1.000000e+00) > %157 = fmul float %144, %40 > %158 = bitcast float %78 to i32 > %159 = bitcast float %79 to i32 > %160 = insertelement <2 x i32> undef, i32 %158, i32 0 > %161 = insertelement <2 x i32> %160, i32 %159, i32 1 > %162 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %161, <8 x i32> %61, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %163 = extractelement <4 x float> %162, i32 0 > %164 = extractelement <4 x float> %162, i32 1 > %165 = extractelement <4 x float> %162, i32 2 > %166 = extractelement <4 x float> %162, i32 3 > %167 = call float @llvm.fma.f32(float %163, float %156, float %152) > %168 = call float @llvm.fma.f32(float %164, float %156, float %153) > %169 = call float @llvm.fma.f32(float %165, float %156, float %154) > %170 = fmul float %138, %30 > %171 = fsub float -0.000000e+00, %138 > %172 = call float @llvm.fma.f32(float %171, float %30, float 1.000000e+00) > %173 = fmul float %170, %27 > %174 = fmul float %170, %28 > %175 = fmul float %170, %29 > %176 = call float @llvm.fma.f32(float %167, float %172, float %173) > %177 = call float @llvm.fma.f32(float %168, float %172, float %174) > %178 = call float @llvm.fma.f32(float %169, float %172, float %175) > %179 = bitcast float %78 to i32 > %180 = bitcast float %79 to i32 > %181 = insertelement <2 x i32> undef, i32 %179, i32 0 > %182 = insertelement <2 x i32> %181, i32 %180, i32 1 > %183 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %182, <8 x i32> %70, <4 x i32> %77, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %184 = extractelement <4 x float> %183, i32 0 > %185 = extractelement <4 x float> %183, i32 1 > %186 = extractelement <4 x float> %183, i32 2 > %187 = fmul float %186, %25 > %188 = fsub float %39, %187 > %189 = fsub float %38, %185 > %190 = call float @llvm.fma.f32(float %157, float %188, float %187) > %191 = call float @llvm.fma.f32(float %157, float %189, float %185) > %192 = bitcast float %5 to i32 > %193 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %192, 10 > %194 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %193, float %127, 11 > %195 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %194, float %128, 12 > %196 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %195, float %129, 13 > %197 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %196, float %41, 14 > %198 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %197, float %176, 15 > %199 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %198, float %177, 16 > %200 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %199, float %178, 17 > %201 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %200, float %166, 18 > %202 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %201, float %190, 19 > %203 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %202, float %191, 20 > %204 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %203, float %184, 21 > %205 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %204, float 0.000000e+00, 22 > %206 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %205, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %206 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 218 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, -0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 176, 112} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {128, 144, 64, 80} >IMM[5] FLT32 { 0.5000, 158456325028528675187087900672.0000, 63.0000, 0.0000} >IMM[6] UINT32 {96, 368, 352, 0} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 66: MOV TEMP[1].z, TEMP[2].xxxx > 67: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 68: MOV TEMP[0].yw, TEMP[2].yxyy > 69: ABS TEMP[2].x, TEMP[3].xxxx > 70: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 71: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 72: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 73: INEG TEMP[9].xy, TEMP[9].xyyy > 74: MOV TEMP[4].yz, TEMP[9].yxyy > 75: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 76: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 77: INEG TEMP[9].xy, TEMP[9].xyyy > 78: MOV TEMP[5].zw, TEMP[9].yyxy > 79: INEG TEMP[9].xy, TEMP[4].yzzz > 80: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 81: MOV TEMP[4].yz, TEMP[9].yxyy > 82: I2F TEMP[9].xy, TEMP[4].yzzz > 83: MOV TEMP[4].yz, TEMP[9].yxyy > 84: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 85: ABS TEMP[2].x, TEMP[6].xxxx > 86: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 87: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 88: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 89: INEG TEMP[9].xy, TEMP[9].xyyy > 90: MOV TEMP[4].yz, TEMP[9].yxyy > 91: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 92: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 93: INEG TEMP[9].xy, TEMP[9].xyyy > 94: MOV TEMP[5].zw, TEMP[9].yyxy > 95: INEG TEMP[9].xy, TEMP[4].yzzz > 96: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 97: MOV TEMP[4].yz, TEMP[9].yxyy > 98: I2F TEMP[9].xy, TEMP[4].yzzz > 99: MOV TEMP[4].yz, TEMP[9].yxyy >100: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >101: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >102: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >103: INEG TEMP[2].xy, TEMP[2].xyyy >104: MOV TEMP[5].xy, TEMP[2].xyxx >105: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >106: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >107: INEG TEMP[2].xy, TEMP[2].xyyy >108: MOV TEMP[5].zw, TEMP[2].yyxy >109: INEG TEMP[2].xy, TEMP[5].xyyy >110: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >111: MOV TEMP[5].xy, TEMP[2].xyxx >112: I2F TEMP[5].xy, TEMP[5].xyyy >113: ABS TEMP[2].x, TEMP[8].xxxx >114: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >115: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >116: MOV TEMP[4].zw, TEMP[2].yyxy >117: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >118: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >119: INEG TEMP[2].xy, TEMP[2].xyyy >120: MOV TEMP[5].xy, TEMP[2].xyxx >121: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >122: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >123: INEG TEMP[2].xy, TEMP[2].xyyy >124: MOV TEMP[5].zw, TEMP[2].yyxy >125: AND TEMP[2], TEMP[5], IMM[2].yyyy >126: MOV TEMP[2], TEMP[2] >127: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >128: MOV TEMP[5].xy, TEMP[2].xyxx >129: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >130: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >131: INEG TEMP[2].xy, TEMP[2].xyyy >132: MOV TEMP[5].zw, TEMP[2].yyxy >133: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >134: MOV TEMP[5].zw, TEMP[2].yyxy >135: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >136: MOV TEMP[5].xy, TEMP[2].xyxx >137: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >138: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >139: INEG TEMP[2].x, TEMP[2].xxxx >140: MOV TEMP[1].z, TEMP[2].xxxx >141: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >142: MOV TEMP[1].z, TEMP[2].xxxx >143: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >144: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >145: INEG TEMP[2].xy, TEMP[2].xyyy >146: MOV TEMP[0].yw, TEMP[2].yxyy >147: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >148: MOV TEMP[0].yw, TEMP[2].yxyy >149: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >150: MOV TEMP[0].y, TEMP[2].xxxx >151: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >152: MOV TEMP[0].y, TEMP[2].xxxx >153: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >154: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >155: INEG TEMP[2].xy, TEMP[2].xyyy >156: MOV TEMP[0].xw, TEMP[2].xxxy >157: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >158: MOV TEMP[0].xw, TEMP[2].xxxy >159: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >160: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >161: INEG TEMP[2].xy, TEMP[2].xyyy >162: MOV TEMP[1].xy, TEMP[2].xyxx >163: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >164: MOV TEMP[1].xy, TEMP[2].xyxx >165: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >166: MOV TEMP[0].xz, TEMP[2].xxyx >167: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >168: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >169: INEG TEMP[2].xy, TEMP[2].xyyy >170: MOV TEMP[1].xy, TEMP[2].xyxx >171: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >172: MOV TEMP[1].xy, TEMP[2].xyxx >173: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >174: MOV TEMP[0].xz, TEMP[2].xxyx >175: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >176: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >177: INEG TEMP[2].xy, TEMP[2].xyyy >178: MOV TEMP[1].xy, TEMP[2].xyxx >179: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >180: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >181: INEG TEMP[2].xyz, TEMP[2].xyzz >182: MOV TEMP[0].xyz, TEMP[2].xyzx >183: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >184: MOV TEMP[0].xz, TEMP[2].xxyx >185: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >186: MOV TEMP[0].x, TEMP[2].xxxx >187: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >188: MOV TEMP[0].x, TEMP[2].xxxx >189: ADD TEMP[2].xyz, -IN[0][0].zxyy, IN[1][0].zxyy >190: MOV TEMP[0].yzw, TEMP[2].yxyz >191: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >192: MUL TEMP[4].xyz, TEMP[0].yzww, TEMP[1].yzxx >193: FMA TEMP[2].xyz, TEMP[0].wyzz, TEMP[1].zxyy, -TEMP[4].xyzz >194: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz >195: MOV TEMP[1].w, TEMP[3].xxxx >196: RSQ TEMP[3].x, TEMP[1].wwww >197: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx >198: MOV TEMP[0].yzw, TEMP[2].yxyz >199: MOV TEMP[2].xyz, CONST[1][11] >200: MOV TEMP[4].xyz, TEMP[2].xyzx >201: MOV TEMP[4].w, IMM[0].xxxx >202: MOV TEMP[2], CONST[1][7] >203: DP4 TEMP[5].x, TEMP[2], TEMP[4] >204: MOV TEMP[2], CONST[1][8] >205: DP4 TEMP[2].x, TEMP[2], TEMP[4] >206: MOV TEMP[5].y, TEMP[2].xxxx >207: MOV TEMP[2], CONST[1][9] >208: DP4 TEMP[2].x, TEMP[2], TEMP[4] >209: MOV TEMP[5].z, TEMP[2].xxxx >210: ADD TEMP[4].xyz, TEMP[5].xyzz, -IN[0][0].xyzz >211: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[4].xyzz >212: MOV TEMP[1].w, TEMP[2].xxxx >213: RSQ TEMP[2].x, TEMP[1].wwww >214: MOV TEMP[1].w, TEMP[2].xxxx >215: MUL TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz >216: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[4].xyzz >217: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].wwww >218: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >219: INEG TEMP[2].x, TEMP[2].xxxx >220: MOV TEMP[0].y, TEMP[2].xxxx >221: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >222: MOV TEMP[0].x, TEMP[2].xxxx >223: MOV TEMP[2].x, TEMP[0].xxxx >224: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >225: UIF TEMP[2].xxxx :0 >226: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >227: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >228: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >229: MOV TEMP[0].yzw, TEMP[2].yxyz >230: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >231: MOV TEMP[0].y, TEMP[2].xxxx >232: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >233: MOV TEMP[0].z, TEMP[2].xxxx >234: SQRT TEMP[2].x, TEMP[0].xxxx >235: SQRT TEMP[2].y, TEMP[0].yyyy >236: SQRT TEMP[2].z, TEMP[0].zzzz >237: MOV TEMP[0].xyz, TEMP[2].xyzx >238: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >239: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].xxxx >240: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >241: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[5].xxxx >242: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >243: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[5].xxxx >244: MOV TEMP[2].y, CONST[3][4] >245: MOV TEMP[7].x, TEMP[2].yyyy >246: MOV TEMP[2].y, CONST[3][5] >247: MOV TEMP[7].y, TEMP[2].yyyy >248: MOV TEMP[2].y, CONST[3][6] >249: MOV TEMP[7].z, TEMP[2].yyyy >250: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >251: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >252: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >253: MOV TEMP[1].w, IMM[0].xxxx >254: MOV TEMP[6], CONST[3][0] >255: DP4 TEMP[7].x, TEMP[6], TEMP[1] >256: MOV TEMP[6], CONST[3][1] >257: DP4 TEMP[6].x, TEMP[6], TEMP[1] >258: MOV TEMP[7].y, TEMP[6].xxxx >259: MOV TEMP[6], CONST[3][3] >260: DP4 TEMP[6].x, TEMP[6], TEMP[1] >261: MOV TEMP[4].w, IMM[0].xxxx >262: MOV TEMP[8], CONST[3][0] >263: DP4 TEMP[8].x, TEMP[8], TEMP[4] >264: MOV TEMP[9], CONST[3][1] >265: DP4 TEMP[9].x, TEMP[9], TEMP[4] >266: MOV TEMP[8].y, TEMP[9].xxxx >267: MOV TEMP[9], CONST[3][3] >268: DP4 TEMP[9].x, TEMP[9], TEMP[4] >269: MOV TEMP[5].w, IMM[0].xxxx >270: MOV TEMP[10], CONST[3][0] >271: DP4 TEMP[4].x, TEMP[10], TEMP[5] >272: MOV TEMP[10], CONST[3][1] >273: DP4 TEMP[10].x, TEMP[10], TEMP[5] >274: MOV TEMP[4].y, TEMP[10].xxxx >275: MOV TEMP[10], CONST[3][3] >276: DP4 TEMP[10].x, TEMP[10], TEMP[5] >277: MOV TEMP[2].w, IMM[0].xxxx >278: MOV TEMP[11], CONST[3][0] >279: DP4 TEMP[5].x, TEMP[11], TEMP[2] >280: MOV TEMP[11], CONST[3][1] >281: DP4 TEMP[11].x, TEMP[11], TEMP[2] >282: MOV TEMP[5].y, TEMP[11].xxxx >283: MOV TEMP[11], CONST[3][3] >284: DP4 TEMP[11].x, TEMP[11], TEMP[2] >285: MOV TEMP[3].w, IMM[0].xxxx >286: MOV TEMP[12], CONST[3][0] >287: DP4 TEMP[2].x, TEMP[12], TEMP[3] >288: MOV TEMP[12], CONST[3][1] >289: DP4 TEMP[12].x, TEMP[12], TEMP[3] >290: MOV TEMP[2].y, TEMP[12].xxxx >291: MOV TEMP[12], CONST[3][3] >292: DP4 TEMP[12].x, TEMP[12], TEMP[3] >293: MOV TEMP[0].w, IMM[0].xxxx >294: MOV TEMP[13], CONST[3][0] >295: DP4 TEMP[3].x, TEMP[13], TEMP[0] >296: MOV TEMP[13], CONST[3][1] >297: DP4 TEMP[13].x, TEMP[13], TEMP[0] >298: MOV TEMP[3].y, TEMP[13].xxxx >299: MOV TEMP[13], CONST[3][3] >300: DP4 TEMP[13].x, TEMP[13], TEMP[0] >301: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >302: SSG TEMP[15].xy, TEMP[8].xyyy >303: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >304: RCP TEMP[16].xy, TEMP[9].xxxx >305: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >306: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >307: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >308: SSG TEMP[15].xy, TEMP[4].xyyy >309: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >310: RCP TEMP[16].xy, TEMP[10].xxxx >311: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >312: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >313: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >314: SSG TEMP[16].xy, TEMP[5].xyyy >315: MUL TEMP[16].xy, IMM[5].yyyy, TEMP[16].xyyy >316: RCP TEMP[11].xy, TEMP[11].xxxx >317: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >318: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >319: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >320: SSG TEMP[15].xy, TEMP[7].xyyy >321: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >322: RCP TEMP[16].xy, TEMP[6].xxxx >323: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >324: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >325: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >326: MOV TEMP[0].yz, TEMP[5].yxyy >327: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >328: SSG TEMP[7].xy, TEMP[2].xyyy >329: MUL TEMP[7].xy, IMM[5].yyyy, TEMP[7].xyyy >330: RCP TEMP[11].xy, TEMP[12].xxxx >331: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >332: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >333: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >334: MOV TEMP[4].zw, TEMP[2].yyxy >335: MOV TEMP[2].xy, CONST[3][23] >336: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >337: MOV TEMP[4].zw, TEMP[2].yyxy >338: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >339: SSG TEMP[5].xy, TEMP[3].xyyy >340: MUL TEMP[5].xy, IMM[5].yyyy, TEMP[5].xyyy >341: RCP TEMP[7].xy, TEMP[13].xxxx >342: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >343: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >344: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >345: MOV TEMP[0].xw, TEMP[2].xxxy >346: MOV TEMP[2].xy, CONST[3][23] >347: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >348: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >349: MOV TEMP[0].y, TEMP[2].xxxx >350: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >351: MOV TEMP[0].z, TEMP[2].xxxx >352: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >353: SQRT TEMP[2].x, TEMP[0].xxxx >354: SQRT TEMP[2].y, TEMP[0].yyyy >355: SQRT TEMP[2].z, TEMP[0].zzzz >356: MOV TEMP[2].xyz, TEMP[2].xyzx >357: MOV TEMP[3].z, CONST[1][22] >358: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >359: MOV TEMP[0].w, TEMP[3].xxxx >360: MOV TEMP[3].z, CONST[1][22] >361: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >362: MOV TEMP[3].z, CONST[1][22] >363: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >364: MOV TEMP[1].y, TEMP[3].xxxx >365: MOV TEMP[3].w, CONST[1][22] >366: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >367: UIF TEMP[3].xxxx :0 >368: MOV TEMP[3].w, CONST[1][22] >369: RCP TEMP[3].x, TEMP[3].wwww >370: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >371: ELSE :0 >372: SSG TEMP[5].x, TEMP[0].wwww >373: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >374: ENDIF >375: MOV_SAT TEMP[3].x, TEMP[3].xxxx >376: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >377: MOV TEMP[0].w, TEMP[3].xxxx >378: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >379: MOV TEMP[0].y, TEMP[3].xxxx >380: MOV TEMP[3].w, CONST[1][22] >381: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >382: UIF TEMP[3].xxxx :0 >383: MOV TEMP[3].w, CONST[1][22] >384: RCP TEMP[3].x, TEMP[3].wwww >385: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >386: ELSE :0 >387: SSG TEMP[5].x, TEMP[1].xxxx >388: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >389: ENDIF >390: MOV_SAT TEMP[3].x, TEMP[3].xxxx >391: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >392: MOV TEMP[0].w, TEMP[3].xxxx >393: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >394: MOV TEMP[0].z, TEMP[3].xxxx >395: MOV TEMP[3].w, CONST[1][22] >396: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >397: UIF TEMP[3].xxxx :0 >398: MOV TEMP[3].w, CONST[1][22] >399: RCP TEMP[3].x, TEMP[3].wwww >400: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >401: ELSE :0 >402: SSG TEMP[5].x, TEMP[1].yyyy >403: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >404: ENDIF >405: MOV_SAT TEMP[3].x, TEMP[3].xxxx >406: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >407: MOV TEMP[0].w, TEMP[3].xxxx >408: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >409: MOV TEMP[2].xy, CONST[1][22] >410: MOV TEMP[3].xy, CONST[2][4] >411: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >412: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >413: MOV TEMP[0].w, TEMP[2].xxxx >414: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >415: SSG TEMP[3].xy, TEMP[0].xyyy >416: MUL TEMP[3].xy, IMM[5].yyyy, TEMP[3].xyyy >417: RCP TEMP[5].xy, TEMP[1].xxxx >418: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >419: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >420: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >421: MOV TEMP[0].y, TEMP[2].xxxx >422: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >423: MOV TEMP[4].z, TEMP[2].xxxx >424: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >425: UIF TEMP[2].xxxx :0 >426: RCP TEMP[1].x, TEMP[1].xxxx >427: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >428: ELSE :0 >429: SSG TEMP[2].x, TEMP[0].zzzz >430: MUL TEMP[1].x, IMM[5].yyyy, TEMP[2].xxxx >431: ENDIF >432: MOV TEMP[0].y, TEMP[1].xxxx >433: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >434: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >435: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >436: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >437: MOV TEMP[4].w, TEMP[0].xxxx >438: ELSE :0 >439: MOV TEMP[4], IMM[0].zzzz >440: ENDIF >441: MIN TEMP[0], TEMP[4], IMM[5].zzzz >442: MOV TEMP[1].x, TEMP[0].xxxx >443: MOV TEMP[2].x, TEMP[0].yyyy >444: MOV TEMP[3].x, TEMP[0].zzzz >445: MOV TEMP[0].x, TEMP[0].wwww >446: MOV OUT[8], TEMP[1] >447: MOV OUT[9], TEMP[2] >448: MOV OUT[10], TEMP[3] >449: MOV OUT[11], TEMP[0] >450: MOV OUT[0].x, TEMP[1].xxxx >451: MOV OUT[0].y, TEMP[2].xxxx >452: MOV OUT[0].z, TEMP[3].xxxx >453: MOV OUT[1].x, TEMP[0].xxxx >454: END >radeonsi: Compiling shader 219 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call float @llvm.SI.load.const(<16 x i8> %33, i32 64) > %35 = call float @llvm.SI.load.const(<16 x i8> %33, i32 68) > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = call float @llvm.SI.load.const(<16 x i8> %37, i32 0) > %39 = call float @llvm.SI.load.const(<16 x i8> %37, i32 4) > %40 = call float @llvm.SI.load.const(<16 x i8> %37, i32 8) > %41 = call float @llvm.SI.load.const(<16 x i8> %37, i32 12) > %42 = call float @llvm.SI.load.const(<16 x i8> %37, i32 16) > %43 = call float @llvm.SI.load.const(<16 x i8> %37, i32 20) > %44 = call float @llvm.SI.load.const(<16 x i8> %37, i32 24) > %45 = call float @llvm.SI.load.const(<16 x i8> %37, i32 28) > %46 = call float @llvm.SI.load.const(<16 x i8> %37, i32 32) > %47 = call float @llvm.SI.load.const(<16 x i8> %37, i32 36) > %48 = call float @llvm.SI.load.const(<16 x i8> %37, i32 40) > %49 = call float @llvm.SI.load.const(<16 x i8> %37, i32 44) > %50 = call float @llvm.SI.load.const(<16 x i8> %37, i32 48) > %51 = call float @llvm.SI.load.const(<16 x i8> %37, i32 52) > %52 = call float @llvm.SI.load.const(<16 x i8> %37, i32 56) > %53 = call float @llvm.SI.load.const(<16 x i8> %37, i32 60) > %54 = call float @llvm.SI.load.const(<16 x i8> %37, i32 68) > %55 = call float @llvm.SI.load.const(<16 x i8> %37, i32 84) > %56 = call float @llvm.SI.load.const(<16 x i8> %37, i32 100) > %57 = call float @llvm.SI.load.const(<16 x i8> %37, i32 368) > %58 = call float @llvm.SI.load.const(<16 x i8> %37, i32 372) > %59 = lshr i32 %10, 8 > %60 = and i32 %59, 31 > %61 = lshr i32 %7, 13 > %62 = and i32 %61, 255 > %63 = and i32 %7, 8191 > %64 = and i32 %10, 255 > %65 = mul nuw nsw i32 %63, %64 > %66 = mul nuw nsw i32 %60, %62 > %67 = add nuw nsw i32 %65, %66 > %68 = add nuw nsw i32 %67, 16 > %69 = zext i32 %68 to i64 > %70 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %69 > %71 = load i32, i32 addrspace(3)* %70, align 4 > %72 = lshr i32 %7, 13 > %73 = and i32 %72, 255 > %74 = and i32 %7, 8191 > %75 = and i32 %10, 255 > %76 = mul nuw nsw i32 %74, %75 > %77 = mul nuw nsw i32 %60, %73 > %78 = add nuw nsw i32 %76, %77 > %79 = add nuw nsw i32 %78, 17 > %80 = zext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = load i32, i32 addrspace(3)* %81, align 4 > %83 = lshr i32 %7, 13 > %84 = and i32 %83, 255 > %85 = and i32 %7, 8191 > %86 = and i32 %10, 255 > %87 = mul nuw nsw i32 %85, %86 > %88 = mul nuw nsw i32 %60, %84 > %89 = add nuw nsw i32 %87, %88 > %90 = add nuw nsw i32 %89, 18 > %91 = zext i32 %90 to i64 > %92 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %91 > %93 = load i32, i32 addrspace(3)* %92, align 4 > %94 = lshr i32 %7, 13 > %95 = and i32 %94, 255 > %96 = and i32 %7, 8191 > %97 = and i32 %10, 255 > %98 = mul nuw nsw i32 %96, %97 > %99 = mul nuw nsw i32 %60, %95 > %100 = add nuw nsw i32 %98, %99 > %101 = add nuw nsw i32 %100, 19 > %102 = zext i32 %101 to i64 > %103 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %102 > %104 = load i32, i32 addrspace(3)* %103, align 4 > %105 = lshr i32 %6, 13 > %106 = and i32 %105, 255 > %107 = shl i32 %5, 2 > %108 = and i32 %107, 262140 > %109 = and i32 %6, 8191 > %110 = and i32 %10, 255 > %111 = mul nuw nsw i32 %109, %110 > %112 = add nuw nsw i32 %108, %111 > %113 = mul nuw nsw i32 %60, %106 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 16 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > store i32 %71, i32 addrspace(3)* %117, align 4 > %118 = add nuw nsw i32 %114, 17 > %119 = zext i32 %118 to i64 > %120 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %119 > store i32 %82, i32 addrspace(3)* %120, align 4 > %121 = add nuw nsw i32 %114, 18 > %122 = zext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > store i32 %93, i32 addrspace(3)* %123, align 4 > %124 = add nuw nsw i32 %114, 19 > %125 = zext i32 %124 to i64 > %126 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %125 > store i32 %104, i32 addrspace(3)* %126, align 4 > %127 = lshr i32 %7, 13 > %128 = and i32 %127, 255 > %129 = and i32 %7, 8191 > %130 = and i32 %10, 255 > %131 = mul nuw nsw i32 %129, %130 > %132 = mul nuw nsw i32 %60, %128 > %133 = add nuw nsw i32 %131, %132 > %134 = add nuw nsw i32 %133, 20 > %135 = zext i32 %134 to i64 > %136 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %135 > %137 = load i32, i32 addrspace(3)* %136, align 4 > %138 = lshr i32 %7, 13 > %139 = and i32 %138, 255 > %140 = and i32 %7, 8191 > %141 = and i32 %10, 255 > %142 = mul nuw nsw i32 %140, %141 > %143 = mul nuw nsw i32 %60, %139 > %144 = add nuw nsw i32 %142, %143 > %145 = add nuw nsw i32 %144, 21 > %146 = zext i32 %145 to i64 > %147 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %146 > %148 = load i32, i32 addrspace(3)* %147, align 4 > %149 = lshr i32 %7, 13 > %150 = and i32 %149, 255 > %151 = and i32 %7, 8191 > %152 = and i32 %10, 255 > %153 = mul nuw nsw i32 %151, %152 > %154 = mul nuw nsw i32 %60, %150 > %155 = add nuw nsw i32 %153, %154 > %156 = add nuw nsw i32 %155, 22 > %157 = zext i32 %156 to i64 > %158 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %157 > %159 = load i32, i32 addrspace(3)* %158, align 4 > %160 = lshr i32 %7, 13 > %161 = and i32 %160, 255 > %162 = and i32 %7, 8191 > %163 = and i32 %10, 255 > %164 = mul nuw nsw i32 %162, %163 > %165 = mul nuw nsw i32 %60, %161 > %166 = add nuw nsw i32 %164, %165 > %167 = add nuw nsw i32 %166, 23 > %168 = zext i32 %167 to i64 > %169 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %168 > %170 = load i32, i32 addrspace(3)* %169, align 4 > %171 = lshr i32 %6, 13 > %172 = and i32 %171, 255 > %173 = shl i32 %5, 2 > %174 = and i32 %173, 262140 > %175 = and i32 %6, 8191 > %176 = and i32 %10, 255 > %177 = mul nuw nsw i32 %175, %176 > %178 = add nuw nsw i32 %174, %177 > %179 = mul nuw nsw i32 %60, %172 > %180 = add nuw nsw i32 %178, %179 > %181 = add nuw nsw i32 %180, 20 > %182 = zext i32 %181 to i64 > %183 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %182 > store i32 %137, i32 addrspace(3)* %183, align 4 > %184 = add nuw nsw i32 %180, 21 > %185 = zext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > store i32 %148, i32 addrspace(3)* %186, align 4 > %187 = add nuw nsw i32 %180, 22 > %188 = zext i32 %187 to i64 > %189 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %188 > store i32 %159, i32 addrspace(3)* %189, align 4 > %190 = add nuw nsw i32 %180, 23 > %191 = zext i32 %190 to i64 > %192 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %191 > store i32 %170, i32 addrspace(3)* %192, align 4 > %193 = lshr i32 %7, 13 > %194 = and i32 %193, 255 > %195 = and i32 %7, 8191 > %196 = and i32 %10, 255 > %197 = mul nuw nsw i32 %195, %196 > %198 = mul nuw nsw i32 %60, %194 > %199 = add nuw nsw i32 %197, %198 > %200 = add nuw nsw i32 %199, 24 > %201 = zext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = load i32, i32 addrspace(3)* %202, align 4 > %204 = lshr i32 %7, 13 > %205 = and i32 %204, 255 > %206 = and i32 %7, 8191 > %207 = and i32 %10, 255 > %208 = mul nuw nsw i32 %206, %207 > %209 = mul nuw nsw i32 %60, %205 > %210 = add nuw nsw i32 %208, %209 > %211 = add nuw nsw i32 %210, 25 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = load i32, i32 addrspace(3)* %213, align 4 > %215 = lshr i32 %7, 13 > %216 = and i32 %215, 255 > %217 = and i32 %7, 8191 > %218 = and i32 %10, 255 > %219 = mul nuw nsw i32 %217, %218 > %220 = mul nuw nsw i32 %60, %216 > %221 = add nuw nsw i32 %219, %220 > %222 = add nuw nsw i32 %221, 26 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = load i32, i32 addrspace(3)* %224, align 4 > %226 = lshr i32 %7, 13 > %227 = and i32 %226, 255 > %228 = and i32 %7, 8191 > %229 = and i32 %10, 255 > %230 = mul nuw nsw i32 %228, %229 > %231 = mul nuw nsw i32 %60, %227 > %232 = add nuw nsw i32 %230, %231 > %233 = add nuw nsw i32 %232, 27 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = load i32, i32 addrspace(3)* %235, align 4 > %237 = lshr i32 %6, 13 > %238 = and i32 %237, 255 > %239 = shl i32 %5, 2 > %240 = and i32 %239, 262140 > %241 = and i32 %6, 8191 > %242 = and i32 %10, 255 > %243 = mul nuw nsw i32 %241, %242 > %244 = add nuw nsw i32 %240, %243 > %245 = mul nuw nsw i32 %60, %238 > %246 = add nuw nsw i32 %244, %245 > %247 = add nuw nsw i32 %246, 24 > %248 = zext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > store i32 %203, i32 addrspace(3)* %249, align 4 > %250 = add nuw nsw i32 %246, 25 > %251 = zext i32 %250 to i64 > %252 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %251 > store i32 %214, i32 addrspace(3)* %252, align 4 > %253 = add nuw nsw i32 %246, 26 > %254 = zext i32 %253 to i64 > %255 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %254 > store i32 %225, i32 addrspace(3)* %255, align 4 > %256 = add nuw nsw i32 %246, 27 > %257 = zext i32 %256 to i64 > %258 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %257 > store i32 %236, i32 addrspace(3)* %258, align 4 > %259 = lshr i32 %7, 13 > %260 = and i32 %259, 255 > %261 = and i32 %7, 8191 > %262 = and i32 %10, 255 > %263 = mul nuw nsw i32 %261, %262 > %264 = mul nuw nsw i32 %60, %260 > %265 = add nuw nsw i32 %263, %264 > %266 = add nuw nsw i32 %265, 28 > %267 = zext i32 %266 to i64 > %268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %267 > %269 = load i32, i32 addrspace(3)* %268, align 4 > %270 = lshr i32 %7, 13 > %271 = and i32 %270, 255 > %272 = and i32 %7, 8191 > %273 = and i32 %10, 255 > %274 = mul nuw nsw i32 %272, %273 > %275 = mul nuw nsw i32 %60, %271 > %276 = add nuw nsw i32 %274, %275 > %277 = add nuw nsw i32 %276, 29 > %278 = zext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = load i32, i32 addrspace(3)* %279, align 4 > %281 = lshr i32 %7, 13 > %282 = and i32 %281, 255 > %283 = and i32 %7, 8191 > %284 = and i32 %10, 255 > %285 = mul nuw nsw i32 %283, %284 > %286 = mul nuw nsw i32 %60, %282 > %287 = add nuw nsw i32 %285, %286 > %288 = add nuw nsw i32 %287, 30 > %289 = zext i32 %288 to i64 > %290 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %289 > %291 = load i32, i32 addrspace(3)* %290, align 4 > %292 = lshr i32 %7, 13 > %293 = and i32 %292, 255 > %294 = and i32 %7, 8191 > %295 = and i32 %10, 255 > %296 = mul nuw nsw i32 %294, %295 > %297 = mul nuw nsw i32 %60, %293 > %298 = add nuw nsw i32 %296, %297 > %299 = add nuw nsw i32 %298, 31 > %300 = zext i32 %299 to i64 > %301 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %300 > %302 = load i32, i32 addrspace(3)* %301, align 4 > %303 = lshr i32 %6, 13 > %304 = and i32 %303, 255 > %305 = shl i32 %5, 2 > %306 = and i32 %305, 262140 > %307 = and i32 %6, 8191 > %308 = and i32 %10, 255 > %309 = mul nuw nsw i32 %307, %308 > %310 = add nuw nsw i32 %306, %309 > %311 = mul nuw nsw i32 %60, %304 > %312 = add nuw nsw i32 %310, %311 > %313 = add nuw nsw i32 %312, 28 > %314 = zext i32 %313 to i64 > %315 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %314 > store i32 %269, i32 addrspace(3)* %315, align 4 > %316 = add nuw nsw i32 %312, 29 > %317 = zext i32 %316 to i64 > %318 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %317 > store i32 %280, i32 addrspace(3)* %318, align 4 > %319 = add nuw nsw i32 %312, 30 > %320 = zext i32 %319 to i64 > %321 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %320 > store i32 %291, i32 addrspace(3)* %321, align 4 > %322 = add nuw nsw i32 %312, 31 > %323 = zext i32 %322 to i64 > %324 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %323 > store i32 %302, i32 addrspace(3)* %324, align 4 > %325 = lshr i32 %7, 13 > %326 = and i32 %325, 255 > %327 = and i32 %7, 8191 > %328 = and i32 %10, 255 > %329 = mul nuw nsw i32 %327, %328 > %330 = mul nuw nsw i32 %60, %326 > %331 = add nuw nsw i32 %329, %330 > %332 = add nuw nsw i32 %331, 32 > %333 = zext i32 %332 to i64 > %334 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %333 > %335 = load i32, i32 addrspace(3)* %334, align 4 > %336 = lshr i32 %7, 13 > %337 = and i32 %336, 255 > %338 = and i32 %7, 8191 > %339 = and i32 %10, 255 > %340 = mul nuw nsw i32 %338, %339 > %341 = mul nuw nsw i32 %60, %337 > %342 = add nuw nsw i32 %340, %341 > %343 = add nuw nsw i32 %342, 33 > %344 = zext i32 %343 to i64 > %345 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %344 > %346 = load i32, i32 addrspace(3)* %345, align 4 > %347 = lshr i32 %7, 13 > %348 = and i32 %347, 255 > %349 = and i32 %7, 8191 > %350 = and i32 %10, 255 > %351 = mul nuw nsw i32 %349, %350 > %352 = mul nuw nsw i32 %60, %348 > %353 = add nuw nsw i32 %351, %352 > %354 = add nuw nsw i32 %353, 34 > %355 = zext i32 %354 to i64 > %356 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %355 > %357 = load i32, i32 addrspace(3)* %356, align 4 > %358 = lshr i32 %7, 13 > %359 = and i32 %358, 255 > %360 = and i32 %7, 8191 > %361 = and i32 %10, 255 > %362 = mul nuw nsw i32 %360, %361 > %363 = mul nuw nsw i32 %60, %359 > %364 = add nuw nsw i32 %362, %363 > %365 = add nuw nsw i32 %364, 35 > %366 = zext i32 %365 to i64 > %367 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %366 > %368 = load i32, i32 addrspace(3)* %367, align 4 > %369 = lshr i32 %6, 13 > %370 = and i32 %369, 255 > %371 = shl i32 %5, 2 > %372 = and i32 %371, 262140 > %373 = and i32 %6, 8191 > %374 = and i32 %10, 255 > %375 = mul nuw nsw i32 %373, %374 > %376 = add nuw nsw i32 %372, %375 > %377 = mul nuw nsw i32 %60, %370 > %378 = add nuw nsw i32 %376, %377 > %379 = add nuw nsw i32 %378, 32 > %380 = zext i32 %379 to i64 > %381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %380 > store i32 %335, i32 addrspace(3)* %381, align 4 > %382 = add nuw nsw i32 %378, 33 > %383 = zext i32 %382 to i64 > %384 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %383 > store i32 %346, i32 addrspace(3)* %384, align 4 > %385 = add nuw nsw i32 %378, 34 > %386 = zext i32 %385 to i64 > %387 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %386 > store i32 %357, i32 addrspace(3)* %387, align 4 > %388 = add nuw nsw i32 %378, 35 > %389 = zext i32 %388 to i64 > %390 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %389 > store i32 %368, i32 addrspace(3)* %390, align 4 > %391 = lshr i32 %7, 13 > %392 = and i32 %391, 255 > %393 = and i32 %7, 8191 > %394 = and i32 %10, 255 > %395 = mul nuw nsw i32 %393, %394 > %396 = mul nuw nsw i32 %60, %392 > %397 = add nuw nsw i32 %395, %396 > %398 = add nuw nsw i32 %397, 36 > %399 = zext i32 %398 to i64 > %400 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %399 > %401 = load i32, i32 addrspace(3)* %400, align 4 > %402 = lshr i32 %7, 13 > %403 = and i32 %402, 255 > %404 = and i32 %7, 8191 > %405 = and i32 %10, 255 > %406 = mul nuw nsw i32 %404, %405 > %407 = mul nuw nsw i32 %60, %403 > %408 = add nuw nsw i32 %406, %407 > %409 = add nuw nsw i32 %408, 37 > %410 = zext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = load i32, i32 addrspace(3)* %411, align 4 > %413 = lshr i32 %7, 13 > %414 = and i32 %413, 255 > %415 = and i32 %7, 8191 > %416 = and i32 %10, 255 > %417 = mul nuw nsw i32 %415, %416 > %418 = mul nuw nsw i32 %60, %414 > %419 = add nuw nsw i32 %417, %418 > %420 = add nuw nsw i32 %419, 38 > %421 = zext i32 %420 to i64 > %422 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %421 > %423 = load i32, i32 addrspace(3)* %422, align 4 > %424 = lshr i32 %7, 13 > %425 = and i32 %424, 255 > %426 = and i32 %7, 8191 > %427 = and i32 %10, 255 > %428 = mul nuw nsw i32 %426, %427 > %429 = mul nuw nsw i32 %60, %425 > %430 = add nuw nsw i32 %428, %429 > %431 = add nuw nsw i32 %430, 39 > %432 = zext i32 %431 to i64 > %433 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %432 > %434 = load i32, i32 addrspace(3)* %433, align 4 > %435 = lshr i32 %6, 13 > %436 = and i32 %435, 255 > %437 = shl i32 %5, 2 > %438 = and i32 %437, 262140 > %439 = and i32 %6, 8191 > %440 = and i32 %10, 255 > %441 = mul nuw nsw i32 %439, %440 > %442 = add nuw nsw i32 %438, %441 > %443 = mul nuw nsw i32 %60, %436 > %444 = add nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 36 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > store i32 %401, i32 addrspace(3)* %447, align 4 > %448 = add nuw nsw i32 %444, 37 > %449 = zext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > store i32 %412, i32 addrspace(3)* %450, align 4 > %451 = add nuw nsw i32 %444, 38 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > store i32 %423, i32 addrspace(3)* %453, align 4 > %454 = add nuw nsw i32 %444, 39 > %455 = zext i32 %454 to i64 > %456 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %455 > store i32 %434, i32 addrspace(3)* %456, align 4 > %457 = and i32 %7, 8191 > %458 = and i32 %10, 255 > %459 = mul nuw nsw i32 %457, %458 > %460 = add nuw nsw i32 %459, 16 > %461 = zext i32 %460 to i64 > %462 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %461 > %463 = bitcast i32 addrspace(3)* %462 to float addrspace(3)* > %464 = load float, float addrspace(3)* %463, align 4 > %465 = and i32 %7, 8191 > %466 = and i32 %10, 255 > %467 = mul nuw nsw i32 %465, %466 > %468 = add nuw nsw i32 %467, 17 > %469 = zext i32 %468 to i64 > %470 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %469 > %471 = bitcast i32 addrspace(3)* %470 to float addrspace(3)* > %472 = load float, float addrspace(3)* %471, align 4 > %473 = and i32 %7, 8191 > %474 = and i32 %10, 255 > %475 = mul nuw nsw i32 %473, %474 > %476 = add nuw nsw i32 %475, 18 > %477 = zext i32 %476 to i64 > %478 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %477 > %479 = bitcast i32 addrspace(3)* %478 to float addrspace(3)* > %480 = load float, float addrspace(3)* %479, align 4 > %481 = fmul float %38, %464 > %482 = fmul float %39, %472 > %483 = fadd float %481, %482 > %484 = fmul float %40, %480 > %485 = fadd float %483, %484 > %486 = fadd float %485, %41 > %487 = fmul float %42, %464 > %488 = fmul float %43, %472 > %489 = fadd float %487, %488 > %490 = fmul float %44, %480 > %491 = fadd float %489, %490 > %492 = fadd float %491, %45 > %493 = fmul float %46, %464 > %494 = fmul float %47, %472 > %495 = fadd float %493, %494 > %496 = fmul float %48, %480 > %497 = fadd float %495, %496 > %498 = fadd float %497, %49 > %499 = fmul float %50, %464 > %500 = fmul float %51, %472 > %501 = fadd float %499, %500 > %502 = fmul float %52, %480 > %503 = fadd float %501, %502 > %504 = fadd float %503, %53 > %505 = lshr i32 %7, 13 > %506 = and i32 %505, 255 > %507 = and i32 %7, 8191 > %508 = and i32 %10, 255 > %509 = mul nuw nsw i32 %507, %508 > %510 = add nuw nsw i32 %509, %506 > %511 = add nuw nsw i32 %510, 16 > %512 = zext i32 %511 to i64 > %513 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %512 > %514 = bitcast i32 addrspace(3)* %513 to float addrspace(3)* > %515 = load float, float addrspace(3)* %514, align 4 > %516 = lshr i32 %7, 13 > %517 = and i32 %516, 255 > %518 = and i32 %7, 8191 > %519 = and i32 %10, 255 > %520 = mul nuw nsw i32 %518, %519 > %521 = add nuw nsw i32 %520, %517 > %522 = add nuw nsw i32 %521, 17 > %523 = zext i32 %522 to i64 > %524 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %523 > %525 = bitcast i32 addrspace(3)* %524 to float addrspace(3)* > %526 = load float, float addrspace(3)* %525, align 4 > %527 = lshr i32 %7, 13 > %528 = and i32 %527, 255 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = add nuw nsw i32 %531, %528 > %533 = add nuw nsw i32 %532, 18 > %534 = zext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %38, %515 > %539 = fmul float %39, %526 > %540 = fadd float %538, %539 > %541 = fmul float %40, %537 > %542 = fadd float %540, %541 > %543 = fadd float %542, %41 > %544 = fmul float %42, %515 > %545 = fmul float %43, %526 > %546 = fadd float %544, %545 > %547 = fmul float %44, %537 > %548 = fadd float %546, %547 > %549 = fadd float %548, %45 > %550 = fmul float %46, %515 > %551 = fmul float %47, %526 > %552 = fadd float %550, %551 > %553 = fmul float %48, %537 > %554 = fadd float %552, %553 > %555 = fadd float %554, %49 > %556 = fmul float %50, %515 > %557 = fmul float %51, %526 > %558 = fadd float %556, %557 > %559 = fmul float %52, %537 > %560 = fadd float %558, %559 > %561 = fadd float %560, %53 > %562 = and i32 %7, 8191 > %563 = and i32 %10, 255 > %564 = mul nuw nsw i32 %562, %563 > %565 = lshr i32 %7, 12 > %566 = and i32 %565, 510 > %567 = add nuw nsw i32 %564, %566 > %568 = add nuw nsw i32 %567, 16 > %569 = zext i32 %568 to i64 > %570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %569 > %571 = bitcast i32 addrspace(3)* %570 to float addrspace(3)* > %572 = load float, float addrspace(3)* %571, align 4 > %573 = and i32 %7, 8191 > %574 = and i32 %10, 255 > %575 = mul nuw nsw i32 %573, %574 > %576 = lshr i32 %7, 12 > %577 = and i32 %576, 510 > %578 = add nuw nsw i32 %575, %577 > %579 = add nuw nsw i32 %578, 17 > %580 = zext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = and i32 %7, 8191 > %585 = and i32 %10, 255 > %586 = mul nuw nsw i32 %584, %585 > %587 = lshr i32 %7, 12 > %588 = and i32 %587, 510 > %589 = add nuw nsw i32 %586, %588 > %590 = add nuw nsw i32 %589, 18 > %591 = zext i32 %590 to i64 > %592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %591 > %593 = bitcast i32 addrspace(3)* %592 to float addrspace(3)* > %594 = load float, float addrspace(3)* %593, align 4 > %595 = fmul float %38, %572 > %596 = fmul float %39, %583 > %597 = fadd float %595, %596 > %598 = fmul float %40, %594 > %599 = fadd float %597, %598 > %600 = fadd float %599, %41 > %601 = fmul float %42, %572 > %602 = fmul float %43, %583 > %603 = fadd float %601, %602 > %604 = fmul float %44, %594 > %605 = fadd float %603, %604 > %606 = fadd float %605, %45 > %607 = fmul float %46, %572 > %608 = fmul float %47, %583 > %609 = fadd float %607, %608 > %610 = fmul float %48, %594 > %611 = fadd float %609, %610 > %612 = fadd float %611, %49 > %613 = fmul float %50, %572 > %614 = fmul float %51, %583 > %615 = fadd float %613, %614 > %616 = fmul float %52, %594 > %617 = fadd float %615, %616 > %618 = fadd float %617, %53 > %619 = fadd float %498, 1.000000e+02 > %620 = fadd float %555, 1.000000e+02 > %621 = fadd float %612, 1.000000e+02 > %622 = call float @llvm.fabs.f32(float %504) > %623 = call float @llvm.minnum.f32(float %622, float 1.000000e+02) > %624 = fcmp ogt float %486, 0.000000e+00 > %625 = fcmp ogt float %492, 0.000000e+00 > %626 = fcmp olt float %486, 0.000000e+00 > %627 = fcmp olt float %492, 0.000000e+00 > %628 = sext i1 %626 to i32 > %629 = sext i1 %627 to i32 > %630 = zext i1 %624 to i32 > %631 = zext i1 %625 to i32 > %632 = add nsw i32 %630, %628 > %633 = add nsw i32 %631, %629 > %634 = sitofp i32 %632 to float > %635 = sitofp i32 %633 to float > %636 = fsub float -0.000000e+00, %623 > %637 = call float @llvm.fma.f32(float %636, float %634, float %486) > %638 = fsub float -0.000000e+00, %623 > %639 = call float @llvm.fma.f32(float %638, float %635, float %492) > %640 = call float @llvm.fabs.f32(float %561) > %641 = call float @llvm.minnum.f32(float %640, float 1.000000e+02) > %642 = fcmp ogt float %543, 0.000000e+00 > %643 = fcmp ogt float %549, 0.000000e+00 > %644 = fcmp olt float %543, 0.000000e+00 > %645 = fcmp olt float %549, 0.000000e+00 > %646 = sext i1 %644 to i32 > %647 = sext i1 %645 to i32 > %648 = zext i1 %642 to i32 > %649 = zext i1 %643 to i32 > %650 = add nsw i32 %648, %646 > %651 = add nsw i32 %649, %647 > %652 = sitofp i32 %650 to float > %653 = sitofp i32 %651 to float > %654 = fsub float -0.000000e+00, %641 > %655 = call float @llvm.fma.f32(float %654, float %652, float %543) > %656 = fsub float -0.000000e+00, %641 > %657 = call float @llvm.fma.f32(float %656, float %653, float %549) > %658 = fcmp ogt float %600, 0.000000e+00 > %659 = fcmp ogt float %606, 0.000000e+00 > %660 = fcmp olt float %600, 0.000000e+00 > %661 = fcmp olt float %606, 0.000000e+00 > %662 = sext i1 %660 to i32 > %663 = sext i1 %661 to i32 > %664 = zext i1 %658 to i32 > %665 = zext i1 %659 to i32 > %666 = add nsw i32 %664, %662 > %667 = add nsw i32 %665, %663 > %668 = sitofp i32 %666 to float > %669 = sitofp i32 %667 to float > %670 = call float @llvm.fabs.f32(float %618) > %671 = call float @llvm.minnum.f32(float %670, float 1.000000e+02) > %672 = fsub float -0.000000e+00, %671 > %673 = call float @llvm.fma.f32(float %672, float %668, float %600) > %674 = fsub float -0.000000e+00, %671 > %675 = call float @llvm.fma.f32(float %674, float %669, float %606) > %676 = fsub float -0.000000e+00, %504 > %677 = fcmp olt float %637, %676 > %678 = fsub float -0.000000e+00, %504 > %679 = fcmp olt float %639, %678 > %680 = zext i1 %677 to i32 > %681 = zext i1 %679 to i32 > %682 = fsub float -0.000000e+00, %561 > %683 = fcmp olt float %655, %682 > %684 = fsub float -0.000000e+00, %561 > %685 = fcmp olt float %657, %684 > %686 = zext i1 %683 to i32 > %687 = zext i1 %685 to i32 > %688 = add nuw nsw i32 %686, %680 > %689 = add nuw nsw i32 %687, %681 > %690 = fsub float -0.000000e+00, %618 > %691 = fcmp olt float %673, %690 > %692 = fsub float -0.000000e+00, %618 > %693 = fcmp olt float %675, %692 > %694 = zext i1 %691 to i32 > %695 = zext i1 %693 to i32 > %696 = add nuw nsw i32 %694, %688 > %697 = add nuw nsw i32 %695, %689 > %698 = fcmp olt float %619, 0.000000e+00 > %699 = zext i1 %698 to i32 > %700 = fcmp olt float %620, 0.000000e+00 > %701 = fcmp olt float %621, 0.000000e+00 > %702 = zext i1 %700 to i32 > %703 = zext i1 %701 to i32 > %704 = add nuw nsw i32 %702, %699 > %705 = add nuw nsw i32 %703, %704 > %706 = fcmp olt float %504, %637 > %707 = fcmp olt float %504, %639 > %708 = zext i1 %706 to i32 > %709 = zext i1 %707 to i32 > %710 = fcmp olt float %561, %655 > %711 = fcmp olt float %561, %657 > %712 = zext i1 %710 to i32 > %713 = zext i1 %711 to i32 > %714 = add nuw nsw i32 %708, %712 > %715 = add nuw nsw i32 %709, %713 > %716 = fcmp olt float %618, %673 > %717 = fcmp olt float %618, %675 > %718 = zext i1 %716 to i32 > %719 = zext i1 %717 to i32 > %720 = add nuw nsw i32 %714, %718 > %721 = add nuw nsw i32 %715, %719 > %722 = icmp eq i32 %696, 3 > %723 = icmp eq i32 %697, 3 > %724 = sext i1 %722 to i32 > %725 = sext i1 %723 to i32 > %726 = icmp eq i32 %720, 3 > %727 = icmp eq i32 %721, 3 > %728 = select i1 %727, i32 -1, i32 %725 > %729 = select i1 %726, i32 -1, i32 %724 > %730 = or i32 %728, %729 > %731 = and i32 %7, 8191 > %732 = and i32 %10, 255 > %733 = mul nuw nsw i32 %731, %732 > %734 = add nuw nsw i32 %733, 18 > %735 = zext i32 %734 to i64 > %736 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %735 > %737 = bitcast i32 addrspace(3)* %736 to float addrspace(3)* > %738 = load float, float addrspace(3)* %737, align 4 > %739 = lshr i32 %7, 13 > %740 = and i32 %739, 255 > %741 = and i32 %7, 8191 > %742 = and i32 %10, 255 > %743 = mul nuw nsw i32 %741, %742 > %744 = add nuw nsw i32 %743, %740 > %745 = add nuw nsw i32 %744, 18 > %746 = zext i32 %745 to i64 > %747 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %746 > %748 = bitcast i32 addrspace(3)* %747 to float addrspace(3)* > %749 = load float, float addrspace(3)* %748, align 4 > %750 = fsub float %749, %738 > %751 = and i32 %7, 8191 > %752 = and i32 %10, 255 > %753 = mul nuw nsw i32 %751, %752 > %754 = add nuw nsw i32 %753, 16 > %755 = zext i32 %754 to i64 > %756 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %755 > %757 = bitcast i32 addrspace(3)* %756 to float addrspace(3)* > %758 = load float, float addrspace(3)* %757, align 4 > %759 = lshr i32 %7, 13 > %760 = and i32 %759, 255 > %761 = and i32 %7, 8191 > %762 = and i32 %10, 255 > %763 = mul nuw nsw i32 %761, %762 > %764 = add nuw nsw i32 %763, %760 > %765 = add nuw nsw i32 %764, 16 > %766 = zext i32 %765 to i64 > %767 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %766 > %768 = bitcast i32 addrspace(3)* %767 to float addrspace(3)* > %769 = load float, float addrspace(3)* %768, align 4 > %770 = fsub float %769, %758 > %771 = and i32 %7, 8191 > %772 = and i32 %10, 255 > %773 = mul nuw nsw i32 %771, %772 > %774 = add nuw nsw i32 %773, 17 > %775 = zext i32 %774 to i64 > %776 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %775 > %777 = bitcast i32 addrspace(3)* %776 to float addrspace(3)* > %778 = load float, float addrspace(3)* %777, align 4 > %779 = lshr i32 %7, 13 > %780 = and i32 %779, 255 > %781 = and i32 %7, 8191 > %782 = and i32 %10, 255 > %783 = mul nuw nsw i32 %781, %782 > %784 = add nuw nsw i32 %783, %780 > %785 = add nuw nsw i32 %784, 17 > %786 = zext i32 %785 to i64 > %787 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %786 > %788 = bitcast i32 addrspace(3)* %787 to float addrspace(3)* > %789 = load float, float addrspace(3)* %788, align 4 > %790 = fsub float %789, %778 > %791 = and i32 %7, 8191 > %792 = and i32 %10, 255 > %793 = mul nuw nsw i32 %791, %792 > %794 = add nuw nsw i32 %793, 16 > %795 = zext i32 %794 to i64 > %796 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %795 > %797 = bitcast i32 addrspace(3)* %796 to float addrspace(3)* > %798 = load float, float addrspace(3)* %797, align 4 > %799 = and i32 %7, 8191 > %800 = and i32 %10, 255 > %801 = mul nuw nsw i32 %799, %800 > %802 = lshr i32 %7, 12 > %803 = and i32 %802, 510 > %804 = add nuw nsw i32 %801, %803 > %805 = add nuw nsw i32 %804, 16 > %806 = zext i32 %805 to i64 > %807 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %806 > %808 = bitcast i32 addrspace(3)* %807 to float addrspace(3)* > %809 = load float, float addrspace(3)* %808, align 4 > %810 = fsub float %809, %798 > %811 = and i32 %7, 8191 > %812 = and i32 %10, 255 > %813 = mul nuw nsw i32 %811, %812 > %814 = add nuw nsw i32 %813, 17 > %815 = zext i32 %814 to i64 > %816 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %815 > %817 = bitcast i32 addrspace(3)* %816 to float addrspace(3)* > %818 = load float, float addrspace(3)* %817, align 4 > %819 = and i32 %7, 8191 > %820 = and i32 %10, 255 > %821 = mul nuw nsw i32 %819, %820 > %822 = lshr i32 %7, 12 > %823 = and i32 %822, 510 > %824 = add nuw nsw i32 %821, %823 > %825 = add nuw nsw i32 %824, 17 > %826 = zext i32 %825 to i64 > %827 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %826 > %828 = bitcast i32 addrspace(3)* %827 to float addrspace(3)* > %829 = load float, float addrspace(3)* %828, align 4 > %830 = fsub float %829, %818 > %831 = and i32 %7, 8191 > %832 = and i32 %10, 255 > %833 = mul nuw nsw i32 %831, %832 > %834 = add nuw nsw i32 %833, 18 > %835 = zext i32 %834 to i64 > %836 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %835 > %837 = bitcast i32 addrspace(3)* %836 to float addrspace(3)* > %838 = load float, float addrspace(3)* %837, align 4 > %839 = and i32 %7, 8191 > %840 = and i32 %10, 255 > %841 = mul nuw nsw i32 %839, %840 > %842 = lshr i32 %7, 12 > %843 = and i32 %842, 510 > %844 = add nuw nsw i32 %841, %843 > %845 = add nuw nsw i32 %844, 18 > %846 = zext i32 %845 to i64 > %847 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %846 > %848 = bitcast i32 addrspace(3)* %847 to float addrspace(3)* > %849 = load float, float addrspace(3)* %848, align 4 > %850 = fsub float %849, %838 > %851 = fmul float %750, %830 > %852 = fmul float %770, %850 > %853 = fmul float %790, %810 > %854 = fsub float -0.000000e+00, %851 > %855 = call float @llvm.fma.f32(float %790, float %850, float %854) > %856 = fsub float -0.000000e+00, %852 > %857 = call float @llvm.fma.f32(float %750, float %810, float %856) > %858 = fsub float -0.000000e+00, %853 > %859 = call float @llvm.fma.f32(float %770, float %830, float %858) > %860 = fmul float %855, %855 > %861 = fmul float %857, %857 > %862 = fadd float %861, %860 > %863 = fmul float %859, %859 > %864 = fadd float %862, %863 > %865 = call float @llvm.AMDGPU.rsq.clamped.f32(float %864) > %866 = fmul float %855, %865 > %867 = fmul float %857, %865 > %868 = fmul float %859, %865 > %869 = fmul float %13, %25 > %870 = fmul float %14, %26 > %871 = fadd float %869, %870 > %872 = fmul float %15, %27 > %873 = fadd float %871, %872 > %874 = fadd float %873, %16 > %875 = fmul float %17, %25 > %876 = fmul float %18, %26 > %877 = fadd float %875, %876 > %878 = fmul float %19, %27 > %879 = fadd float %877, %878 > %880 = fadd float %879, %20 > %881 = fmul float %21, %25 > %882 = fmul float %22, %26 > %883 = fadd float %881, %882 > %884 = fmul float %23, %27 > %885 = fadd float %883, %884 > %886 = fadd float %885, %24 > %887 = and i32 %7, 8191 > %888 = and i32 %10, 255 > %889 = mul nuw nsw i32 %887, %888 > %890 = add nuw nsw i32 %889, 16 > %891 = zext i32 %890 to i64 > %892 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %891 > %893 = bitcast i32 addrspace(3)* %892 to float addrspace(3)* > %894 = load float, float addrspace(3)* %893, align 4 > %895 = fsub float %874, %894 > %896 = and i32 %7, 8191 > %897 = and i32 %10, 255 > %898 = mul nuw nsw i32 %896, %897 > %899 = add nuw nsw i32 %898, 17 > %900 = zext i32 %899 to i64 > %901 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %900 > %902 = bitcast i32 addrspace(3)* %901 to float addrspace(3)* > %903 = load float, float addrspace(3)* %902, align 4 > %904 = fsub float %880, %903 > %905 = and i32 %7, 8191 > %906 = and i32 %10, 255 > %907 = mul nuw nsw i32 %905, %906 > %908 = add nuw nsw i32 %907, 18 > %909 = zext i32 %908 to i64 > %910 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %909 > %911 = bitcast i32 addrspace(3)* %910 to float addrspace(3)* > %912 = load float, float addrspace(3)* %911, align 4 > %913 = fsub float %886, %912 > %914 = fmul float %895, %895 > %915 = fmul float %904, %904 > %916 = fadd float %915, %914 > %917 = fmul float %913, %913 > %918 = fadd float %916, %917 > %919 = call float @llvm.AMDGPU.rsq.clamped.f32(float %918) > %920 = fmul float %919, %895 > %921 = fmul float %919, %904 > %922 = fmul float %919, %913 > %923 = fmul float %866, %920 > %924 = fmul float %867, %921 > %925 = fadd float %924, %923 > %926 = fmul float %868, %922 > %927 = fadd float %925, %926 > %928 = icmp eq i32 %730, 0 > %notlhs = fcmp uge float %927, -5.000000e-01 > %notrhs = icmp ne i32 %705, 3 > %not. = and i1 %notrhs, %notlhs > %929 = and i1 %928, %not. > br i1 %929, label %IF, label %ENDIF > >IF: ; preds = %main_body > %930 = lshr i32 %7, 13 > %931 = and i32 %930, 255 > %932 = and i32 %7, 8191 > %933 = and i32 %10, 255 > %934 = mul nuw nsw i32 %932, %933 > %935 = add nuw nsw i32 %934, %931 > %936 = add nuw nsw i32 %935, 16 > %937 = zext i32 %936 to i64 > %938 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %937 > %939 = bitcast i32 addrspace(3)* %938 to float addrspace(3)* > %940 = load float, float addrspace(3)* %939, align 4 > %941 = and i32 %7, 8191 > %942 = and i32 %10, 255 > %943 = mul nuw nsw i32 %941, %942 > %944 = add nuw nsw i32 %943, 16 > %945 = zext i32 %944 to i64 > %946 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %945 > %947 = bitcast i32 addrspace(3)* %946 to float addrspace(3)* > %948 = load float, float addrspace(3)* %947, align 4 > %949 = fsub float %948, %940 > %950 = lshr i32 %7, 13 > %951 = and i32 %950, 255 > %952 = and i32 %7, 8191 > %953 = and i32 %10, 255 > %954 = mul nuw nsw i32 %952, %953 > %955 = add nuw nsw i32 %954, %951 > %956 = add nuw nsw i32 %955, 17 > %957 = zext i32 %956 to i64 > %958 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %957 > %959 = bitcast i32 addrspace(3)* %958 to float addrspace(3)* > %960 = load float, float addrspace(3)* %959, align 4 > %961 = and i32 %7, 8191 > %962 = and i32 %10, 255 > %963 = mul nuw nsw i32 %961, %962 > %964 = add nuw nsw i32 %963, 17 > %965 = zext i32 %964 to i64 > %966 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %965 > %967 = bitcast i32 addrspace(3)* %966 to float addrspace(3)* > %968 = load float, float addrspace(3)* %967, align 4 > %969 = fsub float %968, %960 > %970 = lshr i32 %7, 13 > %971 = and i32 %970, 255 > %972 = and i32 %7, 8191 > %973 = and i32 %10, 255 > %974 = mul nuw nsw i32 %972, %973 > %975 = add nuw nsw i32 %974, %971 > %976 = add nuw nsw i32 %975, 18 > %977 = zext i32 %976 to i64 > %978 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %977 > %979 = bitcast i32 addrspace(3)* %978 to float addrspace(3)* > %980 = load float, float addrspace(3)* %979, align 4 > %981 = and i32 %7, 8191 > %982 = and i32 %10, 255 > %983 = mul nuw nsw i32 %981, %982 > %984 = add nuw nsw i32 %983, 18 > %985 = zext i32 %984 to i64 > %986 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %985 > %987 = bitcast i32 addrspace(3)* %986 to float addrspace(3)* > %988 = load float, float addrspace(3)* %987, align 4 > %989 = fsub float %988, %980 > %990 = fmul float %949, %949 > %991 = fmul float %969, %969 > %992 = fadd float %991, %990 > %993 = fmul float %989, %989 > %994 = fadd float %992, %993 > %995 = and i32 %7, 8191 > %996 = and i32 %10, 255 > %997 = mul nuw nsw i32 %995, %996 > %998 = lshr i32 %7, 12 > %999 = and i32 %998, 510 > %1000 = add nuw nsw i32 %997, %999 > %1001 = add nuw nsw i32 %1000, 16 > %1002 = zext i32 %1001 to i64 > %1003 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1002 > %1004 = bitcast i32 addrspace(3)* %1003 to float addrspace(3)* > %1005 = load float, float addrspace(3)* %1004, align 4 > %1006 = lshr i32 %7, 13 > %1007 = and i32 %1006, 255 > %1008 = and i32 %7, 8191 > %1009 = and i32 %10, 255 > %1010 = mul nuw nsw i32 %1008, %1009 > %1011 = add nuw nsw i32 %1010, %1007 > %1012 = add nuw nsw i32 %1011, 16 > %1013 = zext i32 %1012 to i64 > %1014 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1013 > %1015 = bitcast i32 addrspace(3)* %1014 to float addrspace(3)* > %1016 = load float, float addrspace(3)* %1015, align 4 > %1017 = fsub float %1016, %1005 > %1018 = and i32 %7, 8191 > %1019 = and i32 %10, 255 > %1020 = mul nuw nsw i32 %1018, %1019 > %1021 = lshr i32 %7, 12 > %1022 = and i32 %1021, 510 > %1023 = add nuw nsw i32 %1020, %1022 > %1024 = add nuw nsw i32 %1023, 17 > %1025 = zext i32 %1024 to i64 > %1026 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1025 > %1027 = bitcast i32 addrspace(3)* %1026 to float addrspace(3)* > %1028 = load float, float addrspace(3)* %1027, align 4 > %1029 = lshr i32 %7, 13 > %1030 = and i32 %1029, 255 > %1031 = and i32 %7, 8191 > %1032 = and i32 %10, 255 > %1033 = mul nuw nsw i32 %1031, %1032 > %1034 = add nuw nsw i32 %1033, %1030 > %1035 = add nuw nsw i32 %1034, 17 > %1036 = zext i32 %1035 to i64 > %1037 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1036 > %1038 = bitcast i32 addrspace(3)* %1037 to float addrspace(3)* > %1039 = load float, float addrspace(3)* %1038, align 4 > %1040 = fsub float %1039, %1028 > %1041 = and i32 %7, 8191 > %1042 = and i32 %10, 255 > %1043 = mul nuw nsw i32 %1041, %1042 > %1044 = lshr i32 %7, 12 > %1045 = and i32 %1044, 510 > %1046 = add nuw nsw i32 %1043, %1045 > %1047 = add nuw nsw i32 %1046, 18 > %1048 = zext i32 %1047 to i64 > %1049 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1048 > %1050 = bitcast i32 addrspace(3)* %1049 to float addrspace(3)* > %1051 = load float, float addrspace(3)* %1050, align 4 > %1052 = lshr i32 %7, 13 > %1053 = and i32 %1052, 255 > %1054 = and i32 %7, 8191 > %1055 = and i32 %10, 255 > %1056 = mul nuw nsw i32 %1054, %1055 > %1057 = add nuw nsw i32 %1056, %1053 > %1058 = add nuw nsw i32 %1057, 18 > %1059 = zext i32 %1058 to i64 > %1060 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1059 > %1061 = bitcast i32 addrspace(3)* %1060 to float addrspace(3)* > %1062 = load float, float addrspace(3)* %1061, align 4 > %1063 = fsub float %1062, %1051 > %1064 = fmul float %1017, %1017 > %1065 = fmul float %1040, %1040 > %1066 = fadd float %1065, %1064 > %1067 = fmul float %1063, %1063 > %1068 = fadd float %1066, %1067 > %1069 = fmul float %810, %810 > %1070 = fmul float %830, %830 > %1071 = fadd float %1070, %1069 > %1072 = fmul float %850, %850 > %1073 = fadd float %1071, %1072 > %1074 = call float @llvm.sqrt.f32(float %994) > %1075 = call float @llvm.sqrt.f32(float %1068) > %1076 = call float @llvm.sqrt.f32(float %1073) > %1077 = lshr i32 %7, 13 > %1078 = and i32 %1077, 255 > %1079 = and i32 %7, 8191 > %1080 = and i32 %10, 255 > %1081 = mul nuw nsw i32 %1079, %1080 > %1082 = add nuw nsw i32 %1081, %1078 > %1083 = add nuw nsw i32 %1082, 16 > %1084 = zext i32 %1083 to i64 > %1085 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1084 > %1086 = bitcast i32 addrspace(3)* %1085 to float addrspace(3)* > %1087 = load float, float addrspace(3)* %1086, align 4 > %1088 = and i32 %7, 8191 > %1089 = and i32 %10, 255 > %1090 = mul nuw nsw i32 %1088, %1089 > %1091 = add nuw nsw i32 %1090, 16 > %1092 = zext i32 %1091 to i64 > %1093 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1092 > %1094 = bitcast i32 addrspace(3)* %1093 to float addrspace(3)* > %1095 = load float, float addrspace(3)* %1094, align 4 > %1096 = fadd float %1087, %1095 > %1097 = lshr i32 %7, 13 > %1098 = and i32 %1097, 255 > %1099 = and i32 %7, 8191 > %1100 = and i32 %10, 255 > %1101 = mul nuw nsw i32 %1099, %1100 > %1102 = add nuw nsw i32 %1101, %1098 > %1103 = add nuw nsw i32 %1102, 17 > %1104 = zext i32 %1103 to i64 > %1105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1104 > %1106 = bitcast i32 addrspace(3)* %1105 to float addrspace(3)* > %1107 = load float, float addrspace(3)* %1106, align 4 > %1108 = and i32 %7, 8191 > %1109 = and i32 %10, 255 > %1110 = mul nuw nsw i32 %1108, %1109 > %1111 = add nuw nsw i32 %1110, 17 > %1112 = zext i32 %1111 to i64 > %1113 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1112 > %1114 = bitcast i32 addrspace(3)* %1113 to float addrspace(3)* > %1115 = load float, float addrspace(3)* %1114, align 4 > %1116 = fadd float %1107, %1115 > %1117 = lshr i32 %7, 13 > %1118 = and i32 %1117, 255 > %1119 = and i32 %7, 8191 > %1120 = and i32 %10, 255 > %1121 = mul nuw nsw i32 %1119, %1120 > %1122 = add nuw nsw i32 %1121, %1118 > %1123 = add nuw nsw i32 %1122, 18 > %1124 = zext i32 %1123 to i64 > %1125 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1124 > %1126 = bitcast i32 addrspace(3)* %1125 to float addrspace(3)* > %1127 = load float, float addrspace(3)* %1126, align 4 > %1128 = and i32 %7, 8191 > %1129 = and i32 %10, 255 > %1130 = mul nuw nsw i32 %1128, %1129 > %1131 = add nuw nsw i32 %1130, 18 > %1132 = zext i32 %1131 to i64 > %1133 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1132 > %1134 = bitcast i32 addrspace(3)* %1133 to float addrspace(3)* > %1135 = load float, float addrspace(3)* %1134, align 4 > %1136 = fadd float %1127, %1135 > %1137 = fmul float %1096, 5.000000e-01 > %1138 = fmul float %1116, 5.000000e-01 > %1139 = fmul float %1136, 5.000000e-01 > %1140 = and i32 %7, 8191 > %1141 = and i32 %10, 255 > %1142 = mul nuw nsw i32 %1140, %1141 > %1143 = lshr i32 %7, 12 > %1144 = and i32 %1143, 510 > %1145 = add nuw nsw i32 %1142, %1144 > %1146 = add nuw nsw i32 %1145, 16 > %1147 = zext i32 %1146 to i64 > %1148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1147 > %1149 = bitcast i32 addrspace(3)* %1148 to float addrspace(3)* > %1150 = load float, float addrspace(3)* %1149, align 4 > %1151 = lshr i32 %7, 13 > %1152 = and i32 %1151, 255 > %1153 = and i32 %7, 8191 > %1154 = and i32 %10, 255 > %1155 = mul nuw nsw i32 %1153, %1154 > %1156 = add nuw nsw i32 %1155, %1152 > %1157 = add nuw nsw i32 %1156, 16 > %1158 = zext i32 %1157 to i64 > %1159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1158 > %1160 = bitcast i32 addrspace(3)* %1159 to float addrspace(3)* > %1161 = load float, float addrspace(3)* %1160, align 4 > %1162 = fadd float %1150, %1161 > %1163 = and i32 %7, 8191 > %1164 = and i32 %10, 255 > %1165 = mul nuw nsw i32 %1163, %1164 > %1166 = lshr i32 %7, 12 > %1167 = and i32 %1166, 510 > %1168 = add nuw nsw i32 %1165, %1167 > %1169 = add nuw nsw i32 %1168, 17 > %1170 = zext i32 %1169 to i64 > %1171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1170 > %1172 = bitcast i32 addrspace(3)* %1171 to float addrspace(3)* > %1173 = load float, float addrspace(3)* %1172, align 4 > %1174 = lshr i32 %7, 13 > %1175 = and i32 %1174, 255 > %1176 = and i32 %7, 8191 > %1177 = and i32 %10, 255 > %1178 = mul nuw nsw i32 %1176, %1177 > %1179 = add nuw nsw i32 %1178, %1175 > %1180 = add nuw nsw i32 %1179, 17 > %1181 = zext i32 %1180 to i64 > %1182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1181 > %1183 = bitcast i32 addrspace(3)* %1182 to float addrspace(3)* > %1184 = load float, float addrspace(3)* %1183, align 4 > %1185 = fadd float %1173, %1184 > %1186 = and i32 %7, 8191 > %1187 = and i32 %10, 255 > %1188 = mul nuw nsw i32 %1186, %1187 > %1189 = lshr i32 %7, 12 > %1190 = and i32 %1189, 510 > %1191 = add nuw nsw i32 %1188, %1190 > %1192 = add nuw nsw i32 %1191, 18 > %1193 = zext i32 %1192 to i64 > %1194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1193 > %1195 = bitcast i32 addrspace(3)* %1194 to float addrspace(3)* > %1196 = load float, float addrspace(3)* %1195, align 4 > %1197 = lshr i32 %7, 13 > %1198 = and i32 %1197, 255 > %1199 = and i32 %7, 8191 > %1200 = and i32 %10, 255 > %1201 = mul nuw nsw i32 %1199, %1200 > %1202 = add nuw nsw i32 %1201, %1198 > %1203 = add nuw nsw i32 %1202, 18 > %1204 = zext i32 %1203 to i64 > %1205 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1204 > %1206 = bitcast i32 addrspace(3)* %1205 to float addrspace(3)* > %1207 = load float, float addrspace(3)* %1206, align 4 > %1208 = fadd float %1196, %1207 > %1209 = fmul float %1162, 5.000000e-01 > %1210 = fmul float %1185, 5.000000e-01 > %1211 = fmul float %1208, 5.000000e-01 > %1212 = and i32 %7, 8191 > %1213 = and i32 %10, 255 > %1214 = mul nuw nsw i32 %1212, %1213 > %1215 = add nuw nsw i32 %1214, 16 > %1216 = zext i32 %1215 to i64 > %1217 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1216 > %1218 = bitcast i32 addrspace(3)* %1217 to float addrspace(3)* > %1219 = load float, float addrspace(3)* %1218, align 4 > %1220 = and i32 %7, 8191 > %1221 = and i32 %10, 255 > %1222 = mul nuw nsw i32 %1220, %1221 > %1223 = lshr i32 %7, 12 > %1224 = and i32 %1223, 510 > %1225 = add nuw nsw i32 %1222, %1224 > %1226 = add nuw nsw i32 %1225, 16 > %1227 = zext i32 %1226 to i64 > %1228 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1227 > %1229 = bitcast i32 addrspace(3)* %1228 to float addrspace(3)* > %1230 = load float, float addrspace(3)* %1229, align 4 > %1231 = fadd float %1219, %1230 > %1232 = and i32 %7, 8191 > %1233 = and i32 %10, 255 > %1234 = mul nuw nsw i32 %1232, %1233 > %1235 = add nuw nsw i32 %1234, 17 > %1236 = zext i32 %1235 to i64 > %1237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1236 > %1238 = bitcast i32 addrspace(3)* %1237 to float addrspace(3)* > %1239 = load float, float addrspace(3)* %1238, align 4 > %1240 = and i32 %7, 8191 > %1241 = and i32 %10, 255 > %1242 = mul nuw nsw i32 %1240, %1241 > %1243 = lshr i32 %7, 12 > %1244 = and i32 %1243, 510 > %1245 = add nuw nsw i32 %1242, %1244 > %1246 = add nuw nsw i32 %1245, 17 > %1247 = zext i32 %1246 to i64 > %1248 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1247 > %1249 = bitcast i32 addrspace(3)* %1248 to float addrspace(3)* > %1250 = load float, float addrspace(3)* %1249, align 4 > %1251 = fadd float %1239, %1250 > %1252 = and i32 %7, 8191 > %1253 = and i32 %10, 255 > %1254 = mul nuw nsw i32 %1252, %1253 > %1255 = add nuw nsw i32 %1254, 18 > %1256 = zext i32 %1255 to i64 > %1257 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1256 > %1258 = bitcast i32 addrspace(3)* %1257 to float addrspace(3)* > %1259 = load float, float addrspace(3)* %1258, align 4 > %1260 = and i32 %7, 8191 > %1261 = and i32 %10, 255 > %1262 = mul nuw nsw i32 %1260, %1261 > %1263 = lshr i32 %7, 12 > %1264 = and i32 %1263, 510 > %1265 = add nuw nsw i32 %1262, %1264 > %1266 = add nuw nsw i32 %1265, 18 > %1267 = zext i32 %1266 to i64 > %1268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1267 > %1269 = bitcast i32 addrspace(3)* %1268 to float addrspace(3)* > %1270 = load float, float addrspace(3)* %1269, align 4 > %1271 = fadd float %1259, %1270 > %1272 = fmul float %1231, 5.000000e-01 > %1273 = fmul float %1251, 5.000000e-01 > %1274 = fmul float %1271, 5.000000e-01 > %1275 = call float @llvm.fma.f32(float %54, float %1074, float %1137) > %1276 = call float @llvm.fma.f32(float %55, float %1074, float %1138) > %1277 = call float @llvm.fma.f32(float %56, float %1074, float %1139) > %1278 = call float @llvm.fma.f32(float %54, float %1075, float %1209) > %1279 = call float @llvm.fma.f32(float %55, float %1075, float %1210) > %1280 = call float @llvm.fma.f32(float %56, float %1075, float %1211) > %1281 = call float @llvm.fma.f32(float %54, float %1076, float %1272) > %1282 = call float @llvm.fma.f32(float %55, float %1076, float %1273) > %1283 = call float @llvm.fma.f32(float %56, float %1076, float %1274) > %1284 = fmul float %38, %1137 > %1285 = fmul float %39, %1138 > %1286 = fadd float %1284, %1285 > %1287 = fmul float %40, %1139 > %1288 = fadd float %1286, %1287 > %1289 = fadd float %1288, %41 > %1290 = fmul float %42, %1137 > %1291 = fmul float %43, %1138 > %1292 = fadd float %1290, %1291 > %1293 = fmul float %44, %1139 > %1294 = fadd float %1292, %1293 > %1295 = fadd float %1294, %45 > %1296 = fmul float %50, %1137 > %1297 = fmul float %51, %1138 > %1298 = fadd float %1296, %1297 > %1299 = fmul float %52, %1139 > %1300 = fadd float %1298, %1299 > %1301 = fadd float %1300, %53 > %1302 = fmul float %38, %1209 > %1303 = fmul float %39, %1210 > %1304 = fadd float %1302, %1303 > %1305 = fmul float %40, %1211 > %1306 = fadd float %1304, %1305 > %1307 = fadd float %1306, %41 > %1308 = fmul float %42, %1209 > %1309 = fmul float %43, %1210 > %1310 = fadd float %1308, %1309 > %1311 = fmul float %44, %1211 > %1312 = fadd float %1310, %1311 > %1313 = fadd float %1312, %45 > %1314 = fmul float %50, %1209 > %1315 = fmul float %51, %1210 > %1316 = fadd float %1314, %1315 > %1317 = fmul float %52, %1211 > %1318 = fadd float %1316, %1317 > %1319 = fadd float %1318, %53 > %1320 = fmul float %38, %1272 > %1321 = fmul float %39, %1273 > %1322 = fadd float %1320, %1321 > %1323 = fmul float %40, %1274 > %1324 = fadd float %1322, %1323 > %1325 = fadd float %1324, %41 > %1326 = fmul float %42, %1272 > %1327 = fmul float %43, %1273 > %1328 = fadd float %1326, %1327 > %1329 = fmul float %44, %1274 > %1330 = fadd float %1328, %1329 > %1331 = fadd float %1330, %45 > %1332 = fmul float %50, %1272 > %1333 = fmul float %51, %1273 > %1334 = fadd float %1332, %1333 > %1335 = fmul float %52, %1274 > %1336 = fadd float %1334, %1335 > %1337 = fadd float %1336, %53 > %1338 = fmul float %38, %1275 > %1339 = fmul float %39, %1276 > %1340 = fadd float %1338, %1339 > %1341 = fmul float %40, %1277 > %1342 = fadd float %1340, %1341 > %1343 = fadd float %1342, %41 > %1344 = fmul float %42, %1275 > %1345 = fmul float %43, %1276 > %1346 = fadd float %1344, %1345 > %1347 = fmul float %44, %1277 > %1348 = fadd float %1346, %1347 > %1349 = fadd float %1348, %45 > %1350 = fmul float %50, %1275 > %1351 = fmul float %51, %1276 > %1352 = fadd float %1350, %1351 > %1353 = fmul float %52, %1277 > %1354 = fadd float %1352, %1353 > %1355 = fadd float %1354, %53 > %1356 = fmul float %38, %1278 > %1357 = fmul float %39, %1279 > %1358 = fadd float %1356, %1357 > %1359 = fmul float %40, %1280 > %1360 = fadd float %1358, %1359 > %1361 = fadd float %1360, %41 > %1362 = fmul float %42, %1278 > %1363 = fmul float %43, %1279 > %1364 = fadd float %1362, %1363 > %1365 = fmul float %44, %1280 > %1366 = fadd float %1364, %1365 > %1367 = fadd float %1366, %45 > %1368 = fmul float %50, %1278 > %1369 = fmul float %51, %1279 > %1370 = fadd float %1368, %1369 > %1371 = fmul float %52, %1280 > %1372 = fadd float %1370, %1371 > %1373 = fadd float %1372, %53 > %1374 = fmul float %38, %1281 > %1375 = fmul float %39, %1282 > %1376 = fadd float %1374, %1375 > %1377 = fmul float %40, %1283 > %1378 = fadd float %1376, %1377 > %1379 = fadd float %1378, %41 > %1380 = fmul float %42, %1281 > %1381 = fmul float %43, %1282 > %1382 = fadd float %1380, %1381 > %1383 = fmul float %44, %1283 > %1384 = fadd float %1382, %1383 > %1385 = fadd float %1384, %45 > %1386 = fmul float %50, %1281 > %1387 = fmul float %51, %1282 > %1388 = fadd float %1386, %1387 > %1389 = fmul float %52, %1283 > %1390 = fadd float %1388, %1389 > %1391 = fadd float %1390, %53 > %1392 = fcmp oeq float %1319, 0.000000e+00 > %1393 = fcmp oeq float %1319, 0.000000e+00 > %1394 = fcmp ogt float %1307, 0.000000e+00 > %1395 = select i1 %1394, float 1.000000e+00, float %1307 > %1396 = fcmp oge float %1395, 0.000000e+00 > %1397 = fcmp ogt float %1313, 0.000000e+00 > %1398 = select i1 %1397, float 1.000000e+00, float %1313 > %1399 = fcmp oge float %1398, 0.000000e+00 > %.op = fmul float %1395, 0x4600000000000000 > %1400 = select i1 %1396, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1398, 0x4600000000000000 > %1401 = select i1 %1399, float %.op80, float 0xC600000000000000 > %1402 = fdiv float 1.000000e+00, %1319 > %1403 = fmul float %1307, %1402 > %1404 = fmul float %1313, %1402 > %1405 = select i1 %1392, float %1400, float %1403 > %1406 = select i1 %1393, float %1401, float %1404 > %1407 = fcmp oeq float %1337, 0.000000e+00 > %1408 = fcmp oeq float %1337, 0.000000e+00 > %1409 = fcmp ogt float %1325, 0.000000e+00 > %1410 = select i1 %1409, float 1.000000e+00, float %1325 > %1411 = fcmp oge float %1410, 0.000000e+00 > %1412 = fcmp ogt float %1331, 0.000000e+00 > %1413 = select i1 %1412, float 1.000000e+00, float %1331 > %1414 = fcmp oge float %1413, 0.000000e+00 > %.op81 = fmul float %1410, 0x4600000000000000 > %1415 = select i1 %1411, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1413, 0x4600000000000000 > %1416 = select i1 %1414, float %.op82, float 0xC600000000000000 > %1417 = fdiv float 1.000000e+00, %1337 > %1418 = fmul float %1325, %1417 > %1419 = fmul float %1331, %1417 > %1420 = select i1 %1407, float %1415, float %1418 > %1421 = select i1 %1408, float %1416, float %1419 > %1422 = fcmp oeq float %1355, 0.000000e+00 > %1423 = fcmp oeq float %1355, 0.000000e+00 > %1424 = fcmp ogt float %1343, 0.000000e+00 > %1425 = select i1 %1424, float 1.000000e+00, float %1343 > %1426 = fcmp oge float %1425, 0.000000e+00 > %1427 = fcmp ogt float %1349, 0.000000e+00 > %1428 = select i1 %1427, float 1.000000e+00, float %1349 > %1429 = fcmp oge float %1428, 0.000000e+00 > %.op83 = fmul float %1425, 0x4600000000000000 > %1430 = select i1 %1426, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1428, 0x4600000000000000 > %1431 = select i1 %1429, float %.op84, float 0xC600000000000000 > %1432 = fdiv float 1.000000e+00, %1355 > %1433 = fmul float %1343, %1432 > %1434 = fmul float %1349, %1432 > %1435 = select i1 %1422, float %1430, float %1433 > %1436 = select i1 %1423, float %1431, float %1434 > %1437 = fcmp oeq float %1301, 0.000000e+00 > %1438 = fcmp oeq float %1301, 0.000000e+00 > %1439 = fcmp ogt float %1289, 0.000000e+00 > %1440 = select i1 %1439, float 1.000000e+00, float %1289 > %1441 = fcmp oge float %1440, 0.000000e+00 > %1442 = fcmp ogt float %1295, 0.000000e+00 > %1443 = select i1 %1442, float 1.000000e+00, float %1295 > %1444 = fcmp oge float %1443, 0.000000e+00 > %.op85 = fmul float %1440, 0x4600000000000000 > %1445 = select i1 %1441, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1443, 0x4600000000000000 > %1446 = select i1 %1444, float %.op86, float 0xC600000000000000 > %1447 = fdiv float 1.000000e+00, %1301 > %1448 = fmul float %1289, %1447 > %1449 = fmul float %1295, %1447 > %1450 = select i1 %1437, float %1445, float %1448 > %1451 = select i1 %1438, float %1446, float %1449 > %1452 = fsub float %1450, %1435 > %1453 = fsub float %1451, %1436 > %1454 = fcmp oeq float %1373, 0.000000e+00 > %1455 = fcmp oeq float %1373, 0.000000e+00 > %1456 = fcmp ogt float %1361, 0.000000e+00 > %1457 = select i1 %1456, float 1.000000e+00, float %1361 > %1458 = fcmp oge float %1457, 0.000000e+00 > %1459 = fcmp ogt float %1367, 0.000000e+00 > %1460 = select i1 %1459, float 1.000000e+00, float %1367 > %1461 = fcmp oge float %1460, 0.000000e+00 > %.op87 = fmul float %1457, 0x4600000000000000 > %1462 = select i1 %1458, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1460, 0x4600000000000000 > %1463 = select i1 %1461, float %.op88, float 0xC600000000000000 > %1464 = fdiv float 1.000000e+00, %1373 > %1465 = fmul float %1361, %1464 > %1466 = fmul float %1367, %1464 > %1467 = select i1 %1454, float %1462, float %1465 > %1468 = select i1 %1455, float %1463, float %1466 > %1469 = fsub float %1405, %1467 > %1470 = fsub float %1406, %1468 > %1471 = fmul float %1469, %57 > %1472 = fmul float %1470, %58 > %1473 = fcmp oeq float %1391, 0.000000e+00 > %1474 = fcmp oeq float %1391, 0.000000e+00 > %1475 = fcmp ogt float %1379, 0.000000e+00 > %1476 = select i1 %1475, float 1.000000e+00, float %1379 > %1477 = fcmp oge float %1476, 0.000000e+00 > %1478 = fcmp ogt float %1385, 0.000000e+00 > %1479 = select i1 %1478, float 1.000000e+00, float %1385 > %1480 = fcmp oge float %1479, 0.000000e+00 > %.op89 = fmul float %1476, 0x4600000000000000 > %1481 = select i1 %1477, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1479, 0x4600000000000000 > %1482 = select i1 %1480, float %.op90, float 0xC600000000000000 > %1483 = fdiv float 1.000000e+00, %1391 > %1484 = fmul float %1379, %1483 > %1485 = fmul float %1385, %1483 > %1486 = select i1 %1473, float %1481, float %1484 > %1487 = select i1 %1474, float %1482, float %1485 > %1488 = fsub float %1420, %1486 > %1489 = fsub float %1421, %1487 > %1490 = fmul float %1488, %57 > %1491 = fmul float %1452, %57 > %1492 = fmul float %1453, %58 > %1493 = fmul float %1489, %58 > %1494 = fmul float %1491, %1491 > %1495 = fmul float %1492, %1492 > %1496 = fadd float %1494, %1495 > %1497 = fmul float %1471, %1471 > %1498 = fmul float %1472, %1472 > %1499 = fadd float %1497, %1498 > %1500 = fmul float %1490, %1490 > %1501 = fmul float %1493, %1493 > %1502 = fadd float %1500, %1501 > %1503 = call float @llvm.sqrt.f32(float %1502) > %1504 = call float @llvm.sqrt.f32(float %1496) > %1505 = call float @llvm.sqrt.f32(float %1499) > %1506 = fsub float %1301, %30 > %1507 = fsub float %1319, %30 > %1508 = fsub float %1337, %30 > %1509 = fcmp une float %31, 0.000000e+00 > br i1 %1509, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %31, %ENDIF77 ], [ %53, %main_body ] > %temp16.0 = phi float [ %1723, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1724, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1713, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1726, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %30, %ENDIF77 ], [ %52, %main_body ] > %temp13.0 = phi float [ %1706, %ENDIF77 ], [ %51, %main_body ] > %temp11.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %24, %main_body ] > %temp10.0 = phi float [ %1505, %ENDIF77 ], [ %23, %main_body ] > %temp9.0 = phi float [ %1698, %ENDIF77 ], [ %22, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %919, %main_body ] > %temp6.0 = phi float [ %1139, %ENDIF77 ], [ %850, %main_body ] > %temp5.0 = phi float [ %1693, %ENDIF77 ], [ %830, %main_body ] > %1510 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1511 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1512 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1513 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1514 = lshr i32 %5, 16 > %1515 = shl nuw nsw i32 %1514, 2 > %1516 = and i32 %6, 8191 > %1517 = and i32 %10, 255 > %1518 = mul nuw nsw i32 %1516, %1517 > %1519 = add nuw nsw i32 %1515, %1518 > %1520 = add nuw nsw i32 %1519, 8 > %1521 = zext i32 %1520 to i64 > %1522 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1521 > %1523 = bitcast i32 addrspace(3)* %1522 to float addrspace(3)* > store float %1510, float addrspace(3)* %1523, align 4 > %1524 = add nuw nsw i32 %1519, 9 > %1525 = zext i32 %1524 to i64 > %1526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1525 > %1527 = bitcast i32 addrspace(3)* %1526 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1527, align 4 > %1528 = add nuw nsw i32 %1519, 10 > %1529 = zext i32 %1528 to i64 > %1530 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1529 > %1531 = bitcast i32 addrspace(3)* %1530 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1531, align 4 > %1532 = add nuw nsw i32 %1519, 11 > %1533 = zext i32 %1532 to i64 > %1534 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1533 > %1535 = bitcast i32 addrspace(3)* %1534 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1535, align 4 > %1536 = lshr i32 %5, 16 > %1537 = shl nuw nsw i32 %1536, 2 > %1538 = and i32 %6, 8191 > %1539 = and i32 %10, 255 > %1540 = mul nuw nsw i32 %1538, %1539 > %1541 = add nuw nsw i32 %1537, %1540 > %1542 = add nuw nsw i32 %1541, 12 > %1543 = zext i32 %1542 to i64 > %1544 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1543 > %1545 = bitcast i32 addrspace(3)* %1544 to float addrspace(3)* > store float %1511, float addrspace(3)* %1545, align 4 > %1546 = add nuw nsw i32 %1541, 13 > %1547 = zext i32 %1546 to i64 > %1548 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1547 > %1549 = bitcast i32 addrspace(3)* %1548 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1549, align 4 > %1550 = add nuw nsw i32 %1541, 14 > %1551 = zext i32 %1550 to i64 > %1552 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1551 > %1553 = bitcast i32 addrspace(3)* %1552 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1553, align 4 > %1554 = add nuw nsw i32 %1541, 15 > %1555 = zext i32 %1554 to i64 > %1556 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1555 > %1557 = bitcast i32 addrspace(3)* %1556 to float addrspace(3)* > store float %temp11.0, float addrspace(3)* %1557, align 4 > %1558 = lshr i32 %5, 16 > %1559 = shl nuw nsw i32 %1558, 2 > %1560 = and i32 %6, 8191 > %1561 = and i32 %10, 255 > %1562 = mul nuw nsw i32 %1560, %1561 > %1563 = add nuw nsw i32 %1559, %1562 > %1564 = add nuw nsw i32 %1563, 16 > %1565 = zext i32 %1564 to i64 > %1566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1565 > %1567 = bitcast i32 addrspace(3)* %1566 to float addrspace(3)* > store float %1512, float addrspace(3)* %1567, align 4 > %1568 = add nuw nsw i32 %1563, 17 > %1569 = zext i32 %1568 to i64 > %1570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1569 > %1571 = bitcast i32 addrspace(3)* %1570 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1571, align 4 > %1572 = add nuw nsw i32 %1563, 18 > %1573 = zext i32 %1572 to i64 > %1574 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1573 > %1575 = bitcast i32 addrspace(3)* %1574 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1575, align 4 > %1576 = add nuw nsw i32 %1563, 19 > %1577 = zext i32 %1576 to i64 > %1578 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1577 > %1579 = bitcast i32 addrspace(3)* %1578 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1579, align 4 > %1580 = lshr i32 %5, 16 > %1581 = shl nuw nsw i32 %1580, 2 > %1582 = and i32 %6, 8191 > %1583 = and i32 %10, 255 > %1584 = mul nuw nsw i32 %1582, %1583 > %1585 = add nuw nsw i32 %1581, %1584 > %1586 = add nuw nsw i32 %1585, 20 > %1587 = zext i32 %1586 to i64 > %1588 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1587 > %1589 = bitcast i32 addrspace(3)* %1588 to float addrspace(3)* > store float %1513, float addrspace(3)* %1589, align 4 > %1590 = add nuw nsw i32 %1585, 21 > %1591 = zext i32 %1590 to i64 > %1592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1591 > %1593 = bitcast i32 addrspace(3)* %1592 to float addrspace(3)* > store float %1511, float addrspace(3)* %1593, align 4 > %1594 = add nuw nsw i32 %1585, 22 > %1595 = zext i32 %1594 to i64 > %1596 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1595 > %1597 = bitcast i32 addrspace(3)* %1596 to float addrspace(3)* > store float %1512, float addrspace(3)* %1597, align 4 > %1598 = add nuw nsw i32 %1585, 23 > %1599 = zext i32 %1598 to i64 > %1600 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1599 > %1601 = bitcast i32 addrspace(3)* %1600 to float addrspace(3)* > store float %1513, float addrspace(3)* %1601, align 4 > %1602 = lshr i32 %5, 16 > %1603 = shl nuw nsw i32 %1602, 2 > %1604 = and i32 %6, 8191 > %1605 = and i32 %10, 255 > %1606 = mul nuw nsw i32 %1604, %1605 > %1607 = add nuw nsw i32 %1603, %1606 > %1608 = zext i32 %1607 to i64 > %1609 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1608 > %1610 = bitcast i32 addrspace(3)* %1609 to float addrspace(3)* > store float %1510, float addrspace(3)* %1610, align 4 > %1611 = lshr i32 %5, 16 > %1612 = shl nuw nsw i32 %1611, 2 > %1613 = and i32 %6, 8191 > %1614 = and i32 %10, 255 > %1615 = mul nuw nsw i32 %1613, %1614 > %1616 = add nuw nsw i32 %1612, %1615 > %1617 = add nuw nsw i32 %1616, 1 > %1618 = zext i32 %1617 to i64 > %1619 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1618 > %1620 = bitcast i32 addrspace(3)* %1619 to float addrspace(3)* > store float %1511, float addrspace(3)* %1620, align 4 > %1621 = lshr i32 %5, 16 > %1622 = shl nuw nsw i32 %1621, 2 > %1623 = and i32 %6, 8191 > %1624 = and i32 %10, 255 > %1625 = mul nuw nsw i32 %1623, %1624 > %1626 = add nuw nsw i32 %1622, %1625 > %1627 = add nuw nsw i32 %1626, 2 > %1628 = zext i32 %1627 to i64 > %1629 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1628 > %1630 = bitcast i32 addrspace(3)* %1629 to float addrspace(3)* > store float %1512, float addrspace(3)* %1630, align 4 > %1631 = lshr i32 %5, 16 > %1632 = shl nuw nsw i32 %1631, 2 > %1633 = and i32 %6, 8191 > %1634 = and i32 %10, 255 > %1635 = mul nuw nsw i32 %1633, %1634 > %1636 = add nuw nsw i32 %1632, %1635 > %1637 = add nuw nsw i32 %1636, 4 > %1638 = zext i32 %1637 to i64 > %1639 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1638 > %1640 = bitcast i32 addrspace(3)* %1639 to float addrspace(3)* > store float %1513, float addrspace(3)* %1640, align 4 > %1641 = and i32 %10, 255 > %1642 = lshr i32 %10, 8 > %1643 = and i32 %1642, 31 > %1644 = lshr i32 %5, 16 > %1645 = shl nuw nsw i32 %1644, 2 > %1646 = and i32 %6, 8191 > %1647 = and i32 %10, 255 > %1648 = mul nuw nsw i32 %1646, %1647 > %1649 = add nuw nsw i32 %1645, %1648 > %1650 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1651 = bitcast i64 %1650 to <2 x i32> > %1652 = extractelement <2 x i32> %1651, i32 0 > %1653 = extractelement <2 x i32> %1651, i32 1 > %1654 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1652, 0 > %1655 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1654, i32 %1653, 1 > %1656 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1655, i32 %8, 13 > %1657 = bitcast i32 %1641 to float > %1658 = bitcast i32 %1643 to float > %1659 = bitcast i32 %1649 to float > %1660 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1656, float %1657, 14 > %1661 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1660, float %1658, 15 > %1662 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1661, float %1659, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1662 > >IF69: ; preds = %IF > %1663 = fdiv float 1.000000e+00, %31 > %1664 = fmul float %1506, %1663 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1665 = fcmp ogt float %1506, 0.000000e+00 > %1666 = select i1 %1665, float 1.000000e+00, float %1506 > %1667 = fcmp oge float %1666, 0.000000e+00 > %.op91 = fmul float %1666, 0x4600000000000000 > %1668 = select i1 %1667, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1664, %IF69 ], [ %1668, %ELSE70 ] > %1669 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1670 = fsub float 1.000000e+00, %1669 > %1671 = fmul float %1670, %1504 > %1672 = fcmp une float %31, 0.000000e+00 > br i1 %1672, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1673 = fdiv float 1.000000e+00, %31 > %1674 = fmul float %1507, %1673 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1675 = fcmp ogt float %1507, 0.000000e+00 > %1676 = select i1 %1675, float 1.000000e+00, float %1507 > %1677 = fcmp oge float %1676, 0.000000e+00 > %.op92 = fmul float %1676, 0x4600000000000000 > %1678 = select i1 %1677, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1674, %IF72 ], [ %1678, %ELSE73 ] > %1679 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1680 = fsub float 1.000000e+00, %1679 > %1681 = fmul float %1680, %1505 > %1682 = fcmp une float %31, 0.000000e+00 > br i1 %1682, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1683 = fdiv float 1.000000e+00, %31 > %1684 = fmul float %1508, %1683 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1685 = fcmp ogt float %1508, 0.000000e+00 > %1686 = select i1 %1685, float 1.000000e+00, float %1508 > %1687 = fcmp oge float %1686, 0.000000e+00 > %.op93 = fmul float %1686, 0x4600000000000000 > %1688 = select i1 %1687, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1684, %IF75 ], [ %1688, %ELSE76 ] > %1689 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1690 = fsub float 1.000000e+00, %1689 > %1691 = fmul float %1690, %1503 > %1692 = fmul float %28, %34 > %1693 = fmul float %29, %35 > %1694 = call float @llvm.maxnum.f32(float %1693, float 1.000000e+00) > %1695 = fcmp oeq float %1692, 0.000000e+00 > %1696 = fcmp oeq float %1692, 0.000000e+00 > %1697 = sext i1 %1696 to i32 > %1698 = bitcast i32 %1697 to float > %1699 = fcmp ogt float %1691, 0.000000e+00 > %1700 = select i1 %1699, float 1.000000e+00, float %1691 > %1701 = fcmp oge float %1700, 0.000000e+00 > %1702 = fcmp ogt float %1671, 0.000000e+00 > %1703 = select i1 %1702, float 1.000000e+00, float %1671 > %1704 = fcmp oge float %1703, 0.000000e+00 > %.op94 = fmul float %1700, 0x4600000000000000 > %1705 = select i1 %1701, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1703, 0x4600000000000000 > %1706 = select i1 %1704, float %.op95, float 0xC600000000000000 > %1707 = fdiv float 1.000000e+00, %1692 > %1708 = fmul float %1691, %1707 > %1709 = fmul float %1671, %1707 > %1710 = select i1 %1695, float %1705, float %1708 > %1711 = select i1 %1696, float %1706, float %1709 > %1712 = call float @llvm.maxnum.f32(float %1711, float 1.000000e+00) > %1713 = call float @llvm.minnum.f32(float %1694, float %1712) > %1714 = fcmp une float %1692, 0.000000e+00 > br i1 %1714, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1715 = fdiv float 1.000000e+00, %1692 > %1716 = fmul float %1681, %1715 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1717 = fcmp ogt float %1681, 0.000000e+00 > %1718 = select i1 %1717, float 1.000000e+00, float %1681 > %1719 = fcmp oge float %1718, 0.000000e+00 > %.op96 = fmul float %1718, 0x4600000000000000 > %1720 = select i1 %1719, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1716, %IF78 ], [ %1720, %ELSE79 ] > %1721 = call float @llvm.maxnum.f32(float %1710, float 1.000000e+00) > %1722 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1723 = call float @llvm.minnum.f32(float %1694, float %1722) > %1724 = call float @llvm.minnum.f32(float %1694, float %1721) > %1725 = call float @llvm.maxnum.f32(float %1713, float %1724) > %1726 = call float @llvm.maxnum.f32(float %1725, float %1723) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[2].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[2].xxxx >101: MUL TEMP[0], SV[0].yyyy, IN[1][2] >102: FMA TEMP[0], SV[0].xxxx, IN[0][2], TEMP[0] >103: FMA TEMP[0], SV[0].zzzz, IN[2][2], TEMP[0] >104: MOV OUT[5], TEMP[0] >105: MOV OUT[4], TEMP[3] >106: MOV OUT[2], TEMP[6] >107: MOV OUT[3], TEMP[4] >108: MOV OUT[1], TEMP[5] >109: MOV OUT[0], TEMP[1] >110: END >radeonsi: Compiling shader 220 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = lshr i32 %6, 13 > %711 = and i32 %710, 255 > %712 = shl i32 %5, 2 > %713 = and i32 %712, 262140 > %714 = and i32 %6, 8191 > %715 = mul i32 %714, %9 > %716 = add i32 %713, %715 > %717 = add i32 %716, %711 > %718 = add i32 %717, 24 > %719 = sext i32 %718 to i64 > %720 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %719 > %721 = bitcast i32 addrspace(3)* %720 to float addrspace(3)* > %722 = load float, float addrspace(3)* %721, align 4 > %723 = fmul float %722, %8 > %724 = lshr i32 %6, 13 > %725 = and i32 %724, 255 > %726 = shl i32 %5, 2 > %727 = and i32 %726, 262140 > %728 = and i32 %6, 8191 > %729 = mul i32 %728, %9 > %730 = add i32 %727, %729 > %731 = add i32 %730, %725 > %732 = add i32 %731, 25 > %733 = sext i32 %732 to i64 > %734 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %733 > %735 = bitcast i32 addrspace(3)* %734 to float addrspace(3)* > %736 = load float, float addrspace(3)* %735, align 4 > %737 = fmul float %736, %8 > %738 = lshr i32 %6, 13 > %739 = and i32 %738, 255 > %740 = shl i32 %5, 2 > %741 = and i32 %740, 262140 > %742 = and i32 %6, 8191 > %743 = mul i32 %742, %9 > %744 = add i32 %741, %743 > %745 = add i32 %744, %739 > %746 = add i32 %745, 26 > %747 = sext i32 %746 to i64 > %748 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %747 > %749 = bitcast i32 addrspace(3)* %748 to float addrspace(3)* > %750 = load float, float addrspace(3)* %749, align 4 > %751 = fmul float %750, %8 > %752 = lshr i32 %6, 13 > %753 = and i32 %752, 255 > %754 = shl i32 %5, 2 > %755 = and i32 %754, 262140 > %756 = and i32 %6, 8191 > %757 = mul i32 %756, %9 > %758 = add i32 %755, %757 > %759 = add i32 %758, %753 > %760 = add i32 %759, 27 > %761 = sext i32 %760 to i64 > %762 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %761 > %763 = bitcast i32 addrspace(3)* %762 to float addrspace(3)* > %764 = load float, float addrspace(3)* %763, align 4 > %765 = fmul float %764, %8 > %766 = shl i32 %5, 2 > %767 = and i32 %766, 262140 > %768 = and i32 %6, 8191 > %769 = mul i32 %768, %9 > %770 = add i32 %767, %769 > %771 = add i32 %770, 24 > %772 = sext i32 %771 to i64 > %773 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %772 > %774 = bitcast i32 addrspace(3)* %773 to float addrspace(3)* > %775 = load float, float addrspace(3)* %774, align 4 > %776 = call float @llvm.fma.f32(float %7, float %775, float %723) > %777 = shl i32 %5, 2 > %778 = and i32 %777, 262140 > %779 = and i32 %6, 8191 > %780 = mul i32 %779, %9 > %781 = add i32 %778, %780 > %782 = add i32 %781, 25 > %783 = sext i32 %782 to i64 > %784 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %783 > %785 = bitcast i32 addrspace(3)* %784 to float addrspace(3)* > %786 = load float, float addrspace(3)* %785, align 4 > %787 = call float @llvm.fma.f32(float %7, float %786, float %737) > %788 = shl i32 %5, 2 > %789 = and i32 %788, 262140 > %790 = and i32 %6, 8191 > %791 = mul i32 %790, %9 > %792 = add i32 %789, %791 > %793 = add i32 %792, 26 > %794 = sext i32 %793 to i64 > %795 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %794 > %796 = bitcast i32 addrspace(3)* %795 to float addrspace(3)* > %797 = load float, float addrspace(3)* %796, align 4 > %798 = call float @llvm.fma.f32(float %7, float %797, float %751) > %799 = shl i32 %5, 2 > %800 = and i32 %799, 262140 > %801 = and i32 %6, 8191 > %802 = mul i32 %801, %9 > %803 = add i32 %800, %802 > %804 = add i32 %803, 27 > %805 = sext i32 %804 to i64 > %806 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %805 > %807 = bitcast i32 addrspace(3)* %806 to float addrspace(3)* > %808 = load float, float addrspace(3)* %807, align 4 > %809 = call float @llvm.fma.f32(float %7, float %808, float %765) > %810 = shl i32 %5, 2 > %811 = and i32 %810, 262140 > %812 = and i32 %6, 8191 > %813 = mul i32 %812, %9 > %814 = add i32 %811, %813 > %815 = lshr i32 %6, 12 > %816 = and i32 %815, 510 > %817 = add i32 %814, %816 > %818 = add i32 %817, 24 > %819 = sext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = call float @llvm.fma.f32(float %62, float %822, float %776) > %824 = shl i32 %5, 2 > %825 = and i32 %824, 262140 > %826 = and i32 %6, 8191 > %827 = mul i32 %826, %9 > %828 = add i32 %825, %827 > %829 = lshr i32 %6, 12 > %830 = and i32 %829, 510 > %831 = add i32 %828, %830 > %832 = add i32 %831, 25 > %833 = sext i32 %832 to i64 > %834 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %833 > %835 = bitcast i32 addrspace(3)* %834 to float addrspace(3)* > %836 = load float, float addrspace(3)* %835, align 4 > %837 = call float @llvm.fma.f32(float %62, float %836, float %787) > %838 = shl i32 %5, 2 > %839 = and i32 %838, 262140 > %840 = and i32 %6, 8191 > %841 = mul i32 %840, %9 > %842 = add i32 %839, %841 > %843 = lshr i32 %6, 12 > %844 = and i32 %843, 510 > %845 = add i32 %842, %844 > %846 = add i32 %845, 26 > %847 = sext i32 %846 to i64 > %848 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %847 > %849 = bitcast i32 addrspace(3)* %848 to float addrspace(3)* > %850 = load float, float addrspace(3)* %849, align 4 > %851 = call float @llvm.fma.f32(float %62, float %850, float %798) > %852 = shl i32 %5, 2 > %853 = and i32 %852, 262140 > %854 = and i32 %6, 8191 > %855 = mul i32 %854, %9 > %856 = add i32 %853, %855 > %857 = lshr i32 %6, 12 > %858 = and i32 %857, 510 > %859 = add i32 %856, %858 > %860 = add i32 %859, 27 > %861 = sext i32 %860 to i64 > %862 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %861 > %863 = bitcast i32 addrspace(3)* %862 to float addrspace(3)* > %864 = load float, float addrspace(3)* %863, align 4 > %865 = call float @llvm.fma.f32(float %62, float %864, float %809) > %866 = bitcast i32 %10 to float > %867 = insertvalue <{ float, float, float }> undef, float %866, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %823, float %837, float %851, float %865) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %867 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..31] >DCL TEMP[0..8], LOCAL >IMM[0] UINT32 {0, 384, 416, 400} >IMM[1] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[2] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, -0.3765} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] UINT32 {464, 448, 432, 496} >IMM[5] FLT32 { 2.0000, 0.5000, 0.0000, 0.0000} >IMM[6] UINT32 {480, 0, 0, 0} > 0: ADD TEMP[0].x, CONST[1][24].yyyy, IMM[1].xxxx > 1: ADD TEMP[1].xy, -IN[4].wwww, IMM[1].xyyy > 2: FMA TEMP[2].x, CONST[1][24].xxxx, TEMP[0].xxxx, TEMP[1].xxxx > 3: CEIL TEMP[3].x, TEMP[1].yyyy > 4: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 5: ADD TEMP[0].x, TEMP[2].xxxx, IMM[1].zzzz > 6: FSNE TEMP[2].x, CONST[1][24].yyyy, IMM[1].wwww > 7: UIF TEMP[2].xxxx :0 > 8: RCP TEMP[2].x, CONST[1][24].yyyy > 9: ELSE :0 > 10: MOV TEMP[2].x, IMM[2].xxxx > 11: ENDIF > 12: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[0].xxxx > 13: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 14: FMA TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy, IMM[2].zzzz > 15: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[2].xxxx > 16: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx > 17: FMA TEMP[2].x, TEMP[3].xxxx, TEMP[0].xxxx, IMM[2].wwww > 18: FMA TEMP[0].x, -TEMP[3].xxxx, TEMP[0].xxxx, IMM[1].xxxx > 19: LG2 TEMP[3].x, TEMP[0].xxxx > 20: MUL TEMP[0].x, TEMP[3].xxxx, CONST[1][26].xxxx > 21: EX2 TEMP[3].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[3].xxxx, CONST[1][25].wwww > 23: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[1].wwww > 24: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx > 25: INEG TEMP[2].x, TEMP[2].xxxx > 26: USNE TEMP[1].x, TEMP[2].xxxx, IMM[0].xxxx > 27: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 28: KILL_IF -TEMP[1].xxxx > 29: FSNE TEMP[1].x, CONST[1][26].zzzz, IMM[1].wwww > 30: UIF TEMP[1].xxxx :0 > 31: RCP TEMP[1].x, CONST[1][26].zzzz > 32: ELSE :0 > 33: MOV TEMP[1].x, IMM[2].xxxx > 34: ENDIF > 35: MOV TEMP[2].xy, IN[0].xyyy > 36: TEX TEMP[2].xy, TEMP[2], SAMP[0], 2D > 37: MAX TEMP[3].x, TEMP[2].yyyy, TEMP[2].xxxx > 38: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx > 39: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 40: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[1].xxxx > 41: FMA TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy, IMM[2].zzzz > 42: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx > 43: MUL TEMP[3].x, TEMP[1].xxxx, CONST[1][29].wwww > 44: ADD TEMP[4].xyz, CONST[1][28].xyzz, -CONST[1][29].xyzz > 45: FMA TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz, CONST[1][29].xyzz > 46: MUL TEMP[3].xyz, TEMP[3].xxxx, TEMP[4].xyzz > 47: MUL TEMP[4].x, TEMP[2].yyyy, CONST[1][27].wwww > 48: FMA TEMP[2].x, -TEMP[2].yyyy, CONST[1][27].wwww, IMM[1].xxxx > 49: MUL TEMP[4].xyz, TEMP[4].xxxx, CONST[1][27].xyzz > 50: FMA TEMP[5].xyz, TEMP[3].xyzz, TEMP[2].xxxx, TEMP[4].xyzz > 51: MUL TEMP[5].xyz, TEMP[5].xyzz, CONST[1][31].yyyy > 52: FMA TEMP[6].xyz, CONST[1][25].xyzz, TEMP[0].xxxx, TEMP[5].xyzz > 53: MOV TEMP[6].w, IMM[1].wwww > 54: MOV TEMP[7].xy, IN[0].xyyy > 55: TEX TEMP[7].yw, TEMP[7], SAMP[1], 2D > 56: FMA TEMP[7].xy, TEMP[7].ywww, IMM[5].xxxx, IMM[1].zzzz > 57: MOV TEMP[5].xy, TEMP[7].xyxx > 58: FMA TEMP[0].x, -TEMP[7].xxxx, TEMP[7].xxxx, IMM[1].xxxx > 59: FMA TEMP[0].x, -TEMP[7].yyyy, TEMP[7].yyyy, TEMP[0].xxxx > 60: SQRT TEMP[7].x, TEMP[0].xxxx > 61: MOV TEMP[5].z, TEMP[7].xxxx > 62: DP3 TEMP[7].x, IN[1].xyzz, TEMP[5].xyzz > 63: DP3 TEMP[8].x, IN[2].xyzz, TEMP[5].xyzz > 64: MOV TEMP[7].y, TEMP[8].xxxx > 65: DP3 TEMP[8].x, IN[3].xyzz, TEMP[5].xyzz > 66: MOV TEMP[7].z, TEMP[8].xxxx > 67: DP3 TEMP[0].x, TEMP[7].xyzz, TEMP[7].xyzz > 68: RSQ TEMP[8].x, TEMP[0].xxxx > 69: MUL TEMP[5].xyz, TEMP[8].xxxx, TEMP[7].xyzz > 70: FMA TEMP[5].xyz, TEMP[5].xyzz, IMM[5].yyyy, IMM[5].yyyy > 71: MOV TEMP[5].w, CONST[1][31].zzzz > 72: FMA TEMP[0].x, -TEMP[1].xxxx, CONST[1][29].wwww, IMM[1].xxxx > 73: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][31].xxxx > 74: MOV TEMP[7].xy, IN[0].xyyy > 75: TEX TEMP[7], TEMP[7], SAMP[2], 2D > 76: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[3].xyzz > 77: MOV TEMP[0].w, TEMP[7].wwww > 78: FMA TEMP[0].xyz, TEMP[3].xyzz, TEMP[2].xxxx, TEMP[4].xyzz > 79: MOV TEMP[2].xy, IN[0].xyyy > 80: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D > 81: MUL TEMP[3].x, TEMP[2].zzzz, CONST[1][26].yyyy > 82: MOV TEMP[3].y, TEMP[2].yyyy > 83: MOV TEMP[2].z, TEMP[2].xxxx > 84: ADD TEMP[4].xy, -TEMP[3].xyyy, CONST[1][30].wxxx > 85: FMA TEMP[2].xy, TEMP[1].xxxx, TEMP[4].xyyy, TEMP[3].xyyy > 86: MOV TEMP[2].w, IMM[1].wwww > 87: MOV OUT[0], TEMP[6] > 88: MOV OUT[1], TEMP[5] > 89: MOV OUT[2], TEMP[0] > 90: MOV OUT[3], TEMP[2] > 91: END >radeonsi: Compiling shader 221 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 444) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 448) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 452) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 456) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 468) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 472) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 476) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 492) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 496) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 500) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 504) > %50 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %51 = load <8 x i32>, <8 x i32> addrspace(2)* %50, align 32, !tbaa !0 > %52 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %53 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %52, i64 0, i64 3 > %54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0 > %55 = extractelement <8 x i32> %51, i32 7 > %56 = extractelement <4 x i32> %54, i32 0 > %57 = and i32 %56, %55 > %58 = insertelement <4 x i32> %54, i32 %57, i32 0 > %59 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 > %61 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %62 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %61, i64 0, i64 7 > %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 > %64 = extractelement <8 x i32> %60, i32 7 > %65 = extractelement <4 x i32> %63, i32 0 > %66 = and i32 %65, %64 > %67 = insertelement <4 x i32> %63, i32 %66, i32 0 > %68 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %69 = load <8 x i32>, <8 x i32> addrspace(2)* %68, align 32, !tbaa !0 > %70 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %71 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %70, i64 0, i64 11 > %72 = load <4 x i32>, <4 x i32> addrspace(2)* %71, align 16, !tbaa !0 > %73 = extractelement <8 x i32> %69, i32 7 > %74 = extractelement <4 x i32> %72, i32 0 > %75 = and i32 %74, %73 > %76 = insertelement <4 x i32> %72, i32 %75, i32 0 > %77 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %78 = load <8 x i32>, <8 x i32> addrspace(2)* %77, align 32, !tbaa !0 > %79 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %80 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %79, i64 0, i64 15 > %81 = load <4 x i32>, <4 x i32> addrspace(2)* %80, align 16, !tbaa !0 > %82 = extractelement <8 x i32> %78, i32 7 > %83 = extractelement <4 x i32> %81, i32 0 > %84 = and i32 %83, %82 > %85 = insertelement <4 x i32> %81, i32 %84, i32 0 > %86 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %93 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %94 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %95 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %96 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %97 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %98 = fadd float %26, 1.000000e+00 > %99 = fsub float 1.000000e+00, %97 > %100 = fsub float 0x3FEFD70A40000000, %97 > %101 = call float @llvm.fma.f32(float %25, float %98, float %99) > %102 = call float @llvm.ceil.f32(float %100) > %103 = call float @llvm.AMDGPU.clamp.(float %102, float 0.000000e+00, float 1.000000e+00) > %104 = fadd float %101, -1.000000e+00 > %105 = fcmp une float %26, 0.000000e+00 > %106 = fdiv float 1.000000e+00, %26 > %temp8.0 = select i1 %105, float %106, float 0x4600000000000000 > %107 = fmul float %temp8.0, %104 > %108 = call float @llvm.AMDGPU.clamp.(float %107, float 0.000000e+00, float 1.000000e+00) > %109 = call float @llvm.fma.f32(float %108, float -2.000000e+00, float 3.000000e+00) > %110 = fmul float %108, %108 > %111 = fmul float %110, %109 > %112 = call float @llvm.fma.f32(float %103, float %111, float 0xBFD8181820000000) > %113 = fsub float -0.000000e+00, %103 > %114 = call float @llvm.fma.f32(float %113, float %111, float 1.000000e+00) > %115 = call float @llvm.log2.f32(float %114) > %116 = fmul float %115, %31 > %117 = call float @llvm.exp2.f32(float %116) > %118 = fmul float %117, %30 > %119 = fcmp olt float %112, 0.000000e+00 > %120 = select i1 %119, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %120) > %121 = fcmp une float %33, 0.000000e+00 > %122 = fdiv float 1.000000e+00, %33 > %temp4.0 = select i1 %121, float %122, float 0x4600000000000000 > %123 = bitcast float %86 to i32 > %124 = bitcast float %87 to i32 > %125 = insertelement <2 x i32> undef, i32 %123, i32 0 > %126 = insertelement <2 x i32> %125, i32 %124, i32 1 > %127 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %126, <8 x i32> %51, <4 x i32> %58, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %128 = extractelement <4 x float> %127, i32 0 > %129 = extractelement <4 x float> %127, i32 1 > %130 = call float @llvm.maxnum.f32(float %129, float %128) > %131 = fmul float %temp4.0, %130 > %132 = call float @llvm.AMDGPU.clamp.(float %131, float 0.000000e+00, float 1.000000e+00) > %133 = fmul float %132, %132 > %134 = call float @llvm.fma.f32(float %132, float -2.000000e+00, float 3.000000e+00) > %135 = fmul float %133, %134 > %136 = fmul float %135, %44 > %137 = fsub float %38, %41 > %138 = fsub float %39, %42 > %139 = fsub float %40, %43 > %140 = call float @llvm.fma.f32(float %128, float %137, float %41) > %141 = call float @llvm.fma.f32(float %128, float %138, float %42) > %142 = call float @llvm.fma.f32(float %128, float %139, float %43) > %143 = fmul float %136, %140 > %144 = fmul float %136, %141 > %145 = fmul float %136, %142 > %146 = fmul float %129, %37 > %147 = fsub float -0.000000e+00, %129 > %148 = call float @llvm.fma.f32(float %147, float %37, float 1.000000e+00) > %149 = fmul float %146, %34 > %150 = fmul float %146, %35 > %151 = fmul float %146, %36 > %152 = call float @llvm.fma.f32(float %143, float %148, float %149) > %153 = call float @llvm.fma.f32(float %144, float %148, float %150) > %154 = call float @llvm.fma.f32(float %145, float %148, float %151) > %155 = fmul float %152, %48 > %156 = fmul float %153, %48 > %157 = fmul float %154, %48 > %158 = call float @llvm.fma.f32(float %27, float %118, float %155) > %159 = call float @llvm.fma.f32(float %28, float %118, float %156) > %160 = call float @llvm.fma.f32(float %29, float %118, float %157) > %161 = bitcast float %86 to i32 > %162 = bitcast float %87 to i32 > %163 = insertelement <2 x i32> undef, i32 %161, i32 0 > %164 = insertelement <2 x i32> %163, i32 %162, i32 1 > %165 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %164, <8 x i32> %60, <4 x i32> %67, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %166 = extractelement <4 x float> %165, i32 1 > %167 = extractelement <4 x float> %165, i32 3 > %168 = call float @llvm.fma.f32(float %166, float 2.000000e+00, float -1.000000e+00) > %169 = call float @llvm.fma.f32(float %167, float 2.000000e+00, float -1.000000e+00) > %170 = fsub float -0.000000e+00, %168 > %171 = call float @llvm.fma.f32(float %170, float %168, float 1.000000e+00) > %172 = fsub float -0.000000e+00, %169 > %173 = call float @llvm.fma.f32(float %172, float %169, float %171) > %174 = call float @llvm.sqrt.f32(float %173) > %175 = fmul float %88, %168 > %176 = fmul float %89, %169 > %177 = fadd float %176, %175 > %178 = fmul float %90, %174 > %179 = fadd float %177, %178 > %180 = fmul float %91, %168 > %181 = fmul float %92, %169 > %182 = fadd float %181, %180 > %183 = fmul float %93, %174 > %184 = fadd float %182, %183 > %185 = fmul float %94, %168 > %186 = fmul float %95, %169 > %187 = fadd float %186, %185 > %188 = fmul float %96, %174 > %189 = fadd float %187, %188 > %190 = fmul float %179, %179 > %191 = fmul float %184, %184 > %192 = fadd float %191, %190 > %193 = fmul float %189, %189 > %194 = fadd float %192, %193 > %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) > %196 = fmul float %195, %179 > %197 = fmul float %195, %184 > %198 = fmul float %195, %189 > %199 = call float @llvm.fma.f32(float %196, float 5.000000e-01, float 5.000000e-01) > %200 = call float @llvm.fma.f32(float %197, float 5.000000e-01, float 5.000000e-01) > %201 = call float @llvm.fma.f32(float %198, float 5.000000e-01, float 5.000000e-01) > %202 = fsub float -0.000000e+00, %135 > %203 = call float @llvm.fma.f32(float %202, float %44, float 1.000000e+00) > %204 = fmul float %135, %47 > %205 = bitcast float %86 to i32 > %206 = bitcast float %87 to i32 > %207 = insertelement <2 x i32> undef, i32 %205, i32 0 > %208 = insertelement <2 x i32> %207, i32 %206, i32 1 > %209 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %208, <8 x i32> %69, <4 x i32> %76, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %210 = extractelement <4 x float> %209, i32 0 > %211 = extractelement <4 x float> %209, i32 1 > %212 = extractelement <4 x float> %209, i32 2 > %213 = extractelement <4 x float> %209, i32 3 > %214 = call float @llvm.fma.f32(float %210, float %203, float %143) > %215 = call float @llvm.fma.f32(float %211, float %203, float %144) > %216 = call float @llvm.fma.f32(float %212, float %203, float %145) > %217 = call float @llvm.fma.f32(float %214, float %148, float %149) > %218 = call float @llvm.fma.f32(float %215, float %148, float %150) > %219 = call float @llvm.fma.f32(float %216, float %148, float %151) > %220 = bitcast float %86 to i32 > %221 = bitcast float %87 to i32 > %222 = insertelement <2 x i32> undef, i32 %220, i32 0 > %223 = insertelement <2 x i32> %222, i32 %221, i32 1 > %224 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %223, <8 x i32> %78, <4 x i32> %85, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %225 = extractelement <4 x float> %224, i32 0 > %226 = extractelement <4 x float> %224, i32 1 > %227 = extractelement <4 x float> %224, i32 2 > %228 = fmul float %227, %32 > %229 = fsub float %46, %228 > %230 = fsub float %45, %226 > %231 = call float @llvm.fma.f32(float %204, float %229, float %228) > %232 = call float @llvm.fma.f32(float %204, float %230, float %226) > %233 = bitcast float %5 to i32 > %234 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %233, 10 > %235 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %234, float %158, 11 > %236 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %235, float %159, 12 > %237 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %236, float %160, 13 > %238 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %237, float 0.000000e+00, 14 > %239 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %238, float %199, 15 > %240 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %239, float %200, 16 > %241 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %240, float %201, 17 > %242 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %241, float %49, 18 > %243 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %242, float %217, 19 > %244 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %243, float %218, 20 > %245 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %244, float %219, 21 > %246 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %245, float %213, 22 > %247 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %246, float %231, 23 > %248 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %247, float %232, 24 > %249 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %248, float %225, 25 > %250 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %249, float 0.000000e+00, 26 > %251 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %250, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %251 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 222 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, 0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 64, 80} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {96, 368, 352, 0} >IMM[5] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: MOV TEMP[1].w, TEMP[8].xxxx > 66: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 67: MOV TEMP[1].z, TEMP[2].xxxx > 68: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 69: MOV TEMP[0].yw, TEMP[2].yxyy > 70: ABS TEMP[2].x, TEMP[3].xxxx > 71: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 72: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 73: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 74: INEG TEMP[9].xy, TEMP[9].xyyy > 75: MOV TEMP[4].yz, TEMP[9].yxyy > 76: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 77: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 78: INEG TEMP[9].xy, TEMP[9].xyyy > 79: MOV TEMP[5].zw, TEMP[9].yyxy > 80: INEG TEMP[9].xy, TEMP[4].yzzz > 81: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 82: MOV TEMP[4].yz, TEMP[9].yxyy > 83: I2F TEMP[9].xy, TEMP[4].yzzz > 84: MOV TEMP[4].yz, TEMP[9].yxyy > 85: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 86: ABS TEMP[2].x, TEMP[6].xxxx > 87: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 88: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 89: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 90: INEG TEMP[9].xy, TEMP[9].xyyy > 91: MOV TEMP[4].yz, TEMP[9].yxyy > 92: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 93: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 94: INEG TEMP[9].xy, TEMP[9].xyyy > 95: MOV TEMP[5].zw, TEMP[9].yyxy > 96: INEG TEMP[9].xy, TEMP[4].yzzz > 97: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 98: MOV TEMP[4].yz, TEMP[9].yxyy > 99: I2F TEMP[9].xy, TEMP[4].yzzz >100: MOV TEMP[4].yz, TEMP[9].yxyy >101: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >102: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >103: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >104: INEG TEMP[2].xy, TEMP[2].xyyy >105: MOV TEMP[5].xy, TEMP[2].xyxx >106: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >107: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >108: INEG TEMP[2].xy, TEMP[2].xyyy >109: MOV TEMP[5].zw, TEMP[2].yyxy >110: INEG TEMP[2].xy, TEMP[5].xyyy >111: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >112: MOV TEMP[5].xy, TEMP[2].xyxx >113: I2F TEMP[5].xy, TEMP[5].xyyy >114: ABS TEMP[2].x, TEMP[8].xxxx >115: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >116: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >117: MOV TEMP[4].zw, TEMP[2].yyxy >118: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >119: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >120: INEG TEMP[2].xy, TEMP[2].xyyy >121: MOV TEMP[5].xy, TEMP[2].xyxx >122: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >123: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >124: INEG TEMP[2].xy, TEMP[2].xyyy >125: MOV TEMP[5].zw, TEMP[2].yyxy >126: AND TEMP[2], TEMP[5], IMM[2].yyyy >127: MOV TEMP[2], TEMP[2] >128: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >129: MOV TEMP[5].xy, TEMP[2].xyxx >130: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >131: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >132: INEG TEMP[2].xy, TEMP[2].xyyy >133: MOV TEMP[5].zw, TEMP[2].yyxy >134: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >135: MOV TEMP[5].zw, TEMP[2].yyxy >136: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >137: MOV TEMP[5].xy, TEMP[2].xyxx >138: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >139: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >140: INEG TEMP[2].x, TEMP[2].xxxx >141: MOV TEMP[1].z, TEMP[2].xxxx >142: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >143: MOV TEMP[1].z, TEMP[2].xxxx >144: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >145: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >146: INEG TEMP[2].xy, TEMP[2].xyyy >147: MOV TEMP[0].yw, TEMP[2].yxyy >148: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >149: MOV TEMP[0].yw, TEMP[2].yxyy >150: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >151: MOV TEMP[0].y, TEMP[2].xxxx >152: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >153: MOV TEMP[0].y, TEMP[2].xxxx >154: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >155: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >156: INEG TEMP[2].xy, TEMP[2].xyyy >157: MOV TEMP[0].xw, TEMP[2].xxxy >158: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >159: MOV TEMP[0].xw, TEMP[2].xxxy >160: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >161: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >162: INEG TEMP[2].xy, TEMP[2].xyyy >163: MOV TEMP[1].xy, TEMP[2].xyxx >164: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >165: MOV TEMP[1].xy, TEMP[2].xyxx >166: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >167: MOV TEMP[0].xz, TEMP[2].xxyx >168: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >169: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >170: INEG TEMP[2].xy, TEMP[2].xyyy >171: MOV TEMP[1].xy, TEMP[2].xyxx >172: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >173: MOV TEMP[1].xy, TEMP[2].xyxx >174: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >175: MOV TEMP[0].xz, TEMP[2].xxyx >176: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >177: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >178: INEG TEMP[2].xy, TEMP[2].xyyy >179: MOV TEMP[1].xy, TEMP[2].xyxx >180: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >181: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >182: INEG TEMP[2].xyz, TEMP[2].xyzz >183: MOV TEMP[0].xyz, TEMP[2].xyzx >184: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >185: MOV TEMP[0].xz, TEMP[2].xxyx >186: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >187: MOV TEMP[0].x, TEMP[2].xxxx >188: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >189: MOV TEMP[0].x, TEMP[2].xxxx >190: MOV TEMP[2].x, TEMP[0].xxxx >191: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >192: UIF TEMP[2].xxxx :0 >193: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >194: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >195: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >196: MOV TEMP[0].yzw, TEMP[2].yxyz >197: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >198: MOV TEMP[0].y, TEMP[2].xxxx >199: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >200: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >201: MOV TEMP[0].z, TEMP[2].xxxx >202: SQRT TEMP[2].x, TEMP[0].xxxx >203: SQRT TEMP[2].y, TEMP[0].yyyy >204: SQRT TEMP[2].z, TEMP[0].zzzz >205: MOV TEMP[0].xyz, TEMP[2].xyzx >206: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >207: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].wwww >208: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >209: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww >210: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >211: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[0].wwww >212: MOV TEMP[2].y, CONST[3][4] >213: MOV TEMP[7].x, TEMP[2].yyyy >214: MOV TEMP[2].y, CONST[3][5] >215: MOV TEMP[7].y, TEMP[2].yyyy >216: MOV TEMP[2].y, CONST[3][6] >217: MOV TEMP[7].z, TEMP[2].yyyy >218: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >219: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >220: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >221: MOV TEMP[1].w, IMM[0].xxxx >222: MOV TEMP[6], CONST[3][0] >223: DP4 TEMP[7].x, TEMP[6], TEMP[1] >224: MOV TEMP[6], CONST[3][1] >225: DP4 TEMP[6].x, TEMP[6], TEMP[1] >226: MOV TEMP[7].y, TEMP[6].xxxx >227: MOV TEMP[6], CONST[3][3] >228: DP4 TEMP[6].x, TEMP[6], TEMP[1] >229: MOV TEMP[4].w, IMM[0].xxxx >230: MOV TEMP[8], CONST[3][0] >231: DP4 TEMP[8].x, TEMP[8], TEMP[4] >232: MOV TEMP[9], CONST[3][1] >233: DP4 TEMP[9].x, TEMP[9], TEMP[4] >234: MOV TEMP[8].y, TEMP[9].xxxx >235: MOV TEMP[9], CONST[3][3] >236: DP4 TEMP[9].x, TEMP[9], TEMP[4] >237: MOV TEMP[5].w, IMM[0].xxxx >238: MOV TEMP[10], CONST[3][0] >239: DP4 TEMP[4].x, TEMP[10], TEMP[5] >240: MOV TEMP[10], CONST[3][1] >241: DP4 TEMP[10].x, TEMP[10], TEMP[5] >242: MOV TEMP[4].y, TEMP[10].xxxx >243: MOV TEMP[10], CONST[3][3] >244: DP4 TEMP[10].x, TEMP[10], TEMP[5] >245: MOV TEMP[2].w, IMM[0].xxxx >246: MOV TEMP[11], CONST[3][0] >247: DP4 TEMP[5].x, TEMP[11], TEMP[2] >248: MOV TEMP[11], CONST[3][1] >249: DP4 TEMP[11].x, TEMP[11], TEMP[2] >250: MOV TEMP[5].y, TEMP[11].xxxx >251: MOV TEMP[11], CONST[3][3] >252: DP4 TEMP[11].x, TEMP[11], TEMP[2] >253: MOV TEMP[3].w, IMM[0].xxxx >254: MOV TEMP[12], CONST[3][0] >255: DP4 TEMP[2].x, TEMP[12], TEMP[3] >256: MOV TEMP[12], CONST[3][1] >257: DP4 TEMP[12].x, TEMP[12], TEMP[3] >258: MOV TEMP[2].y, TEMP[12].xxxx >259: MOV TEMP[12], CONST[3][3] >260: DP4 TEMP[12].x, TEMP[12], TEMP[3] >261: MOV TEMP[0].w, IMM[0].xxxx >262: MOV TEMP[13], CONST[3][0] >263: DP4 TEMP[3].x, TEMP[13], TEMP[0] >264: MOV TEMP[13], CONST[3][1] >265: DP4 TEMP[13].x, TEMP[13], TEMP[0] >266: MOV TEMP[3].y, TEMP[13].xxxx >267: MOV TEMP[13], CONST[3][3] >268: DP4 TEMP[13].x, TEMP[13], TEMP[0] >269: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >270: SSG TEMP[15].xy, TEMP[8].xyyy >271: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >272: RCP TEMP[16].xy, TEMP[9].xxxx >273: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >274: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >275: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >276: SSG TEMP[15].xy, TEMP[4].xyyy >277: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >278: RCP TEMP[16].xy, TEMP[10].xxxx >279: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >280: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >281: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >282: SSG TEMP[16].xy, TEMP[5].xyyy >283: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >284: RCP TEMP[11].xy, TEMP[11].xxxx >285: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >286: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >287: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >288: SSG TEMP[15].xy, TEMP[7].xyyy >289: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >290: RCP TEMP[16].xy, TEMP[6].xxxx >291: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >292: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >293: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >294: MOV TEMP[0].yz, TEMP[5].yxyy >295: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >296: SSG TEMP[7].xy, TEMP[2].xyyy >297: MUL TEMP[7].xy, IMM[5].xxxx, TEMP[7].xyyy >298: RCP TEMP[11].xy, TEMP[12].xxxx >299: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >300: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >301: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >302: MOV TEMP[4].zw, TEMP[2].yyxy >303: MOV TEMP[2].xy, CONST[3][23] >304: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >305: MOV TEMP[4].zw, TEMP[2].yyxy >306: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >307: SSG TEMP[5].xy, TEMP[3].xyyy >308: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >309: RCP TEMP[7].xy, TEMP[13].xxxx >310: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >311: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >312: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >313: MOV TEMP[0].xw, TEMP[2].xxxy >314: MOV TEMP[2].xy, CONST[3][23] >315: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >316: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >317: MOV TEMP[0].y, TEMP[2].xxxx >318: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >319: MOV TEMP[0].z, TEMP[2].xxxx >320: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >321: SQRT TEMP[2].x, TEMP[0].xxxx >322: SQRT TEMP[2].y, TEMP[0].yyyy >323: SQRT TEMP[2].z, TEMP[0].zzzz >324: MOV TEMP[2].xyz, TEMP[2].xyzx >325: MOV TEMP[3].z, CONST[1][22] >326: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >327: MOV TEMP[0].w, TEMP[3].xxxx >328: MOV TEMP[3].z, CONST[1][22] >329: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >330: MOV TEMP[3].z, CONST[1][22] >331: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >332: MOV TEMP[1].y, TEMP[3].xxxx >333: MOV TEMP[3].w, CONST[1][22] >334: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >335: UIF TEMP[3].xxxx :0 >336: MOV TEMP[3].w, CONST[1][22] >337: RCP TEMP[3].x, TEMP[3].wwww >338: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >339: ELSE :0 >340: SSG TEMP[5].x, TEMP[0].wwww >341: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >342: ENDIF >343: MOV_SAT TEMP[3].x, TEMP[3].xxxx >344: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >345: MOV TEMP[0].w, TEMP[3].xxxx >346: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >347: MOV TEMP[0].y, TEMP[3].xxxx >348: MOV TEMP[3].w, CONST[1][22] >349: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >350: UIF TEMP[3].xxxx :0 >351: MOV TEMP[3].w, CONST[1][22] >352: RCP TEMP[3].x, TEMP[3].wwww >353: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >354: ELSE :0 >355: SSG TEMP[5].x, TEMP[1].xxxx >356: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >357: ENDIF >358: MOV_SAT TEMP[3].x, TEMP[3].xxxx >359: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >360: MOV TEMP[0].w, TEMP[3].xxxx >361: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >362: MOV TEMP[0].z, TEMP[3].xxxx >363: MOV TEMP[3].w, CONST[1][22] >364: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >365: UIF TEMP[3].xxxx :0 >366: MOV TEMP[3].w, CONST[1][22] >367: RCP TEMP[3].x, TEMP[3].wwww >368: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >369: ELSE :0 >370: SSG TEMP[5].x, TEMP[1].yyyy >371: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >372: ENDIF >373: MOV_SAT TEMP[3].x, TEMP[3].xxxx >374: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >375: MOV TEMP[0].w, TEMP[3].xxxx >376: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >377: MOV TEMP[2].xy, CONST[1][22] >378: MOV TEMP[3].xy, CONST[2][4] >379: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >380: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >381: MOV TEMP[0].w, TEMP[2].xxxx >382: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >383: SSG TEMP[3].xy, TEMP[0].xyyy >384: MUL TEMP[3].xy, IMM[5].xxxx, TEMP[3].xyyy >385: RCP TEMP[5].xy, TEMP[1].xxxx >386: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >387: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >388: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >389: MOV TEMP[0].y, TEMP[2].xxxx >390: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >391: MOV TEMP[4].z, TEMP[2].xxxx >392: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >393: UIF TEMP[2].xxxx :0 >394: RCP TEMP[1].x, TEMP[1].xxxx >395: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >396: ELSE :0 >397: SSG TEMP[2].x, TEMP[0].zzzz >398: MUL TEMP[1].x, IMM[5].xxxx, TEMP[2].xxxx >399: ENDIF >400: MOV TEMP[0].y, TEMP[1].xxxx >401: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >402: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >403: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >404: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >405: MOV TEMP[4].w, TEMP[0].xxxx >406: ELSE :0 >407: MOV TEMP[4], IMM[0].zzzz >408: ENDIF >409: MIN TEMP[0], TEMP[4], IMM[5].yyyy >410: MOV TEMP[1].x, TEMP[0].xxxx >411: MOV TEMP[2].x, TEMP[0].yyyy >412: MOV TEMP[3].x, TEMP[0].zzzz >413: MOV TEMP[0].x, TEMP[0].wwww >414: MOV OUT[8], TEMP[1] >415: MOV OUT[9], TEMP[2] >416: MOV OUT[10], TEMP[3] >417: MOV OUT[11], TEMP[0] >418: MOV OUT[0].x, TEMP[1].xxxx >419: MOV OUT[0].y, TEMP[2].xxxx >420: MOV OUT[0].z, TEMP[3].xxxx >421: MOV OUT[1].x, TEMP[0].xxxx >422: END >radeonsi: Compiling shader 223 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 64) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 68) > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 84) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %44 = lshr i32 %10, 8 > %45 = and i32 %44, 31 > %46 = lshr i32 %7, 13 > %47 = and i32 %46, 255 > %48 = and i32 %7, 8191 > %49 = and i32 %10, 255 > %50 = mul nuw nsw i32 %48, %49 > %51 = mul nuw nsw i32 %45, %47 > %52 = add nuw nsw i32 %50, %51 > %53 = add nuw nsw i32 %52, 16 > %54 = zext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = load i32, i32 addrspace(3)* %55, align 4 > %57 = lshr i32 %7, 13 > %58 = and i32 %57, 255 > %59 = and i32 %7, 8191 > %60 = and i32 %10, 255 > %61 = mul nuw nsw i32 %59, %60 > %62 = mul nuw nsw i32 %45, %58 > %63 = add nuw nsw i32 %61, %62 > %64 = add nuw nsw i32 %63, 17 > %65 = zext i32 %64 to i64 > %66 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %65 > %67 = load i32, i32 addrspace(3)* %66, align 4 > %68 = lshr i32 %7, 13 > %69 = and i32 %68, 255 > %70 = and i32 %7, 8191 > %71 = and i32 %10, 255 > %72 = mul nuw nsw i32 %70, %71 > %73 = mul nuw nsw i32 %45, %69 > %74 = add nuw nsw i32 %72, %73 > %75 = add nuw nsw i32 %74, 18 > %76 = zext i32 %75 to i64 > %77 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %76 > %78 = load i32, i32 addrspace(3)* %77, align 4 > %79 = lshr i32 %7, 13 > %80 = and i32 %79, 255 > %81 = and i32 %7, 8191 > %82 = and i32 %10, 255 > %83 = mul nuw nsw i32 %81, %82 > %84 = mul nuw nsw i32 %45, %80 > %85 = add nuw nsw i32 %83, %84 > %86 = add nuw nsw i32 %85, 19 > %87 = zext i32 %86 to i64 > %88 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %87 > %89 = load i32, i32 addrspace(3)* %88, align 4 > %90 = lshr i32 %6, 13 > %91 = and i32 %90, 255 > %92 = shl i32 %5, 2 > %93 = and i32 %92, 262140 > %94 = and i32 %6, 8191 > %95 = and i32 %10, 255 > %96 = mul nuw nsw i32 %94, %95 > %97 = add nuw nsw i32 %93, %96 > %98 = mul nuw nsw i32 %45, %91 > %99 = add nuw nsw i32 %97, %98 > %100 = add nuw nsw i32 %99, 16 > %101 = zext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > store i32 %56, i32 addrspace(3)* %102, align 4 > %103 = add nuw nsw i32 %99, 17 > %104 = zext i32 %103 to i64 > %105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %104 > store i32 %67, i32 addrspace(3)* %105, align 4 > %106 = add nuw nsw i32 %99, 18 > %107 = zext i32 %106 to i64 > %108 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %107 > store i32 %78, i32 addrspace(3)* %108, align 4 > %109 = add nuw nsw i32 %99, 19 > %110 = zext i32 %109 to i64 > %111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %110 > store i32 %89, i32 addrspace(3)* %111, align 4 > %112 = lshr i32 %7, 13 > %113 = and i32 %112, 255 > %114 = and i32 %7, 8191 > %115 = and i32 %10, 255 > %116 = mul nuw nsw i32 %114, %115 > %117 = mul nuw nsw i32 %45, %113 > %118 = add nuw nsw i32 %116, %117 > %119 = add nuw nsw i32 %118, 20 > %120 = zext i32 %119 to i64 > %121 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %120 > %122 = load i32, i32 addrspace(3)* %121, align 4 > %123 = lshr i32 %7, 13 > %124 = and i32 %123, 255 > %125 = and i32 %7, 8191 > %126 = and i32 %10, 255 > %127 = mul nuw nsw i32 %125, %126 > %128 = mul nuw nsw i32 %45, %124 > %129 = add nuw nsw i32 %127, %128 > %130 = add nuw nsw i32 %129, 21 > %131 = zext i32 %130 to i64 > %132 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %131 > %133 = load i32, i32 addrspace(3)* %132, align 4 > %134 = lshr i32 %7, 13 > %135 = and i32 %134, 255 > %136 = and i32 %7, 8191 > %137 = and i32 %10, 255 > %138 = mul nuw nsw i32 %136, %137 > %139 = mul nuw nsw i32 %45, %135 > %140 = add nuw nsw i32 %138, %139 > %141 = add nuw nsw i32 %140, 22 > %142 = zext i32 %141 to i64 > %143 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %142 > %144 = load i32, i32 addrspace(3)* %143, align 4 > %145 = lshr i32 %7, 13 > %146 = and i32 %145, 255 > %147 = and i32 %7, 8191 > %148 = and i32 %10, 255 > %149 = mul nuw nsw i32 %147, %148 > %150 = mul nuw nsw i32 %45, %146 > %151 = add nuw nsw i32 %149, %150 > %152 = add nuw nsw i32 %151, 23 > %153 = zext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = load i32, i32 addrspace(3)* %154, align 4 > %156 = lshr i32 %6, 13 > %157 = and i32 %156, 255 > %158 = shl i32 %5, 2 > %159 = and i32 %158, 262140 > %160 = and i32 %6, 8191 > %161 = and i32 %10, 255 > %162 = mul nuw nsw i32 %160, %161 > %163 = add nuw nsw i32 %159, %162 > %164 = mul nuw nsw i32 %45, %157 > %165 = add nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 20 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > store i32 %122, i32 addrspace(3)* %168, align 4 > %169 = add nuw nsw i32 %165, 21 > %170 = zext i32 %169 to i64 > %171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %170 > store i32 %133, i32 addrspace(3)* %171, align 4 > %172 = add nuw nsw i32 %165, 22 > %173 = zext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > store i32 %144, i32 addrspace(3)* %174, align 4 > %175 = add nuw nsw i32 %165, 23 > %176 = zext i32 %175 to i64 > %177 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %176 > store i32 %155, i32 addrspace(3)* %177, align 4 > %178 = lshr i32 %7, 13 > %179 = and i32 %178, 255 > %180 = and i32 %7, 8191 > %181 = and i32 %10, 255 > %182 = mul nuw nsw i32 %180, %181 > %183 = mul nuw nsw i32 %45, %179 > %184 = add nuw nsw i32 %182, %183 > %185 = add nuw nsw i32 %184, 24 > %186 = zext i32 %185 to i64 > %187 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %186 > %188 = load i32, i32 addrspace(3)* %187, align 4 > %189 = lshr i32 %7, 13 > %190 = and i32 %189, 255 > %191 = and i32 %7, 8191 > %192 = and i32 %10, 255 > %193 = mul nuw nsw i32 %191, %192 > %194 = mul nuw nsw i32 %45, %190 > %195 = add nuw nsw i32 %193, %194 > %196 = add nuw nsw i32 %195, 25 > %197 = zext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = load i32, i32 addrspace(3)* %198, align 4 > %200 = lshr i32 %7, 13 > %201 = and i32 %200, 255 > %202 = and i32 %7, 8191 > %203 = and i32 %10, 255 > %204 = mul nuw nsw i32 %202, %203 > %205 = mul nuw nsw i32 %45, %201 > %206 = add nuw nsw i32 %204, %205 > %207 = add nuw nsw i32 %206, 26 > %208 = zext i32 %207 to i64 > %209 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %208 > %210 = load i32, i32 addrspace(3)* %209, align 4 > %211 = lshr i32 %7, 13 > %212 = and i32 %211, 255 > %213 = and i32 %7, 8191 > %214 = and i32 %10, 255 > %215 = mul nuw nsw i32 %213, %214 > %216 = mul nuw nsw i32 %45, %212 > %217 = add nuw nsw i32 %215, %216 > %218 = add nuw nsw i32 %217, 27 > %219 = zext i32 %218 to i64 > %220 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %219 > %221 = load i32, i32 addrspace(3)* %220, align 4 > %222 = lshr i32 %6, 13 > %223 = and i32 %222, 255 > %224 = shl i32 %5, 2 > %225 = and i32 %224, 262140 > %226 = and i32 %6, 8191 > %227 = and i32 %10, 255 > %228 = mul nuw nsw i32 %226, %227 > %229 = add nuw nsw i32 %225, %228 > %230 = mul nuw nsw i32 %45, %223 > %231 = add nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, 24 > %233 = zext i32 %232 to i64 > %234 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %233 > store i32 %188, i32 addrspace(3)* %234, align 4 > %235 = add nuw nsw i32 %231, 25 > %236 = zext i32 %235 to i64 > %237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %236 > store i32 %199, i32 addrspace(3)* %237, align 4 > %238 = add nuw nsw i32 %231, 26 > %239 = zext i32 %238 to i64 > %240 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %239 > store i32 %210, i32 addrspace(3)* %240, align 4 > %241 = add nuw nsw i32 %231, 27 > %242 = zext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > store i32 %221, i32 addrspace(3)* %243, align 4 > %244 = lshr i32 %7, 13 > %245 = and i32 %244, 255 > %246 = and i32 %7, 8191 > %247 = and i32 %10, 255 > %248 = mul nuw nsw i32 %246, %247 > %249 = mul nuw nsw i32 %45, %245 > %250 = add nuw nsw i32 %248, %249 > %251 = add nuw nsw i32 %250, 28 > %252 = zext i32 %251 to i64 > %253 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %252 > %254 = load i32, i32 addrspace(3)* %253, align 4 > %255 = lshr i32 %7, 13 > %256 = and i32 %255, 255 > %257 = and i32 %7, 8191 > %258 = and i32 %10, 255 > %259 = mul nuw nsw i32 %257, %258 > %260 = mul nuw nsw i32 %45, %256 > %261 = add nuw nsw i32 %259, %260 > %262 = add nuw nsw i32 %261, 29 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = load i32, i32 addrspace(3)* %264, align 4 > %266 = lshr i32 %7, 13 > %267 = and i32 %266, 255 > %268 = and i32 %7, 8191 > %269 = and i32 %10, 255 > %270 = mul nuw nsw i32 %268, %269 > %271 = mul nuw nsw i32 %45, %267 > %272 = add nuw nsw i32 %270, %271 > %273 = add nuw nsw i32 %272, 30 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = load i32, i32 addrspace(3)* %275, align 4 > %277 = lshr i32 %7, 13 > %278 = and i32 %277, 255 > %279 = and i32 %7, 8191 > %280 = and i32 %10, 255 > %281 = mul nuw nsw i32 %279, %280 > %282 = mul nuw nsw i32 %45, %278 > %283 = add nuw nsw i32 %281, %282 > %284 = add nuw nsw i32 %283, 31 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = load i32, i32 addrspace(3)* %286, align 4 > %288 = lshr i32 %6, 13 > %289 = and i32 %288, 255 > %290 = shl i32 %5, 2 > %291 = and i32 %290, 262140 > %292 = and i32 %6, 8191 > %293 = and i32 %10, 255 > %294 = mul nuw nsw i32 %292, %293 > %295 = add nuw nsw i32 %291, %294 > %296 = mul nuw nsw i32 %45, %289 > %297 = add nuw nsw i32 %295, %296 > %298 = add nuw nsw i32 %297, 28 > %299 = zext i32 %298 to i64 > %300 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %299 > store i32 %254, i32 addrspace(3)* %300, align 4 > %301 = add nuw nsw i32 %297, 29 > %302 = zext i32 %301 to i64 > %303 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %302 > store i32 %265, i32 addrspace(3)* %303, align 4 > %304 = add nuw nsw i32 %297, 30 > %305 = zext i32 %304 to i64 > %306 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %305 > store i32 %276, i32 addrspace(3)* %306, align 4 > %307 = add nuw nsw i32 %297, 31 > %308 = zext i32 %307 to i64 > %309 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %308 > store i32 %287, i32 addrspace(3)* %309, align 4 > %310 = lshr i32 %7, 13 > %311 = and i32 %310, 255 > %312 = and i32 %7, 8191 > %313 = and i32 %10, 255 > %314 = mul nuw nsw i32 %312, %313 > %315 = mul nuw nsw i32 %45, %311 > %316 = add nuw nsw i32 %314, %315 > %317 = add nuw nsw i32 %316, 32 > %318 = zext i32 %317 to i64 > %319 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %318 > %320 = load i32, i32 addrspace(3)* %319, align 4 > %321 = lshr i32 %7, 13 > %322 = and i32 %321, 255 > %323 = and i32 %7, 8191 > %324 = and i32 %10, 255 > %325 = mul nuw nsw i32 %323, %324 > %326 = mul nuw nsw i32 %45, %322 > %327 = add nuw nsw i32 %325, %326 > %328 = add nuw nsw i32 %327, 33 > %329 = zext i32 %328 to i64 > %330 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %329 > %331 = load i32, i32 addrspace(3)* %330, align 4 > %332 = lshr i32 %7, 13 > %333 = and i32 %332, 255 > %334 = and i32 %7, 8191 > %335 = and i32 %10, 255 > %336 = mul nuw nsw i32 %334, %335 > %337 = mul nuw nsw i32 %45, %333 > %338 = add nuw nsw i32 %336, %337 > %339 = add nuw nsw i32 %338, 34 > %340 = zext i32 %339 to i64 > %341 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %340 > %342 = load i32, i32 addrspace(3)* %341, align 4 > %343 = lshr i32 %7, 13 > %344 = and i32 %343, 255 > %345 = and i32 %7, 8191 > %346 = and i32 %10, 255 > %347 = mul nuw nsw i32 %345, %346 > %348 = mul nuw nsw i32 %45, %344 > %349 = add nuw nsw i32 %347, %348 > %350 = add nuw nsw i32 %349, 35 > %351 = zext i32 %350 to i64 > %352 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %351 > %353 = load i32, i32 addrspace(3)* %352, align 4 > %354 = lshr i32 %6, 13 > %355 = and i32 %354, 255 > %356 = shl i32 %5, 2 > %357 = and i32 %356, 262140 > %358 = and i32 %6, 8191 > %359 = and i32 %10, 255 > %360 = mul nuw nsw i32 %358, %359 > %361 = add nuw nsw i32 %357, %360 > %362 = mul nuw nsw i32 %45, %355 > %363 = add nuw nsw i32 %361, %362 > %364 = add nuw nsw i32 %363, 32 > %365 = zext i32 %364 to i64 > %366 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %365 > store i32 %320, i32 addrspace(3)* %366, align 4 > %367 = add nuw nsw i32 %363, 33 > %368 = zext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > store i32 %331, i32 addrspace(3)* %369, align 4 > %370 = add nuw nsw i32 %363, 34 > %371 = zext i32 %370 to i64 > %372 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %371 > store i32 %342, i32 addrspace(3)* %372, align 4 > %373 = add nuw nsw i32 %363, 35 > %374 = zext i32 %373 to i64 > %375 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %374 > store i32 %353, i32 addrspace(3)* %375, align 4 > %376 = lshr i32 %7, 13 > %377 = and i32 %376, 255 > %378 = and i32 %7, 8191 > %379 = and i32 %10, 255 > %380 = mul nuw nsw i32 %378, %379 > %381 = mul nuw nsw i32 %45, %377 > %382 = add nuw nsw i32 %380, %381 > %383 = add nuw nsw i32 %382, 36 > %384 = zext i32 %383 to i64 > %385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %384 > %386 = load i32, i32 addrspace(3)* %385, align 4 > %387 = lshr i32 %7, 13 > %388 = and i32 %387, 255 > %389 = and i32 %7, 8191 > %390 = and i32 %10, 255 > %391 = mul nuw nsw i32 %389, %390 > %392 = mul nuw nsw i32 %45, %388 > %393 = add nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 37 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = load i32, i32 addrspace(3)* %396, align 4 > %398 = lshr i32 %7, 13 > %399 = and i32 %398, 255 > %400 = and i32 %7, 8191 > %401 = and i32 %10, 255 > %402 = mul nuw nsw i32 %400, %401 > %403 = mul nuw nsw i32 %45, %399 > %404 = add nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, 38 > %406 = zext i32 %405 to i64 > %407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %406 > %408 = load i32, i32 addrspace(3)* %407, align 4 > %409 = lshr i32 %7, 13 > %410 = and i32 %409, 255 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = mul nuw nsw i32 %45, %410 > %415 = add nuw nsw i32 %413, %414 > %416 = add nuw nsw i32 %415, 39 > %417 = zext i32 %416 to i64 > %418 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %417 > %419 = load i32, i32 addrspace(3)* %418, align 4 > %420 = lshr i32 %6, 13 > %421 = and i32 %420, 255 > %422 = shl i32 %5, 2 > %423 = and i32 %422, 262140 > %424 = and i32 %6, 8191 > %425 = and i32 %10, 255 > %426 = mul nuw nsw i32 %424, %425 > %427 = add nuw nsw i32 %423, %426 > %428 = mul nuw nsw i32 %45, %421 > %429 = add nuw nsw i32 %427, %428 > %430 = add nuw nsw i32 %429, 36 > %431 = zext i32 %430 to i64 > %432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %431 > store i32 %386, i32 addrspace(3)* %432, align 4 > %433 = add nuw nsw i32 %429, 37 > %434 = zext i32 %433 to i64 > %435 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %434 > store i32 %397, i32 addrspace(3)* %435, align 4 > %436 = add nuw nsw i32 %429, 38 > %437 = zext i32 %436 to i64 > %438 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %437 > store i32 %408, i32 addrspace(3)* %438, align 4 > %439 = add nuw nsw i32 %429, 39 > %440 = zext i32 %439 to i64 > %441 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %440 > store i32 %419, i32 addrspace(3)* %441, align 4 > %442 = and i32 %7, 8191 > %443 = and i32 %10, 255 > %444 = mul nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 16 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > %448 = bitcast i32 addrspace(3)* %447 to float addrspace(3)* > %449 = load float, float addrspace(3)* %448, align 4 > %450 = and i32 %7, 8191 > %451 = and i32 %10, 255 > %452 = mul nuw nsw i32 %450, %451 > %453 = add nuw nsw i32 %452, 17 > %454 = zext i32 %453 to i64 > %455 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %454 > %456 = bitcast i32 addrspace(3)* %455 to float addrspace(3)* > %457 = load float, float addrspace(3)* %456, align 4 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, 18 > %462 = zext i32 %461 to i64 > %463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %462 > %464 = bitcast i32 addrspace(3)* %463 to float addrspace(3)* > %465 = load float, float addrspace(3)* %464, align 4 > %466 = fmul float %23, %449 > %467 = fmul float %24, %457 > %468 = fadd float %466, %467 > %469 = fmul float %25, %465 > %470 = fadd float %468, %469 > %471 = fadd float %470, %26 > %472 = fmul float %27, %449 > %473 = fmul float %28, %457 > %474 = fadd float %472, %473 > %475 = fmul float %29, %465 > %476 = fadd float %474, %475 > %477 = fadd float %476, %30 > %478 = fmul float %31, %449 > %479 = fmul float %32, %457 > %480 = fadd float %478, %479 > %481 = fmul float %33, %465 > %482 = fadd float %480, %481 > %483 = fadd float %482, %34 > %484 = fmul float %35, %449 > %485 = fmul float %36, %457 > %486 = fadd float %484, %485 > %487 = fmul float %37, %465 > %488 = fadd float %486, %487 > %489 = fadd float %488, %38 > %490 = lshr i32 %7, 13 > %491 = and i32 %490, 255 > %492 = and i32 %7, 8191 > %493 = and i32 %10, 255 > %494 = mul nuw nsw i32 %492, %493 > %495 = add nuw nsw i32 %494, %491 > %496 = add nuw nsw i32 %495, 16 > %497 = zext i32 %496 to i64 > %498 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %497 > %499 = bitcast i32 addrspace(3)* %498 to float addrspace(3)* > %500 = load float, float addrspace(3)* %499, align 4 > %501 = lshr i32 %7, 13 > %502 = and i32 %501, 255 > %503 = and i32 %7, 8191 > %504 = and i32 %10, 255 > %505 = mul nuw nsw i32 %503, %504 > %506 = add nuw nsw i32 %505, %502 > %507 = add nuw nsw i32 %506, 17 > %508 = zext i32 %507 to i64 > %509 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %508 > %510 = bitcast i32 addrspace(3)* %509 to float addrspace(3)* > %511 = load float, float addrspace(3)* %510, align 4 > %512 = lshr i32 %7, 13 > %513 = and i32 %512, 255 > %514 = and i32 %7, 8191 > %515 = and i32 %10, 255 > %516 = mul nuw nsw i32 %514, %515 > %517 = add nuw nsw i32 %516, %513 > %518 = add nuw nsw i32 %517, 18 > %519 = zext i32 %518 to i64 > %520 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %519 > %521 = bitcast i32 addrspace(3)* %520 to float addrspace(3)* > %522 = load float, float addrspace(3)* %521, align 4 > %523 = fmul float %23, %500 > %524 = fmul float %24, %511 > %525 = fadd float %523, %524 > %526 = fmul float %25, %522 > %527 = fadd float %525, %526 > %528 = fadd float %527, %26 > %529 = fmul float %27, %500 > %530 = fmul float %28, %511 > %531 = fadd float %529, %530 > %532 = fmul float %29, %522 > %533 = fadd float %531, %532 > %534 = fadd float %533, %30 > %535 = fmul float %31, %500 > %536 = fmul float %32, %511 > %537 = fadd float %535, %536 > %538 = fmul float %33, %522 > %539 = fadd float %537, %538 > %540 = fadd float %539, %34 > %541 = fmul float %35, %500 > %542 = fmul float %36, %511 > %543 = fadd float %541, %542 > %544 = fmul float %37, %522 > %545 = fadd float %543, %544 > %546 = fadd float %545, %38 > %547 = and i32 %7, 8191 > %548 = and i32 %10, 255 > %549 = mul nuw nsw i32 %547, %548 > %550 = lshr i32 %7, 12 > %551 = and i32 %550, 510 > %552 = add nuw nsw i32 %549, %551 > %553 = add nuw nsw i32 %552, 16 > %554 = zext i32 %553 to i64 > %555 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %554 > %556 = bitcast i32 addrspace(3)* %555 to float addrspace(3)* > %557 = load float, float addrspace(3)* %556, align 4 > %558 = and i32 %7, 8191 > %559 = and i32 %10, 255 > %560 = mul nuw nsw i32 %558, %559 > %561 = lshr i32 %7, 12 > %562 = and i32 %561, 510 > %563 = add nuw nsw i32 %560, %562 > %564 = add nuw nsw i32 %563, 17 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fmul float %23, %557 > %581 = fmul float %24, %568 > %582 = fadd float %580, %581 > %583 = fmul float %25, %579 > %584 = fadd float %582, %583 > %585 = fadd float %584, %26 > %586 = fmul float %27, %557 > %587 = fmul float %28, %568 > %588 = fadd float %586, %587 > %589 = fmul float %29, %579 > %590 = fadd float %588, %589 > %591 = fadd float %590, %30 > %592 = fmul float %31, %557 > %593 = fmul float %32, %568 > %594 = fadd float %592, %593 > %595 = fmul float %33, %579 > %596 = fadd float %594, %595 > %597 = fadd float %596, %34 > %598 = fmul float %35, %557 > %599 = fmul float %36, %568 > %600 = fadd float %598, %599 > %601 = fmul float %37, %579 > %602 = fadd float %600, %601 > %603 = fadd float %602, %38 > %604 = fadd float %483, 1.000000e+02 > %605 = fadd float %540, 1.000000e+02 > %606 = fadd float %597, 1.000000e+02 > %607 = call float @llvm.fabs.f32(float %489) > %608 = call float @llvm.minnum.f32(float %607, float 1.000000e+02) > %609 = fcmp ogt float %471, 0.000000e+00 > %610 = fcmp ogt float %477, 0.000000e+00 > %611 = fcmp olt float %471, 0.000000e+00 > %612 = fcmp olt float %477, 0.000000e+00 > %613 = sext i1 %611 to i32 > %614 = sext i1 %612 to i32 > %615 = zext i1 %609 to i32 > %616 = zext i1 %610 to i32 > %617 = add nsw i32 %615, %613 > %618 = add nsw i32 %616, %614 > %619 = sitofp i32 %617 to float > %620 = sitofp i32 %618 to float > %621 = fsub float -0.000000e+00, %608 > %622 = call float @llvm.fma.f32(float %621, float %619, float %471) > %623 = fsub float -0.000000e+00, %608 > %624 = call float @llvm.fma.f32(float %623, float %620, float %477) > %625 = call float @llvm.fabs.f32(float %546) > %626 = call float @llvm.minnum.f32(float %625, float 1.000000e+02) > %627 = fcmp ogt float %528, 0.000000e+00 > %628 = fcmp ogt float %534, 0.000000e+00 > %629 = fcmp olt float %528, 0.000000e+00 > %630 = fcmp olt float %534, 0.000000e+00 > %631 = sext i1 %629 to i32 > %632 = sext i1 %630 to i32 > %633 = zext i1 %627 to i32 > %634 = zext i1 %628 to i32 > %635 = add nsw i32 %633, %631 > %636 = add nsw i32 %634, %632 > %637 = sitofp i32 %635 to float > %638 = sitofp i32 %636 to float > %639 = fsub float -0.000000e+00, %626 > %640 = call float @llvm.fma.f32(float %639, float %637, float %528) > %641 = fsub float -0.000000e+00, %626 > %642 = call float @llvm.fma.f32(float %641, float %638, float %534) > %643 = fcmp ogt float %585, 0.000000e+00 > %644 = fcmp ogt float %591, 0.000000e+00 > %645 = fcmp olt float %585, 0.000000e+00 > %646 = fcmp olt float %591, 0.000000e+00 > %647 = sext i1 %645 to i32 > %648 = sext i1 %646 to i32 > %649 = zext i1 %643 to i32 > %650 = zext i1 %644 to i32 > %651 = add nsw i32 %649, %647 > %652 = add nsw i32 %650, %648 > %653 = sitofp i32 %651 to float > %654 = sitofp i32 %652 to float > %655 = call float @llvm.fabs.f32(float %603) > %656 = call float @llvm.minnum.f32(float %655, float 1.000000e+02) > %657 = fsub float -0.000000e+00, %656 > %658 = call float @llvm.fma.f32(float %657, float %653, float %585) > %659 = fsub float -0.000000e+00, %656 > %660 = call float @llvm.fma.f32(float %659, float %654, float %591) > %661 = fsub float -0.000000e+00, %489 > %662 = fcmp olt float %622, %661 > %663 = fsub float -0.000000e+00, %489 > %664 = fcmp olt float %624, %663 > %665 = zext i1 %662 to i32 > %666 = zext i1 %664 to i32 > %667 = fsub float -0.000000e+00, %546 > %668 = fcmp olt float %640, %667 > %669 = fsub float -0.000000e+00, %546 > %670 = fcmp olt float %642, %669 > %671 = zext i1 %668 to i32 > %672 = zext i1 %670 to i32 > %673 = add nuw nsw i32 %671, %665 > %674 = add nuw nsw i32 %672, %666 > %675 = fsub float -0.000000e+00, %603 > %676 = fcmp olt float %658, %675 > %677 = fsub float -0.000000e+00, %603 > %678 = fcmp olt float %660, %677 > %679 = zext i1 %676 to i32 > %680 = zext i1 %678 to i32 > %681 = add nuw nsw i32 %679, %673 > %682 = add nuw nsw i32 %680, %674 > %683 = fcmp olt float %604, 0.000000e+00 > %684 = zext i1 %683 to i32 > %685 = bitcast i32 %684 to float > %686 = fcmp olt float %605, 0.000000e+00 > %687 = fcmp olt float %606, 0.000000e+00 > %688 = zext i1 %686 to i32 > %689 = zext i1 %687 to i32 > %690 = add nuw nsw i32 %688, %684 > %691 = add nuw nsw i32 %689, %690 > %692 = fcmp olt float %489, %622 > %693 = fcmp olt float %489, %624 > %694 = zext i1 %692 to i32 > %695 = zext i1 %693 to i32 > %696 = fcmp olt float %546, %640 > %697 = fcmp olt float %546, %642 > %698 = zext i1 %696 to i32 > %699 = zext i1 %697 to i32 > %700 = add nuw nsw i32 %694, %698 > %701 = add nuw nsw i32 %695, %699 > %702 = fcmp olt float %603, %658 > %703 = fcmp olt float %603, %660 > %704 = zext i1 %702 to i32 > %705 = zext i1 %703 to i32 > %706 = add nuw nsw i32 %700, %704 > %707 = add nuw nsw i32 %701, %705 > %708 = icmp eq i32 %681, 3 > %709 = icmp eq i32 %682, 3 > %710 = sext i1 %708 to i32 > %711 = sext i1 %709 to i32 > %712 = bitcast i32 %711 to float > %713 = icmp eq i32 %706, 3 > %714 = icmp eq i32 %707, 3 > %715 = sext i1 %714 to i32 > %716 = bitcast i32 %715 to float > %717 = bitcast i32 %711 to float > %718 = select i1 %714, float 0xFFFFFFFFE0000000, float %717 > %719 = bitcast float %718 to i32 > %720 = select i1 %713, i32 -1, i32 %710 > %721 = or i32 %719, %720 > %722 = icmp eq i32 %721, 0 > %not. = icmp ne i32 %691, 3 > %723 = and i1 %722, %not. > br i1 %723, label %IF, label %ENDIF > >IF: ; preds = %main_body > %724 = lshr i32 %7, 13 > %725 = and i32 %724, 255 > %726 = and i32 %7, 8191 > %727 = and i32 %10, 255 > %728 = mul nuw nsw i32 %726, %727 > %729 = add nuw nsw i32 %728, %725 > %730 = add nuw nsw i32 %729, 16 > %731 = zext i32 %730 to i64 > %732 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %731 > %733 = bitcast i32 addrspace(3)* %732 to float addrspace(3)* > %734 = load float, float addrspace(3)* %733, align 4 > %735 = and i32 %7, 8191 > %736 = and i32 %10, 255 > %737 = mul nuw nsw i32 %735, %736 > %738 = add nuw nsw i32 %737, 16 > %739 = zext i32 %738 to i64 > %740 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %739 > %741 = bitcast i32 addrspace(3)* %740 to float addrspace(3)* > %742 = load float, float addrspace(3)* %741, align 4 > %743 = fsub float %742, %734 > %744 = lshr i32 %7, 13 > %745 = and i32 %744, 255 > %746 = and i32 %7, 8191 > %747 = and i32 %10, 255 > %748 = mul nuw nsw i32 %746, %747 > %749 = add nuw nsw i32 %748, %745 > %750 = add nuw nsw i32 %749, 17 > %751 = zext i32 %750 to i64 > %752 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %751 > %753 = bitcast i32 addrspace(3)* %752 to float addrspace(3)* > %754 = load float, float addrspace(3)* %753, align 4 > %755 = and i32 %7, 8191 > %756 = and i32 %10, 255 > %757 = mul nuw nsw i32 %755, %756 > %758 = add nuw nsw i32 %757, 17 > %759 = zext i32 %758 to i64 > %760 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %759 > %761 = bitcast i32 addrspace(3)* %760 to float addrspace(3)* > %762 = load float, float addrspace(3)* %761, align 4 > %763 = fsub float %762, %754 > %764 = lshr i32 %7, 13 > %765 = and i32 %764, 255 > %766 = and i32 %7, 8191 > %767 = and i32 %10, 255 > %768 = mul nuw nsw i32 %766, %767 > %769 = add nuw nsw i32 %768, %765 > %770 = add nuw nsw i32 %769, 18 > %771 = zext i32 %770 to i64 > %772 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %771 > %773 = bitcast i32 addrspace(3)* %772 to float addrspace(3)* > %774 = load float, float addrspace(3)* %773, align 4 > %775 = and i32 %7, 8191 > %776 = and i32 %10, 255 > %777 = mul nuw nsw i32 %775, %776 > %778 = add nuw nsw i32 %777, 18 > %779 = zext i32 %778 to i64 > %780 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %779 > %781 = bitcast i32 addrspace(3)* %780 to float addrspace(3)* > %782 = load float, float addrspace(3)* %781, align 4 > %783 = fsub float %782, %774 > %784 = fmul float %743, %743 > %785 = fmul float %763, %763 > %786 = fadd float %785, %784 > %787 = fmul float %783, %783 > %788 = fadd float %786, %787 > %789 = and i32 %7, 8191 > %790 = and i32 %10, 255 > %791 = mul nuw nsw i32 %789, %790 > %792 = lshr i32 %7, 12 > %793 = and i32 %792, 510 > %794 = add nuw nsw i32 %791, %793 > %795 = add nuw nsw i32 %794, 16 > %796 = zext i32 %795 to i64 > %797 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %796 > %798 = bitcast i32 addrspace(3)* %797 to float addrspace(3)* > %799 = load float, float addrspace(3)* %798, align 4 > %800 = lshr i32 %7, 13 > %801 = and i32 %800, 255 > %802 = and i32 %7, 8191 > %803 = and i32 %10, 255 > %804 = mul nuw nsw i32 %802, %803 > %805 = add nuw nsw i32 %804, %801 > %806 = add nuw nsw i32 %805, 16 > %807 = zext i32 %806 to i64 > %808 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %807 > %809 = bitcast i32 addrspace(3)* %808 to float addrspace(3)* > %810 = load float, float addrspace(3)* %809, align 4 > %811 = fsub float %810, %799 > %812 = and i32 %7, 8191 > %813 = and i32 %10, 255 > %814 = mul nuw nsw i32 %812, %813 > %815 = lshr i32 %7, 12 > %816 = and i32 %815, 510 > %817 = add nuw nsw i32 %814, %816 > %818 = add nuw nsw i32 %817, 17 > %819 = zext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = lshr i32 %7, 13 > %824 = and i32 %823, 255 > %825 = and i32 %7, 8191 > %826 = and i32 %10, 255 > %827 = mul nuw nsw i32 %825, %826 > %828 = add nuw nsw i32 %827, %824 > %829 = add nuw nsw i32 %828, 17 > %830 = zext i32 %829 to i64 > %831 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %830 > %832 = bitcast i32 addrspace(3)* %831 to float addrspace(3)* > %833 = load float, float addrspace(3)* %832, align 4 > %834 = fsub float %833, %822 > %835 = and i32 %7, 8191 > %836 = and i32 %10, 255 > %837 = mul nuw nsw i32 %835, %836 > %838 = lshr i32 %7, 12 > %839 = and i32 %838, 510 > %840 = add nuw nsw i32 %837, %839 > %841 = add nuw nsw i32 %840, 18 > %842 = zext i32 %841 to i64 > %843 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %842 > %844 = bitcast i32 addrspace(3)* %843 to float addrspace(3)* > %845 = load float, float addrspace(3)* %844, align 4 > %846 = lshr i32 %7, 13 > %847 = and i32 %846, 255 > %848 = and i32 %7, 8191 > %849 = and i32 %10, 255 > %850 = mul nuw nsw i32 %848, %849 > %851 = add nuw nsw i32 %850, %847 > %852 = add nuw nsw i32 %851, 18 > %853 = zext i32 %852 to i64 > %854 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %853 > %855 = bitcast i32 addrspace(3)* %854 to float addrspace(3)* > %856 = load float, float addrspace(3)* %855, align 4 > %857 = fsub float %856, %845 > %858 = fmul float %811, %811 > %859 = fmul float %834, %834 > %860 = fadd float %859, %858 > %861 = fmul float %857, %857 > %862 = fadd float %860, %861 > %863 = and i32 %7, 8191 > %864 = and i32 %10, 255 > %865 = mul nuw nsw i32 %863, %864 > %866 = add nuw nsw i32 %865, 16 > %867 = zext i32 %866 to i64 > %868 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %867 > %869 = bitcast i32 addrspace(3)* %868 to float addrspace(3)* > %870 = load float, float addrspace(3)* %869, align 4 > %871 = and i32 %7, 8191 > %872 = and i32 %10, 255 > %873 = mul nuw nsw i32 %871, %872 > %874 = lshr i32 %7, 12 > %875 = and i32 %874, 510 > %876 = add nuw nsw i32 %873, %875 > %877 = add nuw nsw i32 %876, 16 > %878 = zext i32 %877 to i64 > %879 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %878 > %880 = bitcast i32 addrspace(3)* %879 to float addrspace(3)* > %881 = load float, float addrspace(3)* %880, align 4 > %882 = fsub float %881, %870 > %883 = and i32 %7, 8191 > %884 = and i32 %10, 255 > %885 = mul nuw nsw i32 %883, %884 > %886 = add nuw nsw i32 %885, 17 > %887 = zext i32 %886 to i64 > %888 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %887 > %889 = bitcast i32 addrspace(3)* %888 to float addrspace(3)* > %890 = load float, float addrspace(3)* %889, align 4 > %891 = and i32 %7, 8191 > %892 = and i32 %10, 255 > %893 = mul nuw nsw i32 %891, %892 > %894 = lshr i32 %7, 12 > %895 = and i32 %894, 510 > %896 = add nuw nsw i32 %893, %895 > %897 = add nuw nsw i32 %896, 17 > %898 = zext i32 %897 to i64 > %899 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %898 > %900 = bitcast i32 addrspace(3)* %899 to float addrspace(3)* > %901 = load float, float addrspace(3)* %900, align 4 > %902 = fsub float %901, %890 > %903 = and i32 %7, 8191 > %904 = and i32 %10, 255 > %905 = mul nuw nsw i32 %903, %904 > %906 = add nuw nsw i32 %905, 18 > %907 = zext i32 %906 to i64 > %908 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %907 > %909 = bitcast i32 addrspace(3)* %908 to float addrspace(3)* > %910 = load float, float addrspace(3)* %909, align 4 > %911 = and i32 %7, 8191 > %912 = and i32 %10, 255 > %913 = mul nuw nsw i32 %911, %912 > %914 = lshr i32 %7, 12 > %915 = and i32 %914, 510 > %916 = add nuw nsw i32 %913, %915 > %917 = add nuw nsw i32 %916, 18 > %918 = zext i32 %917 to i64 > %919 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %918 > %920 = bitcast i32 addrspace(3)* %919 to float addrspace(3)* > %921 = load float, float addrspace(3)* %920, align 4 > %922 = fsub float %921, %910 > %923 = fmul float %882, %882 > %924 = fmul float %902, %902 > %925 = fadd float %924, %923 > %926 = fmul float %922, %922 > %927 = fadd float %925, %926 > %928 = call float @llvm.sqrt.f32(float %788) > %929 = call float @llvm.sqrt.f32(float %862) > %930 = call float @llvm.sqrt.f32(float %927) > %931 = lshr i32 %7, 13 > %932 = and i32 %931, 255 > %933 = and i32 %7, 8191 > %934 = and i32 %10, 255 > %935 = mul nuw nsw i32 %933, %934 > %936 = add nuw nsw i32 %935, %932 > %937 = add nuw nsw i32 %936, 16 > %938 = zext i32 %937 to i64 > %939 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %938 > %940 = bitcast i32 addrspace(3)* %939 to float addrspace(3)* > %941 = load float, float addrspace(3)* %940, align 4 > %942 = and i32 %7, 8191 > %943 = and i32 %10, 255 > %944 = mul nuw nsw i32 %942, %943 > %945 = add nuw nsw i32 %944, 16 > %946 = zext i32 %945 to i64 > %947 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %946 > %948 = bitcast i32 addrspace(3)* %947 to float addrspace(3)* > %949 = load float, float addrspace(3)* %948, align 4 > %950 = fadd float %941, %949 > %951 = lshr i32 %7, 13 > %952 = and i32 %951, 255 > %953 = and i32 %7, 8191 > %954 = and i32 %10, 255 > %955 = mul nuw nsw i32 %953, %954 > %956 = add nuw nsw i32 %955, %952 > %957 = add nuw nsw i32 %956, 17 > %958 = zext i32 %957 to i64 > %959 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %958 > %960 = bitcast i32 addrspace(3)* %959 to float addrspace(3)* > %961 = load float, float addrspace(3)* %960, align 4 > %962 = and i32 %7, 8191 > %963 = and i32 %10, 255 > %964 = mul nuw nsw i32 %962, %963 > %965 = add nuw nsw i32 %964, 17 > %966 = zext i32 %965 to i64 > %967 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %966 > %968 = bitcast i32 addrspace(3)* %967 to float addrspace(3)* > %969 = load float, float addrspace(3)* %968, align 4 > %970 = fadd float %961, %969 > %971 = lshr i32 %7, 13 > %972 = and i32 %971, 255 > %973 = and i32 %7, 8191 > %974 = and i32 %10, 255 > %975 = mul nuw nsw i32 %973, %974 > %976 = add nuw nsw i32 %975, %972 > %977 = add nuw nsw i32 %976, 18 > %978 = zext i32 %977 to i64 > %979 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %978 > %980 = bitcast i32 addrspace(3)* %979 to float addrspace(3)* > %981 = load float, float addrspace(3)* %980, align 4 > %982 = and i32 %7, 8191 > %983 = and i32 %10, 255 > %984 = mul nuw nsw i32 %982, %983 > %985 = add nuw nsw i32 %984, 18 > %986 = zext i32 %985 to i64 > %987 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %986 > %988 = bitcast i32 addrspace(3)* %987 to float addrspace(3)* > %989 = load float, float addrspace(3)* %988, align 4 > %990 = fadd float %981, %989 > %991 = fmul float %950, 5.000000e-01 > %992 = fmul float %970, 5.000000e-01 > %993 = fmul float %990, 5.000000e-01 > %994 = and i32 %7, 8191 > %995 = and i32 %10, 255 > %996 = mul nuw nsw i32 %994, %995 > %997 = lshr i32 %7, 12 > %998 = and i32 %997, 510 > %999 = add nuw nsw i32 %996, %998 > %1000 = add nuw nsw i32 %999, 16 > %1001 = zext i32 %1000 to i64 > %1002 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1001 > %1003 = bitcast i32 addrspace(3)* %1002 to float addrspace(3)* > %1004 = load float, float addrspace(3)* %1003, align 4 > %1005 = lshr i32 %7, 13 > %1006 = and i32 %1005, 255 > %1007 = and i32 %7, 8191 > %1008 = and i32 %10, 255 > %1009 = mul nuw nsw i32 %1007, %1008 > %1010 = add nuw nsw i32 %1009, %1006 > %1011 = add nuw nsw i32 %1010, 16 > %1012 = zext i32 %1011 to i64 > %1013 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1012 > %1014 = bitcast i32 addrspace(3)* %1013 to float addrspace(3)* > %1015 = load float, float addrspace(3)* %1014, align 4 > %1016 = fadd float %1004, %1015 > %1017 = and i32 %7, 8191 > %1018 = and i32 %10, 255 > %1019 = mul nuw nsw i32 %1017, %1018 > %1020 = lshr i32 %7, 12 > %1021 = and i32 %1020, 510 > %1022 = add nuw nsw i32 %1019, %1021 > %1023 = add nuw nsw i32 %1022, 17 > %1024 = zext i32 %1023 to i64 > %1025 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1024 > %1026 = bitcast i32 addrspace(3)* %1025 to float addrspace(3)* > %1027 = load float, float addrspace(3)* %1026, align 4 > %1028 = lshr i32 %7, 13 > %1029 = and i32 %1028, 255 > %1030 = and i32 %7, 8191 > %1031 = and i32 %10, 255 > %1032 = mul nuw nsw i32 %1030, %1031 > %1033 = add nuw nsw i32 %1032, %1029 > %1034 = add nuw nsw i32 %1033, 17 > %1035 = zext i32 %1034 to i64 > %1036 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1035 > %1037 = bitcast i32 addrspace(3)* %1036 to float addrspace(3)* > %1038 = load float, float addrspace(3)* %1037, align 4 > %1039 = fadd float %1027, %1038 > %1040 = and i32 %7, 8191 > %1041 = and i32 %10, 255 > %1042 = mul nuw nsw i32 %1040, %1041 > %1043 = lshr i32 %7, 12 > %1044 = and i32 %1043, 510 > %1045 = add nuw nsw i32 %1042, %1044 > %1046 = add nuw nsw i32 %1045, 18 > %1047 = zext i32 %1046 to i64 > %1048 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1047 > %1049 = bitcast i32 addrspace(3)* %1048 to float addrspace(3)* > %1050 = load float, float addrspace(3)* %1049, align 4 > %1051 = lshr i32 %7, 13 > %1052 = and i32 %1051, 255 > %1053 = and i32 %7, 8191 > %1054 = and i32 %10, 255 > %1055 = mul nuw nsw i32 %1053, %1054 > %1056 = add nuw nsw i32 %1055, %1052 > %1057 = add nuw nsw i32 %1056, 18 > %1058 = zext i32 %1057 to i64 > %1059 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1058 > %1060 = bitcast i32 addrspace(3)* %1059 to float addrspace(3)* > %1061 = load float, float addrspace(3)* %1060, align 4 > %1062 = fadd float %1050, %1061 > %1063 = fmul float %1016, 5.000000e-01 > %1064 = fmul float %1039, 5.000000e-01 > %1065 = fmul float %1062, 5.000000e-01 > %1066 = and i32 %7, 8191 > %1067 = and i32 %10, 255 > %1068 = mul nuw nsw i32 %1066, %1067 > %1069 = add nuw nsw i32 %1068, 16 > %1070 = zext i32 %1069 to i64 > %1071 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1070 > %1072 = bitcast i32 addrspace(3)* %1071 to float addrspace(3)* > %1073 = load float, float addrspace(3)* %1072, align 4 > %1074 = and i32 %7, 8191 > %1075 = and i32 %10, 255 > %1076 = mul nuw nsw i32 %1074, %1075 > %1077 = lshr i32 %7, 12 > %1078 = and i32 %1077, 510 > %1079 = add nuw nsw i32 %1076, %1078 > %1080 = add nuw nsw i32 %1079, 16 > %1081 = zext i32 %1080 to i64 > %1082 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1081 > %1083 = bitcast i32 addrspace(3)* %1082 to float addrspace(3)* > %1084 = load float, float addrspace(3)* %1083, align 4 > %1085 = fadd float %1073, %1084 > %1086 = and i32 %7, 8191 > %1087 = and i32 %10, 255 > %1088 = mul nuw nsw i32 %1086, %1087 > %1089 = add nuw nsw i32 %1088, 17 > %1090 = zext i32 %1089 to i64 > %1091 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1090 > %1092 = bitcast i32 addrspace(3)* %1091 to float addrspace(3)* > %1093 = load float, float addrspace(3)* %1092, align 4 > %1094 = and i32 %7, 8191 > %1095 = and i32 %10, 255 > %1096 = mul nuw nsw i32 %1094, %1095 > %1097 = lshr i32 %7, 12 > %1098 = and i32 %1097, 510 > %1099 = add nuw nsw i32 %1096, %1098 > %1100 = add nuw nsw i32 %1099, 17 > %1101 = zext i32 %1100 to i64 > %1102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1101 > %1103 = bitcast i32 addrspace(3)* %1102 to float addrspace(3)* > %1104 = load float, float addrspace(3)* %1103, align 4 > %1105 = fadd float %1093, %1104 > %1106 = and i32 %7, 8191 > %1107 = and i32 %10, 255 > %1108 = mul nuw nsw i32 %1106, %1107 > %1109 = add nuw nsw i32 %1108, 18 > %1110 = zext i32 %1109 to i64 > %1111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1110 > %1112 = bitcast i32 addrspace(3)* %1111 to float addrspace(3)* > %1113 = load float, float addrspace(3)* %1112, align 4 > %1114 = and i32 %7, 8191 > %1115 = and i32 %10, 255 > %1116 = mul nuw nsw i32 %1114, %1115 > %1117 = lshr i32 %7, 12 > %1118 = and i32 %1117, 510 > %1119 = add nuw nsw i32 %1116, %1118 > %1120 = add nuw nsw i32 %1119, 18 > %1121 = zext i32 %1120 to i64 > %1122 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1121 > %1123 = bitcast i32 addrspace(3)* %1122 to float addrspace(3)* > %1124 = load float, float addrspace(3)* %1123, align 4 > %1125 = fadd float %1113, %1124 > %1126 = fmul float %1085, 5.000000e-01 > %1127 = fmul float %1105, 5.000000e-01 > %1128 = fmul float %1125, 5.000000e-01 > %1129 = call float @llvm.fma.f32(float %39, float %928, float %991) > %1130 = call float @llvm.fma.f32(float %40, float %928, float %992) > %1131 = call float @llvm.fma.f32(float %41, float %928, float %993) > %1132 = call float @llvm.fma.f32(float %39, float %929, float %1063) > %1133 = call float @llvm.fma.f32(float %40, float %929, float %1064) > %1134 = call float @llvm.fma.f32(float %41, float %929, float %1065) > %1135 = call float @llvm.fma.f32(float %39, float %930, float %1126) > %1136 = call float @llvm.fma.f32(float %40, float %930, float %1127) > %1137 = call float @llvm.fma.f32(float %41, float %930, float %1128) > %1138 = fmul float %23, %991 > %1139 = fmul float %24, %992 > %1140 = fadd float %1138, %1139 > %1141 = fmul float %25, %993 > %1142 = fadd float %1140, %1141 > %1143 = fadd float %1142, %26 > %1144 = fmul float %27, %991 > %1145 = fmul float %28, %992 > %1146 = fadd float %1144, %1145 > %1147 = fmul float %29, %993 > %1148 = fadd float %1146, %1147 > %1149 = fadd float %1148, %30 > %1150 = fmul float %35, %991 > %1151 = fmul float %36, %992 > %1152 = fadd float %1150, %1151 > %1153 = fmul float %37, %993 > %1154 = fadd float %1152, %1153 > %1155 = fadd float %1154, %38 > %1156 = fmul float %23, %1063 > %1157 = fmul float %24, %1064 > %1158 = fadd float %1156, %1157 > %1159 = fmul float %25, %1065 > %1160 = fadd float %1158, %1159 > %1161 = fadd float %1160, %26 > %1162 = fmul float %27, %1063 > %1163 = fmul float %28, %1064 > %1164 = fadd float %1162, %1163 > %1165 = fmul float %29, %1065 > %1166 = fadd float %1164, %1165 > %1167 = fadd float %1166, %30 > %1168 = fmul float %35, %1063 > %1169 = fmul float %36, %1064 > %1170 = fadd float %1168, %1169 > %1171 = fmul float %37, %1065 > %1172 = fadd float %1170, %1171 > %1173 = fadd float %1172, %38 > %1174 = fmul float %23, %1126 > %1175 = fmul float %24, %1127 > %1176 = fadd float %1174, %1175 > %1177 = fmul float %25, %1128 > %1178 = fadd float %1176, %1177 > %1179 = fadd float %1178, %26 > %1180 = fmul float %27, %1126 > %1181 = fmul float %28, %1127 > %1182 = fadd float %1180, %1181 > %1183 = fmul float %29, %1128 > %1184 = fadd float %1182, %1183 > %1185 = fadd float %1184, %30 > %1186 = fmul float %35, %1126 > %1187 = fmul float %36, %1127 > %1188 = fadd float %1186, %1187 > %1189 = fmul float %37, %1128 > %1190 = fadd float %1188, %1189 > %1191 = fadd float %1190, %38 > %1192 = fmul float %23, %1129 > %1193 = fmul float %24, %1130 > %1194 = fadd float %1192, %1193 > %1195 = fmul float %25, %1131 > %1196 = fadd float %1194, %1195 > %1197 = fadd float %1196, %26 > %1198 = fmul float %27, %1129 > %1199 = fmul float %28, %1130 > %1200 = fadd float %1198, %1199 > %1201 = fmul float %29, %1131 > %1202 = fadd float %1200, %1201 > %1203 = fadd float %1202, %30 > %1204 = fmul float %35, %1129 > %1205 = fmul float %36, %1130 > %1206 = fadd float %1204, %1205 > %1207 = fmul float %37, %1131 > %1208 = fadd float %1206, %1207 > %1209 = fadd float %1208, %38 > %1210 = fmul float %23, %1132 > %1211 = fmul float %24, %1133 > %1212 = fadd float %1210, %1211 > %1213 = fmul float %25, %1134 > %1214 = fadd float %1212, %1213 > %1215 = fadd float %1214, %26 > %1216 = fmul float %27, %1132 > %1217 = fmul float %28, %1133 > %1218 = fadd float %1216, %1217 > %1219 = fmul float %29, %1134 > %1220 = fadd float %1218, %1219 > %1221 = fadd float %1220, %30 > %1222 = fmul float %35, %1132 > %1223 = fmul float %36, %1133 > %1224 = fadd float %1222, %1223 > %1225 = fmul float %37, %1134 > %1226 = fadd float %1224, %1225 > %1227 = fadd float %1226, %38 > %1228 = fmul float %23, %1135 > %1229 = fmul float %24, %1136 > %1230 = fadd float %1228, %1229 > %1231 = fmul float %25, %1137 > %1232 = fadd float %1230, %1231 > %1233 = fadd float %1232, %26 > %1234 = fmul float %27, %1135 > %1235 = fmul float %28, %1136 > %1236 = fadd float %1234, %1235 > %1237 = fmul float %29, %1137 > %1238 = fadd float %1236, %1237 > %1239 = fadd float %1238, %30 > %1240 = fmul float %35, %1135 > %1241 = fmul float %36, %1136 > %1242 = fadd float %1240, %1241 > %1243 = fmul float %37, %1137 > %1244 = fadd float %1242, %1243 > %1245 = fadd float %1244, %38 > %1246 = fcmp oeq float %1173, 0.000000e+00 > %1247 = fcmp oeq float %1173, 0.000000e+00 > %1248 = fcmp ogt float %1161, 0.000000e+00 > %1249 = select i1 %1248, float 1.000000e+00, float %1161 > %1250 = fcmp oge float %1249, 0.000000e+00 > %1251 = fcmp ogt float %1167, 0.000000e+00 > %1252 = select i1 %1251, float 1.000000e+00, float %1167 > %1253 = fcmp oge float %1252, 0.000000e+00 > %.op = fmul float %1249, 0x4600000000000000 > %1254 = select i1 %1250, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1252, 0x4600000000000000 > %1255 = select i1 %1253, float %.op80, float 0xC600000000000000 > %1256 = fdiv float 1.000000e+00, %1173 > %1257 = fmul float %1161, %1256 > %1258 = fmul float %1167, %1256 > %1259 = select i1 %1246, float %1254, float %1257 > %1260 = select i1 %1247, float %1255, float %1258 > %1261 = fcmp oeq float %1191, 0.000000e+00 > %1262 = fcmp oeq float %1191, 0.000000e+00 > %1263 = fcmp ogt float %1179, 0.000000e+00 > %1264 = select i1 %1263, float 1.000000e+00, float %1179 > %1265 = fcmp oge float %1264, 0.000000e+00 > %1266 = fcmp ogt float %1185, 0.000000e+00 > %1267 = select i1 %1266, float 1.000000e+00, float %1185 > %1268 = fcmp oge float %1267, 0.000000e+00 > %.op81 = fmul float %1264, 0x4600000000000000 > %1269 = select i1 %1265, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1267, 0x4600000000000000 > %1270 = select i1 %1268, float %.op82, float 0xC600000000000000 > %1271 = fdiv float 1.000000e+00, %1191 > %1272 = fmul float %1179, %1271 > %1273 = fmul float %1185, %1271 > %1274 = select i1 %1261, float %1269, float %1272 > %1275 = select i1 %1262, float %1270, float %1273 > %1276 = fcmp oeq float %1209, 0.000000e+00 > %1277 = fcmp oeq float %1209, 0.000000e+00 > %1278 = fcmp ogt float %1197, 0.000000e+00 > %1279 = select i1 %1278, float 1.000000e+00, float %1197 > %1280 = fcmp oge float %1279, 0.000000e+00 > %1281 = fcmp ogt float %1203, 0.000000e+00 > %1282 = select i1 %1281, float 1.000000e+00, float %1203 > %1283 = fcmp oge float %1282, 0.000000e+00 > %.op83 = fmul float %1279, 0x4600000000000000 > %1284 = select i1 %1280, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1282, 0x4600000000000000 > %1285 = select i1 %1283, float %.op84, float 0xC600000000000000 > %1286 = fdiv float 1.000000e+00, %1209 > %1287 = fmul float %1197, %1286 > %1288 = fmul float %1203, %1286 > %1289 = select i1 %1276, float %1284, float %1287 > %1290 = select i1 %1277, float %1285, float %1288 > %1291 = fcmp oeq float %1155, 0.000000e+00 > %1292 = fcmp oeq float %1155, 0.000000e+00 > %1293 = fcmp ogt float %1143, 0.000000e+00 > %1294 = select i1 %1293, float 1.000000e+00, float %1143 > %1295 = fcmp oge float %1294, 0.000000e+00 > %1296 = fcmp ogt float %1149, 0.000000e+00 > %1297 = select i1 %1296, float 1.000000e+00, float %1149 > %1298 = fcmp oge float %1297, 0.000000e+00 > %.op85 = fmul float %1294, 0x4600000000000000 > %1299 = select i1 %1295, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1297, 0x4600000000000000 > %1300 = select i1 %1298, float %.op86, float 0xC600000000000000 > %1301 = fdiv float 1.000000e+00, %1155 > %1302 = fmul float %1143, %1301 > %1303 = fmul float %1149, %1301 > %1304 = select i1 %1291, float %1299, float %1302 > %1305 = select i1 %1292, float %1300, float %1303 > %1306 = fsub float %1304, %1289 > %1307 = fsub float %1305, %1290 > %1308 = fcmp oeq float %1227, 0.000000e+00 > %1309 = fcmp oeq float %1227, 0.000000e+00 > %1310 = fcmp ogt float %1215, 0.000000e+00 > %1311 = select i1 %1310, float 1.000000e+00, float %1215 > %1312 = fcmp oge float %1311, 0.000000e+00 > %1313 = fcmp ogt float %1221, 0.000000e+00 > %1314 = select i1 %1313, float 1.000000e+00, float %1221 > %1315 = fcmp oge float %1314, 0.000000e+00 > %.op87 = fmul float %1311, 0x4600000000000000 > %1316 = select i1 %1312, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1314, 0x4600000000000000 > %1317 = select i1 %1315, float %.op88, float 0xC600000000000000 > %1318 = fdiv float 1.000000e+00, %1227 > %1319 = fmul float %1215, %1318 > %1320 = fmul float %1221, %1318 > %1321 = select i1 %1308, float %1316, float %1319 > %1322 = select i1 %1309, float %1317, float %1320 > %1323 = fsub float %1259, %1321 > %1324 = fsub float %1260, %1322 > %1325 = fmul float %1323, %42 > %1326 = fmul float %1324, %43 > %1327 = fcmp oeq float %1245, 0.000000e+00 > %1328 = fcmp oeq float %1245, 0.000000e+00 > %1329 = fcmp ogt float %1233, 0.000000e+00 > %1330 = select i1 %1329, float 1.000000e+00, float %1233 > %1331 = fcmp oge float %1330, 0.000000e+00 > %1332 = fcmp ogt float %1239, 0.000000e+00 > %1333 = select i1 %1332, float 1.000000e+00, float %1239 > %1334 = fcmp oge float %1333, 0.000000e+00 > %.op89 = fmul float %1330, 0x4600000000000000 > %1335 = select i1 %1331, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1333, 0x4600000000000000 > %1336 = select i1 %1334, float %.op90, float 0xC600000000000000 > %1337 = fdiv float 1.000000e+00, %1245 > %1338 = fmul float %1233, %1337 > %1339 = fmul float %1239, %1337 > %1340 = select i1 %1327, float %1335, float %1338 > %1341 = select i1 %1328, float %1336, float %1339 > %1342 = fsub float %1274, %1340 > %1343 = fsub float %1275, %1341 > %1344 = fmul float %1342, %42 > %1345 = fmul float %1306, %42 > %1346 = fmul float %1307, %43 > %1347 = fmul float %1343, %43 > %1348 = fmul float %1345, %1345 > %1349 = fmul float %1346, %1346 > %1350 = fadd float %1348, %1349 > %1351 = fmul float %1325, %1325 > %1352 = fmul float %1326, %1326 > %1353 = fadd float %1351, %1352 > %1354 = fmul float %1344, %1344 > %1355 = fmul float %1347, %1347 > %1356 = fadd float %1354, %1355 > %1357 = call float @llvm.sqrt.f32(float %1356) > %1358 = call float @llvm.sqrt.f32(float %1350) > %1359 = call float @llvm.sqrt.f32(float %1353) > %1360 = fsub float %1155, %15 > %1361 = fsub float %1173, %15 > %1362 = fsub float %1191, %15 > %1363 = fcmp une float %16, 0.000000e+00 > br i1 %1363, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %16, %ENDIF77 ], [ %38, %main_body ] > %temp16.0 = phi float [ %1577, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1578, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1567, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1580, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %15, %ENDIF77 ], [ %37, %main_body ] > %temp13.0 = phi float [ %1560, %ENDIF77 ], [ %36, %main_body ] > %1364 = phi i32 [ 1065353216, %ENDIF77 ], [ %672, %main_body ] > %temp10.0 = phi float [ %1359, %ENDIF77 ], [ %716, %main_body ] > %temp9.0 = phi float [ %1552, %ENDIF77 ], [ %718, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %603, %main_body ] > %temp6.0 = phi float [ %993, %ENDIF77 ], [ %685, %main_body ] > %temp5.0 = phi float [ %1547, %ENDIF77 ], [ %712, %main_body ] > %1365 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1366 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1367 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1368 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1369 = lshr i32 %5, 16 > %1370 = shl nuw nsw i32 %1369, 2 > %1371 = and i32 %6, 8191 > %1372 = and i32 %10, 255 > %1373 = mul nuw nsw i32 %1371, %1372 > %1374 = add nuw nsw i32 %1370, %1373 > %1375 = add nuw nsw i32 %1374, 8 > %1376 = zext i32 %1375 to i64 > %1377 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1376 > %1378 = bitcast i32 addrspace(3)* %1377 to float addrspace(3)* > store float %1365, float addrspace(3)* %1378, align 4 > %1379 = add nuw nsw i32 %1374, 9 > %1380 = zext i32 %1379 to i64 > %1381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1380 > %1382 = bitcast i32 addrspace(3)* %1381 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1382, align 4 > %1383 = add nuw nsw i32 %1374, 10 > %1384 = zext i32 %1383 to i64 > %1385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1384 > %1386 = bitcast i32 addrspace(3)* %1385 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1386, align 4 > %1387 = add nuw nsw i32 %1374, 11 > %1388 = zext i32 %1387 to i64 > %1389 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1388 > %1390 = bitcast i32 addrspace(3)* %1389 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1390, align 4 > %1391 = lshr i32 %5, 16 > %1392 = shl nuw nsw i32 %1391, 2 > %1393 = and i32 %6, 8191 > %1394 = and i32 %10, 255 > %1395 = mul nuw nsw i32 %1393, %1394 > %1396 = add nuw nsw i32 %1392, %1395 > %1397 = add nuw nsw i32 %1396, 12 > %1398 = zext i32 %1397 to i64 > %1399 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1398 > %1400 = bitcast i32 addrspace(3)* %1399 to float addrspace(3)* > store float %1366, float addrspace(3)* %1400, align 4 > %1401 = add nuw nsw i32 %1396, 13 > %1402 = zext i32 %1401 to i64 > %1403 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1402 > %1404 = bitcast i32 addrspace(3)* %1403 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1404, align 4 > %1405 = add nuw nsw i32 %1396, 14 > %1406 = zext i32 %1405 to i64 > %1407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1406 > %1408 = bitcast i32 addrspace(3)* %1407 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1408, align 4 > %1409 = add nuw nsw i32 %1396, 15 > %1410 = zext i32 %1409 to i64 > %1411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1410 > store i32 %1364, i32 addrspace(3)* %1411, align 4 > %1412 = lshr i32 %5, 16 > %1413 = shl nuw nsw i32 %1412, 2 > %1414 = and i32 %6, 8191 > %1415 = and i32 %10, 255 > %1416 = mul nuw nsw i32 %1414, %1415 > %1417 = add nuw nsw i32 %1413, %1416 > %1418 = add nuw nsw i32 %1417, 16 > %1419 = zext i32 %1418 to i64 > %1420 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1419 > %1421 = bitcast i32 addrspace(3)* %1420 to float addrspace(3)* > store float %1367, float addrspace(3)* %1421, align 4 > %1422 = add nuw nsw i32 %1417, 17 > %1423 = zext i32 %1422 to i64 > %1424 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1423 > %1425 = bitcast i32 addrspace(3)* %1424 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1425, align 4 > %1426 = add nuw nsw i32 %1417, 18 > %1427 = zext i32 %1426 to i64 > %1428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1427 > %1429 = bitcast i32 addrspace(3)* %1428 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1429, align 4 > %1430 = add nuw nsw i32 %1417, 19 > %1431 = zext i32 %1430 to i64 > %1432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1431 > %1433 = bitcast i32 addrspace(3)* %1432 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1433, align 4 > %1434 = lshr i32 %5, 16 > %1435 = shl nuw nsw i32 %1434, 2 > %1436 = and i32 %6, 8191 > %1437 = and i32 %10, 255 > %1438 = mul nuw nsw i32 %1436, %1437 > %1439 = add nuw nsw i32 %1435, %1438 > %1440 = add nuw nsw i32 %1439, 20 > %1441 = zext i32 %1440 to i64 > %1442 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1441 > %1443 = bitcast i32 addrspace(3)* %1442 to float addrspace(3)* > store float %1368, float addrspace(3)* %1443, align 4 > %1444 = add nuw nsw i32 %1439, 21 > %1445 = zext i32 %1444 to i64 > %1446 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1445 > %1447 = bitcast i32 addrspace(3)* %1446 to float addrspace(3)* > store float %1366, float addrspace(3)* %1447, align 4 > %1448 = add nuw nsw i32 %1439, 22 > %1449 = zext i32 %1448 to i64 > %1450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1449 > %1451 = bitcast i32 addrspace(3)* %1450 to float addrspace(3)* > store float %1367, float addrspace(3)* %1451, align 4 > %1452 = add nuw nsw i32 %1439, 23 > %1453 = zext i32 %1452 to i64 > %1454 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1453 > %1455 = bitcast i32 addrspace(3)* %1454 to float addrspace(3)* > store float %1368, float addrspace(3)* %1455, align 4 > %1456 = lshr i32 %5, 16 > %1457 = shl nuw nsw i32 %1456, 2 > %1458 = and i32 %6, 8191 > %1459 = and i32 %10, 255 > %1460 = mul nuw nsw i32 %1458, %1459 > %1461 = add nuw nsw i32 %1457, %1460 > %1462 = zext i32 %1461 to i64 > %1463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1462 > %1464 = bitcast i32 addrspace(3)* %1463 to float addrspace(3)* > store float %1365, float addrspace(3)* %1464, align 4 > %1465 = lshr i32 %5, 16 > %1466 = shl nuw nsw i32 %1465, 2 > %1467 = and i32 %6, 8191 > %1468 = and i32 %10, 255 > %1469 = mul nuw nsw i32 %1467, %1468 > %1470 = add nuw nsw i32 %1466, %1469 > %1471 = add nuw nsw i32 %1470, 1 > %1472 = zext i32 %1471 to i64 > %1473 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1472 > %1474 = bitcast i32 addrspace(3)* %1473 to float addrspace(3)* > store float %1366, float addrspace(3)* %1474, align 4 > %1475 = lshr i32 %5, 16 > %1476 = shl nuw nsw i32 %1475, 2 > %1477 = and i32 %6, 8191 > %1478 = and i32 %10, 255 > %1479 = mul nuw nsw i32 %1477, %1478 > %1480 = add nuw nsw i32 %1476, %1479 > %1481 = add nuw nsw i32 %1480, 2 > %1482 = zext i32 %1481 to i64 > %1483 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1482 > %1484 = bitcast i32 addrspace(3)* %1483 to float addrspace(3)* > store float %1367, float addrspace(3)* %1484, align 4 > %1485 = lshr i32 %5, 16 > %1486 = shl nuw nsw i32 %1485, 2 > %1487 = and i32 %6, 8191 > %1488 = and i32 %10, 255 > %1489 = mul nuw nsw i32 %1487, %1488 > %1490 = add nuw nsw i32 %1486, %1489 > %1491 = add nuw nsw i32 %1490, 4 > %1492 = zext i32 %1491 to i64 > %1493 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1492 > %1494 = bitcast i32 addrspace(3)* %1493 to float addrspace(3)* > store float %1368, float addrspace(3)* %1494, align 4 > %1495 = and i32 %10, 255 > %1496 = lshr i32 %10, 8 > %1497 = and i32 %1496, 31 > %1498 = lshr i32 %5, 16 > %1499 = shl nuw nsw i32 %1498, 2 > %1500 = and i32 %6, 8191 > %1501 = and i32 %10, 255 > %1502 = mul nuw nsw i32 %1500, %1501 > %1503 = add nuw nsw i32 %1499, %1502 > %1504 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1505 = bitcast i64 %1504 to <2 x i32> > %1506 = extractelement <2 x i32> %1505, i32 0 > %1507 = extractelement <2 x i32> %1505, i32 1 > %1508 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1506, 0 > %1509 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1508, i32 %1507, 1 > %1510 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1509, i32 %8, 13 > %1511 = bitcast i32 %1495 to float > %1512 = bitcast i32 %1497 to float > %1513 = bitcast i32 %1503 to float > %1514 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1510, float %1511, 14 > %1515 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1514, float %1512, 15 > %1516 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1515, float %1513, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1516 > >IF69: ; preds = %IF > %1517 = fdiv float 1.000000e+00, %16 > %1518 = fmul float %1360, %1517 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1519 = fcmp ogt float %1360, 0.000000e+00 > %1520 = select i1 %1519, float 1.000000e+00, float %1360 > %1521 = fcmp oge float %1520, 0.000000e+00 > %.op91 = fmul float %1520, 0x4600000000000000 > %1522 = select i1 %1521, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1518, %IF69 ], [ %1522, %ELSE70 ] > %1523 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1524 = fsub float 1.000000e+00, %1523 > %1525 = fmul float %1524, %1358 > %1526 = fcmp une float %16, 0.000000e+00 > br i1 %1526, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1527 = fdiv float 1.000000e+00, %16 > %1528 = fmul float %1361, %1527 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1529 = fcmp ogt float %1361, 0.000000e+00 > %1530 = select i1 %1529, float 1.000000e+00, float %1361 > %1531 = fcmp oge float %1530, 0.000000e+00 > %.op92 = fmul float %1530, 0x4600000000000000 > %1532 = select i1 %1531, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1528, %IF72 ], [ %1532, %ELSE73 ] > %1533 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1534 = fsub float 1.000000e+00, %1533 > %1535 = fmul float %1534, %1359 > %1536 = fcmp une float %16, 0.000000e+00 > br i1 %1536, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1537 = fdiv float 1.000000e+00, %16 > %1538 = fmul float %1362, %1537 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1539 = fcmp ogt float %1362, 0.000000e+00 > %1540 = select i1 %1539, float 1.000000e+00, float %1362 > %1541 = fcmp oge float %1540, 0.000000e+00 > %.op93 = fmul float %1540, 0x4600000000000000 > %1542 = select i1 %1541, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1538, %IF75 ], [ %1542, %ELSE76 ] > %1543 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1544 = fsub float 1.000000e+00, %1543 > %1545 = fmul float %1544, %1357 > %1546 = fmul float %13, %19 > %1547 = fmul float %14, %20 > %1548 = call float @llvm.maxnum.f32(float %1547, float 1.000000e+00) > %1549 = fcmp oeq float %1546, 0.000000e+00 > %1550 = fcmp oeq float %1546, 0.000000e+00 > %1551 = sext i1 %1550 to i32 > %1552 = bitcast i32 %1551 to float > %1553 = fcmp ogt float %1545, 0.000000e+00 > %1554 = select i1 %1553, float 1.000000e+00, float %1545 > %1555 = fcmp oge float %1554, 0.000000e+00 > %1556 = fcmp ogt float %1525, 0.000000e+00 > %1557 = select i1 %1556, float 1.000000e+00, float %1525 > %1558 = fcmp oge float %1557, 0.000000e+00 > %.op94 = fmul float %1554, 0x4600000000000000 > %1559 = select i1 %1555, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1557, 0x4600000000000000 > %1560 = select i1 %1558, float %.op95, float 0xC600000000000000 > %1561 = fdiv float 1.000000e+00, %1546 > %1562 = fmul float %1545, %1561 > %1563 = fmul float %1525, %1561 > %1564 = select i1 %1549, float %1559, float %1562 > %1565 = select i1 %1550, float %1560, float %1563 > %1566 = call float @llvm.maxnum.f32(float %1565, float 1.000000e+00) > %1567 = call float @llvm.minnum.f32(float %1548, float %1566) > %1568 = fcmp une float %1546, 0.000000e+00 > br i1 %1568, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1569 = fdiv float 1.000000e+00, %1546 > %1570 = fmul float %1535, %1569 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1571 = fcmp ogt float %1535, 0.000000e+00 > %1572 = select i1 %1571, float 1.000000e+00, float %1535 > %1573 = fcmp oge float %1572, 0.000000e+00 > %.op96 = fmul float %1572, 0x4600000000000000 > %1574 = select i1 %1573, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1570, %IF78 ], [ %1574, %ELSE79 ] > %1575 = call float @llvm.maxnum.f32(float %1564, float 1.000000e+00) > %1576 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1577 = call float @llvm.minnum.f32(float %1548, float %1576) > %1578 = call float @llvm.minnum.f32(float %1548, float %1575) > %1579 = call float @llvm.maxnum.f32(float %1567, float %1578) > %1580 = call float @llvm.maxnum.f32(float %1579, float %1577) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[0].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[0].xxxx >101: MOV OUT[4], TEMP[3] >102: MOV OUT[2], TEMP[6] >103: MOV OUT[3], TEMP[4] >104: MOV OUT[1], TEMP[5] >105: MOV OUT[0], TEMP[1] >106: END >radeonsi: Compiling shader 224 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = bitcast i32 %10 to float > %711 = insertvalue <{ float, float, float }> undef, float %710, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %711 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL SV[0], FACE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL CONST[1][0..31] >DCL TEMP[0..12], LOCAL >IMM[0] UINT32 {0, 384, 416, 400} >IMM[1] INT32 {-1, 0, 1, 0} >IMM[2] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[3] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, -0.3765} >IMM[4] UINT32 {464, 448, 432, 496} >IMM[5] FLT32 { 2.0000, 0.5000, 0.0000, 0.0000} >IMM[6] UINT32 {480, 0, 0, 0} > 0: UIF SV[0].xxxx :0 > 1: MOV TEMP[0].x, IMM[1].xxxx > 2: ELSE :0 > 3: MOV TEMP[0].x, IMM[1].yyyy > 4: ENDIF > 5: ADD TEMP[1].x, CONST[1][24].yyyy, IMM[2].xxxx > 6: MOV TEMP[2].xy, IN[0].xyyy > 7: TEX TEMP[3].w, TEMP[2], SAMP[0], 2D > 8: ADD TEMP[3].xy, -TEMP[3].wwww, IMM[2].xyyy > 9: FMA TEMP[4].x, CONST[1][24].xxxx, TEMP[1].xxxx, TEMP[3].xxxx > 10: CEIL TEMP[3].x, TEMP[3].yyyy > 11: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 12: ADD TEMP[1].x, TEMP[4].xxxx, IMM[2].zzzz > 13: FSNE TEMP[4].x, CONST[1][24].yyyy, IMM[2].wwww > 14: UIF TEMP[4].xxxx :0 > 15: RCP TEMP[4].x, CONST[1][24].yyyy > 16: ELSE :0 > 17: MOV TEMP[4].x, IMM[3].xxxx > 18: ENDIF > 19: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].xxxx > 20: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 21: FMA TEMP[5].x, TEMP[4].xxxx, IMM[3].yyyy, IMM[3].zzzz > 22: MUL TEMP[1].x, TEMP[4].xxxx, TEMP[4].xxxx > 23: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx > 24: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[3].xxxx > 25: FMA TEMP[1].x, -TEMP[3].xxxx, TEMP[1].xxxx, IMM[2].xxxx > 26: LG2 TEMP[3].x, TEMP[1].xxxx > 27: MUL TEMP[1].x, TEMP[3].xxxx, CONST[1][26].xxxx > 28: EX2 TEMP[3].x, TEMP[1].xxxx > 29: MUL TEMP[1].x, TEMP[3].xxxx, CONST[1][25].wwww > 30: MOV TEMP[3].xy, IN[0].xyyy > 31: TEX TEMP[3], TEMP[3], SAMP[1], 2D > 32: FMA TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww, IMM[3].wwww > 33: FSLT TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww > 34: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].zzzz > 35: INEG TEMP[4].x, TEMP[4].xxxx > 36: USNE TEMP[2].x, TEMP[4].xxxx, IMM[0].xxxx > 37: AND TEMP[4].x, TEMP[2].xxxx, IMM[2].xxxx > 38: KILL_IF -TEMP[4].xxxx > 39: FSNE TEMP[4].x, CONST[1][26].zzzz, IMM[2].wwww > 40: UIF TEMP[4].xxxx :0 > 41: RCP TEMP[4].x, CONST[1][26].zzzz > 42: ELSE :0 > 43: MOV TEMP[4].x, IMM[3].xxxx > 44: ENDIF > 45: MOV TEMP[5].xy, IN[0].xyyy > 46: TEX TEMP[5].xy, TEMP[5], SAMP[2], 2D > 47: MAX TEMP[6].x, TEMP[5].yyyy, TEMP[5].xxxx > 48: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx > 49: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 50: MUL TEMP[6].x, TEMP[4].xxxx, TEMP[4].xxxx > 51: FMA TEMP[4].x, TEMP[4].xxxx, IMM[3].yyyy, IMM[3].zzzz > 52: MUL TEMP[4].x, TEMP[6].xxxx, TEMP[4].xxxx > 53: MUL TEMP[6].x, TEMP[4].xxxx, CONST[1][29].wwww > 54: ADD TEMP[7].xyz, CONST[1][28].xyzz, -CONST[1][29].xyzz > 55: FMA TEMP[8].xyz, TEMP[5].xxxx, TEMP[7].xyzz, CONST[1][29].xyzz > 56: MUL TEMP[7].xyz, TEMP[6].xxxx, TEMP[8].xyzz > 57: MUL TEMP[6].x, TEMP[5].yyyy, CONST[1][27].wwww > 58: FMA TEMP[5].x, -TEMP[5].yyyy, CONST[1][27].wwww, IMM[2].xxxx > 59: MUL TEMP[6].xyz, TEMP[6].xxxx, CONST[1][27].xyzz > 60: FMA TEMP[8].xyz, TEMP[7].xyzz, TEMP[5].xxxx, TEMP[6].xyzz > 61: MUL TEMP[8].xyz, TEMP[8].xyzz, CONST[1][31].yyyy > 62: FMA TEMP[9].xyz, CONST[1][25].xyzz, TEMP[1].xxxx, TEMP[8].xyzz > 63: MOV TEMP[9].w, IMM[2].wwww > 64: MOV TEMP[10].xy, IN[0].xyyy > 65: TEX TEMP[10].yw, TEMP[10], SAMP[3], 2D > 66: FMA TEMP[10].xy, TEMP[10].ywww, IMM[5].xxxx, IMM[2].zzzz > 67: MOV TEMP[8].xy, TEMP[10].xyxx > 68: FMA TEMP[1].x, -TEMP[10].xxxx, TEMP[10].xxxx, IMM[2].xxxx > 69: FMA TEMP[1].x, -TEMP[10].yyyy, TEMP[10].yyyy, TEMP[1].xxxx > 70: SQRT TEMP[10].x, TEMP[1].xxxx > 71: MOV TEMP[8].z, TEMP[10].xxxx > 72: DP3 TEMP[10].x, IN[1].xyzz, TEMP[8].xyzz > 73: DP3 TEMP[11].x, IN[2].xyzz, TEMP[8].xyzz > 74: MOV TEMP[10].y, TEMP[11].xxxx > 75: DP3 TEMP[11].x, IN[3].xyzz, TEMP[8].xyzz > 76: MOV TEMP[10].z, TEMP[11].xxxx > 77: DP3 TEMP[1].x, TEMP[10].xyzz, TEMP[10].xyzz > 78: RSQ TEMP[11].x, TEMP[1].xxxx > 79: MUL TEMP[8].xyz, TEMP[11].xxxx, TEMP[10].xyzz > 80: MOV TEMP[10].xyz, -TEMP[8].xyzx > 81: USNE TEMP[11].x, TEMP[0].xxxx, IMM[0].xxxx > 82: UIF TEMP[11].xxxx :0 > 83: MOV TEMP[11].x, TEMP[8].xxxx > 84: ELSE :0 > 85: MOV TEMP[11].x, TEMP[10].xxxx > 86: ENDIF > 87: MOV TEMP[11].x, TEMP[11].xxxx > 88: USNE TEMP[12].x, TEMP[0].xxxx, IMM[0].xxxx > 89: UIF TEMP[12].xxxx :0 > 90: MOV TEMP[12].x, TEMP[8].yyyy > 91: ELSE :0 > 92: MOV TEMP[12].x, TEMP[10].yyyy > 93: ENDIF > 94: MOV TEMP[11].y, TEMP[12].xxxx > 95: USNE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 96: UIF TEMP[0].xxxx :0 > 97: MOV TEMP[0].x, TEMP[8].zzzz > 98: ELSE :0 > 99: MOV TEMP[0].x, TEMP[10].zzzz >100: ENDIF >101: MOV TEMP[11].z, TEMP[0].xxxx >102: FMA TEMP[0].xyz, TEMP[11].xyzz, IMM[5].yyyy, IMM[5].yyyy >103: MOV TEMP[0].w, CONST[1][31].wwww >104: FMA TEMP[1].x, -TEMP[4].xxxx, CONST[1][29].wwww, IMM[2].xxxx >105: MUL TEMP[4].x, TEMP[4].xxxx, CONST[1][31].xxxx >106: FMA TEMP[2].xyz, TEMP[3].xyzz, TEMP[1].xxxx, TEMP[7].xyzz >107: FMA TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx, TEMP[6].xyzz >108: MOV TEMP[1].xy, IN[0].xyyy >109: TEX TEMP[1], TEMP[1], SAMP[4], 2D >110: MOV TEMP[2].w, TEMP[1].wwww >111: MUL TEMP[7].x, TEMP[1].zzzz, CONST[1][26].yyyy >112: MOV TEMP[7].y, TEMP[1].yyyy >113: MOV TEMP[1].z, TEMP[1].xxxx >114: ADD TEMP[3].xy, -TEMP[7].xyyy, CONST[1][30].wxxx >115: FMA TEMP[1].xy, TEMP[4].xxxx, TEMP[3].xyyy, TEMP[7].xyyy >116: MOV TEMP[1].w, CONST[1][31].zzzz >117: MOV OUT[0], TEMP[9] >118: MOV OUT[1], TEMP[0] >119: MOV OUT[2], TEMP[2] >120: MOV OUT[3], TEMP[1] >121: END >radeonsi: Compiling shader 225 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 444) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 448) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 452) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 456) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 468) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 472) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 476) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 492) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 496) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 500) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 504) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 508) > %51 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 > %53 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %54 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %53, i64 0, i64 3 > %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 > %56 = extractelement <8 x i32> %52, i32 7 > %57 = extractelement <4 x i32> %55, i32 0 > %58 = and i32 %57, %56 > %59 = insertelement <4 x i32> %55, i32 %58, i32 0 > %60 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 > %62 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %63 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %62, i64 0, i64 7 > %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 > %65 = extractelement <8 x i32> %61, i32 7 > %66 = extractelement <4 x i32> %64, i32 0 > %67 = and i32 %66, %65 > %68 = insertelement <4 x i32> %64, i32 %67, i32 0 > %69 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %70 = load <8 x i32>, <8 x i32> addrspace(2)* %69, align 32, !tbaa !0 > %71 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %72 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %71, i64 0, i64 11 > %73 = load <4 x i32>, <4 x i32> addrspace(2)* %72, align 16, !tbaa !0 > %74 = extractelement <8 x i32> %70, i32 7 > %75 = extractelement <4 x i32> %73, i32 0 > %76 = and i32 %75, %74 > %77 = insertelement <4 x i32> %73, i32 %76, i32 0 > %78 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %79 = load <8 x i32>, <8 x i32> addrspace(2)* %78, align 32, !tbaa !0 > %80 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %81 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %80, i64 0, i64 15 > %82 = load <4 x i32>, <4 x i32> addrspace(2)* %81, align 16, !tbaa !0 > %83 = extractelement <8 x i32> %79, i32 7 > %84 = extractelement <4 x i32> %82, i32 0 > %85 = and i32 %84, %83 > %86 = insertelement <4 x i32> %82, i32 %85, i32 0 > %87 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %88 = load <8 x i32>, <8 x i32> addrspace(2)* %87, align 32, !tbaa !0 > %89 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %90 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %89, i64 0, i64 19 > %91 = load <4 x i32>, <4 x i32> addrspace(2)* %90, align 16, !tbaa !0 > %92 = extractelement <8 x i32> %88, i32 7 > %93 = extractelement <4 x i32> %91, i32 0 > %94 = and i32 %93, %92 > %95 = insertelement <4 x i32> %91, i32 %94, i32 0 > %96 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %97 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %98 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %99 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %100 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %101 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %102 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %103 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %104 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %105 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %106 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %107 = icmp ne i32 %19, 0 > %. = select i1 %107, float 0xFFFFFFFFE0000000, float 0.000000e+00 > %108 = fadd float %26, 1.000000e+00 > %109 = bitcast float %96 to i32 > %110 = bitcast float %97 to i32 > %111 = insertelement <2 x i32> undef, i32 %109, i32 0 > %112 = insertelement <2 x i32> %111, i32 %110, i32 1 > %113 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %112, <8 x i32> %52, <4 x i32> %59, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %114 = extractelement <4 x float> %113, i32 3 > %115 = fsub float 1.000000e+00, %114 > %116 = fsub float 0x3FEFD70A40000000, %114 > %117 = call float @llvm.fma.f32(float %25, float %108, float %115) > %118 = call float @llvm.ceil.f32(float %116) > %119 = call float @llvm.AMDGPU.clamp.(float %118, float 0.000000e+00, float 1.000000e+00) > %120 = fadd float %117, -1.000000e+00 > %121 = fcmp une float %26, 0.000000e+00 > %122 = fdiv float 1.000000e+00, %26 > %temp16.0 = select i1 %121, float %122, float 0x4600000000000000 > %123 = fmul float %temp16.0, %120 > %124 = call float @llvm.AMDGPU.clamp.(float %123, float 0.000000e+00, float 1.000000e+00) > %125 = call float @llvm.fma.f32(float %124, float -2.000000e+00, float 3.000000e+00) > %126 = fmul float %124, %124 > %127 = fmul float %126, %125 > %128 = fmul float %127, %119 > %129 = fsub float -0.000000e+00, %119 > %130 = call float @llvm.fma.f32(float %129, float %127, float 1.000000e+00) > %131 = call float @llvm.log2.f32(float %130) > %132 = fmul float %131, %31 > %133 = call float @llvm.exp2.f32(float %132) > %134 = fmul float %133, %30 > %135 = bitcast float %96 to i32 > %136 = bitcast float %97 to i32 > %137 = insertelement <2 x i32> undef, i32 %135, i32 0 > %138 = insertelement <2 x i32> %137, i32 %136, i32 1 > %139 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %138, <8 x i32> %61, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %140 = extractelement <4 x float> %139, i32 0 > %141 = extractelement <4 x float> %139, i32 1 > %142 = extractelement <4 x float> %139, i32 2 > %143 = extractelement <4 x float> %139, i32 3 > %144 = call float @llvm.fma.f32(float %128, float %143, float 0xBFD8181820000000) > %145 = fcmp olt float %144, 0.000000e+00 > %146 = select i1 %145, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %146) > %147 = fcmp une float %33, 0.000000e+00 > %148 = fdiv float 1.000000e+00, %33 > %temp16.1 = select i1 %147, float %148, float 0x4600000000000000 > %149 = bitcast float %96 to i32 > %150 = bitcast float %97 to i32 > %151 = insertelement <2 x i32> undef, i32 %149, i32 0 > %152 = insertelement <2 x i32> %151, i32 %150, i32 1 > %153 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %152, <8 x i32> %70, <4 x i32> %77, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %154 = extractelement <4 x float> %153, i32 0 > %155 = extractelement <4 x float> %153, i32 1 > %156 = call float @llvm.maxnum.f32(float %155, float %154) > %157 = fmul float %temp16.1, %156 > %158 = call float @llvm.AMDGPU.clamp.(float %157, float 0.000000e+00, float 1.000000e+00) > %159 = fmul float %158, %158 > %160 = call float @llvm.fma.f32(float %158, float -2.000000e+00, float 3.000000e+00) > %161 = fmul float %159, %160 > %162 = fmul float %161, %44 > %163 = fsub float %38, %41 > %164 = fsub float %39, %42 > %165 = fsub float %40, %43 > %166 = call float @llvm.fma.f32(float %154, float %163, float %41) > %167 = call float @llvm.fma.f32(float %154, float %164, float %42) > %168 = call float @llvm.fma.f32(float %154, float %165, float %43) > %169 = fmul float %162, %166 > %170 = fmul float %162, %167 > %171 = fmul float %162, %168 > %172 = fmul float %155, %37 > %173 = fsub float -0.000000e+00, %155 > %174 = call float @llvm.fma.f32(float %173, float %37, float 1.000000e+00) > %175 = fmul float %172, %34 > %176 = fmul float %172, %35 > %177 = fmul float %172, %36 > %178 = call float @llvm.fma.f32(float %169, float %174, float %175) > %179 = call float @llvm.fma.f32(float %170, float %174, float %176) > %180 = call float @llvm.fma.f32(float %171, float %174, float %177) > %181 = fmul float %178, %48 > %182 = fmul float %179, %48 > %183 = fmul float %180, %48 > %184 = call float @llvm.fma.f32(float %27, float %134, float %181) > %185 = call float @llvm.fma.f32(float %28, float %134, float %182) > %186 = call float @llvm.fma.f32(float %29, float %134, float %183) > %187 = bitcast float %96 to i32 > %188 = bitcast float %97 to i32 > %189 = insertelement <2 x i32> undef, i32 %187, i32 0 > %190 = insertelement <2 x i32> %189, i32 %188, i32 1 > %191 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %190, <8 x i32> %79, <4 x i32> %86, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %192 = extractelement <4 x float> %191, i32 1 > %193 = extractelement <4 x float> %191, i32 3 > %194 = call float @llvm.fma.f32(float %192, float 2.000000e+00, float -1.000000e+00) > %195 = call float @llvm.fma.f32(float %193, float 2.000000e+00, float -1.000000e+00) > %196 = fsub float -0.000000e+00, %194 > %197 = call float @llvm.fma.f32(float %196, float %194, float 1.000000e+00) > %198 = fsub float -0.000000e+00, %195 > %199 = call float @llvm.fma.f32(float %198, float %195, float %197) > %200 = call float @llvm.sqrt.f32(float %199) > %201 = fmul float %98, %194 > %202 = fmul float %99, %195 > %203 = fadd float %202, %201 > %204 = fmul float %100, %200 > %205 = fadd float %203, %204 > %206 = fmul float %101, %194 > %207 = fmul float %102, %195 > %208 = fadd float %207, %206 > %209 = fmul float %103, %200 > %210 = fadd float %208, %209 > %211 = fmul float %104, %194 > %212 = fmul float %105, %195 > %213 = fadd float %212, %211 > %214 = fmul float %106, %200 > %215 = fadd float %213, %214 > %216 = fmul float %205, %205 > %217 = fmul float %210, %210 > %218 = fadd float %217, %216 > %219 = fmul float %215, %215 > %220 = fadd float %218, %219 > %221 = call float @llvm.AMDGPU.rsq.clamped.f32(float %220) > %222 = fmul float %221, %205 > %223 = fmul float %221, %210 > %224 = fmul float %221, %215 > %225 = fsub float -0.000000e+00, %222 > %226 = fsub float -0.000000e+00, %223 > %227 = fsub float -0.000000e+00, %224 > %228 = bitcast float %. to i32 > %229 = icmp ne i32 %228, 0 > %.67 = select i1 %229, float %222, float %225 > %230 = bitcast float %. to i32 > %231 = icmp ne i32 %230, 0 > %temp48.0 = select i1 %231, float %223, float %226 > %232 = bitcast float %. to i32 > %233 = icmp ne i32 %232, 0 > %.68 = select i1 %233, float %224, float %227 > %234 = call float @llvm.fma.f32(float %.67, float 5.000000e-01, float 5.000000e-01) > %235 = call float @llvm.fma.f32(float %temp48.0, float 5.000000e-01, float 5.000000e-01) > %236 = call float @llvm.fma.f32(float %.68, float 5.000000e-01, float 5.000000e-01) > %237 = fsub float -0.000000e+00, %161 > %238 = call float @llvm.fma.f32(float %237, float %44, float 1.000000e+00) > %239 = fmul float %161, %47 > %240 = call float @llvm.fma.f32(float %140, float %238, float %169) > %241 = call float @llvm.fma.f32(float %141, float %238, float %170) > %242 = call float @llvm.fma.f32(float %142, float %238, float %171) > %243 = call float @llvm.fma.f32(float %240, float %174, float %175) > %244 = call float @llvm.fma.f32(float %241, float %174, float %176) > %245 = call float @llvm.fma.f32(float %242, float %174, float %177) > %246 = bitcast float %96 to i32 > %247 = bitcast float %97 to i32 > %248 = insertelement <2 x i32> undef, i32 %246, i32 0 > %249 = insertelement <2 x i32> %248, i32 %247, i32 1 > %250 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %249, <8 x i32> %88, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %251 = extractelement <4 x float> %250, i32 0 > %252 = extractelement <4 x float> %250, i32 1 > %253 = extractelement <4 x float> %250, i32 2 > %254 = extractelement <4 x float> %250, i32 3 > %255 = fmul float %253, %32 > %256 = fsub float %46, %255 > %257 = fsub float %45, %252 > %258 = call float @llvm.fma.f32(float %239, float %256, float %255) > %259 = call float @llvm.fma.f32(float %239, float %257, float %252) > %260 = bitcast float %5 to i32 > %261 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %260, 10 > %262 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %261, float %184, 11 > %263 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %262, float %185, 12 > %264 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %263, float %186, 13 > %265 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %264, float 0.000000e+00, 14 > %266 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %265, float %234, 15 > %267 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %266, float %235, 16 > %268 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %267, float %236, 17 > %269 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %268, float %50, 18 > %270 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %269, float %243, 19 > %271 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %270, float %244, 20 > %272 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %271, float %245, 21 > %273 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %272, float %254, 22 > %274 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %273, float %258, 23 > %275 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %274, float %259, 24 > %276 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %275, float %251, 25 > %277 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %276, float %49, 26 > %278 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %277, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %278 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} >IMM[2] UINT32 {160, 0, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: MOV TEMP[2].xy, IN[2].xyxx > 8: MUL TEMP[0].xyz, IN[5].wwww, IN[5].xyzz > 9: MOV TEMP[0].w, IN[5].wwww > 10: MUL TEMP[0], TEMP[0], CONST[1][10] > 11: DP3 TEMP[3].x, CONST[1][7].xyzz, IN[3].xyzz > 12: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[4].xyzz > 13: MOV TEMP[3].y, TEMP[4].xxxx > 14: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[1].xyzz > 15: MOV TEMP[3].z, TEMP[4].xxxx > 16: DP3 TEMP[4].x, CONST[1][8].xyzz, IN[3].xyzz > 17: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[4].xyzz > 18: MOV TEMP[4].y, TEMP[5].xxxx > 19: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[1].xyzz > 20: MOV TEMP[4].z, TEMP[5].xxxx > 21: DP3 TEMP[5].x, CONST[1][9].xyzz, IN[3].xyzz > 22: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[4].xyzz > 23: MOV TEMP[5].y, TEMP[6].xxxx > 24: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[1].xyzz > 25: MOV TEMP[5].z, TEMP[6].xxxx > 26: MOV OUT[5], TEMP[5] > 27: MOV OUT[4], TEMP[4] > 28: MOV OUT[3], TEMP[3] > 29: MOV OUT[2], TEMP[0] > 30: MOV OUT[1], TEMP[2] > 31: MOV OUT[0], TEMP[1] > 32: END >radeonsi: Compiling shader 226 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 112) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 116) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 120) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 124) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 128) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 132) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 136) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 140) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 144) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 148) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 152) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 156) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 160) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 164) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 168) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 172) > %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 > %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %13) > %40 = extractelement <4 x float> %39, i32 0 > %41 = extractelement <4 x float> %39, i32 1 > %42 = extractelement <4 x float> %39, i32 2 > %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 > %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %14) > %46 = extractelement <4 x float> %45, i32 0 > %47 = extractelement <4 x float> %45, i32 1 > %48 = extractelement <4 x float> %45, i32 2 > %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 > %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %15) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %16) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %17) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 > %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %18) > %67 = extractelement <4 x float> %66, i32 0 > %68 = extractelement <4 x float> %66, i32 1 > %69 = extractelement <4 x float> %66, i32 2 > %70 = extractelement <4 x float> %66, i32 3 > %71 = fmul float %21, %40 > %72 = fmul float %22, %41 > %73 = fadd float %71, %72 > %74 = fmul float %23, %42 > %75 = fadd float %73, %74 > %76 = fadd float %75, %24 > %77 = fmul float %25, %40 > %78 = fmul float %26, %41 > %79 = fadd float %77, %78 > %80 = fmul float %27, %42 > %81 = fadd float %79, %80 > %82 = fadd float %81, %28 > %83 = fmul float %29, %40 > %84 = fmul float %30, %41 > %85 = fadd float %83, %84 > %86 = fmul float %31, %42 > %87 = fadd float %85, %86 > %88 = fadd float %87, %32 > %89 = fmul float %70, %67 > %90 = fmul float %70, %68 > %91 = fmul float %70, %69 > %92 = fmul float %89, %33 > %93 = fmul float %90, %34 > %94 = fmul float %91, %35 > %95 = fmul float %70, %36 > %96 = fmul float %21, %55 > %97 = fmul float %22, %56 > %98 = fadd float %97, %96 > %99 = fmul float %23, %57 > %100 = fadd float %98, %99 > %101 = fmul float %21, %61 > %102 = fmul float %22, %62 > %103 = fadd float %102, %101 > %104 = fmul float %23, %63 > %105 = fadd float %103, %104 > %106 = fmul float %21, %46 > %107 = fmul float %22, %47 > %108 = fadd float %107, %106 > %109 = fmul float %23, %48 > %110 = fadd float %108, %109 > %111 = fmul float %25, %55 > %112 = fmul float %26, %56 > %113 = fadd float %112, %111 > %114 = fmul float %27, %57 > %115 = fadd float %113, %114 > %116 = fmul float %25, %61 > %117 = fmul float %26, %62 > %118 = fadd float %117, %116 > %119 = fmul float %27, %63 > %120 = fadd float %118, %119 > %121 = fmul float %25, %46 > %122 = fmul float %26, %47 > %123 = fadd float %122, %121 > %124 = fmul float %27, %48 > %125 = fadd float %123, %124 > %126 = fmul float %29, %55 > %127 = fmul float %30, %56 > %128 = fadd float %127, %126 > %129 = fmul float %31, %57 > %130 = fadd float %128, %129 > %131 = fmul float %29, %61 > %132 = fmul float %30, %62 > %133 = fadd float %132, %131 > %134 = fmul float %31, %63 > %135 = fadd float %133, %134 > %136 = fmul float %29, %46 > %137 = fmul float %30, %47 > %138 = fadd float %137, %136 > %139 = fmul float %31, %48 > %140 = fadd float %138, %139 > %141 = lshr i32 %8, 13 > %142 = and i32 %141, 255 > %143 = mul i32 %142, %10 > %144 = add i32 %143, 16 > %145 = sext i32 %144 to i64 > %146 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %145 > %147 = bitcast i32 addrspace(3)* %146 to float addrspace(3)* > store float %76, float addrspace(3)* %147, align 4 > %148 = add i32 %143, 17 > %149 = sext i32 %148 to i64 > %150 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %149 > %151 = bitcast i32 addrspace(3)* %150 to float addrspace(3)* > store float %82, float addrspace(3)* %151, align 4 > %152 = add i32 %143, 18 > %153 = sext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = bitcast i32 addrspace(3)* %154 to float addrspace(3)* > store float %88, float addrspace(3)* %155, align 4 > %156 = add i32 %143, 20 > %bc = bitcast <4 x float> %51 to <4 x i32> > %157 = extractelement <4 x i32> %bc, i32 0 > %158 = sext i32 %156 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %157, i32 addrspace(3)* %159, align 4 > %160 = add i32 %143, 21 > %bc28 = bitcast <4 x float> %51 to <4 x i32> > %161 = extractelement <4 x i32> %bc28, i32 1 > %162 = sext i32 %160 to i64 > %163 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %162 > store i32 %161, i32 addrspace(3)* %163, align 4 > %164 = add i32 %143, 24 > %165 = sext i32 %164 to i64 > %166 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %165 > %167 = bitcast i32 addrspace(3)* %166 to float addrspace(3)* > store float %92, float addrspace(3)* %167, align 4 > %168 = add i32 %143, 25 > %169 = sext i32 %168 to i64 > %170 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %169 > %171 = bitcast i32 addrspace(3)* %170 to float addrspace(3)* > store float %93, float addrspace(3)* %171, align 4 > %172 = add i32 %143, 26 > %173 = sext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > %175 = bitcast i32 addrspace(3)* %174 to float addrspace(3)* > store float %94, float addrspace(3)* %175, align 4 > %176 = add i32 %143, 27 > %177 = sext i32 %176 to i64 > %178 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %177 > %179 = bitcast i32 addrspace(3)* %178 to float addrspace(3)* > store float %95, float addrspace(3)* %179, align 4 > %180 = add i32 %143, 28 > %181 = sext i32 %180 to i64 > %182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %181 > %183 = bitcast i32 addrspace(3)* %182 to float addrspace(3)* > store float %100, float addrspace(3)* %183, align 4 > %184 = add i32 %143, 29 > %185 = sext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > %187 = bitcast i32 addrspace(3)* %186 to float addrspace(3)* > store float %105, float addrspace(3)* %187, align 4 > %188 = add i32 %143, 30 > %189 = sext i32 %188 to i64 > %190 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %189 > %191 = bitcast i32 addrspace(3)* %190 to float addrspace(3)* > store float %110, float addrspace(3)* %191, align 4 > %192 = add i32 %143, 32 > %193 = sext i32 %192 to i64 > %194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %193 > %195 = bitcast i32 addrspace(3)* %194 to float addrspace(3)* > store float %115, float addrspace(3)* %195, align 4 > %196 = add i32 %143, 33 > %197 = sext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = bitcast i32 addrspace(3)* %198 to float addrspace(3)* > store float %120, float addrspace(3)* %199, align 4 > %200 = add i32 %143, 34 > %201 = sext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = bitcast i32 addrspace(3)* %202 to float addrspace(3)* > store float %125, float addrspace(3)* %203, align 4 > %204 = add i32 %143, 36 > %205 = sext i32 %204 to i64 > %206 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %205 > %207 = bitcast i32 addrspace(3)* %206 to float addrspace(3)* > store float %130, float addrspace(3)* %207, align 4 > %208 = add i32 %143, 37 > %209 = sext i32 %208 to i64 > %210 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %209 > %211 = bitcast i32 addrspace(3)* %210 to float addrspace(3)* > store float %135, float addrspace(3)* %211, align 4 > %212 = add i32 %143, 38 > %213 = sext i32 %212 to i64 > %214 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %213 > %215 = bitcast i32 addrspace(3)* %214 to float addrspace(3)* > store float %140, float addrspace(3)* %215, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, 0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 64, 80} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {96, 368, 352, 0} >IMM[5] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: MOV TEMP[1].w, TEMP[8].xxxx > 66: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 67: MOV TEMP[1].z, TEMP[2].xxxx > 68: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 69: MOV TEMP[0].yw, TEMP[2].yxyy > 70: ABS TEMP[2].x, TEMP[3].xxxx > 71: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 72: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 73: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 74: INEG TEMP[9].xy, TEMP[9].xyyy > 75: MOV TEMP[4].yz, TEMP[9].yxyy > 76: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 77: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 78: INEG TEMP[9].xy, TEMP[9].xyyy > 79: MOV TEMP[5].zw, TEMP[9].yyxy > 80: INEG TEMP[9].xy, TEMP[4].yzzz > 81: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 82: MOV TEMP[4].yz, TEMP[9].yxyy > 83: I2F TEMP[9].xy, TEMP[4].yzzz > 84: MOV TEMP[4].yz, TEMP[9].yxyy > 85: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 86: ABS TEMP[2].x, TEMP[6].xxxx > 87: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 88: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 89: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 90: INEG TEMP[9].xy, TEMP[9].xyyy > 91: MOV TEMP[4].yz, TEMP[9].yxyy > 92: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 93: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 94: INEG TEMP[9].xy, TEMP[9].xyyy > 95: MOV TEMP[5].zw, TEMP[9].yyxy > 96: INEG TEMP[9].xy, TEMP[4].yzzz > 97: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 98: MOV TEMP[4].yz, TEMP[9].yxyy > 99: I2F TEMP[9].xy, TEMP[4].yzzz >100: MOV TEMP[4].yz, TEMP[9].yxyy >101: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >102: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >103: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >104: INEG TEMP[2].xy, TEMP[2].xyyy >105: MOV TEMP[5].xy, TEMP[2].xyxx >106: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >107: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >108: INEG TEMP[2].xy, TEMP[2].xyyy >109: MOV TEMP[5].zw, TEMP[2].yyxy >110: INEG TEMP[2].xy, TEMP[5].xyyy >111: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >112: MOV TEMP[5].xy, TEMP[2].xyxx >113: I2F TEMP[5].xy, TEMP[5].xyyy >114: ABS TEMP[2].x, TEMP[8].xxxx >115: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >116: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >117: MOV TEMP[4].zw, TEMP[2].yyxy >118: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >119: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >120: INEG TEMP[2].xy, TEMP[2].xyyy >121: MOV TEMP[5].xy, TEMP[2].xyxx >122: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >123: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >124: INEG TEMP[2].xy, TEMP[2].xyyy >125: MOV TEMP[5].zw, TEMP[2].yyxy >126: AND TEMP[2], TEMP[5], IMM[2].yyyy >127: MOV TEMP[2], TEMP[2] >128: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >129: MOV TEMP[5].xy, TEMP[2].xyxx >130: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >131: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >132: INEG TEMP[2].xy, TEMP[2].xyyy >133: MOV TEMP[5].zw, TEMP[2].yyxy >134: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >135: MOV TEMP[5].zw, TEMP[2].yyxy >136: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >137: MOV TEMP[5].xy, TEMP[2].xyxx >138: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >139: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >140: INEG TEMP[2].x, TEMP[2].xxxx >141: MOV TEMP[1].z, TEMP[2].xxxx >142: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >143: MOV TEMP[1].z, TEMP[2].xxxx >144: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >145: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >146: INEG TEMP[2].xy, TEMP[2].xyyy >147: MOV TEMP[0].yw, TEMP[2].yxyy >148: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >149: MOV TEMP[0].yw, TEMP[2].yxyy >150: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >151: MOV TEMP[0].y, TEMP[2].xxxx >152: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >153: MOV TEMP[0].y, TEMP[2].xxxx >154: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >155: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >156: INEG TEMP[2].xy, TEMP[2].xyyy >157: MOV TEMP[0].xw, TEMP[2].xxxy >158: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >159: MOV TEMP[0].xw, TEMP[2].xxxy >160: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >161: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >162: INEG TEMP[2].xy, TEMP[2].xyyy >163: MOV TEMP[1].xy, TEMP[2].xyxx >164: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >165: MOV TEMP[1].xy, TEMP[2].xyxx >166: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >167: MOV TEMP[0].xz, TEMP[2].xxyx >168: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >169: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >170: INEG TEMP[2].xy, TEMP[2].xyyy >171: MOV TEMP[1].xy, TEMP[2].xyxx >172: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >173: MOV TEMP[1].xy, TEMP[2].xyxx >174: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >175: MOV TEMP[0].xz, TEMP[2].xxyx >176: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >177: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >178: INEG TEMP[2].xy, TEMP[2].xyyy >179: MOV TEMP[1].xy, TEMP[2].xyxx >180: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >181: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >182: INEG TEMP[2].xyz, TEMP[2].xyzz >183: MOV TEMP[0].xyz, TEMP[2].xyzx >184: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >185: MOV TEMP[0].xz, TEMP[2].xxyx >186: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >187: MOV TEMP[0].x, TEMP[2].xxxx >188: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >189: MOV TEMP[0].x, TEMP[2].xxxx >190: MOV TEMP[2].x, TEMP[0].xxxx >191: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >192: UIF TEMP[2].xxxx :0 >193: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >194: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >195: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >196: MOV TEMP[0].yzw, TEMP[2].yxyz >197: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >198: MOV TEMP[0].y, TEMP[2].xxxx >199: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >200: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >201: MOV TEMP[0].z, TEMP[2].xxxx >202: SQRT TEMP[2].x, TEMP[0].xxxx >203: SQRT TEMP[2].y, TEMP[0].yyyy >204: SQRT TEMP[2].z, TEMP[0].zzzz >205: MOV TEMP[0].xyz, TEMP[2].xyzx >206: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >207: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].wwww >208: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >209: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww >210: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >211: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[0].wwww >212: MOV TEMP[2].y, CONST[3][4] >213: MOV TEMP[7].x, TEMP[2].yyyy >214: MOV TEMP[2].y, CONST[3][5] >215: MOV TEMP[7].y, TEMP[2].yyyy >216: MOV TEMP[2].y, CONST[3][6] >217: MOV TEMP[7].z, TEMP[2].yyyy >218: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >219: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >220: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >221: MOV TEMP[1].w, IMM[0].xxxx >222: MOV TEMP[6], CONST[3][0] >223: DP4 TEMP[7].x, TEMP[6], TEMP[1] >224: MOV TEMP[6], CONST[3][1] >225: DP4 TEMP[6].x, TEMP[6], TEMP[1] >226: MOV TEMP[7].y, TEMP[6].xxxx >227: MOV TEMP[6], CONST[3][3] >228: DP4 TEMP[6].x, TEMP[6], TEMP[1] >229: MOV TEMP[4].w, IMM[0].xxxx >230: MOV TEMP[8], CONST[3][0] >231: DP4 TEMP[8].x, TEMP[8], TEMP[4] >232: MOV TEMP[9], CONST[3][1] >233: DP4 TEMP[9].x, TEMP[9], TEMP[4] >234: MOV TEMP[8].y, TEMP[9].xxxx >235: MOV TEMP[9], CONST[3][3] >236: DP4 TEMP[9].x, TEMP[9], TEMP[4] >237: MOV TEMP[5].w, IMM[0].xxxx >238: MOV TEMP[10], CONST[3][0] >239: DP4 TEMP[4].x, TEMP[10], TEMP[5] >240: MOV TEMP[10], CONST[3][1] >241: DP4 TEMP[10].x, TEMP[10], TEMP[5] >242: MOV TEMP[4].y, TEMP[10].xxxx >243: MOV TEMP[10], CONST[3][3] >244: DP4 TEMP[10].x, TEMP[10], TEMP[5] >245: MOV TEMP[2].w, IMM[0].xxxx >246: MOV TEMP[11], CONST[3][0] >247: DP4 TEMP[5].x, TEMP[11], TEMP[2] >248: MOV TEMP[11], CONST[3][1] >249: DP4 TEMP[11].x, TEMP[11], TEMP[2] >250: MOV TEMP[5].y, TEMP[11].xxxx >251: MOV TEMP[11], CONST[3][3] >252: DP4 TEMP[11].x, TEMP[11], TEMP[2] >253: MOV TEMP[3].w, IMM[0].xxxx >254: MOV TEMP[12], CONST[3][0] >255: DP4 TEMP[2].x, TEMP[12], TEMP[3] >256: MOV TEMP[12], CONST[3][1] >257: DP4 TEMP[12].x, TEMP[12], TEMP[3] >258: MOV TEMP[2].y, TEMP[12].xxxx >259: MOV TEMP[12], CONST[3][3] >260: DP4 TEMP[12].x, TEMP[12], TEMP[3] >261: MOV TEMP[0].w, IMM[0].xxxx >262: MOV TEMP[13], CONST[3][0] >263: DP4 TEMP[3].x, TEMP[13], TEMP[0] >264: MOV TEMP[13], CONST[3][1] >265: DP4 TEMP[13].x, TEMP[13], TEMP[0] >266: MOV TEMP[3].y, TEMP[13].xxxx >267: MOV TEMP[13], CONST[3][3] >268: DP4 TEMP[13].x, TEMP[13], TEMP[0] >269: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >270: SSG TEMP[15].xy, TEMP[8].xyyy >271: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >272: RCP TEMP[16].xy, TEMP[9].xxxx >273: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >274: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >275: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >276: SSG TEMP[15].xy, TEMP[4].xyyy >277: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >278: RCP TEMP[16].xy, TEMP[10].xxxx >279: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >280: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >281: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >282: SSG TEMP[16].xy, TEMP[5].xyyy >283: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >284: RCP TEMP[11].xy, TEMP[11].xxxx >285: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >286: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >287: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >288: SSG TEMP[15].xy, TEMP[7].xyyy >289: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >290: RCP TEMP[16].xy, TEMP[6].xxxx >291: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >292: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >293: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >294: MOV TEMP[0].yz, TEMP[5].yxyy >295: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >296: SSG TEMP[7].xy, TEMP[2].xyyy >297: MUL TEMP[7].xy, IMM[5].xxxx, TEMP[7].xyyy >298: RCP TEMP[11].xy, TEMP[12].xxxx >299: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >300: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >301: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >302: MOV TEMP[4].zw, TEMP[2].yyxy >303: MOV TEMP[2].xy, CONST[3][23] >304: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >305: MOV TEMP[4].zw, TEMP[2].yyxy >306: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >307: SSG TEMP[5].xy, TEMP[3].xyyy >308: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >309: RCP TEMP[7].xy, TEMP[13].xxxx >310: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >311: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >312: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >313: MOV TEMP[0].xw, TEMP[2].xxxy >314: MOV TEMP[2].xy, CONST[3][23] >315: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >316: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >317: MOV TEMP[0].y, TEMP[2].xxxx >318: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >319: MOV TEMP[0].z, TEMP[2].xxxx >320: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >321: SQRT TEMP[2].x, TEMP[0].xxxx >322: SQRT TEMP[2].y, TEMP[0].yyyy >323: SQRT TEMP[2].z, TEMP[0].zzzz >324: MOV TEMP[2].xyz, TEMP[2].xyzx >325: MOV TEMP[3].z, CONST[1][22] >326: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >327: MOV TEMP[0].w, TEMP[3].xxxx >328: MOV TEMP[3].z, CONST[1][22] >329: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >330: MOV TEMP[3].z, CONST[1][22] >331: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >332: MOV TEMP[1].y, TEMP[3].xxxx >333: MOV TEMP[3].w, CONST[1][22] >334: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >335: UIF TEMP[3].xxxx :0 >336: MOV TEMP[3].w, CONST[1][22] >337: RCP TEMP[3].x, TEMP[3].wwww >338: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >339: ELSE :0 >340: SSG TEMP[5].x, TEMP[0].wwww >341: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >342: ENDIF >343: MOV_SAT TEMP[3].x, TEMP[3].xxxx >344: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >345: MOV TEMP[0].w, TEMP[3].xxxx >346: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >347: MOV TEMP[0].y, TEMP[3].xxxx >348: MOV TEMP[3].w, CONST[1][22] >349: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >350: UIF TEMP[3].xxxx :0 >351: MOV TEMP[3].w, CONST[1][22] >352: RCP TEMP[3].x, TEMP[3].wwww >353: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >354: ELSE :0 >355: SSG TEMP[5].x, TEMP[1].xxxx >356: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >357: ENDIF >358: MOV_SAT TEMP[3].x, TEMP[3].xxxx >359: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >360: MOV TEMP[0].w, TEMP[3].xxxx >361: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >362: MOV TEMP[0].z, TEMP[3].xxxx >363: MOV TEMP[3].w, CONST[1][22] >364: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >365: UIF TEMP[3].xxxx :0 >366: MOV TEMP[3].w, CONST[1][22] >367: RCP TEMP[3].x, TEMP[3].wwww >368: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >369: ELSE :0 >370: SSG TEMP[5].x, TEMP[1].yyyy >371: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >372: ENDIF >373: MOV_SAT TEMP[3].x, TEMP[3].xxxx >374: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >375: MOV TEMP[0].w, TEMP[3].xxxx >376: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >377: MOV TEMP[2].xy, CONST[1][22] >378: MOV TEMP[3].xy, CONST[2][4] >379: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >380: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >381: MOV TEMP[0].w, TEMP[2].xxxx >382: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >383: SSG TEMP[3].xy, TEMP[0].xyyy >384: MUL TEMP[3].xy, IMM[5].xxxx, TEMP[3].xyyy >385: RCP TEMP[5].xy, TEMP[1].xxxx >386: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >387: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >388: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >389: MOV TEMP[0].y, TEMP[2].xxxx >390: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >391: MOV TEMP[4].z, TEMP[2].xxxx >392: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >393: UIF TEMP[2].xxxx :0 >394: RCP TEMP[1].x, TEMP[1].xxxx >395: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >396: ELSE :0 >397: SSG TEMP[2].x, TEMP[0].zzzz >398: MUL TEMP[1].x, IMM[5].xxxx, TEMP[2].xxxx >399: ENDIF >400: MOV TEMP[0].y, TEMP[1].xxxx >401: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >402: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >403: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >404: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >405: MOV TEMP[4].w, TEMP[0].xxxx >406: ELSE :0 >407: MOV TEMP[4], IMM[0].zzzz >408: ENDIF >409: MIN TEMP[0], TEMP[4], IMM[5].yyyy >410: MOV TEMP[1].x, TEMP[0].xxxx >411: MOV TEMP[2].x, TEMP[0].yyyy >412: MOV TEMP[3].x, TEMP[0].zzzz >413: MOV TEMP[0].x, TEMP[0].wwww >414: MOV OUT[8], TEMP[1] >415: MOV OUT[9], TEMP[2] >416: MOV OUT[10], TEMP[3] >417: MOV OUT[11], TEMP[0] >418: MOV OUT[0].x, TEMP[1].xxxx >419: MOV OUT[0].y, TEMP[2].xxxx >420: MOV OUT[0].z, TEMP[3].xxxx >421: MOV OUT[1].x, TEMP[0].xxxx >422: END >radeonsi: Compiling shader 227 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 64) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 68) > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 84) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %44 = lshr i32 %10, 8 > %45 = and i32 %44, 31 > %46 = lshr i32 %7, 13 > %47 = and i32 %46, 255 > %48 = and i32 %7, 8191 > %49 = and i32 %10, 255 > %50 = mul nuw nsw i32 %48, %49 > %51 = mul nuw nsw i32 %45, %47 > %52 = add nuw nsw i32 %50, %51 > %53 = add nuw nsw i32 %52, 16 > %54 = zext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = load i32, i32 addrspace(3)* %55, align 4 > %57 = lshr i32 %7, 13 > %58 = and i32 %57, 255 > %59 = and i32 %7, 8191 > %60 = and i32 %10, 255 > %61 = mul nuw nsw i32 %59, %60 > %62 = mul nuw nsw i32 %45, %58 > %63 = add nuw nsw i32 %61, %62 > %64 = add nuw nsw i32 %63, 17 > %65 = zext i32 %64 to i64 > %66 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %65 > %67 = load i32, i32 addrspace(3)* %66, align 4 > %68 = lshr i32 %7, 13 > %69 = and i32 %68, 255 > %70 = and i32 %7, 8191 > %71 = and i32 %10, 255 > %72 = mul nuw nsw i32 %70, %71 > %73 = mul nuw nsw i32 %45, %69 > %74 = add nuw nsw i32 %72, %73 > %75 = add nuw nsw i32 %74, 18 > %76 = zext i32 %75 to i64 > %77 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %76 > %78 = load i32, i32 addrspace(3)* %77, align 4 > %79 = lshr i32 %7, 13 > %80 = and i32 %79, 255 > %81 = and i32 %7, 8191 > %82 = and i32 %10, 255 > %83 = mul nuw nsw i32 %81, %82 > %84 = mul nuw nsw i32 %45, %80 > %85 = add nuw nsw i32 %83, %84 > %86 = add nuw nsw i32 %85, 19 > %87 = zext i32 %86 to i64 > %88 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %87 > %89 = load i32, i32 addrspace(3)* %88, align 4 > %90 = lshr i32 %6, 13 > %91 = and i32 %90, 255 > %92 = shl i32 %5, 2 > %93 = and i32 %92, 262140 > %94 = and i32 %6, 8191 > %95 = and i32 %10, 255 > %96 = mul nuw nsw i32 %94, %95 > %97 = add nuw nsw i32 %93, %96 > %98 = mul nuw nsw i32 %45, %91 > %99 = add nuw nsw i32 %97, %98 > %100 = add nuw nsw i32 %99, 16 > %101 = zext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > store i32 %56, i32 addrspace(3)* %102, align 4 > %103 = add nuw nsw i32 %99, 17 > %104 = zext i32 %103 to i64 > %105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %104 > store i32 %67, i32 addrspace(3)* %105, align 4 > %106 = add nuw nsw i32 %99, 18 > %107 = zext i32 %106 to i64 > %108 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %107 > store i32 %78, i32 addrspace(3)* %108, align 4 > %109 = add nuw nsw i32 %99, 19 > %110 = zext i32 %109 to i64 > %111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %110 > store i32 %89, i32 addrspace(3)* %111, align 4 > %112 = lshr i32 %7, 13 > %113 = and i32 %112, 255 > %114 = and i32 %7, 8191 > %115 = and i32 %10, 255 > %116 = mul nuw nsw i32 %114, %115 > %117 = mul nuw nsw i32 %45, %113 > %118 = add nuw nsw i32 %116, %117 > %119 = add nuw nsw i32 %118, 20 > %120 = zext i32 %119 to i64 > %121 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %120 > %122 = load i32, i32 addrspace(3)* %121, align 4 > %123 = lshr i32 %7, 13 > %124 = and i32 %123, 255 > %125 = and i32 %7, 8191 > %126 = and i32 %10, 255 > %127 = mul nuw nsw i32 %125, %126 > %128 = mul nuw nsw i32 %45, %124 > %129 = add nuw nsw i32 %127, %128 > %130 = add nuw nsw i32 %129, 21 > %131 = zext i32 %130 to i64 > %132 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %131 > %133 = load i32, i32 addrspace(3)* %132, align 4 > %134 = lshr i32 %7, 13 > %135 = and i32 %134, 255 > %136 = and i32 %7, 8191 > %137 = and i32 %10, 255 > %138 = mul nuw nsw i32 %136, %137 > %139 = mul nuw nsw i32 %45, %135 > %140 = add nuw nsw i32 %138, %139 > %141 = add nuw nsw i32 %140, 22 > %142 = zext i32 %141 to i64 > %143 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %142 > %144 = load i32, i32 addrspace(3)* %143, align 4 > %145 = lshr i32 %7, 13 > %146 = and i32 %145, 255 > %147 = and i32 %7, 8191 > %148 = and i32 %10, 255 > %149 = mul nuw nsw i32 %147, %148 > %150 = mul nuw nsw i32 %45, %146 > %151 = add nuw nsw i32 %149, %150 > %152 = add nuw nsw i32 %151, 23 > %153 = zext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = load i32, i32 addrspace(3)* %154, align 4 > %156 = lshr i32 %6, 13 > %157 = and i32 %156, 255 > %158 = shl i32 %5, 2 > %159 = and i32 %158, 262140 > %160 = and i32 %6, 8191 > %161 = and i32 %10, 255 > %162 = mul nuw nsw i32 %160, %161 > %163 = add nuw nsw i32 %159, %162 > %164 = mul nuw nsw i32 %45, %157 > %165 = add nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 20 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > store i32 %122, i32 addrspace(3)* %168, align 4 > %169 = add nuw nsw i32 %165, 21 > %170 = zext i32 %169 to i64 > %171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %170 > store i32 %133, i32 addrspace(3)* %171, align 4 > %172 = add nuw nsw i32 %165, 22 > %173 = zext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > store i32 %144, i32 addrspace(3)* %174, align 4 > %175 = add nuw nsw i32 %165, 23 > %176 = zext i32 %175 to i64 > %177 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %176 > store i32 %155, i32 addrspace(3)* %177, align 4 > %178 = lshr i32 %7, 13 > %179 = and i32 %178, 255 > %180 = and i32 %7, 8191 > %181 = and i32 %10, 255 > %182 = mul nuw nsw i32 %180, %181 > %183 = mul nuw nsw i32 %45, %179 > %184 = add nuw nsw i32 %182, %183 > %185 = add nuw nsw i32 %184, 24 > %186 = zext i32 %185 to i64 > %187 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %186 > %188 = load i32, i32 addrspace(3)* %187, align 4 > %189 = lshr i32 %7, 13 > %190 = and i32 %189, 255 > %191 = and i32 %7, 8191 > %192 = and i32 %10, 255 > %193 = mul nuw nsw i32 %191, %192 > %194 = mul nuw nsw i32 %45, %190 > %195 = add nuw nsw i32 %193, %194 > %196 = add nuw nsw i32 %195, 25 > %197 = zext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = load i32, i32 addrspace(3)* %198, align 4 > %200 = lshr i32 %7, 13 > %201 = and i32 %200, 255 > %202 = and i32 %7, 8191 > %203 = and i32 %10, 255 > %204 = mul nuw nsw i32 %202, %203 > %205 = mul nuw nsw i32 %45, %201 > %206 = add nuw nsw i32 %204, %205 > %207 = add nuw nsw i32 %206, 26 > %208 = zext i32 %207 to i64 > %209 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %208 > %210 = load i32, i32 addrspace(3)* %209, align 4 > %211 = lshr i32 %7, 13 > %212 = and i32 %211, 255 > %213 = and i32 %7, 8191 > %214 = and i32 %10, 255 > %215 = mul nuw nsw i32 %213, %214 > %216 = mul nuw nsw i32 %45, %212 > %217 = add nuw nsw i32 %215, %216 > %218 = add nuw nsw i32 %217, 27 > %219 = zext i32 %218 to i64 > %220 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %219 > %221 = load i32, i32 addrspace(3)* %220, align 4 > %222 = lshr i32 %6, 13 > %223 = and i32 %222, 255 > %224 = shl i32 %5, 2 > %225 = and i32 %224, 262140 > %226 = and i32 %6, 8191 > %227 = and i32 %10, 255 > %228 = mul nuw nsw i32 %226, %227 > %229 = add nuw nsw i32 %225, %228 > %230 = mul nuw nsw i32 %45, %223 > %231 = add nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, 24 > %233 = zext i32 %232 to i64 > %234 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %233 > store i32 %188, i32 addrspace(3)* %234, align 4 > %235 = add nuw nsw i32 %231, 25 > %236 = zext i32 %235 to i64 > %237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %236 > store i32 %199, i32 addrspace(3)* %237, align 4 > %238 = add nuw nsw i32 %231, 26 > %239 = zext i32 %238 to i64 > %240 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %239 > store i32 %210, i32 addrspace(3)* %240, align 4 > %241 = add nuw nsw i32 %231, 27 > %242 = zext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > store i32 %221, i32 addrspace(3)* %243, align 4 > %244 = lshr i32 %7, 13 > %245 = and i32 %244, 255 > %246 = and i32 %7, 8191 > %247 = and i32 %10, 255 > %248 = mul nuw nsw i32 %246, %247 > %249 = mul nuw nsw i32 %45, %245 > %250 = add nuw nsw i32 %248, %249 > %251 = add nuw nsw i32 %250, 28 > %252 = zext i32 %251 to i64 > %253 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %252 > %254 = load i32, i32 addrspace(3)* %253, align 4 > %255 = lshr i32 %7, 13 > %256 = and i32 %255, 255 > %257 = and i32 %7, 8191 > %258 = and i32 %10, 255 > %259 = mul nuw nsw i32 %257, %258 > %260 = mul nuw nsw i32 %45, %256 > %261 = add nuw nsw i32 %259, %260 > %262 = add nuw nsw i32 %261, 29 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = load i32, i32 addrspace(3)* %264, align 4 > %266 = lshr i32 %7, 13 > %267 = and i32 %266, 255 > %268 = and i32 %7, 8191 > %269 = and i32 %10, 255 > %270 = mul nuw nsw i32 %268, %269 > %271 = mul nuw nsw i32 %45, %267 > %272 = add nuw nsw i32 %270, %271 > %273 = add nuw nsw i32 %272, 30 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = load i32, i32 addrspace(3)* %275, align 4 > %277 = lshr i32 %7, 13 > %278 = and i32 %277, 255 > %279 = and i32 %7, 8191 > %280 = and i32 %10, 255 > %281 = mul nuw nsw i32 %279, %280 > %282 = mul nuw nsw i32 %45, %278 > %283 = add nuw nsw i32 %281, %282 > %284 = add nuw nsw i32 %283, 31 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = load i32, i32 addrspace(3)* %286, align 4 > %288 = lshr i32 %6, 13 > %289 = and i32 %288, 255 > %290 = shl i32 %5, 2 > %291 = and i32 %290, 262140 > %292 = and i32 %6, 8191 > %293 = and i32 %10, 255 > %294 = mul nuw nsw i32 %292, %293 > %295 = add nuw nsw i32 %291, %294 > %296 = mul nuw nsw i32 %45, %289 > %297 = add nuw nsw i32 %295, %296 > %298 = add nuw nsw i32 %297, 28 > %299 = zext i32 %298 to i64 > %300 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %299 > store i32 %254, i32 addrspace(3)* %300, align 4 > %301 = add nuw nsw i32 %297, 29 > %302 = zext i32 %301 to i64 > %303 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %302 > store i32 %265, i32 addrspace(3)* %303, align 4 > %304 = add nuw nsw i32 %297, 30 > %305 = zext i32 %304 to i64 > %306 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %305 > store i32 %276, i32 addrspace(3)* %306, align 4 > %307 = add nuw nsw i32 %297, 31 > %308 = zext i32 %307 to i64 > %309 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %308 > store i32 %287, i32 addrspace(3)* %309, align 4 > %310 = lshr i32 %7, 13 > %311 = and i32 %310, 255 > %312 = and i32 %7, 8191 > %313 = and i32 %10, 255 > %314 = mul nuw nsw i32 %312, %313 > %315 = mul nuw nsw i32 %45, %311 > %316 = add nuw nsw i32 %314, %315 > %317 = add nuw nsw i32 %316, 32 > %318 = zext i32 %317 to i64 > %319 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %318 > %320 = load i32, i32 addrspace(3)* %319, align 4 > %321 = lshr i32 %7, 13 > %322 = and i32 %321, 255 > %323 = and i32 %7, 8191 > %324 = and i32 %10, 255 > %325 = mul nuw nsw i32 %323, %324 > %326 = mul nuw nsw i32 %45, %322 > %327 = add nuw nsw i32 %325, %326 > %328 = add nuw nsw i32 %327, 33 > %329 = zext i32 %328 to i64 > %330 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %329 > %331 = load i32, i32 addrspace(3)* %330, align 4 > %332 = lshr i32 %7, 13 > %333 = and i32 %332, 255 > %334 = and i32 %7, 8191 > %335 = and i32 %10, 255 > %336 = mul nuw nsw i32 %334, %335 > %337 = mul nuw nsw i32 %45, %333 > %338 = add nuw nsw i32 %336, %337 > %339 = add nuw nsw i32 %338, 34 > %340 = zext i32 %339 to i64 > %341 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %340 > %342 = load i32, i32 addrspace(3)* %341, align 4 > %343 = lshr i32 %7, 13 > %344 = and i32 %343, 255 > %345 = and i32 %7, 8191 > %346 = and i32 %10, 255 > %347 = mul nuw nsw i32 %345, %346 > %348 = mul nuw nsw i32 %45, %344 > %349 = add nuw nsw i32 %347, %348 > %350 = add nuw nsw i32 %349, 35 > %351 = zext i32 %350 to i64 > %352 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %351 > %353 = load i32, i32 addrspace(3)* %352, align 4 > %354 = lshr i32 %6, 13 > %355 = and i32 %354, 255 > %356 = shl i32 %5, 2 > %357 = and i32 %356, 262140 > %358 = and i32 %6, 8191 > %359 = and i32 %10, 255 > %360 = mul nuw nsw i32 %358, %359 > %361 = add nuw nsw i32 %357, %360 > %362 = mul nuw nsw i32 %45, %355 > %363 = add nuw nsw i32 %361, %362 > %364 = add nuw nsw i32 %363, 32 > %365 = zext i32 %364 to i64 > %366 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %365 > store i32 %320, i32 addrspace(3)* %366, align 4 > %367 = add nuw nsw i32 %363, 33 > %368 = zext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > store i32 %331, i32 addrspace(3)* %369, align 4 > %370 = add nuw nsw i32 %363, 34 > %371 = zext i32 %370 to i64 > %372 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %371 > store i32 %342, i32 addrspace(3)* %372, align 4 > %373 = add nuw nsw i32 %363, 35 > %374 = zext i32 %373 to i64 > %375 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %374 > store i32 %353, i32 addrspace(3)* %375, align 4 > %376 = lshr i32 %7, 13 > %377 = and i32 %376, 255 > %378 = and i32 %7, 8191 > %379 = and i32 %10, 255 > %380 = mul nuw nsw i32 %378, %379 > %381 = mul nuw nsw i32 %45, %377 > %382 = add nuw nsw i32 %380, %381 > %383 = add nuw nsw i32 %382, 36 > %384 = zext i32 %383 to i64 > %385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %384 > %386 = load i32, i32 addrspace(3)* %385, align 4 > %387 = lshr i32 %7, 13 > %388 = and i32 %387, 255 > %389 = and i32 %7, 8191 > %390 = and i32 %10, 255 > %391 = mul nuw nsw i32 %389, %390 > %392 = mul nuw nsw i32 %45, %388 > %393 = add nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 37 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = load i32, i32 addrspace(3)* %396, align 4 > %398 = lshr i32 %7, 13 > %399 = and i32 %398, 255 > %400 = and i32 %7, 8191 > %401 = and i32 %10, 255 > %402 = mul nuw nsw i32 %400, %401 > %403 = mul nuw nsw i32 %45, %399 > %404 = add nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, 38 > %406 = zext i32 %405 to i64 > %407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %406 > %408 = load i32, i32 addrspace(3)* %407, align 4 > %409 = lshr i32 %7, 13 > %410 = and i32 %409, 255 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = mul nuw nsw i32 %45, %410 > %415 = add nuw nsw i32 %413, %414 > %416 = add nuw nsw i32 %415, 39 > %417 = zext i32 %416 to i64 > %418 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %417 > %419 = load i32, i32 addrspace(3)* %418, align 4 > %420 = lshr i32 %6, 13 > %421 = and i32 %420, 255 > %422 = shl i32 %5, 2 > %423 = and i32 %422, 262140 > %424 = and i32 %6, 8191 > %425 = and i32 %10, 255 > %426 = mul nuw nsw i32 %424, %425 > %427 = add nuw nsw i32 %423, %426 > %428 = mul nuw nsw i32 %45, %421 > %429 = add nuw nsw i32 %427, %428 > %430 = add nuw nsw i32 %429, 36 > %431 = zext i32 %430 to i64 > %432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %431 > store i32 %386, i32 addrspace(3)* %432, align 4 > %433 = add nuw nsw i32 %429, 37 > %434 = zext i32 %433 to i64 > %435 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %434 > store i32 %397, i32 addrspace(3)* %435, align 4 > %436 = add nuw nsw i32 %429, 38 > %437 = zext i32 %436 to i64 > %438 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %437 > store i32 %408, i32 addrspace(3)* %438, align 4 > %439 = add nuw nsw i32 %429, 39 > %440 = zext i32 %439 to i64 > %441 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %440 > store i32 %419, i32 addrspace(3)* %441, align 4 > %442 = and i32 %7, 8191 > %443 = and i32 %10, 255 > %444 = mul nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 16 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > %448 = bitcast i32 addrspace(3)* %447 to float addrspace(3)* > %449 = load float, float addrspace(3)* %448, align 4 > %450 = and i32 %7, 8191 > %451 = and i32 %10, 255 > %452 = mul nuw nsw i32 %450, %451 > %453 = add nuw nsw i32 %452, 17 > %454 = zext i32 %453 to i64 > %455 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %454 > %456 = bitcast i32 addrspace(3)* %455 to float addrspace(3)* > %457 = load float, float addrspace(3)* %456, align 4 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, 18 > %462 = zext i32 %461 to i64 > %463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %462 > %464 = bitcast i32 addrspace(3)* %463 to float addrspace(3)* > %465 = load float, float addrspace(3)* %464, align 4 > %466 = fmul float %23, %449 > %467 = fmul float %24, %457 > %468 = fadd float %466, %467 > %469 = fmul float %25, %465 > %470 = fadd float %468, %469 > %471 = fadd float %470, %26 > %472 = fmul float %27, %449 > %473 = fmul float %28, %457 > %474 = fadd float %472, %473 > %475 = fmul float %29, %465 > %476 = fadd float %474, %475 > %477 = fadd float %476, %30 > %478 = fmul float %31, %449 > %479 = fmul float %32, %457 > %480 = fadd float %478, %479 > %481 = fmul float %33, %465 > %482 = fadd float %480, %481 > %483 = fadd float %482, %34 > %484 = fmul float %35, %449 > %485 = fmul float %36, %457 > %486 = fadd float %484, %485 > %487 = fmul float %37, %465 > %488 = fadd float %486, %487 > %489 = fadd float %488, %38 > %490 = lshr i32 %7, 13 > %491 = and i32 %490, 255 > %492 = and i32 %7, 8191 > %493 = and i32 %10, 255 > %494 = mul nuw nsw i32 %492, %493 > %495 = add nuw nsw i32 %494, %491 > %496 = add nuw nsw i32 %495, 16 > %497 = zext i32 %496 to i64 > %498 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %497 > %499 = bitcast i32 addrspace(3)* %498 to float addrspace(3)* > %500 = load float, float addrspace(3)* %499, align 4 > %501 = lshr i32 %7, 13 > %502 = and i32 %501, 255 > %503 = and i32 %7, 8191 > %504 = and i32 %10, 255 > %505 = mul nuw nsw i32 %503, %504 > %506 = add nuw nsw i32 %505, %502 > %507 = add nuw nsw i32 %506, 17 > %508 = zext i32 %507 to i64 > %509 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %508 > %510 = bitcast i32 addrspace(3)* %509 to float addrspace(3)* > %511 = load float, float addrspace(3)* %510, align 4 > %512 = lshr i32 %7, 13 > %513 = and i32 %512, 255 > %514 = and i32 %7, 8191 > %515 = and i32 %10, 255 > %516 = mul nuw nsw i32 %514, %515 > %517 = add nuw nsw i32 %516, %513 > %518 = add nuw nsw i32 %517, 18 > %519 = zext i32 %518 to i64 > %520 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %519 > %521 = bitcast i32 addrspace(3)* %520 to float addrspace(3)* > %522 = load float, float addrspace(3)* %521, align 4 > %523 = fmul float %23, %500 > %524 = fmul float %24, %511 > %525 = fadd float %523, %524 > %526 = fmul float %25, %522 > %527 = fadd float %525, %526 > %528 = fadd float %527, %26 > %529 = fmul float %27, %500 > %530 = fmul float %28, %511 > %531 = fadd float %529, %530 > %532 = fmul float %29, %522 > %533 = fadd float %531, %532 > %534 = fadd float %533, %30 > %535 = fmul float %31, %500 > %536 = fmul float %32, %511 > %537 = fadd float %535, %536 > %538 = fmul float %33, %522 > %539 = fadd float %537, %538 > %540 = fadd float %539, %34 > %541 = fmul float %35, %500 > %542 = fmul float %36, %511 > %543 = fadd float %541, %542 > %544 = fmul float %37, %522 > %545 = fadd float %543, %544 > %546 = fadd float %545, %38 > %547 = and i32 %7, 8191 > %548 = and i32 %10, 255 > %549 = mul nuw nsw i32 %547, %548 > %550 = lshr i32 %7, 12 > %551 = and i32 %550, 510 > %552 = add nuw nsw i32 %549, %551 > %553 = add nuw nsw i32 %552, 16 > %554 = zext i32 %553 to i64 > %555 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %554 > %556 = bitcast i32 addrspace(3)* %555 to float addrspace(3)* > %557 = load float, float addrspace(3)* %556, align 4 > %558 = and i32 %7, 8191 > %559 = and i32 %10, 255 > %560 = mul nuw nsw i32 %558, %559 > %561 = lshr i32 %7, 12 > %562 = and i32 %561, 510 > %563 = add nuw nsw i32 %560, %562 > %564 = add nuw nsw i32 %563, 17 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fmul float %23, %557 > %581 = fmul float %24, %568 > %582 = fadd float %580, %581 > %583 = fmul float %25, %579 > %584 = fadd float %582, %583 > %585 = fadd float %584, %26 > %586 = fmul float %27, %557 > %587 = fmul float %28, %568 > %588 = fadd float %586, %587 > %589 = fmul float %29, %579 > %590 = fadd float %588, %589 > %591 = fadd float %590, %30 > %592 = fmul float %31, %557 > %593 = fmul float %32, %568 > %594 = fadd float %592, %593 > %595 = fmul float %33, %579 > %596 = fadd float %594, %595 > %597 = fadd float %596, %34 > %598 = fmul float %35, %557 > %599 = fmul float %36, %568 > %600 = fadd float %598, %599 > %601 = fmul float %37, %579 > %602 = fadd float %600, %601 > %603 = fadd float %602, %38 > %604 = fadd float %483, 1.000000e+02 > %605 = fadd float %540, 1.000000e+02 > %606 = fadd float %597, 1.000000e+02 > %607 = call float @llvm.fabs.f32(float %489) > %608 = call float @llvm.minnum.f32(float %607, float 1.000000e+02) > %609 = fcmp ogt float %471, 0.000000e+00 > %610 = fcmp ogt float %477, 0.000000e+00 > %611 = fcmp olt float %471, 0.000000e+00 > %612 = fcmp olt float %477, 0.000000e+00 > %613 = sext i1 %611 to i32 > %614 = sext i1 %612 to i32 > %615 = zext i1 %609 to i32 > %616 = zext i1 %610 to i32 > %617 = add nsw i32 %615, %613 > %618 = add nsw i32 %616, %614 > %619 = sitofp i32 %617 to float > %620 = sitofp i32 %618 to float > %621 = fsub float -0.000000e+00, %608 > %622 = call float @llvm.fma.f32(float %621, float %619, float %471) > %623 = fsub float -0.000000e+00, %608 > %624 = call float @llvm.fma.f32(float %623, float %620, float %477) > %625 = call float @llvm.fabs.f32(float %546) > %626 = call float @llvm.minnum.f32(float %625, float 1.000000e+02) > %627 = fcmp ogt float %528, 0.000000e+00 > %628 = fcmp ogt float %534, 0.000000e+00 > %629 = fcmp olt float %528, 0.000000e+00 > %630 = fcmp olt float %534, 0.000000e+00 > %631 = sext i1 %629 to i32 > %632 = sext i1 %630 to i32 > %633 = zext i1 %627 to i32 > %634 = zext i1 %628 to i32 > %635 = add nsw i32 %633, %631 > %636 = add nsw i32 %634, %632 > %637 = sitofp i32 %635 to float > %638 = sitofp i32 %636 to float > %639 = fsub float -0.000000e+00, %626 > %640 = call float @llvm.fma.f32(float %639, float %637, float %528) > %641 = fsub float -0.000000e+00, %626 > %642 = call float @llvm.fma.f32(float %641, float %638, float %534) > %643 = fcmp ogt float %585, 0.000000e+00 > %644 = fcmp ogt float %591, 0.000000e+00 > %645 = fcmp olt float %585, 0.000000e+00 > %646 = fcmp olt float %591, 0.000000e+00 > %647 = sext i1 %645 to i32 > %648 = sext i1 %646 to i32 > %649 = zext i1 %643 to i32 > %650 = zext i1 %644 to i32 > %651 = add nsw i32 %649, %647 > %652 = add nsw i32 %650, %648 > %653 = sitofp i32 %651 to float > %654 = sitofp i32 %652 to float > %655 = call float @llvm.fabs.f32(float %603) > %656 = call float @llvm.minnum.f32(float %655, float 1.000000e+02) > %657 = fsub float -0.000000e+00, %656 > %658 = call float @llvm.fma.f32(float %657, float %653, float %585) > %659 = fsub float -0.000000e+00, %656 > %660 = call float @llvm.fma.f32(float %659, float %654, float %591) > %661 = fsub float -0.000000e+00, %489 > %662 = fcmp olt float %622, %661 > %663 = fsub float -0.000000e+00, %489 > %664 = fcmp olt float %624, %663 > %665 = zext i1 %662 to i32 > %666 = zext i1 %664 to i32 > %667 = fsub float -0.000000e+00, %546 > %668 = fcmp olt float %640, %667 > %669 = fsub float -0.000000e+00, %546 > %670 = fcmp olt float %642, %669 > %671 = zext i1 %668 to i32 > %672 = zext i1 %670 to i32 > %673 = add nuw nsw i32 %671, %665 > %674 = add nuw nsw i32 %672, %666 > %675 = fsub float -0.000000e+00, %603 > %676 = fcmp olt float %658, %675 > %677 = fsub float -0.000000e+00, %603 > %678 = fcmp olt float %660, %677 > %679 = zext i1 %676 to i32 > %680 = zext i1 %678 to i32 > %681 = add nuw nsw i32 %679, %673 > %682 = add nuw nsw i32 %680, %674 > %683 = fcmp olt float %604, 0.000000e+00 > %684 = zext i1 %683 to i32 > %685 = bitcast i32 %684 to float > %686 = fcmp olt float %605, 0.000000e+00 > %687 = fcmp olt float %606, 0.000000e+00 > %688 = zext i1 %686 to i32 > %689 = zext i1 %687 to i32 > %690 = add nuw nsw i32 %688, %684 > %691 = add nuw nsw i32 %689, %690 > %692 = fcmp olt float %489, %622 > %693 = fcmp olt float %489, %624 > %694 = zext i1 %692 to i32 > %695 = zext i1 %693 to i32 > %696 = fcmp olt float %546, %640 > %697 = fcmp olt float %546, %642 > %698 = zext i1 %696 to i32 > %699 = zext i1 %697 to i32 > %700 = add nuw nsw i32 %694, %698 > %701 = add nuw nsw i32 %695, %699 > %702 = fcmp olt float %603, %658 > %703 = fcmp olt float %603, %660 > %704 = zext i1 %702 to i32 > %705 = zext i1 %703 to i32 > %706 = add nuw nsw i32 %700, %704 > %707 = add nuw nsw i32 %701, %705 > %708 = icmp eq i32 %681, 3 > %709 = icmp eq i32 %682, 3 > %710 = sext i1 %708 to i32 > %711 = sext i1 %709 to i32 > %712 = bitcast i32 %711 to float > %713 = icmp eq i32 %706, 3 > %714 = icmp eq i32 %707, 3 > %715 = sext i1 %714 to i32 > %716 = bitcast i32 %715 to float > %717 = bitcast i32 %711 to float > %718 = select i1 %714, float 0xFFFFFFFFE0000000, float %717 > %719 = bitcast float %718 to i32 > %720 = select i1 %713, i32 -1, i32 %710 > %721 = or i32 %719, %720 > %722 = icmp eq i32 %721, 0 > %not. = icmp ne i32 %691, 3 > %723 = and i1 %722, %not. > br i1 %723, label %IF, label %ENDIF > >IF: ; preds = %main_body > %724 = lshr i32 %7, 13 > %725 = and i32 %724, 255 > %726 = and i32 %7, 8191 > %727 = and i32 %10, 255 > %728 = mul nuw nsw i32 %726, %727 > %729 = add nuw nsw i32 %728, %725 > %730 = add nuw nsw i32 %729, 16 > %731 = zext i32 %730 to i64 > %732 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %731 > %733 = bitcast i32 addrspace(3)* %732 to float addrspace(3)* > %734 = load float, float addrspace(3)* %733, align 4 > %735 = and i32 %7, 8191 > %736 = and i32 %10, 255 > %737 = mul nuw nsw i32 %735, %736 > %738 = add nuw nsw i32 %737, 16 > %739 = zext i32 %738 to i64 > %740 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %739 > %741 = bitcast i32 addrspace(3)* %740 to float addrspace(3)* > %742 = load float, float addrspace(3)* %741, align 4 > %743 = fsub float %742, %734 > %744 = lshr i32 %7, 13 > %745 = and i32 %744, 255 > %746 = and i32 %7, 8191 > %747 = and i32 %10, 255 > %748 = mul nuw nsw i32 %746, %747 > %749 = add nuw nsw i32 %748, %745 > %750 = add nuw nsw i32 %749, 17 > %751 = zext i32 %750 to i64 > %752 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %751 > %753 = bitcast i32 addrspace(3)* %752 to float addrspace(3)* > %754 = load float, float addrspace(3)* %753, align 4 > %755 = and i32 %7, 8191 > %756 = and i32 %10, 255 > %757 = mul nuw nsw i32 %755, %756 > %758 = add nuw nsw i32 %757, 17 > %759 = zext i32 %758 to i64 > %760 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %759 > %761 = bitcast i32 addrspace(3)* %760 to float addrspace(3)* > %762 = load float, float addrspace(3)* %761, align 4 > %763 = fsub float %762, %754 > %764 = lshr i32 %7, 13 > %765 = and i32 %764, 255 > %766 = and i32 %7, 8191 > %767 = and i32 %10, 255 > %768 = mul nuw nsw i32 %766, %767 > %769 = add nuw nsw i32 %768, %765 > %770 = add nuw nsw i32 %769, 18 > %771 = zext i32 %770 to i64 > %772 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %771 > %773 = bitcast i32 addrspace(3)* %772 to float addrspace(3)* > %774 = load float, float addrspace(3)* %773, align 4 > %775 = and i32 %7, 8191 > %776 = and i32 %10, 255 > %777 = mul nuw nsw i32 %775, %776 > %778 = add nuw nsw i32 %777, 18 > %779 = zext i32 %778 to i64 > %780 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %779 > %781 = bitcast i32 addrspace(3)* %780 to float addrspace(3)* > %782 = load float, float addrspace(3)* %781, align 4 > %783 = fsub float %782, %774 > %784 = fmul float %743, %743 > %785 = fmul float %763, %763 > %786 = fadd float %785, %784 > %787 = fmul float %783, %783 > %788 = fadd float %786, %787 > %789 = and i32 %7, 8191 > %790 = and i32 %10, 255 > %791 = mul nuw nsw i32 %789, %790 > %792 = lshr i32 %7, 12 > %793 = and i32 %792, 510 > %794 = add nuw nsw i32 %791, %793 > %795 = add nuw nsw i32 %794, 16 > %796 = zext i32 %795 to i64 > %797 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %796 > %798 = bitcast i32 addrspace(3)* %797 to float addrspace(3)* > %799 = load float, float addrspace(3)* %798, align 4 > %800 = lshr i32 %7, 13 > %801 = and i32 %800, 255 > %802 = and i32 %7, 8191 > %803 = and i32 %10, 255 > %804 = mul nuw nsw i32 %802, %803 > %805 = add nuw nsw i32 %804, %801 > %806 = add nuw nsw i32 %805, 16 > %807 = zext i32 %806 to i64 > %808 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %807 > %809 = bitcast i32 addrspace(3)* %808 to float addrspace(3)* > %810 = load float, float addrspace(3)* %809, align 4 > %811 = fsub float %810, %799 > %812 = and i32 %7, 8191 > %813 = and i32 %10, 255 > %814 = mul nuw nsw i32 %812, %813 > %815 = lshr i32 %7, 12 > %816 = and i32 %815, 510 > %817 = add nuw nsw i32 %814, %816 > %818 = add nuw nsw i32 %817, 17 > %819 = zext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = lshr i32 %7, 13 > %824 = and i32 %823, 255 > %825 = and i32 %7, 8191 > %826 = and i32 %10, 255 > %827 = mul nuw nsw i32 %825, %826 > %828 = add nuw nsw i32 %827, %824 > %829 = add nuw nsw i32 %828, 17 > %830 = zext i32 %829 to i64 > %831 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %830 > %832 = bitcast i32 addrspace(3)* %831 to float addrspace(3)* > %833 = load float, float addrspace(3)* %832, align 4 > %834 = fsub float %833, %822 > %835 = and i32 %7, 8191 > %836 = and i32 %10, 255 > %837 = mul nuw nsw i32 %835, %836 > %838 = lshr i32 %7, 12 > %839 = and i32 %838, 510 > %840 = add nuw nsw i32 %837, %839 > %841 = add nuw nsw i32 %840, 18 > %842 = zext i32 %841 to i64 > %843 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %842 > %844 = bitcast i32 addrspace(3)* %843 to float addrspace(3)* > %845 = load float, float addrspace(3)* %844, align 4 > %846 = lshr i32 %7, 13 > %847 = and i32 %846, 255 > %848 = and i32 %7, 8191 > %849 = and i32 %10, 255 > %850 = mul nuw nsw i32 %848, %849 > %851 = add nuw nsw i32 %850, %847 > %852 = add nuw nsw i32 %851, 18 > %853 = zext i32 %852 to i64 > %854 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %853 > %855 = bitcast i32 addrspace(3)* %854 to float addrspace(3)* > %856 = load float, float addrspace(3)* %855, align 4 > %857 = fsub float %856, %845 > %858 = fmul float %811, %811 > %859 = fmul float %834, %834 > %860 = fadd float %859, %858 > %861 = fmul float %857, %857 > %862 = fadd float %860, %861 > %863 = and i32 %7, 8191 > %864 = and i32 %10, 255 > %865 = mul nuw nsw i32 %863, %864 > %866 = add nuw nsw i32 %865, 16 > %867 = zext i32 %866 to i64 > %868 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %867 > %869 = bitcast i32 addrspace(3)* %868 to float addrspace(3)* > %870 = load float, float addrspace(3)* %869, align 4 > %871 = and i32 %7, 8191 > %872 = and i32 %10, 255 > %873 = mul nuw nsw i32 %871, %872 > %874 = lshr i32 %7, 12 > %875 = and i32 %874, 510 > %876 = add nuw nsw i32 %873, %875 > %877 = add nuw nsw i32 %876, 16 > %878 = zext i32 %877 to i64 > %879 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %878 > %880 = bitcast i32 addrspace(3)* %879 to float addrspace(3)* > %881 = load float, float addrspace(3)* %880, align 4 > %882 = fsub float %881, %870 > %883 = and i32 %7, 8191 > %884 = and i32 %10, 255 > %885 = mul nuw nsw i32 %883, %884 > %886 = add nuw nsw i32 %885, 17 > %887 = zext i32 %886 to i64 > %888 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %887 > %889 = bitcast i32 addrspace(3)* %888 to float addrspace(3)* > %890 = load float, float addrspace(3)* %889, align 4 > %891 = and i32 %7, 8191 > %892 = and i32 %10, 255 > %893 = mul nuw nsw i32 %891, %892 > %894 = lshr i32 %7, 12 > %895 = and i32 %894, 510 > %896 = add nuw nsw i32 %893, %895 > %897 = add nuw nsw i32 %896, 17 > %898 = zext i32 %897 to i64 > %899 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %898 > %900 = bitcast i32 addrspace(3)* %899 to float addrspace(3)* > %901 = load float, float addrspace(3)* %900, align 4 > %902 = fsub float %901, %890 > %903 = and i32 %7, 8191 > %904 = and i32 %10, 255 > %905 = mul nuw nsw i32 %903, %904 > %906 = add nuw nsw i32 %905, 18 > %907 = zext i32 %906 to i64 > %908 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %907 > %909 = bitcast i32 addrspace(3)* %908 to float addrspace(3)* > %910 = load float, float addrspace(3)* %909, align 4 > %911 = and i32 %7, 8191 > %912 = and i32 %10, 255 > %913 = mul nuw nsw i32 %911, %912 > %914 = lshr i32 %7, 12 > %915 = and i32 %914, 510 > %916 = add nuw nsw i32 %913, %915 > %917 = add nuw nsw i32 %916, 18 > %918 = zext i32 %917 to i64 > %919 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %918 > %920 = bitcast i32 addrspace(3)* %919 to float addrspace(3)* > %921 = load float, float addrspace(3)* %920, align 4 > %922 = fsub float %921, %910 > %923 = fmul float %882, %882 > %924 = fmul float %902, %902 > %925 = fadd float %924, %923 > %926 = fmul float %922, %922 > %927 = fadd float %925, %926 > %928 = call float @llvm.sqrt.f32(float %788) > %929 = call float @llvm.sqrt.f32(float %862) > %930 = call float @llvm.sqrt.f32(float %927) > %931 = lshr i32 %7, 13 > %932 = and i32 %931, 255 > %933 = and i32 %7, 8191 > %934 = and i32 %10, 255 > %935 = mul nuw nsw i32 %933, %934 > %936 = add nuw nsw i32 %935, %932 > %937 = add nuw nsw i32 %936, 16 > %938 = zext i32 %937 to i64 > %939 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %938 > %940 = bitcast i32 addrspace(3)* %939 to float addrspace(3)* > %941 = load float, float addrspace(3)* %940, align 4 > %942 = and i32 %7, 8191 > %943 = and i32 %10, 255 > %944 = mul nuw nsw i32 %942, %943 > %945 = add nuw nsw i32 %944, 16 > %946 = zext i32 %945 to i64 > %947 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %946 > %948 = bitcast i32 addrspace(3)* %947 to float addrspace(3)* > %949 = load float, float addrspace(3)* %948, align 4 > %950 = fadd float %941, %949 > %951 = lshr i32 %7, 13 > %952 = and i32 %951, 255 > %953 = and i32 %7, 8191 > %954 = and i32 %10, 255 > %955 = mul nuw nsw i32 %953, %954 > %956 = add nuw nsw i32 %955, %952 > %957 = add nuw nsw i32 %956, 17 > %958 = zext i32 %957 to i64 > %959 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %958 > %960 = bitcast i32 addrspace(3)* %959 to float addrspace(3)* > %961 = load float, float addrspace(3)* %960, align 4 > %962 = and i32 %7, 8191 > %963 = and i32 %10, 255 > %964 = mul nuw nsw i32 %962, %963 > %965 = add nuw nsw i32 %964, 17 > %966 = zext i32 %965 to i64 > %967 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %966 > %968 = bitcast i32 addrspace(3)* %967 to float addrspace(3)* > %969 = load float, float addrspace(3)* %968, align 4 > %970 = fadd float %961, %969 > %971 = lshr i32 %7, 13 > %972 = and i32 %971, 255 > %973 = and i32 %7, 8191 > %974 = and i32 %10, 255 > %975 = mul nuw nsw i32 %973, %974 > %976 = add nuw nsw i32 %975, %972 > %977 = add nuw nsw i32 %976, 18 > %978 = zext i32 %977 to i64 > %979 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %978 > %980 = bitcast i32 addrspace(3)* %979 to float addrspace(3)* > %981 = load float, float addrspace(3)* %980, align 4 > %982 = and i32 %7, 8191 > %983 = and i32 %10, 255 > %984 = mul nuw nsw i32 %982, %983 > %985 = add nuw nsw i32 %984, 18 > %986 = zext i32 %985 to i64 > %987 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %986 > %988 = bitcast i32 addrspace(3)* %987 to float addrspace(3)* > %989 = load float, float addrspace(3)* %988, align 4 > %990 = fadd float %981, %989 > %991 = fmul float %950, 5.000000e-01 > %992 = fmul float %970, 5.000000e-01 > %993 = fmul float %990, 5.000000e-01 > %994 = and i32 %7, 8191 > %995 = and i32 %10, 255 > %996 = mul nuw nsw i32 %994, %995 > %997 = lshr i32 %7, 12 > %998 = and i32 %997, 510 > %999 = add nuw nsw i32 %996, %998 > %1000 = add nuw nsw i32 %999, 16 > %1001 = zext i32 %1000 to i64 > %1002 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1001 > %1003 = bitcast i32 addrspace(3)* %1002 to float addrspace(3)* > %1004 = load float, float addrspace(3)* %1003, align 4 > %1005 = lshr i32 %7, 13 > %1006 = and i32 %1005, 255 > %1007 = and i32 %7, 8191 > %1008 = and i32 %10, 255 > %1009 = mul nuw nsw i32 %1007, %1008 > %1010 = add nuw nsw i32 %1009, %1006 > %1011 = add nuw nsw i32 %1010, 16 > %1012 = zext i32 %1011 to i64 > %1013 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1012 > %1014 = bitcast i32 addrspace(3)* %1013 to float addrspace(3)* > %1015 = load float, float addrspace(3)* %1014, align 4 > %1016 = fadd float %1004, %1015 > %1017 = and i32 %7, 8191 > %1018 = and i32 %10, 255 > %1019 = mul nuw nsw i32 %1017, %1018 > %1020 = lshr i32 %7, 12 > %1021 = and i32 %1020, 510 > %1022 = add nuw nsw i32 %1019, %1021 > %1023 = add nuw nsw i32 %1022, 17 > %1024 = zext i32 %1023 to i64 > %1025 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1024 > %1026 = bitcast i32 addrspace(3)* %1025 to float addrspace(3)* > %1027 = load float, float addrspace(3)* %1026, align 4 > %1028 = lshr i32 %7, 13 > %1029 = and i32 %1028, 255 > %1030 = and i32 %7, 8191 > %1031 = and i32 %10, 255 > %1032 = mul nuw nsw i32 %1030, %1031 > %1033 = add nuw nsw i32 %1032, %1029 > %1034 = add nuw nsw i32 %1033, 17 > %1035 = zext i32 %1034 to i64 > %1036 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1035 > %1037 = bitcast i32 addrspace(3)* %1036 to float addrspace(3)* > %1038 = load float, float addrspace(3)* %1037, align 4 > %1039 = fadd float %1027, %1038 > %1040 = and i32 %7, 8191 > %1041 = and i32 %10, 255 > %1042 = mul nuw nsw i32 %1040, %1041 > %1043 = lshr i32 %7, 12 > %1044 = and i32 %1043, 510 > %1045 = add nuw nsw i32 %1042, %1044 > %1046 = add nuw nsw i32 %1045, 18 > %1047 = zext i32 %1046 to i64 > %1048 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1047 > %1049 = bitcast i32 addrspace(3)* %1048 to float addrspace(3)* > %1050 = load float, float addrspace(3)* %1049, align 4 > %1051 = lshr i32 %7, 13 > %1052 = and i32 %1051, 255 > %1053 = and i32 %7, 8191 > %1054 = and i32 %10, 255 > %1055 = mul nuw nsw i32 %1053, %1054 > %1056 = add nuw nsw i32 %1055, %1052 > %1057 = add nuw nsw i32 %1056, 18 > %1058 = zext i32 %1057 to i64 > %1059 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1058 > %1060 = bitcast i32 addrspace(3)* %1059 to float addrspace(3)* > %1061 = load float, float addrspace(3)* %1060, align 4 > %1062 = fadd float %1050, %1061 > %1063 = fmul float %1016, 5.000000e-01 > %1064 = fmul float %1039, 5.000000e-01 > %1065 = fmul float %1062, 5.000000e-01 > %1066 = and i32 %7, 8191 > %1067 = and i32 %10, 255 > %1068 = mul nuw nsw i32 %1066, %1067 > %1069 = add nuw nsw i32 %1068, 16 > %1070 = zext i32 %1069 to i64 > %1071 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1070 > %1072 = bitcast i32 addrspace(3)* %1071 to float addrspace(3)* > %1073 = load float, float addrspace(3)* %1072, align 4 > %1074 = and i32 %7, 8191 > %1075 = and i32 %10, 255 > %1076 = mul nuw nsw i32 %1074, %1075 > %1077 = lshr i32 %7, 12 > %1078 = and i32 %1077, 510 > %1079 = add nuw nsw i32 %1076, %1078 > %1080 = add nuw nsw i32 %1079, 16 > %1081 = zext i32 %1080 to i64 > %1082 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1081 > %1083 = bitcast i32 addrspace(3)* %1082 to float addrspace(3)* > %1084 = load float, float addrspace(3)* %1083, align 4 > %1085 = fadd float %1073, %1084 > %1086 = and i32 %7, 8191 > %1087 = and i32 %10, 255 > %1088 = mul nuw nsw i32 %1086, %1087 > %1089 = add nuw nsw i32 %1088, 17 > %1090 = zext i32 %1089 to i64 > %1091 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1090 > %1092 = bitcast i32 addrspace(3)* %1091 to float addrspace(3)* > %1093 = load float, float addrspace(3)* %1092, align 4 > %1094 = and i32 %7, 8191 > %1095 = and i32 %10, 255 > %1096 = mul nuw nsw i32 %1094, %1095 > %1097 = lshr i32 %7, 12 > %1098 = and i32 %1097, 510 > %1099 = add nuw nsw i32 %1096, %1098 > %1100 = add nuw nsw i32 %1099, 17 > %1101 = zext i32 %1100 to i64 > %1102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1101 > %1103 = bitcast i32 addrspace(3)* %1102 to float addrspace(3)* > %1104 = load float, float addrspace(3)* %1103, align 4 > %1105 = fadd float %1093, %1104 > %1106 = and i32 %7, 8191 > %1107 = and i32 %10, 255 > %1108 = mul nuw nsw i32 %1106, %1107 > %1109 = add nuw nsw i32 %1108, 18 > %1110 = zext i32 %1109 to i64 > %1111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1110 > %1112 = bitcast i32 addrspace(3)* %1111 to float addrspace(3)* > %1113 = load float, float addrspace(3)* %1112, align 4 > %1114 = and i32 %7, 8191 > %1115 = and i32 %10, 255 > %1116 = mul nuw nsw i32 %1114, %1115 > %1117 = lshr i32 %7, 12 > %1118 = and i32 %1117, 510 > %1119 = add nuw nsw i32 %1116, %1118 > %1120 = add nuw nsw i32 %1119, 18 > %1121 = zext i32 %1120 to i64 > %1122 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1121 > %1123 = bitcast i32 addrspace(3)* %1122 to float addrspace(3)* > %1124 = load float, float addrspace(3)* %1123, align 4 > %1125 = fadd float %1113, %1124 > %1126 = fmul float %1085, 5.000000e-01 > %1127 = fmul float %1105, 5.000000e-01 > %1128 = fmul float %1125, 5.000000e-01 > %1129 = call float @llvm.fma.f32(float %39, float %928, float %991) > %1130 = call float @llvm.fma.f32(float %40, float %928, float %992) > %1131 = call float @llvm.fma.f32(float %41, float %928, float %993) > %1132 = call float @llvm.fma.f32(float %39, float %929, float %1063) > %1133 = call float @llvm.fma.f32(float %40, float %929, float %1064) > %1134 = call float @llvm.fma.f32(float %41, float %929, float %1065) > %1135 = call float @llvm.fma.f32(float %39, float %930, float %1126) > %1136 = call float @llvm.fma.f32(float %40, float %930, float %1127) > %1137 = call float @llvm.fma.f32(float %41, float %930, float %1128) > %1138 = fmul float %23, %991 > %1139 = fmul float %24, %992 > %1140 = fadd float %1138, %1139 > %1141 = fmul float %25, %993 > %1142 = fadd float %1140, %1141 > %1143 = fadd float %1142, %26 > %1144 = fmul float %27, %991 > %1145 = fmul float %28, %992 > %1146 = fadd float %1144, %1145 > %1147 = fmul float %29, %993 > %1148 = fadd float %1146, %1147 > %1149 = fadd float %1148, %30 > %1150 = fmul float %35, %991 > %1151 = fmul float %36, %992 > %1152 = fadd float %1150, %1151 > %1153 = fmul float %37, %993 > %1154 = fadd float %1152, %1153 > %1155 = fadd float %1154, %38 > %1156 = fmul float %23, %1063 > %1157 = fmul float %24, %1064 > %1158 = fadd float %1156, %1157 > %1159 = fmul float %25, %1065 > %1160 = fadd float %1158, %1159 > %1161 = fadd float %1160, %26 > %1162 = fmul float %27, %1063 > %1163 = fmul float %28, %1064 > %1164 = fadd float %1162, %1163 > %1165 = fmul float %29, %1065 > %1166 = fadd float %1164, %1165 > %1167 = fadd float %1166, %30 > %1168 = fmul float %35, %1063 > %1169 = fmul float %36, %1064 > %1170 = fadd float %1168, %1169 > %1171 = fmul float %37, %1065 > %1172 = fadd float %1170, %1171 > %1173 = fadd float %1172, %38 > %1174 = fmul float %23, %1126 > %1175 = fmul float %24, %1127 > %1176 = fadd float %1174, %1175 > %1177 = fmul float %25, %1128 > %1178 = fadd float %1176, %1177 > %1179 = fadd float %1178, %26 > %1180 = fmul float %27, %1126 > %1181 = fmul float %28, %1127 > %1182 = fadd float %1180, %1181 > %1183 = fmul float %29, %1128 > %1184 = fadd float %1182, %1183 > %1185 = fadd float %1184, %30 > %1186 = fmul float %35, %1126 > %1187 = fmul float %36, %1127 > %1188 = fadd float %1186, %1187 > %1189 = fmul float %37, %1128 > %1190 = fadd float %1188, %1189 > %1191 = fadd float %1190, %38 > %1192 = fmul float %23, %1129 > %1193 = fmul float %24, %1130 > %1194 = fadd float %1192, %1193 > %1195 = fmul float %25, %1131 > %1196 = fadd float %1194, %1195 > %1197 = fadd float %1196, %26 > %1198 = fmul float %27, %1129 > %1199 = fmul float %28, %1130 > %1200 = fadd float %1198, %1199 > %1201 = fmul float %29, %1131 > %1202 = fadd float %1200, %1201 > %1203 = fadd float %1202, %30 > %1204 = fmul float %35, %1129 > %1205 = fmul float %36, %1130 > %1206 = fadd float %1204, %1205 > %1207 = fmul float %37, %1131 > %1208 = fadd float %1206, %1207 > %1209 = fadd float %1208, %38 > %1210 = fmul float %23, %1132 > %1211 = fmul float %24, %1133 > %1212 = fadd float %1210, %1211 > %1213 = fmul float %25, %1134 > %1214 = fadd float %1212, %1213 > %1215 = fadd float %1214, %26 > %1216 = fmul float %27, %1132 > %1217 = fmul float %28, %1133 > %1218 = fadd float %1216, %1217 > %1219 = fmul float %29, %1134 > %1220 = fadd float %1218, %1219 > %1221 = fadd float %1220, %30 > %1222 = fmul float %35, %1132 > %1223 = fmul float %36, %1133 > %1224 = fadd float %1222, %1223 > %1225 = fmul float %37, %1134 > %1226 = fadd float %1224, %1225 > %1227 = fadd float %1226, %38 > %1228 = fmul float %23, %1135 > %1229 = fmul float %24, %1136 > %1230 = fadd float %1228, %1229 > %1231 = fmul float %25, %1137 > %1232 = fadd float %1230, %1231 > %1233 = fadd float %1232, %26 > %1234 = fmul float %27, %1135 > %1235 = fmul float %28, %1136 > %1236 = fadd float %1234, %1235 > %1237 = fmul float %29, %1137 > %1238 = fadd float %1236, %1237 > %1239 = fadd float %1238, %30 > %1240 = fmul float %35, %1135 > %1241 = fmul float %36, %1136 > %1242 = fadd float %1240, %1241 > %1243 = fmul float %37, %1137 > %1244 = fadd float %1242, %1243 > %1245 = fadd float %1244, %38 > %1246 = fcmp oeq float %1173, 0.000000e+00 > %1247 = fcmp oeq float %1173, 0.000000e+00 > %1248 = fcmp ogt float %1161, 0.000000e+00 > %1249 = select i1 %1248, float 1.000000e+00, float %1161 > %1250 = fcmp oge float %1249, 0.000000e+00 > %1251 = fcmp ogt float %1167, 0.000000e+00 > %1252 = select i1 %1251, float 1.000000e+00, float %1167 > %1253 = fcmp oge float %1252, 0.000000e+00 > %.op = fmul float %1249, 0x4600000000000000 > %1254 = select i1 %1250, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1252, 0x4600000000000000 > %1255 = select i1 %1253, float %.op80, float 0xC600000000000000 > %1256 = fdiv float 1.000000e+00, %1173 > %1257 = fmul float %1161, %1256 > %1258 = fmul float %1167, %1256 > %1259 = select i1 %1246, float %1254, float %1257 > %1260 = select i1 %1247, float %1255, float %1258 > %1261 = fcmp oeq float %1191, 0.000000e+00 > %1262 = fcmp oeq float %1191, 0.000000e+00 > %1263 = fcmp ogt float %1179, 0.000000e+00 > %1264 = select i1 %1263, float 1.000000e+00, float %1179 > %1265 = fcmp oge float %1264, 0.000000e+00 > %1266 = fcmp ogt float %1185, 0.000000e+00 > %1267 = select i1 %1266, float 1.000000e+00, float %1185 > %1268 = fcmp oge float %1267, 0.000000e+00 > %.op81 = fmul float %1264, 0x4600000000000000 > %1269 = select i1 %1265, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1267, 0x4600000000000000 > %1270 = select i1 %1268, float %.op82, float 0xC600000000000000 > %1271 = fdiv float 1.000000e+00, %1191 > %1272 = fmul float %1179, %1271 > %1273 = fmul float %1185, %1271 > %1274 = select i1 %1261, float %1269, float %1272 > %1275 = select i1 %1262, float %1270, float %1273 > %1276 = fcmp oeq float %1209, 0.000000e+00 > %1277 = fcmp oeq float %1209, 0.000000e+00 > %1278 = fcmp ogt float %1197, 0.000000e+00 > %1279 = select i1 %1278, float 1.000000e+00, float %1197 > %1280 = fcmp oge float %1279, 0.000000e+00 > %1281 = fcmp ogt float %1203, 0.000000e+00 > %1282 = select i1 %1281, float 1.000000e+00, float %1203 > %1283 = fcmp oge float %1282, 0.000000e+00 > %.op83 = fmul float %1279, 0x4600000000000000 > %1284 = select i1 %1280, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1282, 0x4600000000000000 > %1285 = select i1 %1283, float %.op84, float 0xC600000000000000 > %1286 = fdiv float 1.000000e+00, %1209 > %1287 = fmul float %1197, %1286 > %1288 = fmul float %1203, %1286 > %1289 = select i1 %1276, float %1284, float %1287 > %1290 = select i1 %1277, float %1285, float %1288 > %1291 = fcmp oeq float %1155, 0.000000e+00 > %1292 = fcmp oeq float %1155, 0.000000e+00 > %1293 = fcmp ogt float %1143, 0.000000e+00 > %1294 = select i1 %1293, float 1.000000e+00, float %1143 > %1295 = fcmp oge float %1294, 0.000000e+00 > %1296 = fcmp ogt float %1149, 0.000000e+00 > %1297 = select i1 %1296, float 1.000000e+00, float %1149 > %1298 = fcmp oge float %1297, 0.000000e+00 > %.op85 = fmul float %1294, 0x4600000000000000 > %1299 = select i1 %1295, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1297, 0x4600000000000000 > %1300 = select i1 %1298, float %.op86, float 0xC600000000000000 > %1301 = fdiv float 1.000000e+00, %1155 > %1302 = fmul float %1143, %1301 > %1303 = fmul float %1149, %1301 > %1304 = select i1 %1291, float %1299, float %1302 > %1305 = select i1 %1292, float %1300, float %1303 > %1306 = fsub float %1304, %1289 > %1307 = fsub float %1305, %1290 > %1308 = fcmp oeq float %1227, 0.000000e+00 > %1309 = fcmp oeq float %1227, 0.000000e+00 > %1310 = fcmp ogt float %1215, 0.000000e+00 > %1311 = select i1 %1310, float 1.000000e+00, float %1215 > %1312 = fcmp oge float %1311, 0.000000e+00 > %1313 = fcmp ogt float %1221, 0.000000e+00 > %1314 = select i1 %1313, float 1.000000e+00, float %1221 > %1315 = fcmp oge float %1314, 0.000000e+00 > %.op87 = fmul float %1311, 0x4600000000000000 > %1316 = select i1 %1312, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1314, 0x4600000000000000 > %1317 = select i1 %1315, float %.op88, float 0xC600000000000000 > %1318 = fdiv float 1.000000e+00, %1227 > %1319 = fmul float %1215, %1318 > %1320 = fmul float %1221, %1318 > %1321 = select i1 %1308, float %1316, float %1319 > %1322 = select i1 %1309, float %1317, float %1320 > %1323 = fsub float %1259, %1321 > %1324 = fsub float %1260, %1322 > %1325 = fmul float %1323, %42 > %1326 = fmul float %1324, %43 > %1327 = fcmp oeq float %1245, 0.000000e+00 > %1328 = fcmp oeq float %1245, 0.000000e+00 > %1329 = fcmp ogt float %1233, 0.000000e+00 > %1330 = select i1 %1329, float 1.000000e+00, float %1233 > %1331 = fcmp oge float %1330, 0.000000e+00 > %1332 = fcmp ogt float %1239, 0.000000e+00 > %1333 = select i1 %1332, float 1.000000e+00, float %1239 > %1334 = fcmp oge float %1333, 0.000000e+00 > %.op89 = fmul float %1330, 0x4600000000000000 > %1335 = select i1 %1331, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1333, 0x4600000000000000 > %1336 = select i1 %1334, float %.op90, float 0xC600000000000000 > %1337 = fdiv float 1.000000e+00, %1245 > %1338 = fmul float %1233, %1337 > %1339 = fmul float %1239, %1337 > %1340 = select i1 %1327, float %1335, float %1338 > %1341 = select i1 %1328, float %1336, float %1339 > %1342 = fsub float %1274, %1340 > %1343 = fsub float %1275, %1341 > %1344 = fmul float %1342, %42 > %1345 = fmul float %1306, %42 > %1346 = fmul float %1307, %43 > %1347 = fmul float %1343, %43 > %1348 = fmul float %1345, %1345 > %1349 = fmul float %1346, %1346 > %1350 = fadd float %1348, %1349 > %1351 = fmul float %1325, %1325 > %1352 = fmul float %1326, %1326 > %1353 = fadd float %1351, %1352 > %1354 = fmul float %1344, %1344 > %1355 = fmul float %1347, %1347 > %1356 = fadd float %1354, %1355 > %1357 = call float @llvm.sqrt.f32(float %1356) > %1358 = call float @llvm.sqrt.f32(float %1350) > %1359 = call float @llvm.sqrt.f32(float %1353) > %1360 = fsub float %1155, %15 > %1361 = fsub float %1173, %15 > %1362 = fsub float %1191, %15 > %1363 = fcmp une float %16, 0.000000e+00 > br i1 %1363, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %16, %ENDIF77 ], [ %38, %main_body ] > %temp16.0 = phi float [ %1577, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1578, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1567, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1580, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %15, %ENDIF77 ], [ %37, %main_body ] > %temp13.0 = phi float [ %1560, %ENDIF77 ], [ %36, %main_body ] > %1364 = phi i32 [ 1065353216, %ENDIF77 ], [ %672, %main_body ] > %temp10.0 = phi float [ %1359, %ENDIF77 ], [ %716, %main_body ] > %temp9.0 = phi float [ %1552, %ENDIF77 ], [ %718, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %603, %main_body ] > %temp6.0 = phi float [ %993, %ENDIF77 ], [ %685, %main_body ] > %temp5.0 = phi float [ %1547, %ENDIF77 ], [ %712, %main_body ] > %1365 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1366 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1367 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1368 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1369 = lshr i32 %5, 16 > %1370 = shl nuw nsw i32 %1369, 2 > %1371 = and i32 %6, 8191 > %1372 = and i32 %10, 255 > %1373 = mul nuw nsw i32 %1371, %1372 > %1374 = add nuw nsw i32 %1370, %1373 > %1375 = add nuw nsw i32 %1374, 8 > %1376 = zext i32 %1375 to i64 > %1377 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1376 > %1378 = bitcast i32 addrspace(3)* %1377 to float addrspace(3)* > store float %1365, float addrspace(3)* %1378, align 4 > %1379 = add nuw nsw i32 %1374, 9 > %1380 = zext i32 %1379 to i64 > %1381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1380 > %1382 = bitcast i32 addrspace(3)* %1381 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1382, align 4 > %1383 = add nuw nsw i32 %1374, 10 > %1384 = zext i32 %1383 to i64 > %1385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1384 > %1386 = bitcast i32 addrspace(3)* %1385 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1386, align 4 > %1387 = add nuw nsw i32 %1374, 11 > %1388 = zext i32 %1387 to i64 > %1389 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1388 > %1390 = bitcast i32 addrspace(3)* %1389 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1390, align 4 > %1391 = lshr i32 %5, 16 > %1392 = shl nuw nsw i32 %1391, 2 > %1393 = and i32 %6, 8191 > %1394 = and i32 %10, 255 > %1395 = mul nuw nsw i32 %1393, %1394 > %1396 = add nuw nsw i32 %1392, %1395 > %1397 = add nuw nsw i32 %1396, 12 > %1398 = zext i32 %1397 to i64 > %1399 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1398 > %1400 = bitcast i32 addrspace(3)* %1399 to float addrspace(3)* > store float %1366, float addrspace(3)* %1400, align 4 > %1401 = add nuw nsw i32 %1396, 13 > %1402 = zext i32 %1401 to i64 > %1403 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1402 > %1404 = bitcast i32 addrspace(3)* %1403 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1404, align 4 > %1405 = add nuw nsw i32 %1396, 14 > %1406 = zext i32 %1405 to i64 > %1407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1406 > %1408 = bitcast i32 addrspace(3)* %1407 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1408, align 4 > %1409 = add nuw nsw i32 %1396, 15 > %1410 = zext i32 %1409 to i64 > %1411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1410 > store i32 %1364, i32 addrspace(3)* %1411, align 4 > %1412 = lshr i32 %5, 16 > %1413 = shl nuw nsw i32 %1412, 2 > %1414 = and i32 %6, 8191 > %1415 = and i32 %10, 255 > %1416 = mul nuw nsw i32 %1414, %1415 > %1417 = add nuw nsw i32 %1413, %1416 > %1418 = add nuw nsw i32 %1417, 16 > %1419 = zext i32 %1418 to i64 > %1420 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1419 > %1421 = bitcast i32 addrspace(3)* %1420 to float addrspace(3)* > store float %1367, float addrspace(3)* %1421, align 4 > %1422 = add nuw nsw i32 %1417, 17 > %1423 = zext i32 %1422 to i64 > %1424 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1423 > %1425 = bitcast i32 addrspace(3)* %1424 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1425, align 4 > %1426 = add nuw nsw i32 %1417, 18 > %1427 = zext i32 %1426 to i64 > %1428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1427 > %1429 = bitcast i32 addrspace(3)* %1428 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1429, align 4 > %1430 = add nuw nsw i32 %1417, 19 > %1431 = zext i32 %1430 to i64 > %1432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1431 > %1433 = bitcast i32 addrspace(3)* %1432 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1433, align 4 > %1434 = lshr i32 %5, 16 > %1435 = shl nuw nsw i32 %1434, 2 > %1436 = and i32 %6, 8191 > %1437 = and i32 %10, 255 > %1438 = mul nuw nsw i32 %1436, %1437 > %1439 = add nuw nsw i32 %1435, %1438 > %1440 = add nuw nsw i32 %1439, 20 > %1441 = zext i32 %1440 to i64 > %1442 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1441 > %1443 = bitcast i32 addrspace(3)* %1442 to float addrspace(3)* > store float %1368, float addrspace(3)* %1443, align 4 > %1444 = add nuw nsw i32 %1439, 21 > %1445 = zext i32 %1444 to i64 > %1446 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1445 > %1447 = bitcast i32 addrspace(3)* %1446 to float addrspace(3)* > store float %1366, float addrspace(3)* %1447, align 4 > %1448 = add nuw nsw i32 %1439, 22 > %1449 = zext i32 %1448 to i64 > %1450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1449 > %1451 = bitcast i32 addrspace(3)* %1450 to float addrspace(3)* > store float %1367, float addrspace(3)* %1451, align 4 > %1452 = add nuw nsw i32 %1439, 23 > %1453 = zext i32 %1452 to i64 > %1454 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1453 > %1455 = bitcast i32 addrspace(3)* %1454 to float addrspace(3)* > store float %1368, float addrspace(3)* %1455, align 4 > %1456 = lshr i32 %5, 16 > %1457 = shl nuw nsw i32 %1456, 2 > %1458 = and i32 %6, 8191 > %1459 = and i32 %10, 255 > %1460 = mul nuw nsw i32 %1458, %1459 > %1461 = add nuw nsw i32 %1457, %1460 > %1462 = zext i32 %1461 to i64 > %1463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1462 > %1464 = bitcast i32 addrspace(3)* %1463 to float addrspace(3)* > store float %1365, float addrspace(3)* %1464, align 4 > %1465 = lshr i32 %5, 16 > %1466 = shl nuw nsw i32 %1465, 2 > %1467 = and i32 %6, 8191 > %1468 = and i32 %10, 255 > %1469 = mul nuw nsw i32 %1467, %1468 > %1470 = add nuw nsw i32 %1466, %1469 > %1471 = add nuw nsw i32 %1470, 1 > %1472 = zext i32 %1471 to i64 > %1473 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1472 > %1474 = bitcast i32 addrspace(3)* %1473 to float addrspace(3)* > store float %1366, float addrspace(3)* %1474, align 4 > %1475 = lshr i32 %5, 16 > %1476 = shl nuw nsw i32 %1475, 2 > %1477 = and i32 %6, 8191 > %1478 = and i32 %10, 255 > %1479 = mul nuw nsw i32 %1477, %1478 > %1480 = add nuw nsw i32 %1476, %1479 > %1481 = add nuw nsw i32 %1480, 2 > %1482 = zext i32 %1481 to i64 > %1483 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1482 > %1484 = bitcast i32 addrspace(3)* %1483 to float addrspace(3)* > store float %1367, float addrspace(3)* %1484, align 4 > %1485 = lshr i32 %5, 16 > %1486 = shl nuw nsw i32 %1485, 2 > %1487 = and i32 %6, 8191 > %1488 = and i32 %10, 255 > %1489 = mul nuw nsw i32 %1487, %1488 > %1490 = add nuw nsw i32 %1486, %1489 > %1491 = add nuw nsw i32 %1490, 4 > %1492 = zext i32 %1491 to i64 > %1493 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1492 > %1494 = bitcast i32 addrspace(3)* %1493 to float addrspace(3)* > store float %1368, float addrspace(3)* %1494, align 4 > %1495 = and i32 %10, 255 > %1496 = lshr i32 %10, 8 > %1497 = and i32 %1496, 31 > %1498 = lshr i32 %5, 16 > %1499 = shl nuw nsw i32 %1498, 2 > %1500 = and i32 %6, 8191 > %1501 = and i32 %10, 255 > %1502 = mul nuw nsw i32 %1500, %1501 > %1503 = add nuw nsw i32 %1499, %1502 > %1504 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1505 = bitcast i64 %1504 to <2 x i32> > %1506 = extractelement <2 x i32> %1505, i32 0 > %1507 = extractelement <2 x i32> %1505, i32 1 > %1508 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1506, 0 > %1509 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1508, i32 %1507, 1 > %1510 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1509, i32 %8, 13 > %1511 = bitcast i32 %1495 to float > %1512 = bitcast i32 %1497 to float > %1513 = bitcast i32 %1503 to float > %1514 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1510, float %1511, 14 > %1515 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1514, float %1512, 15 > %1516 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1515, float %1513, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1516 > >IF69: ; preds = %IF > %1517 = fdiv float 1.000000e+00, %16 > %1518 = fmul float %1360, %1517 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1519 = fcmp ogt float %1360, 0.000000e+00 > %1520 = select i1 %1519, float 1.000000e+00, float %1360 > %1521 = fcmp oge float %1520, 0.000000e+00 > %.op91 = fmul float %1520, 0x4600000000000000 > %1522 = select i1 %1521, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1518, %IF69 ], [ %1522, %ELSE70 ] > %1523 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1524 = fsub float 1.000000e+00, %1523 > %1525 = fmul float %1524, %1358 > %1526 = fcmp une float %16, 0.000000e+00 > br i1 %1526, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1527 = fdiv float 1.000000e+00, %16 > %1528 = fmul float %1361, %1527 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1529 = fcmp ogt float %1361, 0.000000e+00 > %1530 = select i1 %1529, float 1.000000e+00, float %1361 > %1531 = fcmp oge float %1530, 0.000000e+00 > %.op92 = fmul float %1530, 0x4600000000000000 > %1532 = select i1 %1531, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1528, %IF72 ], [ %1532, %ELSE73 ] > %1533 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1534 = fsub float 1.000000e+00, %1533 > %1535 = fmul float %1534, %1359 > %1536 = fcmp une float %16, 0.000000e+00 > br i1 %1536, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1537 = fdiv float 1.000000e+00, %16 > %1538 = fmul float %1362, %1537 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1539 = fcmp ogt float %1362, 0.000000e+00 > %1540 = select i1 %1539, float 1.000000e+00, float %1362 > %1541 = fcmp oge float %1540, 0.000000e+00 > %.op93 = fmul float %1540, 0x4600000000000000 > %1542 = select i1 %1541, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1538, %IF75 ], [ %1542, %ELSE76 ] > %1543 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1544 = fsub float 1.000000e+00, %1543 > %1545 = fmul float %1544, %1357 > %1546 = fmul float %13, %19 > %1547 = fmul float %14, %20 > %1548 = call float @llvm.maxnum.f32(float %1547, float 1.000000e+00) > %1549 = fcmp oeq float %1546, 0.000000e+00 > %1550 = fcmp oeq float %1546, 0.000000e+00 > %1551 = sext i1 %1550 to i32 > %1552 = bitcast i32 %1551 to float > %1553 = fcmp ogt float %1545, 0.000000e+00 > %1554 = select i1 %1553, float 1.000000e+00, float %1545 > %1555 = fcmp oge float %1554, 0.000000e+00 > %1556 = fcmp ogt float %1525, 0.000000e+00 > %1557 = select i1 %1556, float 1.000000e+00, float %1525 > %1558 = fcmp oge float %1557, 0.000000e+00 > %.op94 = fmul float %1554, 0x4600000000000000 > %1559 = select i1 %1555, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1557, 0x4600000000000000 > %1560 = select i1 %1558, float %.op95, float 0xC600000000000000 > %1561 = fdiv float 1.000000e+00, %1546 > %1562 = fmul float %1545, %1561 > %1563 = fmul float %1525, %1561 > %1564 = select i1 %1549, float %1559, float %1562 > %1565 = select i1 %1550, float %1560, float %1563 > %1566 = call float @llvm.maxnum.f32(float %1565, float 1.000000e+00) > %1567 = call float @llvm.minnum.f32(float %1548, float %1566) > %1568 = fcmp une float %1546, 0.000000e+00 > br i1 %1568, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1569 = fdiv float 1.000000e+00, %1546 > %1570 = fmul float %1535, %1569 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1571 = fcmp ogt float %1535, 0.000000e+00 > %1572 = select i1 %1571, float 1.000000e+00, float %1535 > %1573 = fcmp oge float %1572, 0.000000e+00 > %.op96 = fmul float %1572, 0x4600000000000000 > %1574 = select i1 %1573, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1570, %IF78 ], [ %1574, %ELSE79 ] > %1575 = call float @llvm.maxnum.f32(float %1564, float 1.000000e+00) > %1576 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1577 = call float @llvm.minnum.f32(float %1548, float %1576) > %1578 = call float @llvm.minnum.f32(float %1548, float %1575) > %1579 = call float @llvm.maxnum.f32(float %1567, float %1578) > %1580 = call float @llvm.maxnum.f32(float %1579, float %1577) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[0].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[0].xxxx >101: MOV OUT[4], TEMP[3] >102: MOV OUT[2], TEMP[6] >103: MOV OUT[3], TEMP[4] >104: MOV OUT[1], TEMP[5] >105: MOV OUT[0], TEMP[1] >106: END >radeonsi: Compiling shader 228 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = bitcast i32 %10 to float > %711 = insertvalue <{ float, float, float }> undef, float %710, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %711 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 229 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, 0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 64, 80} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {96, 368, 352, 0} >IMM[5] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: MOV TEMP[1].w, TEMP[8].xxxx > 66: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 67: MOV TEMP[1].z, TEMP[2].xxxx > 68: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 69: MOV TEMP[0].yw, TEMP[2].yxyy > 70: ABS TEMP[2].x, TEMP[3].xxxx > 71: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 72: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 73: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 74: INEG TEMP[9].xy, TEMP[9].xyyy > 75: MOV TEMP[4].yz, TEMP[9].yxyy > 76: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 77: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 78: INEG TEMP[9].xy, TEMP[9].xyyy > 79: MOV TEMP[5].zw, TEMP[9].yyxy > 80: INEG TEMP[9].xy, TEMP[4].yzzz > 81: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 82: MOV TEMP[4].yz, TEMP[9].yxyy > 83: I2F TEMP[9].xy, TEMP[4].yzzz > 84: MOV TEMP[4].yz, TEMP[9].yxyy > 85: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 86: ABS TEMP[2].x, TEMP[6].xxxx > 87: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 88: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 89: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 90: INEG TEMP[9].xy, TEMP[9].xyyy > 91: MOV TEMP[4].yz, TEMP[9].yxyy > 92: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 93: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 94: INEG TEMP[9].xy, TEMP[9].xyyy > 95: MOV TEMP[5].zw, TEMP[9].yyxy > 96: INEG TEMP[9].xy, TEMP[4].yzzz > 97: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 98: MOV TEMP[4].yz, TEMP[9].yxyy > 99: I2F TEMP[9].xy, TEMP[4].yzzz >100: MOV TEMP[4].yz, TEMP[9].yxyy >101: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >102: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >103: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >104: INEG TEMP[2].xy, TEMP[2].xyyy >105: MOV TEMP[5].xy, TEMP[2].xyxx >106: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >107: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >108: INEG TEMP[2].xy, TEMP[2].xyyy >109: MOV TEMP[5].zw, TEMP[2].yyxy >110: INEG TEMP[2].xy, TEMP[5].xyyy >111: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >112: MOV TEMP[5].xy, TEMP[2].xyxx >113: I2F TEMP[5].xy, TEMP[5].xyyy >114: ABS TEMP[2].x, TEMP[8].xxxx >115: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >116: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >117: MOV TEMP[4].zw, TEMP[2].yyxy >118: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >119: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >120: INEG TEMP[2].xy, TEMP[2].xyyy >121: MOV TEMP[5].xy, TEMP[2].xyxx >122: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >123: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >124: INEG TEMP[2].xy, TEMP[2].xyyy >125: MOV TEMP[5].zw, TEMP[2].yyxy >126: AND TEMP[2], TEMP[5], IMM[2].yyyy >127: MOV TEMP[2], TEMP[2] >128: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >129: MOV TEMP[5].xy, TEMP[2].xyxx >130: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >131: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >132: INEG TEMP[2].xy, TEMP[2].xyyy >133: MOV TEMP[5].zw, TEMP[2].yyxy >134: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >135: MOV TEMP[5].zw, TEMP[2].yyxy >136: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >137: MOV TEMP[5].xy, TEMP[2].xyxx >138: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >139: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >140: INEG TEMP[2].x, TEMP[2].xxxx >141: MOV TEMP[1].z, TEMP[2].xxxx >142: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >143: MOV TEMP[1].z, TEMP[2].xxxx >144: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >145: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >146: INEG TEMP[2].xy, TEMP[2].xyyy >147: MOV TEMP[0].yw, TEMP[2].yxyy >148: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >149: MOV TEMP[0].yw, TEMP[2].yxyy >150: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >151: MOV TEMP[0].y, TEMP[2].xxxx >152: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >153: MOV TEMP[0].y, TEMP[2].xxxx >154: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >155: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >156: INEG TEMP[2].xy, TEMP[2].xyyy >157: MOV TEMP[0].xw, TEMP[2].xxxy >158: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >159: MOV TEMP[0].xw, TEMP[2].xxxy >160: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >161: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >162: INEG TEMP[2].xy, TEMP[2].xyyy >163: MOV TEMP[1].xy, TEMP[2].xyxx >164: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >165: MOV TEMP[1].xy, TEMP[2].xyxx >166: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >167: MOV TEMP[0].xz, TEMP[2].xxyx >168: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >169: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >170: INEG TEMP[2].xy, TEMP[2].xyyy >171: MOV TEMP[1].xy, TEMP[2].xyxx >172: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >173: MOV TEMP[1].xy, TEMP[2].xyxx >174: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >175: MOV TEMP[0].xz, TEMP[2].xxyx >176: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >177: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >178: INEG TEMP[2].xy, TEMP[2].xyyy >179: MOV TEMP[1].xy, TEMP[2].xyxx >180: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >181: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >182: INEG TEMP[2].xyz, TEMP[2].xyzz >183: MOV TEMP[0].xyz, TEMP[2].xyzx >184: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >185: MOV TEMP[0].xz, TEMP[2].xxyx >186: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >187: MOV TEMP[0].x, TEMP[2].xxxx >188: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >189: MOV TEMP[0].x, TEMP[2].xxxx >190: MOV TEMP[2].x, TEMP[0].xxxx >191: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >192: UIF TEMP[2].xxxx :0 >193: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >194: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >195: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >196: MOV TEMP[0].yzw, TEMP[2].yxyz >197: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >198: MOV TEMP[0].y, TEMP[2].xxxx >199: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >200: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >201: MOV TEMP[0].z, TEMP[2].xxxx >202: SQRT TEMP[2].x, TEMP[0].xxxx >203: SQRT TEMP[2].y, TEMP[0].yyyy >204: SQRT TEMP[2].z, TEMP[0].zzzz >205: MOV TEMP[0].xyz, TEMP[2].xyzx >206: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >207: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].wwww >208: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >209: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww >210: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >211: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[0].wwww >212: MOV TEMP[2].y, CONST[3][4] >213: MOV TEMP[7].x, TEMP[2].yyyy >214: MOV TEMP[2].y, CONST[3][5] >215: MOV TEMP[7].y, TEMP[2].yyyy >216: MOV TEMP[2].y, CONST[3][6] >217: MOV TEMP[7].z, TEMP[2].yyyy >218: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >219: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >220: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >221: MOV TEMP[1].w, IMM[0].xxxx >222: MOV TEMP[6], CONST[3][0] >223: DP4 TEMP[7].x, TEMP[6], TEMP[1] >224: MOV TEMP[6], CONST[3][1] >225: DP4 TEMP[6].x, TEMP[6], TEMP[1] >226: MOV TEMP[7].y, TEMP[6].xxxx >227: MOV TEMP[6], CONST[3][3] >228: DP4 TEMP[6].x, TEMP[6], TEMP[1] >229: MOV TEMP[4].w, IMM[0].xxxx >230: MOV TEMP[8], CONST[3][0] >231: DP4 TEMP[8].x, TEMP[8], TEMP[4] >232: MOV TEMP[9], CONST[3][1] >233: DP4 TEMP[9].x, TEMP[9], TEMP[4] >234: MOV TEMP[8].y, TEMP[9].xxxx >235: MOV TEMP[9], CONST[3][3] >236: DP4 TEMP[9].x, TEMP[9], TEMP[4] >237: MOV TEMP[5].w, IMM[0].xxxx >238: MOV TEMP[10], CONST[3][0] >239: DP4 TEMP[4].x, TEMP[10], TEMP[5] >240: MOV TEMP[10], CONST[3][1] >241: DP4 TEMP[10].x, TEMP[10], TEMP[5] >242: MOV TEMP[4].y, TEMP[10].xxxx >243: MOV TEMP[10], CONST[3][3] >244: DP4 TEMP[10].x, TEMP[10], TEMP[5] >245: MOV TEMP[2].w, IMM[0].xxxx >246: MOV TEMP[11], CONST[3][0] >247: DP4 TEMP[5].x, TEMP[11], TEMP[2] >248: MOV TEMP[11], CONST[3][1] >249: DP4 TEMP[11].x, TEMP[11], TEMP[2] >250: MOV TEMP[5].y, TEMP[11].xxxx >251: MOV TEMP[11], CONST[3][3] >252: DP4 TEMP[11].x, TEMP[11], TEMP[2] >253: MOV TEMP[3].w, IMM[0].xxxx >254: MOV TEMP[12], CONST[3][0] >255: DP4 TEMP[2].x, TEMP[12], TEMP[3] >256: MOV TEMP[12], CONST[3][1] >257: DP4 TEMP[12].x, TEMP[12], TEMP[3] >258: MOV TEMP[2].y, TEMP[12].xxxx >259: MOV TEMP[12], CONST[3][3] >260: DP4 TEMP[12].x, TEMP[12], TEMP[3] >261: MOV TEMP[0].w, IMM[0].xxxx >262: MOV TEMP[13], CONST[3][0] >263: DP4 TEMP[3].x, TEMP[13], TEMP[0] >264: MOV TEMP[13], CONST[3][1] >265: DP4 TEMP[13].x, TEMP[13], TEMP[0] >266: MOV TEMP[3].y, TEMP[13].xxxx >267: MOV TEMP[13], CONST[3][3] >268: DP4 TEMP[13].x, TEMP[13], TEMP[0] >269: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >270: SSG TEMP[15].xy, TEMP[8].xyyy >271: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >272: RCP TEMP[16].xy, TEMP[9].xxxx >273: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >274: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >275: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >276: SSG TEMP[15].xy, TEMP[4].xyyy >277: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >278: RCP TEMP[16].xy, TEMP[10].xxxx >279: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >280: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >281: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >282: SSG TEMP[16].xy, TEMP[5].xyyy >283: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >284: RCP TEMP[11].xy, TEMP[11].xxxx >285: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >286: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >287: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >288: SSG TEMP[15].xy, TEMP[7].xyyy >289: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >290: RCP TEMP[16].xy, TEMP[6].xxxx >291: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >292: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >293: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >294: MOV TEMP[0].yz, TEMP[5].yxyy >295: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >296: SSG TEMP[7].xy, TEMP[2].xyyy >297: MUL TEMP[7].xy, IMM[5].xxxx, TEMP[7].xyyy >298: RCP TEMP[11].xy, TEMP[12].xxxx >299: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >300: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >301: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >302: MOV TEMP[4].zw, TEMP[2].yyxy >303: MOV TEMP[2].xy, CONST[3][23] >304: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >305: MOV TEMP[4].zw, TEMP[2].yyxy >306: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >307: SSG TEMP[5].xy, TEMP[3].xyyy >308: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >309: RCP TEMP[7].xy, TEMP[13].xxxx >310: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >311: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >312: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >313: MOV TEMP[0].xw, TEMP[2].xxxy >314: MOV TEMP[2].xy, CONST[3][23] >315: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >316: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >317: MOV TEMP[0].y, TEMP[2].xxxx >318: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >319: MOV TEMP[0].z, TEMP[2].xxxx >320: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >321: SQRT TEMP[2].x, TEMP[0].xxxx >322: SQRT TEMP[2].y, TEMP[0].yyyy >323: SQRT TEMP[2].z, TEMP[0].zzzz >324: MOV TEMP[2].xyz, TEMP[2].xyzx >325: MOV TEMP[3].z, CONST[1][22] >326: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >327: MOV TEMP[0].w, TEMP[3].xxxx >328: MOV TEMP[3].z, CONST[1][22] >329: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >330: MOV TEMP[3].z, CONST[1][22] >331: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >332: MOV TEMP[1].y, TEMP[3].xxxx >333: MOV TEMP[3].w, CONST[1][22] >334: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >335: UIF TEMP[3].xxxx :0 >336: MOV TEMP[3].w, CONST[1][22] >337: RCP TEMP[3].x, TEMP[3].wwww >338: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >339: ELSE :0 >340: SSG TEMP[5].x, TEMP[0].wwww >341: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >342: ENDIF >343: MOV_SAT TEMP[3].x, TEMP[3].xxxx >344: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >345: MOV TEMP[0].w, TEMP[3].xxxx >346: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >347: MOV TEMP[0].y, TEMP[3].xxxx >348: MOV TEMP[3].w, CONST[1][22] >349: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >350: UIF TEMP[3].xxxx :0 >351: MOV TEMP[3].w, CONST[1][22] >352: RCP TEMP[3].x, TEMP[3].wwww >353: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >354: ELSE :0 >355: SSG TEMP[5].x, TEMP[1].xxxx >356: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >357: ENDIF >358: MOV_SAT TEMP[3].x, TEMP[3].xxxx >359: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >360: MOV TEMP[0].w, TEMP[3].xxxx >361: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >362: MOV TEMP[0].z, TEMP[3].xxxx >363: MOV TEMP[3].w, CONST[1][22] >364: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >365: UIF TEMP[3].xxxx :0 >366: MOV TEMP[3].w, CONST[1][22] >367: RCP TEMP[3].x, TEMP[3].wwww >368: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >369: ELSE :0 >370: SSG TEMP[5].x, TEMP[1].yyyy >371: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >372: ENDIF >373: MOV_SAT TEMP[3].x, TEMP[3].xxxx >374: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >375: MOV TEMP[0].w, TEMP[3].xxxx >376: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >377: MOV TEMP[2].xy, CONST[1][22] >378: MOV TEMP[3].xy, CONST[2][4] >379: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >380: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >381: MOV TEMP[0].w, TEMP[2].xxxx >382: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >383: SSG TEMP[3].xy, TEMP[0].xyyy >384: MUL TEMP[3].xy, IMM[5].xxxx, TEMP[3].xyyy >385: RCP TEMP[5].xy, TEMP[1].xxxx >386: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >387: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >388: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >389: MOV TEMP[0].y, TEMP[2].xxxx >390: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >391: MOV TEMP[4].z, TEMP[2].xxxx >392: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >393: UIF TEMP[2].xxxx :0 >394: RCP TEMP[1].x, TEMP[1].xxxx >395: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >396: ELSE :0 >397: SSG TEMP[2].x, TEMP[0].zzzz >398: MUL TEMP[1].x, IMM[5].xxxx, TEMP[2].xxxx >399: ENDIF >400: MOV TEMP[0].y, TEMP[1].xxxx >401: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >402: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >403: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >404: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >405: MOV TEMP[4].w, TEMP[0].xxxx >406: ELSE :0 >407: MOV TEMP[4], IMM[0].zzzz >408: ENDIF >409: MIN TEMP[0], TEMP[4], IMM[5].yyyy >410: MOV TEMP[1].x, TEMP[0].xxxx >411: MOV TEMP[2].x, TEMP[0].yyyy >412: MOV TEMP[3].x, TEMP[0].zzzz >413: MOV TEMP[0].x, TEMP[0].wwww >414: MOV OUT[8], TEMP[1] >415: MOV OUT[9], TEMP[2] >416: MOV OUT[10], TEMP[3] >417: MOV OUT[11], TEMP[0] >418: MOV OUT[0].x, TEMP[1].xxxx >419: MOV OUT[0].y, TEMP[2].xxxx >420: MOV OUT[0].z, TEMP[3].xxxx >421: MOV OUT[1].x, TEMP[0].xxxx >422: END >radeonsi: Compiling shader 230 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 64) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 68) > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 84) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %44 = lshr i32 %10, 8 > %45 = and i32 %44, 31 > %46 = lshr i32 %7, 13 > %47 = and i32 %46, 255 > %48 = and i32 %7, 8191 > %49 = and i32 %10, 255 > %50 = mul nuw nsw i32 %48, %49 > %51 = mul nuw nsw i32 %45, %47 > %52 = add nuw nsw i32 %50, %51 > %53 = add nuw nsw i32 %52, 16 > %54 = zext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = load i32, i32 addrspace(3)* %55, align 4 > %57 = lshr i32 %7, 13 > %58 = and i32 %57, 255 > %59 = and i32 %7, 8191 > %60 = and i32 %10, 255 > %61 = mul nuw nsw i32 %59, %60 > %62 = mul nuw nsw i32 %45, %58 > %63 = add nuw nsw i32 %61, %62 > %64 = add nuw nsw i32 %63, 17 > %65 = zext i32 %64 to i64 > %66 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %65 > %67 = load i32, i32 addrspace(3)* %66, align 4 > %68 = lshr i32 %7, 13 > %69 = and i32 %68, 255 > %70 = and i32 %7, 8191 > %71 = and i32 %10, 255 > %72 = mul nuw nsw i32 %70, %71 > %73 = mul nuw nsw i32 %45, %69 > %74 = add nuw nsw i32 %72, %73 > %75 = add nuw nsw i32 %74, 18 > %76 = zext i32 %75 to i64 > %77 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %76 > %78 = load i32, i32 addrspace(3)* %77, align 4 > %79 = lshr i32 %7, 13 > %80 = and i32 %79, 255 > %81 = and i32 %7, 8191 > %82 = and i32 %10, 255 > %83 = mul nuw nsw i32 %81, %82 > %84 = mul nuw nsw i32 %45, %80 > %85 = add nuw nsw i32 %83, %84 > %86 = add nuw nsw i32 %85, 19 > %87 = zext i32 %86 to i64 > %88 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %87 > %89 = load i32, i32 addrspace(3)* %88, align 4 > %90 = lshr i32 %6, 13 > %91 = and i32 %90, 255 > %92 = shl i32 %5, 2 > %93 = and i32 %92, 262140 > %94 = and i32 %6, 8191 > %95 = and i32 %10, 255 > %96 = mul nuw nsw i32 %94, %95 > %97 = add nuw nsw i32 %93, %96 > %98 = mul nuw nsw i32 %45, %91 > %99 = add nuw nsw i32 %97, %98 > %100 = add nuw nsw i32 %99, 16 > %101 = zext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > store i32 %56, i32 addrspace(3)* %102, align 4 > %103 = add nuw nsw i32 %99, 17 > %104 = zext i32 %103 to i64 > %105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %104 > store i32 %67, i32 addrspace(3)* %105, align 4 > %106 = add nuw nsw i32 %99, 18 > %107 = zext i32 %106 to i64 > %108 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %107 > store i32 %78, i32 addrspace(3)* %108, align 4 > %109 = add nuw nsw i32 %99, 19 > %110 = zext i32 %109 to i64 > %111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %110 > store i32 %89, i32 addrspace(3)* %111, align 4 > %112 = lshr i32 %7, 13 > %113 = and i32 %112, 255 > %114 = and i32 %7, 8191 > %115 = and i32 %10, 255 > %116 = mul nuw nsw i32 %114, %115 > %117 = mul nuw nsw i32 %45, %113 > %118 = add nuw nsw i32 %116, %117 > %119 = add nuw nsw i32 %118, 20 > %120 = zext i32 %119 to i64 > %121 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %120 > %122 = load i32, i32 addrspace(3)* %121, align 4 > %123 = lshr i32 %7, 13 > %124 = and i32 %123, 255 > %125 = and i32 %7, 8191 > %126 = and i32 %10, 255 > %127 = mul nuw nsw i32 %125, %126 > %128 = mul nuw nsw i32 %45, %124 > %129 = add nuw nsw i32 %127, %128 > %130 = add nuw nsw i32 %129, 21 > %131 = zext i32 %130 to i64 > %132 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %131 > %133 = load i32, i32 addrspace(3)* %132, align 4 > %134 = lshr i32 %7, 13 > %135 = and i32 %134, 255 > %136 = and i32 %7, 8191 > %137 = and i32 %10, 255 > %138 = mul nuw nsw i32 %136, %137 > %139 = mul nuw nsw i32 %45, %135 > %140 = add nuw nsw i32 %138, %139 > %141 = add nuw nsw i32 %140, 22 > %142 = zext i32 %141 to i64 > %143 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %142 > %144 = load i32, i32 addrspace(3)* %143, align 4 > %145 = lshr i32 %7, 13 > %146 = and i32 %145, 255 > %147 = and i32 %7, 8191 > %148 = and i32 %10, 255 > %149 = mul nuw nsw i32 %147, %148 > %150 = mul nuw nsw i32 %45, %146 > %151 = add nuw nsw i32 %149, %150 > %152 = add nuw nsw i32 %151, 23 > %153 = zext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = load i32, i32 addrspace(3)* %154, align 4 > %156 = lshr i32 %6, 13 > %157 = and i32 %156, 255 > %158 = shl i32 %5, 2 > %159 = and i32 %158, 262140 > %160 = and i32 %6, 8191 > %161 = and i32 %10, 255 > %162 = mul nuw nsw i32 %160, %161 > %163 = add nuw nsw i32 %159, %162 > %164 = mul nuw nsw i32 %45, %157 > %165 = add nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 20 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > store i32 %122, i32 addrspace(3)* %168, align 4 > %169 = add nuw nsw i32 %165, 21 > %170 = zext i32 %169 to i64 > %171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %170 > store i32 %133, i32 addrspace(3)* %171, align 4 > %172 = add nuw nsw i32 %165, 22 > %173 = zext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > store i32 %144, i32 addrspace(3)* %174, align 4 > %175 = add nuw nsw i32 %165, 23 > %176 = zext i32 %175 to i64 > %177 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %176 > store i32 %155, i32 addrspace(3)* %177, align 4 > %178 = lshr i32 %7, 13 > %179 = and i32 %178, 255 > %180 = and i32 %7, 8191 > %181 = and i32 %10, 255 > %182 = mul nuw nsw i32 %180, %181 > %183 = mul nuw nsw i32 %45, %179 > %184 = add nuw nsw i32 %182, %183 > %185 = add nuw nsw i32 %184, 24 > %186 = zext i32 %185 to i64 > %187 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %186 > %188 = load i32, i32 addrspace(3)* %187, align 4 > %189 = lshr i32 %7, 13 > %190 = and i32 %189, 255 > %191 = and i32 %7, 8191 > %192 = and i32 %10, 255 > %193 = mul nuw nsw i32 %191, %192 > %194 = mul nuw nsw i32 %45, %190 > %195 = add nuw nsw i32 %193, %194 > %196 = add nuw nsw i32 %195, 25 > %197 = zext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = load i32, i32 addrspace(3)* %198, align 4 > %200 = lshr i32 %7, 13 > %201 = and i32 %200, 255 > %202 = and i32 %7, 8191 > %203 = and i32 %10, 255 > %204 = mul nuw nsw i32 %202, %203 > %205 = mul nuw nsw i32 %45, %201 > %206 = add nuw nsw i32 %204, %205 > %207 = add nuw nsw i32 %206, 26 > %208 = zext i32 %207 to i64 > %209 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %208 > %210 = load i32, i32 addrspace(3)* %209, align 4 > %211 = lshr i32 %7, 13 > %212 = and i32 %211, 255 > %213 = and i32 %7, 8191 > %214 = and i32 %10, 255 > %215 = mul nuw nsw i32 %213, %214 > %216 = mul nuw nsw i32 %45, %212 > %217 = add nuw nsw i32 %215, %216 > %218 = add nuw nsw i32 %217, 27 > %219 = zext i32 %218 to i64 > %220 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %219 > %221 = load i32, i32 addrspace(3)* %220, align 4 > %222 = lshr i32 %6, 13 > %223 = and i32 %222, 255 > %224 = shl i32 %5, 2 > %225 = and i32 %224, 262140 > %226 = and i32 %6, 8191 > %227 = and i32 %10, 255 > %228 = mul nuw nsw i32 %226, %227 > %229 = add nuw nsw i32 %225, %228 > %230 = mul nuw nsw i32 %45, %223 > %231 = add nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, 24 > %233 = zext i32 %232 to i64 > %234 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %233 > store i32 %188, i32 addrspace(3)* %234, align 4 > %235 = add nuw nsw i32 %231, 25 > %236 = zext i32 %235 to i64 > %237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %236 > store i32 %199, i32 addrspace(3)* %237, align 4 > %238 = add nuw nsw i32 %231, 26 > %239 = zext i32 %238 to i64 > %240 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %239 > store i32 %210, i32 addrspace(3)* %240, align 4 > %241 = add nuw nsw i32 %231, 27 > %242 = zext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > store i32 %221, i32 addrspace(3)* %243, align 4 > %244 = lshr i32 %7, 13 > %245 = and i32 %244, 255 > %246 = and i32 %7, 8191 > %247 = and i32 %10, 255 > %248 = mul nuw nsw i32 %246, %247 > %249 = mul nuw nsw i32 %45, %245 > %250 = add nuw nsw i32 %248, %249 > %251 = add nuw nsw i32 %250, 28 > %252 = zext i32 %251 to i64 > %253 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %252 > %254 = load i32, i32 addrspace(3)* %253, align 4 > %255 = lshr i32 %7, 13 > %256 = and i32 %255, 255 > %257 = and i32 %7, 8191 > %258 = and i32 %10, 255 > %259 = mul nuw nsw i32 %257, %258 > %260 = mul nuw nsw i32 %45, %256 > %261 = add nuw nsw i32 %259, %260 > %262 = add nuw nsw i32 %261, 29 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = load i32, i32 addrspace(3)* %264, align 4 > %266 = lshr i32 %7, 13 > %267 = and i32 %266, 255 > %268 = and i32 %7, 8191 > %269 = and i32 %10, 255 > %270 = mul nuw nsw i32 %268, %269 > %271 = mul nuw nsw i32 %45, %267 > %272 = add nuw nsw i32 %270, %271 > %273 = add nuw nsw i32 %272, 30 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = load i32, i32 addrspace(3)* %275, align 4 > %277 = lshr i32 %7, 13 > %278 = and i32 %277, 255 > %279 = and i32 %7, 8191 > %280 = and i32 %10, 255 > %281 = mul nuw nsw i32 %279, %280 > %282 = mul nuw nsw i32 %45, %278 > %283 = add nuw nsw i32 %281, %282 > %284 = add nuw nsw i32 %283, 31 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = load i32, i32 addrspace(3)* %286, align 4 > %288 = lshr i32 %6, 13 > %289 = and i32 %288, 255 > %290 = shl i32 %5, 2 > %291 = and i32 %290, 262140 > %292 = and i32 %6, 8191 > %293 = and i32 %10, 255 > %294 = mul nuw nsw i32 %292, %293 > %295 = add nuw nsw i32 %291, %294 > %296 = mul nuw nsw i32 %45, %289 > %297 = add nuw nsw i32 %295, %296 > %298 = add nuw nsw i32 %297, 28 > %299 = zext i32 %298 to i64 > %300 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %299 > store i32 %254, i32 addrspace(3)* %300, align 4 > %301 = add nuw nsw i32 %297, 29 > %302 = zext i32 %301 to i64 > %303 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %302 > store i32 %265, i32 addrspace(3)* %303, align 4 > %304 = add nuw nsw i32 %297, 30 > %305 = zext i32 %304 to i64 > %306 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %305 > store i32 %276, i32 addrspace(3)* %306, align 4 > %307 = add nuw nsw i32 %297, 31 > %308 = zext i32 %307 to i64 > %309 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %308 > store i32 %287, i32 addrspace(3)* %309, align 4 > %310 = lshr i32 %7, 13 > %311 = and i32 %310, 255 > %312 = and i32 %7, 8191 > %313 = and i32 %10, 255 > %314 = mul nuw nsw i32 %312, %313 > %315 = mul nuw nsw i32 %45, %311 > %316 = add nuw nsw i32 %314, %315 > %317 = add nuw nsw i32 %316, 32 > %318 = zext i32 %317 to i64 > %319 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %318 > %320 = load i32, i32 addrspace(3)* %319, align 4 > %321 = lshr i32 %7, 13 > %322 = and i32 %321, 255 > %323 = and i32 %7, 8191 > %324 = and i32 %10, 255 > %325 = mul nuw nsw i32 %323, %324 > %326 = mul nuw nsw i32 %45, %322 > %327 = add nuw nsw i32 %325, %326 > %328 = add nuw nsw i32 %327, 33 > %329 = zext i32 %328 to i64 > %330 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %329 > %331 = load i32, i32 addrspace(3)* %330, align 4 > %332 = lshr i32 %7, 13 > %333 = and i32 %332, 255 > %334 = and i32 %7, 8191 > %335 = and i32 %10, 255 > %336 = mul nuw nsw i32 %334, %335 > %337 = mul nuw nsw i32 %45, %333 > %338 = add nuw nsw i32 %336, %337 > %339 = add nuw nsw i32 %338, 34 > %340 = zext i32 %339 to i64 > %341 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %340 > %342 = load i32, i32 addrspace(3)* %341, align 4 > %343 = lshr i32 %7, 13 > %344 = and i32 %343, 255 > %345 = and i32 %7, 8191 > %346 = and i32 %10, 255 > %347 = mul nuw nsw i32 %345, %346 > %348 = mul nuw nsw i32 %45, %344 > %349 = add nuw nsw i32 %347, %348 > %350 = add nuw nsw i32 %349, 35 > %351 = zext i32 %350 to i64 > %352 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %351 > %353 = load i32, i32 addrspace(3)* %352, align 4 > %354 = lshr i32 %6, 13 > %355 = and i32 %354, 255 > %356 = shl i32 %5, 2 > %357 = and i32 %356, 262140 > %358 = and i32 %6, 8191 > %359 = and i32 %10, 255 > %360 = mul nuw nsw i32 %358, %359 > %361 = add nuw nsw i32 %357, %360 > %362 = mul nuw nsw i32 %45, %355 > %363 = add nuw nsw i32 %361, %362 > %364 = add nuw nsw i32 %363, 32 > %365 = zext i32 %364 to i64 > %366 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %365 > store i32 %320, i32 addrspace(3)* %366, align 4 > %367 = add nuw nsw i32 %363, 33 > %368 = zext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > store i32 %331, i32 addrspace(3)* %369, align 4 > %370 = add nuw nsw i32 %363, 34 > %371 = zext i32 %370 to i64 > %372 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %371 > store i32 %342, i32 addrspace(3)* %372, align 4 > %373 = add nuw nsw i32 %363, 35 > %374 = zext i32 %373 to i64 > %375 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %374 > store i32 %353, i32 addrspace(3)* %375, align 4 > %376 = lshr i32 %7, 13 > %377 = and i32 %376, 255 > %378 = and i32 %7, 8191 > %379 = and i32 %10, 255 > %380 = mul nuw nsw i32 %378, %379 > %381 = mul nuw nsw i32 %45, %377 > %382 = add nuw nsw i32 %380, %381 > %383 = add nuw nsw i32 %382, 36 > %384 = zext i32 %383 to i64 > %385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %384 > %386 = load i32, i32 addrspace(3)* %385, align 4 > %387 = lshr i32 %7, 13 > %388 = and i32 %387, 255 > %389 = and i32 %7, 8191 > %390 = and i32 %10, 255 > %391 = mul nuw nsw i32 %389, %390 > %392 = mul nuw nsw i32 %45, %388 > %393 = add nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 37 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = load i32, i32 addrspace(3)* %396, align 4 > %398 = lshr i32 %7, 13 > %399 = and i32 %398, 255 > %400 = and i32 %7, 8191 > %401 = and i32 %10, 255 > %402 = mul nuw nsw i32 %400, %401 > %403 = mul nuw nsw i32 %45, %399 > %404 = add nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, 38 > %406 = zext i32 %405 to i64 > %407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %406 > %408 = load i32, i32 addrspace(3)* %407, align 4 > %409 = lshr i32 %7, 13 > %410 = and i32 %409, 255 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = mul nuw nsw i32 %45, %410 > %415 = add nuw nsw i32 %413, %414 > %416 = add nuw nsw i32 %415, 39 > %417 = zext i32 %416 to i64 > %418 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %417 > %419 = load i32, i32 addrspace(3)* %418, align 4 > %420 = lshr i32 %6, 13 > %421 = and i32 %420, 255 > %422 = shl i32 %5, 2 > %423 = and i32 %422, 262140 > %424 = and i32 %6, 8191 > %425 = and i32 %10, 255 > %426 = mul nuw nsw i32 %424, %425 > %427 = add nuw nsw i32 %423, %426 > %428 = mul nuw nsw i32 %45, %421 > %429 = add nuw nsw i32 %427, %428 > %430 = add nuw nsw i32 %429, 36 > %431 = zext i32 %430 to i64 > %432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %431 > store i32 %386, i32 addrspace(3)* %432, align 4 > %433 = add nuw nsw i32 %429, 37 > %434 = zext i32 %433 to i64 > %435 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %434 > store i32 %397, i32 addrspace(3)* %435, align 4 > %436 = add nuw nsw i32 %429, 38 > %437 = zext i32 %436 to i64 > %438 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %437 > store i32 %408, i32 addrspace(3)* %438, align 4 > %439 = add nuw nsw i32 %429, 39 > %440 = zext i32 %439 to i64 > %441 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %440 > store i32 %419, i32 addrspace(3)* %441, align 4 > %442 = and i32 %7, 8191 > %443 = and i32 %10, 255 > %444 = mul nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 16 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > %448 = bitcast i32 addrspace(3)* %447 to float addrspace(3)* > %449 = load float, float addrspace(3)* %448, align 4 > %450 = and i32 %7, 8191 > %451 = and i32 %10, 255 > %452 = mul nuw nsw i32 %450, %451 > %453 = add nuw nsw i32 %452, 17 > %454 = zext i32 %453 to i64 > %455 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %454 > %456 = bitcast i32 addrspace(3)* %455 to float addrspace(3)* > %457 = load float, float addrspace(3)* %456, align 4 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, 18 > %462 = zext i32 %461 to i64 > %463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %462 > %464 = bitcast i32 addrspace(3)* %463 to float addrspace(3)* > %465 = load float, float addrspace(3)* %464, align 4 > %466 = fmul float %23, %449 > %467 = fmul float %24, %457 > %468 = fadd float %466, %467 > %469 = fmul float %25, %465 > %470 = fadd float %468, %469 > %471 = fadd float %470, %26 > %472 = fmul float %27, %449 > %473 = fmul float %28, %457 > %474 = fadd float %472, %473 > %475 = fmul float %29, %465 > %476 = fadd float %474, %475 > %477 = fadd float %476, %30 > %478 = fmul float %31, %449 > %479 = fmul float %32, %457 > %480 = fadd float %478, %479 > %481 = fmul float %33, %465 > %482 = fadd float %480, %481 > %483 = fadd float %482, %34 > %484 = fmul float %35, %449 > %485 = fmul float %36, %457 > %486 = fadd float %484, %485 > %487 = fmul float %37, %465 > %488 = fadd float %486, %487 > %489 = fadd float %488, %38 > %490 = lshr i32 %7, 13 > %491 = and i32 %490, 255 > %492 = and i32 %7, 8191 > %493 = and i32 %10, 255 > %494 = mul nuw nsw i32 %492, %493 > %495 = add nuw nsw i32 %494, %491 > %496 = add nuw nsw i32 %495, 16 > %497 = zext i32 %496 to i64 > %498 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %497 > %499 = bitcast i32 addrspace(3)* %498 to float addrspace(3)* > %500 = load float, float addrspace(3)* %499, align 4 > %501 = lshr i32 %7, 13 > %502 = and i32 %501, 255 > %503 = and i32 %7, 8191 > %504 = and i32 %10, 255 > %505 = mul nuw nsw i32 %503, %504 > %506 = add nuw nsw i32 %505, %502 > %507 = add nuw nsw i32 %506, 17 > %508 = zext i32 %507 to i64 > %509 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %508 > %510 = bitcast i32 addrspace(3)* %509 to float addrspace(3)* > %511 = load float, float addrspace(3)* %510, align 4 > %512 = lshr i32 %7, 13 > %513 = and i32 %512, 255 > %514 = and i32 %7, 8191 > %515 = and i32 %10, 255 > %516 = mul nuw nsw i32 %514, %515 > %517 = add nuw nsw i32 %516, %513 > %518 = add nuw nsw i32 %517, 18 > %519 = zext i32 %518 to i64 > %520 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %519 > %521 = bitcast i32 addrspace(3)* %520 to float addrspace(3)* > %522 = load float, float addrspace(3)* %521, align 4 > %523 = fmul float %23, %500 > %524 = fmul float %24, %511 > %525 = fadd float %523, %524 > %526 = fmul float %25, %522 > %527 = fadd float %525, %526 > %528 = fadd float %527, %26 > %529 = fmul float %27, %500 > %530 = fmul float %28, %511 > %531 = fadd float %529, %530 > %532 = fmul float %29, %522 > %533 = fadd float %531, %532 > %534 = fadd float %533, %30 > %535 = fmul float %31, %500 > %536 = fmul float %32, %511 > %537 = fadd float %535, %536 > %538 = fmul float %33, %522 > %539 = fadd float %537, %538 > %540 = fadd float %539, %34 > %541 = fmul float %35, %500 > %542 = fmul float %36, %511 > %543 = fadd float %541, %542 > %544 = fmul float %37, %522 > %545 = fadd float %543, %544 > %546 = fadd float %545, %38 > %547 = and i32 %7, 8191 > %548 = and i32 %10, 255 > %549 = mul nuw nsw i32 %547, %548 > %550 = lshr i32 %7, 12 > %551 = and i32 %550, 510 > %552 = add nuw nsw i32 %549, %551 > %553 = add nuw nsw i32 %552, 16 > %554 = zext i32 %553 to i64 > %555 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %554 > %556 = bitcast i32 addrspace(3)* %555 to float addrspace(3)* > %557 = load float, float addrspace(3)* %556, align 4 > %558 = and i32 %7, 8191 > %559 = and i32 %10, 255 > %560 = mul nuw nsw i32 %558, %559 > %561 = lshr i32 %7, 12 > %562 = and i32 %561, 510 > %563 = add nuw nsw i32 %560, %562 > %564 = add nuw nsw i32 %563, 17 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fmul float %23, %557 > %581 = fmul float %24, %568 > %582 = fadd float %580, %581 > %583 = fmul float %25, %579 > %584 = fadd float %582, %583 > %585 = fadd float %584, %26 > %586 = fmul float %27, %557 > %587 = fmul float %28, %568 > %588 = fadd float %586, %587 > %589 = fmul float %29, %579 > %590 = fadd float %588, %589 > %591 = fadd float %590, %30 > %592 = fmul float %31, %557 > %593 = fmul float %32, %568 > %594 = fadd float %592, %593 > %595 = fmul float %33, %579 > %596 = fadd float %594, %595 > %597 = fadd float %596, %34 > %598 = fmul float %35, %557 > %599 = fmul float %36, %568 > %600 = fadd float %598, %599 > %601 = fmul float %37, %579 > %602 = fadd float %600, %601 > %603 = fadd float %602, %38 > %604 = fadd float %483, 1.000000e+02 > %605 = fadd float %540, 1.000000e+02 > %606 = fadd float %597, 1.000000e+02 > %607 = call float @llvm.fabs.f32(float %489) > %608 = call float @llvm.minnum.f32(float %607, float 1.000000e+02) > %609 = fcmp ogt float %471, 0.000000e+00 > %610 = fcmp ogt float %477, 0.000000e+00 > %611 = fcmp olt float %471, 0.000000e+00 > %612 = fcmp olt float %477, 0.000000e+00 > %613 = sext i1 %611 to i32 > %614 = sext i1 %612 to i32 > %615 = zext i1 %609 to i32 > %616 = zext i1 %610 to i32 > %617 = add nsw i32 %615, %613 > %618 = add nsw i32 %616, %614 > %619 = sitofp i32 %617 to float > %620 = sitofp i32 %618 to float > %621 = fsub float -0.000000e+00, %608 > %622 = call float @llvm.fma.f32(float %621, float %619, float %471) > %623 = fsub float -0.000000e+00, %608 > %624 = call float @llvm.fma.f32(float %623, float %620, float %477) > %625 = call float @llvm.fabs.f32(float %546) > %626 = call float @llvm.minnum.f32(float %625, float 1.000000e+02) > %627 = fcmp ogt float %528, 0.000000e+00 > %628 = fcmp ogt float %534, 0.000000e+00 > %629 = fcmp olt float %528, 0.000000e+00 > %630 = fcmp olt float %534, 0.000000e+00 > %631 = sext i1 %629 to i32 > %632 = sext i1 %630 to i32 > %633 = zext i1 %627 to i32 > %634 = zext i1 %628 to i32 > %635 = add nsw i32 %633, %631 > %636 = add nsw i32 %634, %632 > %637 = sitofp i32 %635 to float > %638 = sitofp i32 %636 to float > %639 = fsub float -0.000000e+00, %626 > %640 = call float @llvm.fma.f32(float %639, float %637, float %528) > %641 = fsub float -0.000000e+00, %626 > %642 = call float @llvm.fma.f32(float %641, float %638, float %534) > %643 = fcmp ogt float %585, 0.000000e+00 > %644 = fcmp ogt float %591, 0.000000e+00 > %645 = fcmp olt float %585, 0.000000e+00 > %646 = fcmp olt float %591, 0.000000e+00 > %647 = sext i1 %645 to i32 > %648 = sext i1 %646 to i32 > %649 = zext i1 %643 to i32 > %650 = zext i1 %644 to i32 > %651 = add nsw i32 %649, %647 > %652 = add nsw i32 %650, %648 > %653 = sitofp i32 %651 to float > %654 = sitofp i32 %652 to float > %655 = call float @llvm.fabs.f32(float %603) > %656 = call float @llvm.minnum.f32(float %655, float 1.000000e+02) > %657 = fsub float -0.000000e+00, %656 > %658 = call float @llvm.fma.f32(float %657, float %653, float %585) > %659 = fsub float -0.000000e+00, %656 > %660 = call float @llvm.fma.f32(float %659, float %654, float %591) > %661 = fsub float -0.000000e+00, %489 > %662 = fcmp olt float %622, %661 > %663 = fsub float -0.000000e+00, %489 > %664 = fcmp olt float %624, %663 > %665 = zext i1 %662 to i32 > %666 = zext i1 %664 to i32 > %667 = fsub float -0.000000e+00, %546 > %668 = fcmp olt float %640, %667 > %669 = fsub float -0.000000e+00, %546 > %670 = fcmp olt float %642, %669 > %671 = zext i1 %668 to i32 > %672 = zext i1 %670 to i32 > %673 = add nuw nsw i32 %671, %665 > %674 = add nuw nsw i32 %672, %666 > %675 = fsub float -0.000000e+00, %603 > %676 = fcmp olt float %658, %675 > %677 = fsub float -0.000000e+00, %603 > %678 = fcmp olt float %660, %677 > %679 = zext i1 %676 to i32 > %680 = zext i1 %678 to i32 > %681 = add nuw nsw i32 %679, %673 > %682 = add nuw nsw i32 %680, %674 > %683 = fcmp olt float %604, 0.000000e+00 > %684 = zext i1 %683 to i32 > %685 = bitcast i32 %684 to float > %686 = fcmp olt float %605, 0.000000e+00 > %687 = fcmp olt float %606, 0.000000e+00 > %688 = zext i1 %686 to i32 > %689 = zext i1 %687 to i32 > %690 = add nuw nsw i32 %688, %684 > %691 = add nuw nsw i32 %689, %690 > %692 = fcmp olt float %489, %622 > %693 = fcmp olt float %489, %624 > %694 = zext i1 %692 to i32 > %695 = zext i1 %693 to i32 > %696 = fcmp olt float %546, %640 > %697 = fcmp olt float %546, %642 > %698 = zext i1 %696 to i32 > %699 = zext i1 %697 to i32 > %700 = add nuw nsw i32 %694, %698 > %701 = add nuw nsw i32 %695, %699 > %702 = fcmp olt float %603, %658 > %703 = fcmp olt float %603, %660 > %704 = zext i1 %702 to i32 > %705 = zext i1 %703 to i32 > %706 = add nuw nsw i32 %700, %704 > %707 = add nuw nsw i32 %701, %705 > %708 = icmp eq i32 %681, 3 > %709 = icmp eq i32 %682, 3 > %710 = sext i1 %708 to i32 > %711 = sext i1 %709 to i32 > %712 = bitcast i32 %711 to float > %713 = icmp eq i32 %706, 3 > %714 = icmp eq i32 %707, 3 > %715 = sext i1 %714 to i32 > %716 = bitcast i32 %715 to float > %717 = bitcast i32 %711 to float > %718 = select i1 %714, float 0xFFFFFFFFE0000000, float %717 > %719 = bitcast float %718 to i32 > %720 = select i1 %713, i32 -1, i32 %710 > %721 = or i32 %719, %720 > %722 = icmp eq i32 %721, 0 > %not. = icmp ne i32 %691, 3 > %723 = and i1 %722, %not. > br i1 %723, label %IF, label %ENDIF > >IF: ; preds = %main_body > %724 = lshr i32 %7, 13 > %725 = and i32 %724, 255 > %726 = and i32 %7, 8191 > %727 = and i32 %10, 255 > %728 = mul nuw nsw i32 %726, %727 > %729 = add nuw nsw i32 %728, %725 > %730 = add nuw nsw i32 %729, 16 > %731 = zext i32 %730 to i64 > %732 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %731 > %733 = bitcast i32 addrspace(3)* %732 to float addrspace(3)* > %734 = load float, float addrspace(3)* %733, align 4 > %735 = and i32 %7, 8191 > %736 = and i32 %10, 255 > %737 = mul nuw nsw i32 %735, %736 > %738 = add nuw nsw i32 %737, 16 > %739 = zext i32 %738 to i64 > %740 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %739 > %741 = bitcast i32 addrspace(3)* %740 to float addrspace(3)* > %742 = load float, float addrspace(3)* %741, align 4 > %743 = fsub float %742, %734 > %744 = lshr i32 %7, 13 > %745 = and i32 %744, 255 > %746 = and i32 %7, 8191 > %747 = and i32 %10, 255 > %748 = mul nuw nsw i32 %746, %747 > %749 = add nuw nsw i32 %748, %745 > %750 = add nuw nsw i32 %749, 17 > %751 = zext i32 %750 to i64 > %752 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %751 > %753 = bitcast i32 addrspace(3)* %752 to float addrspace(3)* > %754 = load float, float addrspace(3)* %753, align 4 > %755 = and i32 %7, 8191 > %756 = and i32 %10, 255 > %757 = mul nuw nsw i32 %755, %756 > %758 = add nuw nsw i32 %757, 17 > %759 = zext i32 %758 to i64 > %760 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %759 > %761 = bitcast i32 addrspace(3)* %760 to float addrspace(3)* > %762 = load float, float addrspace(3)* %761, align 4 > %763 = fsub float %762, %754 > %764 = lshr i32 %7, 13 > %765 = and i32 %764, 255 > %766 = and i32 %7, 8191 > %767 = and i32 %10, 255 > %768 = mul nuw nsw i32 %766, %767 > %769 = add nuw nsw i32 %768, %765 > %770 = add nuw nsw i32 %769, 18 > %771 = zext i32 %770 to i64 > %772 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %771 > %773 = bitcast i32 addrspace(3)* %772 to float addrspace(3)* > %774 = load float, float addrspace(3)* %773, align 4 > %775 = and i32 %7, 8191 > %776 = and i32 %10, 255 > %777 = mul nuw nsw i32 %775, %776 > %778 = add nuw nsw i32 %777, 18 > %779 = zext i32 %778 to i64 > %780 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %779 > %781 = bitcast i32 addrspace(3)* %780 to float addrspace(3)* > %782 = load float, float addrspace(3)* %781, align 4 > %783 = fsub float %782, %774 > %784 = fmul float %743, %743 > %785 = fmul float %763, %763 > %786 = fadd float %785, %784 > %787 = fmul float %783, %783 > %788 = fadd float %786, %787 > %789 = and i32 %7, 8191 > %790 = and i32 %10, 255 > %791 = mul nuw nsw i32 %789, %790 > %792 = lshr i32 %7, 12 > %793 = and i32 %792, 510 > %794 = add nuw nsw i32 %791, %793 > %795 = add nuw nsw i32 %794, 16 > %796 = zext i32 %795 to i64 > %797 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %796 > %798 = bitcast i32 addrspace(3)* %797 to float addrspace(3)* > %799 = load float, float addrspace(3)* %798, align 4 > %800 = lshr i32 %7, 13 > %801 = and i32 %800, 255 > %802 = and i32 %7, 8191 > %803 = and i32 %10, 255 > %804 = mul nuw nsw i32 %802, %803 > %805 = add nuw nsw i32 %804, %801 > %806 = add nuw nsw i32 %805, 16 > %807 = zext i32 %806 to i64 > %808 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %807 > %809 = bitcast i32 addrspace(3)* %808 to float addrspace(3)* > %810 = load float, float addrspace(3)* %809, align 4 > %811 = fsub float %810, %799 > %812 = and i32 %7, 8191 > %813 = and i32 %10, 255 > %814 = mul nuw nsw i32 %812, %813 > %815 = lshr i32 %7, 12 > %816 = and i32 %815, 510 > %817 = add nuw nsw i32 %814, %816 > %818 = add nuw nsw i32 %817, 17 > %819 = zext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = lshr i32 %7, 13 > %824 = and i32 %823, 255 > %825 = and i32 %7, 8191 > %826 = and i32 %10, 255 > %827 = mul nuw nsw i32 %825, %826 > %828 = add nuw nsw i32 %827, %824 > %829 = add nuw nsw i32 %828, 17 > %830 = zext i32 %829 to i64 > %831 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %830 > %832 = bitcast i32 addrspace(3)* %831 to float addrspace(3)* > %833 = load float, float addrspace(3)* %832, align 4 > %834 = fsub float %833, %822 > %835 = and i32 %7, 8191 > %836 = and i32 %10, 255 > %837 = mul nuw nsw i32 %835, %836 > %838 = lshr i32 %7, 12 > %839 = and i32 %838, 510 > %840 = add nuw nsw i32 %837, %839 > %841 = add nuw nsw i32 %840, 18 > %842 = zext i32 %841 to i64 > %843 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %842 > %844 = bitcast i32 addrspace(3)* %843 to float addrspace(3)* > %845 = load float, float addrspace(3)* %844, align 4 > %846 = lshr i32 %7, 13 > %847 = and i32 %846, 255 > %848 = and i32 %7, 8191 > %849 = and i32 %10, 255 > %850 = mul nuw nsw i32 %848, %849 > %851 = add nuw nsw i32 %850, %847 > %852 = add nuw nsw i32 %851, 18 > %853 = zext i32 %852 to i64 > %854 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %853 > %855 = bitcast i32 addrspace(3)* %854 to float addrspace(3)* > %856 = load float, float addrspace(3)* %855, align 4 > %857 = fsub float %856, %845 > %858 = fmul float %811, %811 > %859 = fmul float %834, %834 > %860 = fadd float %859, %858 > %861 = fmul float %857, %857 > %862 = fadd float %860, %861 > %863 = and i32 %7, 8191 > %864 = and i32 %10, 255 > %865 = mul nuw nsw i32 %863, %864 > %866 = add nuw nsw i32 %865, 16 > %867 = zext i32 %866 to i64 > %868 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %867 > %869 = bitcast i32 addrspace(3)* %868 to float addrspace(3)* > %870 = load float, float addrspace(3)* %869, align 4 > %871 = and i32 %7, 8191 > %872 = and i32 %10, 255 > %873 = mul nuw nsw i32 %871, %872 > %874 = lshr i32 %7, 12 > %875 = and i32 %874, 510 > %876 = add nuw nsw i32 %873, %875 > %877 = add nuw nsw i32 %876, 16 > %878 = zext i32 %877 to i64 > %879 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %878 > %880 = bitcast i32 addrspace(3)* %879 to float addrspace(3)* > %881 = load float, float addrspace(3)* %880, align 4 > %882 = fsub float %881, %870 > %883 = and i32 %7, 8191 > %884 = and i32 %10, 255 > %885 = mul nuw nsw i32 %883, %884 > %886 = add nuw nsw i32 %885, 17 > %887 = zext i32 %886 to i64 > %888 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %887 > %889 = bitcast i32 addrspace(3)* %888 to float addrspace(3)* > %890 = load float, float addrspace(3)* %889, align 4 > %891 = and i32 %7, 8191 > %892 = and i32 %10, 255 > %893 = mul nuw nsw i32 %891, %892 > %894 = lshr i32 %7, 12 > %895 = and i32 %894, 510 > %896 = add nuw nsw i32 %893, %895 > %897 = add nuw nsw i32 %896, 17 > %898 = zext i32 %897 to i64 > %899 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %898 > %900 = bitcast i32 addrspace(3)* %899 to float addrspace(3)* > %901 = load float, float addrspace(3)* %900, align 4 > %902 = fsub float %901, %890 > %903 = and i32 %7, 8191 > %904 = and i32 %10, 255 > %905 = mul nuw nsw i32 %903, %904 > %906 = add nuw nsw i32 %905, 18 > %907 = zext i32 %906 to i64 > %908 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %907 > %909 = bitcast i32 addrspace(3)* %908 to float addrspace(3)* > %910 = load float, float addrspace(3)* %909, align 4 > %911 = and i32 %7, 8191 > %912 = and i32 %10, 255 > %913 = mul nuw nsw i32 %911, %912 > %914 = lshr i32 %7, 12 > %915 = and i32 %914, 510 > %916 = add nuw nsw i32 %913, %915 > %917 = add nuw nsw i32 %916, 18 > %918 = zext i32 %917 to i64 > %919 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %918 > %920 = bitcast i32 addrspace(3)* %919 to float addrspace(3)* > %921 = load float, float addrspace(3)* %920, align 4 > %922 = fsub float %921, %910 > %923 = fmul float %882, %882 > %924 = fmul float %902, %902 > %925 = fadd float %924, %923 > %926 = fmul float %922, %922 > %927 = fadd float %925, %926 > %928 = call float @llvm.sqrt.f32(float %788) > %929 = call float @llvm.sqrt.f32(float %862) > %930 = call float @llvm.sqrt.f32(float %927) > %931 = lshr i32 %7, 13 > %932 = and i32 %931, 255 > %933 = and i32 %7, 8191 > %934 = and i32 %10, 255 > %935 = mul nuw nsw i32 %933, %934 > %936 = add nuw nsw i32 %935, %932 > %937 = add nuw nsw i32 %936, 16 > %938 = zext i32 %937 to i64 > %939 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %938 > %940 = bitcast i32 addrspace(3)* %939 to float addrspace(3)* > %941 = load float, float addrspace(3)* %940, align 4 > %942 = and i32 %7, 8191 > %943 = and i32 %10, 255 > %944 = mul nuw nsw i32 %942, %943 > %945 = add nuw nsw i32 %944, 16 > %946 = zext i32 %945 to i64 > %947 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %946 > %948 = bitcast i32 addrspace(3)* %947 to float addrspace(3)* > %949 = load float, float addrspace(3)* %948, align 4 > %950 = fadd float %941, %949 > %951 = lshr i32 %7, 13 > %952 = and i32 %951, 255 > %953 = and i32 %7, 8191 > %954 = and i32 %10, 255 > %955 = mul nuw nsw i32 %953, %954 > %956 = add nuw nsw i32 %955, %952 > %957 = add nuw nsw i32 %956, 17 > %958 = zext i32 %957 to i64 > %959 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %958 > %960 = bitcast i32 addrspace(3)* %959 to float addrspace(3)* > %961 = load float, float addrspace(3)* %960, align 4 > %962 = and i32 %7, 8191 > %963 = and i32 %10, 255 > %964 = mul nuw nsw i32 %962, %963 > %965 = add nuw nsw i32 %964, 17 > %966 = zext i32 %965 to i64 > %967 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %966 > %968 = bitcast i32 addrspace(3)* %967 to float addrspace(3)* > %969 = load float, float addrspace(3)* %968, align 4 > %970 = fadd float %961, %969 > %971 = lshr i32 %7, 13 > %972 = and i32 %971, 255 > %973 = and i32 %7, 8191 > %974 = and i32 %10, 255 > %975 = mul nuw nsw i32 %973, %974 > %976 = add nuw nsw i32 %975, %972 > %977 = add nuw nsw i32 %976, 18 > %978 = zext i32 %977 to i64 > %979 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %978 > %980 = bitcast i32 addrspace(3)* %979 to float addrspace(3)* > %981 = load float, float addrspace(3)* %980, align 4 > %982 = and i32 %7, 8191 > %983 = and i32 %10, 255 > %984 = mul nuw nsw i32 %982, %983 > %985 = add nuw nsw i32 %984, 18 > %986 = zext i32 %985 to i64 > %987 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %986 > %988 = bitcast i32 addrspace(3)* %987 to float addrspace(3)* > %989 = load float, float addrspace(3)* %988, align 4 > %990 = fadd float %981, %989 > %991 = fmul float %950, 5.000000e-01 > %992 = fmul float %970, 5.000000e-01 > %993 = fmul float %990, 5.000000e-01 > %994 = and i32 %7, 8191 > %995 = and i32 %10, 255 > %996 = mul nuw nsw i32 %994, %995 > %997 = lshr i32 %7, 12 > %998 = and i32 %997, 510 > %999 = add nuw nsw i32 %996, %998 > %1000 = add nuw nsw i32 %999, 16 > %1001 = zext i32 %1000 to i64 > %1002 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1001 > %1003 = bitcast i32 addrspace(3)* %1002 to float addrspace(3)* > %1004 = load float, float addrspace(3)* %1003, align 4 > %1005 = lshr i32 %7, 13 > %1006 = and i32 %1005, 255 > %1007 = and i32 %7, 8191 > %1008 = and i32 %10, 255 > %1009 = mul nuw nsw i32 %1007, %1008 > %1010 = add nuw nsw i32 %1009, %1006 > %1011 = add nuw nsw i32 %1010, 16 > %1012 = zext i32 %1011 to i64 > %1013 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1012 > %1014 = bitcast i32 addrspace(3)* %1013 to float addrspace(3)* > %1015 = load float, float addrspace(3)* %1014, align 4 > %1016 = fadd float %1004, %1015 > %1017 = and i32 %7, 8191 > %1018 = and i32 %10, 255 > %1019 = mul nuw nsw i32 %1017, %1018 > %1020 = lshr i32 %7, 12 > %1021 = and i32 %1020, 510 > %1022 = add nuw nsw i32 %1019, %1021 > %1023 = add nuw nsw i32 %1022, 17 > %1024 = zext i32 %1023 to i64 > %1025 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1024 > %1026 = bitcast i32 addrspace(3)* %1025 to float addrspace(3)* > %1027 = load float, float addrspace(3)* %1026, align 4 > %1028 = lshr i32 %7, 13 > %1029 = and i32 %1028, 255 > %1030 = and i32 %7, 8191 > %1031 = and i32 %10, 255 > %1032 = mul nuw nsw i32 %1030, %1031 > %1033 = add nuw nsw i32 %1032, %1029 > %1034 = add nuw nsw i32 %1033, 17 > %1035 = zext i32 %1034 to i64 > %1036 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1035 > %1037 = bitcast i32 addrspace(3)* %1036 to float addrspace(3)* > %1038 = load float, float addrspace(3)* %1037, align 4 > %1039 = fadd float %1027, %1038 > %1040 = and i32 %7, 8191 > %1041 = and i32 %10, 255 > %1042 = mul nuw nsw i32 %1040, %1041 > %1043 = lshr i32 %7, 12 > %1044 = and i32 %1043, 510 > %1045 = add nuw nsw i32 %1042, %1044 > %1046 = add nuw nsw i32 %1045, 18 > %1047 = zext i32 %1046 to i64 > %1048 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1047 > %1049 = bitcast i32 addrspace(3)* %1048 to float addrspace(3)* > %1050 = load float, float addrspace(3)* %1049, align 4 > %1051 = lshr i32 %7, 13 > %1052 = and i32 %1051, 255 > %1053 = and i32 %7, 8191 > %1054 = and i32 %10, 255 > %1055 = mul nuw nsw i32 %1053, %1054 > %1056 = add nuw nsw i32 %1055, %1052 > %1057 = add nuw nsw i32 %1056, 18 > %1058 = zext i32 %1057 to i64 > %1059 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1058 > %1060 = bitcast i32 addrspace(3)* %1059 to float addrspace(3)* > %1061 = load float, float addrspace(3)* %1060, align 4 > %1062 = fadd float %1050, %1061 > %1063 = fmul float %1016, 5.000000e-01 > %1064 = fmul float %1039, 5.000000e-01 > %1065 = fmul float %1062, 5.000000e-01 > %1066 = and i32 %7, 8191 > %1067 = and i32 %10, 255 > %1068 = mul nuw nsw i32 %1066, %1067 > %1069 = add nuw nsw i32 %1068, 16 > %1070 = zext i32 %1069 to i64 > %1071 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1070 > %1072 = bitcast i32 addrspace(3)* %1071 to float addrspace(3)* > %1073 = load float, float addrspace(3)* %1072, align 4 > %1074 = and i32 %7, 8191 > %1075 = and i32 %10, 255 > %1076 = mul nuw nsw i32 %1074, %1075 > %1077 = lshr i32 %7, 12 > %1078 = and i32 %1077, 510 > %1079 = add nuw nsw i32 %1076, %1078 > %1080 = add nuw nsw i32 %1079, 16 > %1081 = zext i32 %1080 to i64 > %1082 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1081 > %1083 = bitcast i32 addrspace(3)* %1082 to float addrspace(3)* > %1084 = load float, float addrspace(3)* %1083, align 4 > %1085 = fadd float %1073, %1084 > %1086 = and i32 %7, 8191 > %1087 = and i32 %10, 255 > %1088 = mul nuw nsw i32 %1086, %1087 > %1089 = add nuw nsw i32 %1088, 17 > %1090 = zext i32 %1089 to i64 > %1091 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1090 > %1092 = bitcast i32 addrspace(3)* %1091 to float addrspace(3)* > %1093 = load float, float addrspace(3)* %1092, align 4 > %1094 = and i32 %7, 8191 > %1095 = and i32 %10, 255 > %1096 = mul nuw nsw i32 %1094, %1095 > %1097 = lshr i32 %7, 12 > %1098 = and i32 %1097, 510 > %1099 = add nuw nsw i32 %1096, %1098 > %1100 = add nuw nsw i32 %1099, 17 > %1101 = zext i32 %1100 to i64 > %1102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1101 > %1103 = bitcast i32 addrspace(3)* %1102 to float addrspace(3)* > %1104 = load float, float addrspace(3)* %1103, align 4 > %1105 = fadd float %1093, %1104 > %1106 = and i32 %7, 8191 > %1107 = and i32 %10, 255 > %1108 = mul nuw nsw i32 %1106, %1107 > %1109 = add nuw nsw i32 %1108, 18 > %1110 = zext i32 %1109 to i64 > %1111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1110 > %1112 = bitcast i32 addrspace(3)* %1111 to float addrspace(3)* > %1113 = load float, float addrspace(3)* %1112, align 4 > %1114 = and i32 %7, 8191 > %1115 = and i32 %10, 255 > %1116 = mul nuw nsw i32 %1114, %1115 > %1117 = lshr i32 %7, 12 > %1118 = and i32 %1117, 510 > %1119 = add nuw nsw i32 %1116, %1118 > %1120 = add nuw nsw i32 %1119, 18 > %1121 = zext i32 %1120 to i64 > %1122 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1121 > %1123 = bitcast i32 addrspace(3)* %1122 to float addrspace(3)* > %1124 = load float, float addrspace(3)* %1123, align 4 > %1125 = fadd float %1113, %1124 > %1126 = fmul float %1085, 5.000000e-01 > %1127 = fmul float %1105, 5.000000e-01 > %1128 = fmul float %1125, 5.000000e-01 > %1129 = call float @llvm.fma.f32(float %39, float %928, float %991) > %1130 = call float @llvm.fma.f32(float %40, float %928, float %992) > %1131 = call float @llvm.fma.f32(float %41, float %928, float %993) > %1132 = call float @llvm.fma.f32(float %39, float %929, float %1063) > %1133 = call float @llvm.fma.f32(float %40, float %929, float %1064) > %1134 = call float @llvm.fma.f32(float %41, float %929, float %1065) > %1135 = call float @llvm.fma.f32(float %39, float %930, float %1126) > %1136 = call float @llvm.fma.f32(float %40, float %930, float %1127) > %1137 = call float @llvm.fma.f32(float %41, float %930, float %1128) > %1138 = fmul float %23, %991 > %1139 = fmul float %24, %992 > %1140 = fadd float %1138, %1139 > %1141 = fmul float %25, %993 > %1142 = fadd float %1140, %1141 > %1143 = fadd float %1142, %26 > %1144 = fmul float %27, %991 > %1145 = fmul float %28, %992 > %1146 = fadd float %1144, %1145 > %1147 = fmul float %29, %993 > %1148 = fadd float %1146, %1147 > %1149 = fadd float %1148, %30 > %1150 = fmul float %35, %991 > %1151 = fmul float %36, %992 > %1152 = fadd float %1150, %1151 > %1153 = fmul float %37, %993 > %1154 = fadd float %1152, %1153 > %1155 = fadd float %1154, %38 > %1156 = fmul float %23, %1063 > %1157 = fmul float %24, %1064 > %1158 = fadd float %1156, %1157 > %1159 = fmul float %25, %1065 > %1160 = fadd float %1158, %1159 > %1161 = fadd float %1160, %26 > %1162 = fmul float %27, %1063 > %1163 = fmul float %28, %1064 > %1164 = fadd float %1162, %1163 > %1165 = fmul float %29, %1065 > %1166 = fadd float %1164, %1165 > %1167 = fadd float %1166, %30 > %1168 = fmul float %35, %1063 > %1169 = fmul float %36, %1064 > %1170 = fadd float %1168, %1169 > %1171 = fmul float %37, %1065 > %1172 = fadd float %1170, %1171 > %1173 = fadd float %1172, %38 > %1174 = fmul float %23, %1126 > %1175 = fmul float %24, %1127 > %1176 = fadd float %1174, %1175 > %1177 = fmul float %25, %1128 > %1178 = fadd float %1176, %1177 > %1179 = fadd float %1178, %26 > %1180 = fmul float %27, %1126 > %1181 = fmul float %28, %1127 > %1182 = fadd float %1180, %1181 > %1183 = fmul float %29, %1128 > %1184 = fadd float %1182, %1183 > %1185 = fadd float %1184, %30 > %1186 = fmul float %35, %1126 > %1187 = fmul float %36, %1127 > %1188 = fadd float %1186, %1187 > %1189 = fmul float %37, %1128 > %1190 = fadd float %1188, %1189 > %1191 = fadd float %1190, %38 > %1192 = fmul float %23, %1129 > %1193 = fmul float %24, %1130 > %1194 = fadd float %1192, %1193 > %1195 = fmul float %25, %1131 > %1196 = fadd float %1194, %1195 > %1197 = fadd float %1196, %26 > %1198 = fmul float %27, %1129 > %1199 = fmul float %28, %1130 > %1200 = fadd float %1198, %1199 > %1201 = fmul float %29, %1131 > %1202 = fadd float %1200, %1201 > %1203 = fadd float %1202, %30 > %1204 = fmul float %35, %1129 > %1205 = fmul float %36, %1130 > %1206 = fadd float %1204, %1205 > %1207 = fmul float %37, %1131 > %1208 = fadd float %1206, %1207 > %1209 = fadd float %1208, %38 > %1210 = fmul float %23, %1132 > %1211 = fmul float %24, %1133 > %1212 = fadd float %1210, %1211 > %1213 = fmul float %25, %1134 > %1214 = fadd float %1212, %1213 > %1215 = fadd float %1214, %26 > %1216 = fmul float %27, %1132 > %1217 = fmul float %28, %1133 > %1218 = fadd float %1216, %1217 > %1219 = fmul float %29, %1134 > %1220 = fadd float %1218, %1219 > %1221 = fadd float %1220, %30 > %1222 = fmul float %35, %1132 > %1223 = fmul float %36, %1133 > %1224 = fadd float %1222, %1223 > %1225 = fmul float %37, %1134 > %1226 = fadd float %1224, %1225 > %1227 = fadd float %1226, %38 > %1228 = fmul float %23, %1135 > %1229 = fmul float %24, %1136 > %1230 = fadd float %1228, %1229 > %1231 = fmul float %25, %1137 > %1232 = fadd float %1230, %1231 > %1233 = fadd float %1232, %26 > %1234 = fmul float %27, %1135 > %1235 = fmul float %28, %1136 > %1236 = fadd float %1234, %1235 > %1237 = fmul float %29, %1137 > %1238 = fadd float %1236, %1237 > %1239 = fadd float %1238, %30 > %1240 = fmul float %35, %1135 > %1241 = fmul float %36, %1136 > %1242 = fadd float %1240, %1241 > %1243 = fmul float %37, %1137 > %1244 = fadd float %1242, %1243 > %1245 = fadd float %1244, %38 > %1246 = fcmp oeq float %1173, 0.000000e+00 > %1247 = fcmp oeq float %1173, 0.000000e+00 > %1248 = fcmp ogt float %1161, 0.000000e+00 > %1249 = select i1 %1248, float 1.000000e+00, float %1161 > %1250 = fcmp oge float %1249, 0.000000e+00 > %1251 = fcmp ogt float %1167, 0.000000e+00 > %1252 = select i1 %1251, float 1.000000e+00, float %1167 > %1253 = fcmp oge float %1252, 0.000000e+00 > %.op = fmul float %1249, 0x4600000000000000 > %1254 = select i1 %1250, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1252, 0x4600000000000000 > %1255 = select i1 %1253, float %.op80, float 0xC600000000000000 > %1256 = fdiv float 1.000000e+00, %1173 > %1257 = fmul float %1161, %1256 > %1258 = fmul float %1167, %1256 > %1259 = select i1 %1246, float %1254, float %1257 > %1260 = select i1 %1247, float %1255, float %1258 > %1261 = fcmp oeq float %1191, 0.000000e+00 > %1262 = fcmp oeq float %1191, 0.000000e+00 > %1263 = fcmp ogt float %1179, 0.000000e+00 > %1264 = select i1 %1263, float 1.000000e+00, float %1179 > %1265 = fcmp oge float %1264, 0.000000e+00 > %1266 = fcmp ogt float %1185, 0.000000e+00 > %1267 = select i1 %1266, float 1.000000e+00, float %1185 > %1268 = fcmp oge float %1267, 0.000000e+00 > %.op81 = fmul float %1264, 0x4600000000000000 > %1269 = select i1 %1265, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1267, 0x4600000000000000 > %1270 = select i1 %1268, float %.op82, float 0xC600000000000000 > %1271 = fdiv float 1.000000e+00, %1191 > %1272 = fmul float %1179, %1271 > %1273 = fmul float %1185, %1271 > %1274 = select i1 %1261, float %1269, float %1272 > %1275 = select i1 %1262, float %1270, float %1273 > %1276 = fcmp oeq float %1209, 0.000000e+00 > %1277 = fcmp oeq float %1209, 0.000000e+00 > %1278 = fcmp ogt float %1197, 0.000000e+00 > %1279 = select i1 %1278, float 1.000000e+00, float %1197 > %1280 = fcmp oge float %1279, 0.000000e+00 > %1281 = fcmp ogt float %1203, 0.000000e+00 > %1282 = select i1 %1281, float 1.000000e+00, float %1203 > %1283 = fcmp oge float %1282, 0.000000e+00 > %.op83 = fmul float %1279, 0x4600000000000000 > %1284 = select i1 %1280, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1282, 0x4600000000000000 > %1285 = select i1 %1283, float %.op84, float 0xC600000000000000 > %1286 = fdiv float 1.000000e+00, %1209 > %1287 = fmul float %1197, %1286 > %1288 = fmul float %1203, %1286 > %1289 = select i1 %1276, float %1284, float %1287 > %1290 = select i1 %1277, float %1285, float %1288 > %1291 = fcmp oeq float %1155, 0.000000e+00 > %1292 = fcmp oeq float %1155, 0.000000e+00 > %1293 = fcmp ogt float %1143, 0.000000e+00 > %1294 = select i1 %1293, float 1.000000e+00, float %1143 > %1295 = fcmp oge float %1294, 0.000000e+00 > %1296 = fcmp ogt float %1149, 0.000000e+00 > %1297 = select i1 %1296, float 1.000000e+00, float %1149 > %1298 = fcmp oge float %1297, 0.000000e+00 > %.op85 = fmul float %1294, 0x4600000000000000 > %1299 = select i1 %1295, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1297, 0x4600000000000000 > %1300 = select i1 %1298, float %.op86, float 0xC600000000000000 > %1301 = fdiv float 1.000000e+00, %1155 > %1302 = fmul float %1143, %1301 > %1303 = fmul float %1149, %1301 > %1304 = select i1 %1291, float %1299, float %1302 > %1305 = select i1 %1292, float %1300, float %1303 > %1306 = fsub float %1304, %1289 > %1307 = fsub float %1305, %1290 > %1308 = fcmp oeq float %1227, 0.000000e+00 > %1309 = fcmp oeq float %1227, 0.000000e+00 > %1310 = fcmp ogt float %1215, 0.000000e+00 > %1311 = select i1 %1310, float 1.000000e+00, float %1215 > %1312 = fcmp oge float %1311, 0.000000e+00 > %1313 = fcmp ogt float %1221, 0.000000e+00 > %1314 = select i1 %1313, float 1.000000e+00, float %1221 > %1315 = fcmp oge float %1314, 0.000000e+00 > %.op87 = fmul float %1311, 0x4600000000000000 > %1316 = select i1 %1312, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1314, 0x4600000000000000 > %1317 = select i1 %1315, float %.op88, float 0xC600000000000000 > %1318 = fdiv float 1.000000e+00, %1227 > %1319 = fmul float %1215, %1318 > %1320 = fmul float %1221, %1318 > %1321 = select i1 %1308, float %1316, float %1319 > %1322 = select i1 %1309, float %1317, float %1320 > %1323 = fsub float %1259, %1321 > %1324 = fsub float %1260, %1322 > %1325 = fmul float %1323, %42 > %1326 = fmul float %1324, %43 > %1327 = fcmp oeq float %1245, 0.000000e+00 > %1328 = fcmp oeq float %1245, 0.000000e+00 > %1329 = fcmp ogt float %1233, 0.000000e+00 > %1330 = select i1 %1329, float 1.000000e+00, float %1233 > %1331 = fcmp oge float %1330, 0.000000e+00 > %1332 = fcmp ogt float %1239, 0.000000e+00 > %1333 = select i1 %1332, float 1.000000e+00, float %1239 > %1334 = fcmp oge float %1333, 0.000000e+00 > %.op89 = fmul float %1330, 0x4600000000000000 > %1335 = select i1 %1331, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1333, 0x4600000000000000 > %1336 = select i1 %1334, float %.op90, float 0xC600000000000000 > %1337 = fdiv float 1.000000e+00, %1245 > %1338 = fmul float %1233, %1337 > %1339 = fmul float %1239, %1337 > %1340 = select i1 %1327, float %1335, float %1338 > %1341 = select i1 %1328, float %1336, float %1339 > %1342 = fsub float %1274, %1340 > %1343 = fsub float %1275, %1341 > %1344 = fmul float %1342, %42 > %1345 = fmul float %1306, %42 > %1346 = fmul float %1307, %43 > %1347 = fmul float %1343, %43 > %1348 = fmul float %1345, %1345 > %1349 = fmul float %1346, %1346 > %1350 = fadd float %1348, %1349 > %1351 = fmul float %1325, %1325 > %1352 = fmul float %1326, %1326 > %1353 = fadd float %1351, %1352 > %1354 = fmul float %1344, %1344 > %1355 = fmul float %1347, %1347 > %1356 = fadd float %1354, %1355 > %1357 = call float @llvm.sqrt.f32(float %1356) > %1358 = call float @llvm.sqrt.f32(float %1350) > %1359 = call float @llvm.sqrt.f32(float %1353) > %1360 = fsub float %1155, %15 > %1361 = fsub float %1173, %15 > %1362 = fsub float %1191, %15 > %1363 = fcmp une float %16, 0.000000e+00 > br i1 %1363, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %16, %ENDIF77 ], [ %38, %main_body ] > %temp16.0 = phi float [ %1577, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1578, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1567, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1580, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %15, %ENDIF77 ], [ %37, %main_body ] > %temp13.0 = phi float [ %1560, %ENDIF77 ], [ %36, %main_body ] > %1364 = phi i32 [ 1065353216, %ENDIF77 ], [ %672, %main_body ] > %temp10.0 = phi float [ %1359, %ENDIF77 ], [ %716, %main_body ] > %temp9.0 = phi float [ %1552, %ENDIF77 ], [ %718, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %603, %main_body ] > %temp6.0 = phi float [ %993, %ENDIF77 ], [ %685, %main_body ] > %temp5.0 = phi float [ %1547, %ENDIF77 ], [ %712, %main_body ] > %1365 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1366 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1367 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1368 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1369 = lshr i32 %5, 16 > %1370 = shl nuw nsw i32 %1369, 2 > %1371 = and i32 %6, 8191 > %1372 = and i32 %10, 255 > %1373 = mul nuw nsw i32 %1371, %1372 > %1374 = add nuw nsw i32 %1370, %1373 > %1375 = add nuw nsw i32 %1374, 8 > %1376 = zext i32 %1375 to i64 > %1377 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1376 > %1378 = bitcast i32 addrspace(3)* %1377 to float addrspace(3)* > store float %1365, float addrspace(3)* %1378, align 4 > %1379 = add nuw nsw i32 %1374, 9 > %1380 = zext i32 %1379 to i64 > %1381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1380 > %1382 = bitcast i32 addrspace(3)* %1381 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1382, align 4 > %1383 = add nuw nsw i32 %1374, 10 > %1384 = zext i32 %1383 to i64 > %1385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1384 > %1386 = bitcast i32 addrspace(3)* %1385 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1386, align 4 > %1387 = add nuw nsw i32 %1374, 11 > %1388 = zext i32 %1387 to i64 > %1389 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1388 > %1390 = bitcast i32 addrspace(3)* %1389 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1390, align 4 > %1391 = lshr i32 %5, 16 > %1392 = shl nuw nsw i32 %1391, 2 > %1393 = and i32 %6, 8191 > %1394 = and i32 %10, 255 > %1395 = mul nuw nsw i32 %1393, %1394 > %1396 = add nuw nsw i32 %1392, %1395 > %1397 = add nuw nsw i32 %1396, 12 > %1398 = zext i32 %1397 to i64 > %1399 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1398 > %1400 = bitcast i32 addrspace(3)* %1399 to float addrspace(3)* > store float %1366, float addrspace(3)* %1400, align 4 > %1401 = add nuw nsw i32 %1396, 13 > %1402 = zext i32 %1401 to i64 > %1403 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1402 > %1404 = bitcast i32 addrspace(3)* %1403 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1404, align 4 > %1405 = add nuw nsw i32 %1396, 14 > %1406 = zext i32 %1405 to i64 > %1407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1406 > %1408 = bitcast i32 addrspace(3)* %1407 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1408, align 4 > %1409 = add nuw nsw i32 %1396, 15 > %1410 = zext i32 %1409 to i64 > %1411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1410 > store i32 %1364, i32 addrspace(3)* %1411, align 4 > %1412 = lshr i32 %5, 16 > %1413 = shl nuw nsw i32 %1412, 2 > %1414 = and i32 %6, 8191 > %1415 = and i32 %10, 255 > %1416 = mul nuw nsw i32 %1414, %1415 > %1417 = add nuw nsw i32 %1413, %1416 > %1418 = add nuw nsw i32 %1417, 16 > %1419 = zext i32 %1418 to i64 > %1420 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1419 > %1421 = bitcast i32 addrspace(3)* %1420 to float addrspace(3)* > store float %1367, float addrspace(3)* %1421, align 4 > %1422 = add nuw nsw i32 %1417, 17 > %1423 = zext i32 %1422 to i64 > %1424 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1423 > %1425 = bitcast i32 addrspace(3)* %1424 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1425, align 4 > %1426 = add nuw nsw i32 %1417, 18 > %1427 = zext i32 %1426 to i64 > %1428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1427 > %1429 = bitcast i32 addrspace(3)* %1428 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1429, align 4 > %1430 = add nuw nsw i32 %1417, 19 > %1431 = zext i32 %1430 to i64 > %1432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1431 > %1433 = bitcast i32 addrspace(3)* %1432 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1433, align 4 > %1434 = lshr i32 %5, 16 > %1435 = shl nuw nsw i32 %1434, 2 > %1436 = and i32 %6, 8191 > %1437 = and i32 %10, 255 > %1438 = mul nuw nsw i32 %1436, %1437 > %1439 = add nuw nsw i32 %1435, %1438 > %1440 = add nuw nsw i32 %1439, 20 > %1441 = zext i32 %1440 to i64 > %1442 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1441 > %1443 = bitcast i32 addrspace(3)* %1442 to float addrspace(3)* > store float %1368, float addrspace(3)* %1443, align 4 > %1444 = add nuw nsw i32 %1439, 21 > %1445 = zext i32 %1444 to i64 > %1446 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1445 > %1447 = bitcast i32 addrspace(3)* %1446 to float addrspace(3)* > store float %1366, float addrspace(3)* %1447, align 4 > %1448 = add nuw nsw i32 %1439, 22 > %1449 = zext i32 %1448 to i64 > %1450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1449 > %1451 = bitcast i32 addrspace(3)* %1450 to float addrspace(3)* > store float %1367, float addrspace(3)* %1451, align 4 > %1452 = add nuw nsw i32 %1439, 23 > %1453 = zext i32 %1452 to i64 > %1454 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1453 > %1455 = bitcast i32 addrspace(3)* %1454 to float addrspace(3)* > store float %1368, float addrspace(3)* %1455, align 4 > %1456 = lshr i32 %5, 16 > %1457 = shl nuw nsw i32 %1456, 2 > %1458 = and i32 %6, 8191 > %1459 = and i32 %10, 255 > %1460 = mul nuw nsw i32 %1458, %1459 > %1461 = add nuw nsw i32 %1457, %1460 > %1462 = zext i32 %1461 to i64 > %1463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1462 > %1464 = bitcast i32 addrspace(3)* %1463 to float addrspace(3)* > store float %1365, float addrspace(3)* %1464, align 4 > %1465 = lshr i32 %5, 16 > %1466 = shl nuw nsw i32 %1465, 2 > %1467 = and i32 %6, 8191 > %1468 = and i32 %10, 255 > %1469 = mul nuw nsw i32 %1467, %1468 > %1470 = add nuw nsw i32 %1466, %1469 > %1471 = add nuw nsw i32 %1470, 1 > %1472 = zext i32 %1471 to i64 > %1473 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1472 > %1474 = bitcast i32 addrspace(3)* %1473 to float addrspace(3)* > store float %1366, float addrspace(3)* %1474, align 4 > %1475 = lshr i32 %5, 16 > %1476 = shl nuw nsw i32 %1475, 2 > %1477 = and i32 %6, 8191 > %1478 = and i32 %10, 255 > %1479 = mul nuw nsw i32 %1477, %1478 > %1480 = add nuw nsw i32 %1476, %1479 > %1481 = add nuw nsw i32 %1480, 2 > %1482 = zext i32 %1481 to i64 > %1483 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1482 > %1484 = bitcast i32 addrspace(3)* %1483 to float addrspace(3)* > store float %1367, float addrspace(3)* %1484, align 4 > %1485 = lshr i32 %5, 16 > %1486 = shl nuw nsw i32 %1485, 2 > %1487 = and i32 %6, 8191 > %1488 = and i32 %10, 255 > %1489 = mul nuw nsw i32 %1487, %1488 > %1490 = add nuw nsw i32 %1486, %1489 > %1491 = add nuw nsw i32 %1490, 4 > %1492 = zext i32 %1491 to i64 > %1493 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1492 > %1494 = bitcast i32 addrspace(3)* %1493 to float addrspace(3)* > store float %1368, float addrspace(3)* %1494, align 4 > %1495 = and i32 %10, 255 > %1496 = lshr i32 %10, 8 > %1497 = and i32 %1496, 31 > %1498 = lshr i32 %5, 16 > %1499 = shl nuw nsw i32 %1498, 2 > %1500 = and i32 %6, 8191 > %1501 = and i32 %10, 255 > %1502 = mul nuw nsw i32 %1500, %1501 > %1503 = add nuw nsw i32 %1499, %1502 > %1504 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1505 = bitcast i64 %1504 to <2 x i32> > %1506 = extractelement <2 x i32> %1505, i32 0 > %1507 = extractelement <2 x i32> %1505, i32 1 > %1508 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1506, 0 > %1509 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1508, i32 %1507, 1 > %1510 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1509, i32 %8, 13 > %1511 = bitcast i32 %1495 to float > %1512 = bitcast i32 %1497 to float > %1513 = bitcast i32 %1503 to float > %1514 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1510, float %1511, 14 > %1515 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1514, float %1512, 15 > %1516 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1515, float %1513, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1516 > >IF69: ; preds = %IF > %1517 = fdiv float 1.000000e+00, %16 > %1518 = fmul float %1360, %1517 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1519 = fcmp ogt float %1360, 0.000000e+00 > %1520 = select i1 %1519, float 1.000000e+00, float %1360 > %1521 = fcmp oge float %1520, 0.000000e+00 > %.op91 = fmul float %1520, 0x4600000000000000 > %1522 = select i1 %1521, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1518, %IF69 ], [ %1522, %ELSE70 ] > %1523 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1524 = fsub float 1.000000e+00, %1523 > %1525 = fmul float %1524, %1358 > %1526 = fcmp une float %16, 0.000000e+00 > br i1 %1526, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1527 = fdiv float 1.000000e+00, %16 > %1528 = fmul float %1361, %1527 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1529 = fcmp ogt float %1361, 0.000000e+00 > %1530 = select i1 %1529, float 1.000000e+00, float %1361 > %1531 = fcmp oge float %1530, 0.000000e+00 > %.op92 = fmul float %1530, 0x4600000000000000 > %1532 = select i1 %1531, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1528, %IF72 ], [ %1532, %ELSE73 ] > %1533 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1534 = fsub float 1.000000e+00, %1533 > %1535 = fmul float %1534, %1359 > %1536 = fcmp une float %16, 0.000000e+00 > br i1 %1536, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1537 = fdiv float 1.000000e+00, %16 > %1538 = fmul float %1362, %1537 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1539 = fcmp ogt float %1362, 0.000000e+00 > %1540 = select i1 %1539, float 1.000000e+00, float %1362 > %1541 = fcmp oge float %1540, 0.000000e+00 > %.op93 = fmul float %1540, 0x4600000000000000 > %1542 = select i1 %1541, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1538, %IF75 ], [ %1542, %ELSE76 ] > %1543 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1544 = fsub float 1.000000e+00, %1543 > %1545 = fmul float %1544, %1357 > %1546 = fmul float %13, %19 > %1547 = fmul float %14, %20 > %1548 = call float @llvm.maxnum.f32(float %1547, float 1.000000e+00) > %1549 = fcmp oeq float %1546, 0.000000e+00 > %1550 = fcmp oeq float %1546, 0.000000e+00 > %1551 = sext i1 %1550 to i32 > %1552 = bitcast i32 %1551 to float > %1553 = fcmp ogt float %1545, 0.000000e+00 > %1554 = select i1 %1553, float 1.000000e+00, float %1545 > %1555 = fcmp oge float %1554, 0.000000e+00 > %1556 = fcmp ogt float %1525, 0.000000e+00 > %1557 = select i1 %1556, float 1.000000e+00, float %1525 > %1558 = fcmp oge float %1557, 0.000000e+00 > %.op94 = fmul float %1554, 0x4600000000000000 > %1559 = select i1 %1555, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1557, 0x4600000000000000 > %1560 = select i1 %1558, float %.op95, float 0xC600000000000000 > %1561 = fdiv float 1.000000e+00, %1546 > %1562 = fmul float %1545, %1561 > %1563 = fmul float %1525, %1561 > %1564 = select i1 %1549, float %1559, float %1562 > %1565 = select i1 %1550, float %1560, float %1563 > %1566 = call float @llvm.maxnum.f32(float %1565, float 1.000000e+00) > %1567 = call float @llvm.minnum.f32(float %1548, float %1566) > %1568 = fcmp une float %1546, 0.000000e+00 > br i1 %1568, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1569 = fdiv float 1.000000e+00, %1546 > %1570 = fmul float %1535, %1569 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1571 = fcmp ogt float %1535, 0.000000e+00 > %1572 = select i1 %1571, float 1.000000e+00, float %1535 > %1573 = fcmp oge float %1572, 0.000000e+00 > %.op96 = fmul float %1572, 0x4600000000000000 > %1574 = select i1 %1573, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1570, %IF78 ], [ %1574, %ELSE79 ] > %1575 = call float @llvm.maxnum.f32(float %1564, float 1.000000e+00) > %1576 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1577 = call float @llvm.minnum.f32(float %1548, float %1576) > %1578 = call float @llvm.minnum.f32(float %1548, float %1575) > %1579 = call float @llvm.maxnum.f32(float %1567, float %1578) > %1580 = call float @llvm.maxnum.f32(float %1579, float %1577) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[0].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[0].xxxx >101: MOV OUT[4], TEMP[3] >102: MOV OUT[2], TEMP[6] >103: MOV OUT[3], TEMP[4] >104: MOV OUT[1], TEMP[5] >105: MOV OUT[0], TEMP[1] >106: END >radeonsi: Compiling shader 231 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = bitcast i32 %10 to float > %711 = insertvalue <{ float, float, float }> undef, float %710, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %711 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL SV[0], FACE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..29] >DCL TEMP[0..8], LOCAL >IMM[0] UINT32 {0, 464, 384, 432} >IMM[1] INT32 {-1, 0, 1, 0} >IMM[2] FLT32 { -0.3765, 0.0000, 1.0000, 2.0000} >IMM[3] FLT32 { -1.0000, 0.5000, 158456325028528675187087900672.0000, -2.0000} >IMM[4] FLT32 { 3.0000, 0.0000, 0.0000, 0.0000} >IMM[5] UINT32 {416, 400, 448, 0} > 0: UIF SV[0].xxxx :0 > 1: MOV TEMP[0].x, IMM[1].xxxx > 2: ELSE :0 > 3: MOV TEMP[0].x, IMM[1].yyyy > 4: ENDIF > 5: MOV TEMP[1].xy, IN[0].xyyy > 6: TEX TEMP[2], TEMP[1], SAMP[0], 2D > 7: ADD TEMP[3].x, TEMP[2].wwww, IMM[2].xxxx > 8: FSLT TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy > 9: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].zzzz > 10: INEG TEMP[3].x, TEMP[3].xxxx > 11: USNE TEMP[1].x, TEMP[3].xxxx, IMM[0].xxxx > 12: AND TEMP[3].x, TEMP[1].xxxx, IMM[2].zzzz > 13: KILL_IF -TEMP[3].xxxx > 14: MOV TEMP[3].xy, IN[0].xyyy > 15: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D > 16: FMA TEMP[4].xy, TEMP[3].ywww, IMM[2].wwww, IMM[3].xxxx > 17: MOV TEMP[3].xy, TEMP[4].xyxx > 18: FMA TEMP[5].x, -TEMP[4].xxxx, TEMP[4].xxxx, IMM[2].zzzz > 19: FMA TEMP[4].x, -TEMP[4].yyyy, TEMP[4].yyyy, TEMP[5].xxxx > 20: SQRT TEMP[4].x, TEMP[4].xxxx > 21: MOV TEMP[3].z, TEMP[4].xxxx > 22: DP3 TEMP[4].x, IN[1].xyzz, TEMP[3].xyzz > 23: DP3 TEMP[5].x, IN[2].xyzz, TEMP[3].xyzz > 24: MOV TEMP[4].y, TEMP[5].xxxx > 25: DP3 TEMP[5].x, IN[3].xyzz, TEMP[3].xyzz > 26: MOV TEMP[4].z, TEMP[5].xxxx > 27: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz > 28: RSQ TEMP[5].x, TEMP[5].xxxx > 29: MUL TEMP[3].xyz, TEMP[5].xxxx, TEMP[4].xyzz > 30: MOV TEMP[5].xyz, -TEMP[3].xyzx > 31: USNE TEMP[6].x, TEMP[0].xxxx, IMM[0].xxxx > 32: UIF TEMP[6].xxxx :0 > 33: MOV TEMP[6].x, TEMP[3].xxxx > 34: ELSE :0 > 35: MOV TEMP[6].x, TEMP[5].xxxx > 36: ENDIF > 37: MOV TEMP[6].x, TEMP[6].xxxx > 38: USNE TEMP[7].x, TEMP[0].xxxx, IMM[0].xxxx > 39: UIF TEMP[7].xxxx :0 > 40: MOV TEMP[7].x, TEMP[3].yyyy > 41: ELSE :0 > 42: MOV TEMP[7].x, TEMP[5].yyyy > 43: ENDIF > 44: MOV TEMP[6].y, TEMP[7].xxxx > 45: USNE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 46: UIF TEMP[0].xxxx :0 > 47: MOV TEMP[0].x, TEMP[3].zzzz > 48: ELSE :0 > 49: MOV TEMP[0].x, TEMP[5].zzzz > 50: ENDIF > 51: MOV TEMP[6].z, TEMP[0].xxxx > 52: FMA TEMP[0].xyz, TEMP[6].xyzz, IMM[3].yyyy, IMM[3].yyyy > 53: MOV TEMP[0].w, CONST[1][29].wwww > 54: FSNE TEMP[5].x, CONST[1][24].yyyy, IMM[2].yyyy > 55: UIF TEMP[5].xxxx :0 > 56: RCP TEMP[5].x, CONST[1][24].yyyy > 57: ELSE :0 > 58: MOV TEMP[5].x, IMM[3].zzzz > 59: ENDIF > 60: MOV TEMP[6].xy, IN[0].xyyy > 61: TEX TEMP[6].xy, TEMP[6], SAMP[2], 2D > 62: MAX TEMP[7].x, TEMP[6].yyyy, TEMP[6].xxxx > 63: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx > 64: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 65: MUL TEMP[7].x, TEMP[5].xxxx, TEMP[5].xxxx > 66: FMA TEMP[5].x, TEMP[5].xxxx, IMM[3].wwww, IMM[4].xxxx > 67: MUL TEMP[5].x, TEMP[7].xxxx, TEMP[5].xxxx > 68: MUL TEMP[7].x, TEMP[5].xxxx, CONST[1][27].wwww > 69: ADD TEMP[4].xyz, CONST[1][26].xyzz, -CONST[1][27].xyzz > 70: FMA TEMP[8].xyz, TEMP[6].xxxx, TEMP[4].xyzz, CONST[1][27].xyzz > 71: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[8].xyzz > 72: FMA TEMP[4].x, -TEMP[5].xxxx, CONST[1][27].wwww, IMM[2].zzzz > 73: MUL TEMP[5].x, TEMP[5].xxxx, CONST[1][29].xxxx > 74: FMA TEMP[1].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[7].xyzz > 75: MUL TEMP[3].x, TEMP[6].yyyy, CONST[1][25].wwww > 76: FMA TEMP[2].x, -TEMP[6].yyyy, CONST[1][25].wwww, IMM[2].zzzz > 77: MUL TEMP[4].xyz, TEMP[3].xxxx, CONST[1][25].xyzz > 78: FMA TEMP[2].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[4].xyzz > 79: MOV TEMP[4].xy, IN[0].xyyy > 80: TEX TEMP[4], TEMP[4], SAMP[3], 2D > 81: MOV TEMP[2].w, TEMP[4].wwww > 82: MUL TEMP[1].x, TEMP[4].zzzz, CONST[1][24].xxxx > 83: MOV TEMP[1].y, TEMP[4].yyyy > 84: MOV TEMP[4].z, TEMP[4].xxxx > 85: ADD TEMP[3].xy, -TEMP[1].xyyy, CONST[1][28].wxxx > 86: FMA TEMP[4].xy, TEMP[5].xxxx, TEMP[3].xyyy, TEMP[1].xyyy > 87: MOV TEMP[4].w, CONST[1][29].zzzz > 88: MOV OUT[0], TEMP[0] > 89: MOV OUT[1], TEMP[2] > 90: MOV OUT[2], TEMP[4] > 91: END >radeonsi: Compiling shader 232 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 444) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 448) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 460) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 472) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 476) > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 3 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 7 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 > %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 11 > %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0 > %66 = extractelement <8 x i32> %62, i32 7 > %67 = extractelement <4 x i32> %65, i32 0 > %68 = and i32 %67, %66 > %69 = insertelement <4 x i32> %65, i32 %68, i32 0 > %70 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %71 = load <8 x i32>, <8 x i32> addrspace(2)* %70, align 32, !tbaa !0 > %72 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %73 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %72, i64 0, i64 15 > %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 > %75 = extractelement <8 x i32> %71, i32 7 > %76 = extractelement <4 x i32> %74, i32 0 > %77 = and i32 %76, %75 > %78 = insertelement <4 x i32> %74, i32 %77, i32 0 > %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %90 = icmp ne i32 %19, 0 > %. = select i1 %90, float 0xFFFFFFFFE0000000, float 0.000000e+00 > %91 = bitcast float %79 to i32 > %92 = bitcast float %80 to i32 > %93 = insertelement <2 x i32> undef, i32 %91, i32 0 > %94 = insertelement <2 x i32> %93, i32 %92, i32 1 > %95 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %94, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %96 = extractelement <4 x float> %95, i32 0 > %97 = extractelement <4 x float> %95, i32 1 > %98 = extractelement <4 x float> %95, i32 2 > %99 = extractelement <4 x float> %95, i32 3 > %100 = fadd float %99, 0xBFD8181820000000 > %101 = fcmp olt float %100, 0.000000e+00 > %102 = select i1 %101, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %102) > %103 = bitcast float %79 to i32 > %104 = bitcast float %80 to i32 > %105 = insertelement <2 x i32> undef, i32 %103, i32 0 > %106 = insertelement <2 x i32> %105, i32 %104, i32 1 > %107 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %106, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %108 = extractelement <4 x float> %107, i32 1 > %109 = extractelement <4 x float> %107, i32 3 > %110 = call float @llvm.fma.f32(float %108, float 2.000000e+00, float -1.000000e+00) > %111 = call float @llvm.fma.f32(float %109, float 2.000000e+00, float -1.000000e+00) > %112 = fsub float -0.000000e+00, %110 > %113 = call float @llvm.fma.f32(float %112, float %110, float 1.000000e+00) > %114 = fsub float -0.000000e+00, %111 > %115 = call float @llvm.fma.f32(float %114, float %111, float %113) > %116 = call float @llvm.sqrt.f32(float %115) > %117 = fmul float %81, %110 > %118 = fmul float %82, %111 > %119 = fadd float %118, %117 > %120 = fmul float %83, %116 > %121 = fadd float %119, %120 > %122 = fmul float %84, %110 > %123 = fmul float %85, %111 > %124 = fadd float %123, %122 > %125 = fmul float %86, %116 > %126 = fadd float %124, %125 > %127 = fmul float %87, %110 > %128 = fmul float %88, %111 > %129 = fadd float %128, %127 > %130 = fmul float %89, %116 > %131 = fadd float %129, %130 > %132 = fmul float %121, %121 > %133 = fmul float %126, %126 > %134 = fadd float %133, %132 > %135 = fmul float %131, %131 > %136 = fadd float %134, %135 > %137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136) > %138 = fmul float %137, %121 > %139 = fmul float %137, %126 > %140 = fmul float %137, %131 > %141 = fsub float -0.000000e+00, %138 > %142 = fsub float -0.000000e+00, %139 > %143 = fsub float -0.000000e+00, %140 > %144 = bitcast float %. to i32 > %145 = icmp ne i32 %144, 0 > %temp24.0 = select i1 %145, float %138, float %141 > %146 = bitcast float %. to i32 > %147 = icmp ne i32 %146, 0 > %.48 = select i1 %147, float %139, float %142 > %148 = bitcast float %. to i32 > %149 = icmp ne i32 %148, 0 > %temp.1 = select i1 %149, float %140, float %143 > %150 = call float @llvm.fma.f32(float %temp24.0, float 5.000000e-01, float 5.000000e-01) > %151 = call float @llvm.fma.f32(float %.48, float 5.000000e-01, float 5.000000e-01) > %152 = call float @llvm.fma.f32(float %temp.1, float 5.000000e-01, float 5.000000e-01) > %153 = fcmp une float %26, 0.000000e+00 > %154 = fdiv float 1.000000e+00, %26 > %temp20.0 = select i1 %153, float %154, float 0x4600000000000000 > %155 = bitcast float %79 to i32 > %156 = bitcast float %80 to i32 > %157 = insertelement <2 x i32> undef, i32 %155, i32 0 > %158 = insertelement <2 x i32> %157, i32 %156, i32 1 > %159 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %158, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %160 = extractelement <4 x float> %159, i32 0 > %161 = extractelement <4 x float> %159, i32 1 > %162 = call float @llvm.maxnum.f32(float %161, float %160) > %163 = fmul float %temp20.0, %162 > %164 = call float @llvm.AMDGPU.clamp.(float %163, float 0.000000e+00, float 1.000000e+00) > %165 = fmul float %164, %164 > %166 = call float @llvm.fma.f32(float %164, float -2.000000e+00, float 3.000000e+00) > %167 = fmul float %165, %166 > %168 = fmul float %167, %37 > %169 = fsub float %31, %34 > %170 = fsub float %32, %35 > %171 = fsub float %33, %36 > %172 = call float @llvm.fma.f32(float %160, float %169, float %34) > %173 = call float @llvm.fma.f32(float %160, float %170, float %35) > %174 = call float @llvm.fma.f32(float %160, float %171, float %36) > %175 = fmul float %168, %172 > %176 = fmul float %168, %173 > %177 = fmul float %168, %174 > %178 = fsub float -0.000000e+00, %167 > %179 = call float @llvm.fma.f32(float %178, float %37, float 1.000000e+00) > %180 = fmul float %167, %40 > %181 = call float @llvm.fma.f32(float %96, float %179, float %175) > %182 = call float @llvm.fma.f32(float %97, float %179, float %176) > %183 = call float @llvm.fma.f32(float %98, float %179, float %177) > %184 = fmul float %161, %30 > %185 = fsub float -0.000000e+00, %161 > %186 = call float @llvm.fma.f32(float %185, float %30, float 1.000000e+00) > %187 = fmul float %184, %27 > %188 = fmul float %184, %28 > %189 = fmul float %184, %29 > %190 = call float @llvm.fma.f32(float %181, float %186, float %187) > %191 = call float @llvm.fma.f32(float %182, float %186, float %188) > %192 = call float @llvm.fma.f32(float %183, float %186, float %189) > %193 = bitcast float %79 to i32 > %194 = bitcast float %80 to i32 > %195 = insertelement <2 x i32> undef, i32 %193, i32 0 > %196 = insertelement <2 x i32> %195, i32 %194, i32 1 > %197 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %196, <8 x i32> %71, <4 x i32> %78, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %198 = extractelement <4 x float> %197, i32 0 > %199 = extractelement <4 x float> %197, i32 1 > %200 = extractelement <4 x float> %197, i32 2 > %201 = extractelement <4 x float> %197, i32 3 > %202 = fmul float %200, %25 > %203 = fsub float %39, %202 > %204 = fsub float %38, %199 > %205 = call float @llvm.fma.f32(float %180, float %203, float %202) > %206 = call float @llvm.fma.f32(float %180, float %204, float %199) > %207 = bitcast float %5 to i32 > %208 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %207, 10 > %209 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %208, float %150, 11 > %210 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %209, float %151, 12 > %211 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %210, float %152, 13 > %212 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %211, float %42, 14 > %213 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %212, float %190, 15 > %214 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %213, float %191, 16 > %215 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %214, float %192, 17 > %216 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %215, float %201, 18 > %217 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %216, float %205, 19 > %218 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %217, float %206, 20 > %219 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %218, float %198, 21 > %220 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %219, float %41, 22 > %221 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %220, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %221 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} >IMM[2] UINT32 {160, 0, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: MOV TEMP[2].xy, IN[2].xyxx > 8: MUL TEMP[0].xyz, IN[5].wwww, IN[5].xyzz > 9: MOV TEMP[0].w, IN[5].wwww > 10: MUL TEMP[0], TEMP[0], CONST[1][10] > 11: DP3 TEMP[3].x, CONST[1][7].xyzz, IN[3].xyzz > 12: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[4].xyzz > 13: MOV TEMP[3].y, TEMP[4].xxxx > 14: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[1].xyzz > 15: MOV TEMP[3].z, TEMP[4].xxxx > 16: DP3 TEMP[4].x, CONST[1][8].xyzz, IN[3].xyzz > 17: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[4].xyzz > 18: MOV TEMP[4].y, TEMP[5].xxxx > 19: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[1].xyzz > 20: MOV TEMP[4].z, TEMP[5].xxxx > 21: DP3 TEMP[5].x, CONST[1][9].xyzz, IN[3].xyzz > 22: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[4].xyzz > 23: MOV TEMP[5].y, TEMP[6].xxxx > 24: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[1].xyzz > 25: MOV TEMP[5].z, TEMP[6].xxxx > 26: MOV OUT[5], TEMP[5] > 27: MOV OUT[4], TEMP[4] > 28: MOV OUT[3], TEMP[3] > 29: MOV OUT[2], TEMP[0] > 30: MOV OUT[1], TEMP[2] > 31: MOV OUT[0], TEMP[1] > 32: END >radeonsi: Compiling shader 233 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 112) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 116) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 120) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 124) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 128) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 132) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 136) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 140) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 144) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 148) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 152) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 156) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 160) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 164) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 168) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 172) > %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 > %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %13) > %40 = extractelement <4 x float> %39, i32 0 > %41 = extractelement <4 x float> %39, i32 1 > %42 = extractelement <4 x float> %39, i32 2 > %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 > %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %14) > %46 = extractelement <4 x float> %45, i32 0 > %47 = extractelement <4 x float> %45, i32 1 > %48 = extractelement <4 x float> %45, i32 2 > %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 > %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %15) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %16) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %17) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 > %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %18) > %67 = extractelement <4 x float> %66, i32 0 > %68 = extractelement <4 x float> %66, i32 1 > %69 = extractelement <4 x float> %66, i32 2 > %70 = extractelement <4 x float> %66, i32 3 > %71 = fmul float %21, %40 > %72 = fmul float %22, %41 > %73 = fadd float %71, %72 > %74 = fmul float %23, %42 > %75 = fadd float %73, %74 > %76 = fadd float %75, %24 > %77 = fmul float %25, %40 > %78 = fmul float %26, %41 > %79 = fadd float %77, %78 > %80 = fmul float %27, %42 > %81 = fadd float %79, %80 > %82 = fadd float %81, %28 > %83 = fmul float %29, %40 > %84 = fmul float %30, %41 > %85 = fadd float %83, %84 > %86 = fmul float %31, %42 > %87 = fadd float %85, %86 > %88 = fadd float %87, %32 > %89 = fmul float %70, %67 > %90 = fmul float %70, %68 > %91 = fmul float %70, %69 > %92 = fmul float %89, %33 > %93 = fmul float %90, %34 > %94 = fmul float %91, %35 > %95 = fmul float %70, %36 > %96 = fmul float %21, %55 > %97 = fmul float %22, %56 > %98 = fadd float %97, %96 > %99 = fmul float %23, %57 > %100 = fadd float %98, %99 > %101 = fmul float %21, %61 > %102 = fmul float %22, %62 > %103 = fadd float %102, %101 > %104 = fmul float %23, %63 > %105 = fadd float %103, %104 > %106 = fmul float %21, %46 > %107 = fmul float %22, %47 > %108 = fadd float %107, %106 > %109 = fmul float %23, %48 > %110 = fadd float %108, %109 > %111 = fmul float %25, %55 > %112 = fmul float %26, %56 > %113 = fadd float %112, %111 > %114 = fmul float %27, %57 > %115 = fadd float %113, %114 > %116 = fmul float %25, %61 > %117 = fmul float %26, %62 > %118 = fadd float %117, %116 > %119 = fmul float %27, %63 > %120 = fadd float %118, %119 > %121 = fmul float %25, %46 > %122 = fmul float %26, %47 > %123 = fadd float %122, %121 > %124 = fmul float %27, %48 > %125 = fadd float %123, %124 > %126 = fmul float %29, %55 > %127 = fmul float %30, %56 > %128 = fadd float %127, %126 > %129 = fmul float %31, %57 > %130 = fadd float %128, %129 > %131 = fmul float %29, %61 > %132 = fmul float %30, %62 > %133 = fadd float %132, %131 > %134 = fmul float %31, %63 > %135 = fadd float %133, %134 > %136 = fmul float %29, %46 > %137 = fmul float %30, %47 > %138 = fadd float %137, %136 > %139 = fmul float %31, %48 > %140 = fadd float %138, %139 > %141 = lshr i32 %8, 13 > %142 = and i32 %141, 255 > %143 = mul i32 %142, %10 > %144 = add i32 %143, 16 > %145 = sext i32 %144 to i64 > %146 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %145 > %147 = bitcast i32 addrspace(3)* %146 to float addrspace(3)* > store float %76, float addrspace(3)* %147, align 4 > %148 = add i32 %143, 17 > %149 = sext i32 %148 to i64 > %150 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %149 > %151 = bitcast i32 addrspace(3)* %150 to float addrspace(3)* > store float %82, float addrspace(3)* %151, align 4 > %152 = add i32 %143, 18 > %153 = sext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = bitcast i32 addrspace(3)* %154 to float addrspace(3)* > store float %88, float addrspace(3)* %155, align 4 > %156 = add i32 %143, 20 > %bc = bitcast <4 x float> %51 to <4 x i32> > %157 = extractelement <4 x i32> %bc, i32 0 > %158 = sext i32 %156 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %157, i32 addrspace(3)* %159, align 4 > %160 = add i32 %143, 21 > %bc28 = bitcast <4 x float> %51 to <4 x i32> > %161 = extractelement <4 x i32> %bc28, i32 1 > %162 = sext i32 %160 to i64 > %163 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %162 > store i32 %161, i32 addrspace(3)* %163, align 4 > %164 = add i32 %143, 24 > %165 = sext i32 %164 to i64 > %166 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %165 > %167 = bitcast i32 addrspace(3)* %166 to float addrspace(3)* > store float %92, float addrspace(3)* %167, align 4 > %168 = add i32 %143, 25 > %169 = sext i32 %168 to i64 > %170 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %169 > %171 = bitcast i32 addrspace(3)* %170 to float addrspace(3)* > store float %93, float addrspace(3)* %171, align 4 > %172 = add i32 %143, 26 > %173 = sext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > %175 = bitcast i32 addrspace(3)* %174 to float addrspace(3)* > store float %94, float addrspace(3)* %175, align 4 > %176 = add i32 %143, 27 > %177 = sext i32 %176 to i64 > %178 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %177 > %179 = bitcast i32 addrspace(3)* %178 to float addrspace(3)* > store float %95, float addrspace(3)* %179, align 4 > %180 = add i32 %143, 28 > %181 = sext i32 %180 to i64 > %182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %181 > %183 = bitcast i32 addrspace(3)* %182 to float addrspace(3)* > store float %100, float addrspace(3)* %183, align 4 > %184 = add i32 %143, 29 > %185 = sext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > %187 = bitcast i32 addrspace(3)* %186 to float addrspace(3)* > store float %105, float addrspace(3)* %187, align 4 > %188 = add i32 %143, 30 > %189 = sext i32 %188 to i64 > %190 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %189 > %191 = bitcast i32 addrspace(3)* %190 to float addrspace(3)* > store float %110, float addrspace(3)* %191, align 4 > %192 = add i32 %143, 32 > %193 = sext i32 %192 to i64 > %194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %193 > %195 = bitcast i32 addrspace(3)* %194 to float addrspace(3)* > store float %115, float addrspace(3)* %195, align 4 > %196 = add i32 %143, 33 > %197 = sext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = bitcast i32 addrspace(3)* %198 to float addrspace(3)* > store float %120, float addrspace(3)* %199, align 4 > %200 = add i32 %143, 34 > %201 = sext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = bitcast i32 addrspace(3)* %202 to float addrspace(3)* > store float %125, float addrspace(3)* %203, align 4 > %204 = add i32 %143, 36 > %205 = sext i32 %204 to i64 > %206 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %205 > %207 = bitcast i32 addrspace(3)* %206 to float addrspace(3)* > store float %130, float addrspace(3)* %207, align 4 > %208 = add i32 %143, 37 > %209 = sext i32 %208 to i64 > %210 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %209 > %211 = bitcast i32 addrspace(3)* %210 to float addrspace(3)* > store float %135, float addrspace(3)* %211, align 4 > %212 = add i32 %143, 38 > %213 = sext i32 %212 to i64 > %214 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %213 > %215 = bitcast i32 addrspace(3)* %214 to float addrspace(3)* > store float %140, float addrspace(3)* %215, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, 0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 64, 80} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {96, 368, 352, 0} >IMM[5] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: MOV TEMP[1].w, TEMP[8].xxxx > 66: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 67: MOV TEMP[1].z, TEMP[2].xxxx > 68: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 69: MOV TEMP[0].yw, TEMP[2].yxyy > 70: ABS TEMP[2].x, TEMP[3].xxxx > 71: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 72: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 73: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 74: INEG TEMP[9].xy, TEMP[9].xyyy > 75: MOV TEMP[4].yz, TEMP[9].yxyy > 76: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 77: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 78: INEG TEMP[9].xy, TEMP[9].xyyy > 79: MOV TEMP[5].zw, TEMP[9].yyxy > 80: INEG TEMP[9].xy, TEMP[4].yzzz > 81: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 82: MOV TEMP[4].yz, TEMP[9].yxyy > 83: I2F TEMP[9].xy, TEMP[4].yzzz > 84: MOV TEMP[4].yz, TEMP[9].yxyy > 85: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 86: ABS TEMP[2].x, TEMP[6].xxxx > 87: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 88: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 89: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 90: INEG TEMP[9].xy, TEMP[9].xyyy > 91: MOV TEMP[4].yz, TEMP[9].yxyy > 92: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 93: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 94: INEG TEMP[9].xy, TEMP[9].xyyy > 95: MOV TEMP[5].zw, TEMP[9].yyxy > 96: INEG TEMP[9].xy, TEMP[4].yzzz > 97: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 98: MOV TEMP[4].yz, TEMP[9].yxyy > 99: I2F TEMP[9].xy, TEMP[4].yzzz >100: MOV TEMP[4].yz, TEMP[9].yxyy >101: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >102: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >103: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >104: INEG TEMP[2].xy, TEMP[2].xyyy >105: MOV TEMP[5].xy, TEMP[2].xyxx >106: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >107: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >108: INEG TEMP[2].xy, TEMP[2].xyyy >109: MOV TEMP[5].zw, TEMP[2].yyxy >110: INEG TEMP[2].xy, TEMP[5].xyyy >111: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >112: MOV TEMP[5].xy, TEMP[2].xyxx >113: I2F TEMP[5].xy, TEMP[5].xyyy >114: ABS TEMP[2].x, TEMP[8].xxxx >115: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >116: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >117: MOV TEMP[4].zw, TEMP[2].yyxy >118: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >119: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >120: INEG TEMP[2].xy, TEMP[2].xyyy >121: MOV TEMP[5].xy, TEMP[2].xyxx >122: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >123: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >124: INEG TEMP[2].xy, TEMP[2].xyyy >125: MOV TEMP[5].zw, TEMP[2].yyxy >126: AND TEMP[2], TEMP[5], IMM[2].yyyy >127: MOV TEMP[2], TEMP[2] >128: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >129: MOV TEMP[5].xy, TEMP[2].xyxx >130: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >131: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >132: INEG TEMP[2].xy, TEMP[2].xyyy >133: MOV TEMP[5].zw, TEMP[2].yyxy >134: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >135: MOV TEMP[5].zw, TEMP[2].yyxy >136: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >137: MOV TEMP[5].xy, TEMP[2].xyxx >138: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >139: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >140: INEG TEMP[2].x, TEMP[2].xxxx >141: MOV TEMP[1].z, TEMP[2].xxxx >142: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >143: MOV TEMP[1].z, TEMP[2].xxxx >144: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >145: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >146: INEG TEMP[2].xy, TEMP[2].xyyy >147: MOV TEMP[0].yw, TEMP[2].yxyy >148: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >149: MOV TEMP[0].yw, TEMP[2].yxyy >150: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >151: MOV TEMP[0].y, TEMP[2].xxxx >152: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >153: MOV TEMP[0].y, TEMP[2].xxxx >154: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >155: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >156: INEG TEMP[2].xy, TEMP[2].xyyy >157: MOV TEMP[0].xw, TEMP[2].xxxy >158: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >159: MOV TEMP[0].xw, TEMP[2].xxxy >160: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >161: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >162: INEG TEMP[2].xy, TEMP[2].xyyy >163: MOV TEMP[1].xy, TEMP[2].xyxx >164: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >165: MOV TEMP[1].xy, TEMP[2].xyxx >166: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >167: MOV TEMP[0].xz, TEMP[2].xxyx >168: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >169: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >170: INEG TEMP[2].xy, TEMP[2].xyyy >171: MOV TEMP[1].xy, TEMP[2].xyxx >172: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >173: MOV TEMP[1].xy, TEMP[2].xyxx >174: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >175: MOV TEMP[0].xz, TEMP[2].xxyx >176: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >177: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >178: INEG TEMP[2].xy, TEMP[2].xyyy >179: MOV TEMP[1].xy, TEMP[2].xyxx >180: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >181: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >182: INEG TEMP[2].xyz, TEMP[2].xyzz >183: MOV TEMP[0].xyz, TEMP[2].xyzx >184: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >185: MOV TEMP[0].xz, TEMP[2].xxyx >186: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >187: MOV TEMP[0].x, TEMP[2].xxxx >188: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >189: MOV TEMP[0].x, TEMP[2].xxxx >190: MOV TEMP[2].x, TEMP[0].xxxx >191: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >192: UIF TEMP[2].xxxx :0 >193: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >194: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >195: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >196: MOV TEMP[0].yzw, TEMP[2].yxyz >197: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >198: MOV TEMP[0].y, TEMP[2].xxxx >199: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >200: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >201: MOV TEMP[0].z, TEMP[2].xxxx >202: SQRT TEMP[2].x, TEMP[0].xxxx >203: SQRT TEMP[2].y, TEMP[0].yyyy >204: SQRT TEMP[2].z, TEMP[0].zzzz >205: MOV TEMP[0].xyz, TEMP[2].xyzx >206: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >207: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].wwww >208: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >209: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww >210: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >211: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[0].wwww >212: MOV TEMP[2].y, CONST[3][4] >213: MOV TEMP[7].x, TEMP[2].yyyy >214: MOV TEMP[2].y, CONST[3][5] >215: MOV TEMP[7].y, TEMP[2].yyyy >216: MOV TEMP[2].y, CONST[3][6] >217: MOV TEMP[7].z, TEMP[2].yyyy >218: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >219: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >220: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >221: MOV TEMP[1].w, IMM[0].xxxx >222: MOV TEMP[6], CONST[3][0] >223: DP4 TEMP[7].x, TEMP[6], TEMP[1] >224: MOV TEMP[6], CONST[3][1] >225: DP4 TEMP[6].x, TEMP[6], TEMP[1] >226: MOV TEMP[7].y, TEMP[6].xxxx >227: MOV TEMP[6], CONST[3][3] >228: DP4 TEMP[6].x, TEMP[6], TEMP[1] >229: MOV TEMP[4].w, IMM[0].xxxx >230: MOV TEMP[8], CONST[3][0] >231: DP4 TEMP[8].x, TEMP[8], TEMP[4] >232: MOV TEMP[9], CONST[3][1] >233: DP4 TEMP[9].x, TEMP[9], TEMP[4] >234: MOV TEMP[8].y, TEMP[9].xxxx >235: MOV TEMP[9], CONST[3][3] >236: DP4 TEMP[9].x, TEMP[9], TEMP[4] >237: MOV TEMP[5].w, IMM[0].xxxx >238: MOV TEMP[10], CONST[3][0] >239: DP4 TEMP[4].x, TEMP[10], TEMP[5] >240: MOV TEMP[10], CONST[3][1] >241: DP4 TEMP[10].x, TEMP[10], TEMP[5] >242: MOV TEMP[4].y, TEMP[10].xxxx >243: MOV TEMP[10], CONST[3][3] >244: DP4 TEMP[10].x, TEMP[10], TEMP[5] >245: MOV TEMP[2].w, IMM[0].xxxx >246: MOV TEMP[11], CONST[3][0] >247: DP4 TEMP[5].x, TEMP[11], TEMP[2] >248: MOV TEMP[11], CONST[3][1] >249: DP4 TEMP[11].x, TEMP[11], TEMP[2] >250: MOV TEMP[5].y, TEMP[11].xxxx >251: MOV TEMP[11], CONST[3][3] >252: DP4 TEMP[11].x, TEMP[11], TEMP[2] >253: MOV TEMP[3].w, IMM[0].xxxx >254: MOV TEMP[12], CONST[3][0] >255: DP4 TEMP[2].x, TEMP[12], TEMP[3] >256: MOV TEMP[12], CONST[3][1] >257: DP4 TEMP[12].x, TEMP[12], TEMP[3] >258: MOV TEMP[2].y, TEMP[12].xxxx >259: MOV TEMP[12], CONST[3][3] >260: DP4 TEMP[12].x, TEMP[12], TEMP[3] >261: MOV TEMP[0].w, IMM[0].xxxx >262: MOV TEMP[13], CONST[3][0] >263: DP4 TEMP[3].x, TEMP[13], TEMP[0] >264: MOV TEMP[13], CONST[3][1] >265: DP4 TEMP[13].x, TEMP[13], TEMP[0] >266: MOV TEMP[3].y, TEMP[13].xxxx >267: MOV TEMP[13], CONST[3][3] >268: DP4 TEMP[13].x, TEMP[13], TEMP[0] >269: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >270: SSG TEMP[15].xy, TEMP[8].xyyy >271: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >272: RCP TEMP[16].xy, TEMP[9].xxxx >273: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >274: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >275: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >276: SSG TEMP[15].xy, TEMP[4].xyyy >277: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >278: RCP TEMP[16].xy, TEMP[10].xxxx >279: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >280: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >281: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >282: SSG TEMP[16].xy, TEMP[5].xyyy >283: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >284: RCP TEMP[11].xy, TEMP[11].xxxx >285: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >286: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >287: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >288: SSG TEMP[15].xy, TEMP[7].xyyy >289: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >290: RCP TEMP[16].xy, TEMP[6].xxxx >291: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >292: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >293: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >294: MOV TEMP[0].yz, TEMP[5].yxyy >295: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >296: SSG TEMP[7].xy, TEMP[2].xyyy >297: MUL TEMP[7].xy, IMM[5].xxxx, TEMP[7].xyyy >298: RCP TEMP[11].xy, TEMP[12].xxxx >299: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >300: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >301: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >302: MOV TEMP[4].zw, TEMP[2].yyxy >303: MOV TEMP[2].xy, CONST[3][23] >304: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >305: MOV TEMP[4].zw, TEMP[2].yyxy >306: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >307: SSG TEMP[5].xy, TEMP[3].xyyy >308: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >309: RCP TEMP[7].xy, TEMP[13].xxxx >310: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >311: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >312: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >313: MOV TEMP[0].xw, TEMP[2].xxxy >314: MOV TEMP[2].xy, CONST[3][23] >315: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >316: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >317: MOV TEMP[0].y, TEMP[2].xxxx >318: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >319: MOV TEMP[0].z, TEMP[2].xxxx >320: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >321: SQRT TEMP[2].x, TEMP[0].xxxx >322: SQRT TEMP[2].y, TEMP[0].yyyy >323: SQRT TEMP[2].z, TEMP[0].zzzz >324: MOV TEMP[2].xyz, TEMP[2].xyzx >325: MOV TEMP[3].z, CONST[1][22] >326: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >327: MOV TEMP[0].w, TEMP[3].xxxx >328: MOV TEMP[3].z, CONST[1][22] >329: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >330: MOV TEMP[3].z, CONST[1][22] >331: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >332: MOV TEMP[1].y, TEMP[3].xxxx >333: MOV TEMP[3].w, CONST[1][22] >334: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >335: UIF TEMP[3].xxxx :0 >336: MOV TEMP[3].w, CONST[1][22] >337: RCP TEMP[3].x, TEMP[3].wwww >338: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >339: ELSE :0 >340: SSG TEMP[5].x, TEMP[0].wwww >341: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >342: ENDIF >343: MOV_SAT TEMP[3].x, TEMP[3].xxxx >344: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >345: MOV TEMP[0].w, TEMP[3].xxxx >346: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >347: MOV TEMP[0].y, TEMP[3].xxxx >348: MOV TEMP[3].w, CONST[1][22] >349: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >350: UIF TEMP[3].xxxx :0 >351: MOV TEMP[3].w, CONST[1][22] >352: RCP TEMP[3].x, TEMP[3].wwww >353: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >354: ELSE :0 >355: SSG TEMP[5].x, TEMP[1].xxxx >356: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >357: ENDIF >358: MOV_SAT TEMP[3].x, TEMP[3].xxxx >359: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >360: MOV TEMP[0].w, TEMP[3].xxxx >361: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >362: MOV TEMP[0].z, TEMP[3].xxxx >363: MOV TEMP[3].w, CONST[1][22] >364: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >365: UIF TEMP[3].xxxx :0 >366: MOV TEMP[3].w, CONST[1][22] >367: RCP TEMP[3].x, TEMP[3].wwww >368: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >369: ELSE :0 >370: SSG TEMP[5].x, TEMP[1].yyyy >371: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >372: ENDIF >373: MOV_SAT TEMP[3].x, TEMP[3].xxxx >374: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >375: MOV TEMP[0].w, TEMP[3].xxxx >376: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >377: MOV TEMP[2].xy, CONST[1][22] >378: MOV TEMP[3].xy, CONST[2][4] >379: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >380: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >381: MOV TEMP[0].w, TEMP[2].xxxx >382: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >383: SSG TEMP[3].xy, TEMP[0].xyyy >384: MUL TEMP[3].xy, IMM[5].xxxx, TEMP[3].xyyy >385: RCP TEMP[5].xy, TEMP[1].xxxx >386: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >387: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >388: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >389: MOV TEMP[0].y, TEMP[2].xxxx >390: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >391: MOV TEMP[4].z, TEMP[2].xxxx >392: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >393: UIF TEMP[2].xxxx :0 >394: RCP TEMP[1].x, TEMP[1].xxxx >395: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >396: ELSE :0 >397: SSG TEMP[2].x, TEMP[0].zzzz >398: MUL TEMP[1].x, IMM[5].xxxx, TEMP[2].xxxx >399: ENDIF >400: MOV TEMP[0].y, TEMP[1].xxxx >401: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >402: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >403: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >404: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >405: MOV TEMP[4].w, TEMP[0].xxxx >406: ELSE :0 >407: MOV TEMP[4], IMM[0].zzzz >408: ENDIF >409: MIN TEMP[0], TEMP[4], IMM[5].yyyy >410: MOV TEMP[1].x, TEMP[0].xxxx >411: MOV TEMP[2].x, TEMP[0].yyyy >412: MOV TEMP[3].x, TEMP[0].zzzz >413: MOV TEMP[0].x, TEMP[0].wwww >414: MOV OUT[8], TEMP[1] >415: MOV OUT[9], TEMP[2] >416: MOV OUT[10], TEMP[3] >417: MOV OUT[11], TEMP[0] >418: MOV OUT[0].x, TEMP[1].xxxx >419: MOV OUT[0].y, TEMP[2].xxxx >420: MOV OUT[0].z, TEMP[3].xxxx >421: MOV OUT[1].x, TEMP[0].xxxx >422: END >radeonsi: Compiling shader 234 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 64) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 68) > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 84) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %44 = lshr i32 %10, 8 > %45 = and i32 %44, 31 > %46 = lshr i32 %7, 13 > %47 = and i32 %46, 255 > %48 = and i32 %7, 8191 > %49 = and i32 %10, 255 > %50 = mul nuw nsw i32 %48, %49 > %51 = mul nuw nsw i32 %45, %47 > %52 = add nuw nsw i32 %50, %51 > %53 = add nuw nsw i32 %52, 16 > %54 = zext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = load i32, i32 addrspace(3)* %55, align 4 > %57 = lshr i32 %7, 13 > %58 = and i32 %57, 255 > %59 = and i32 %7, 8191 > %60 = and i32 %10, 255 > %61 = mul nuw nsw i32 %59, %60 > %62 = mul nuw nsw i32 %45, %58 > %63 = add nuw nsw i32 %61, %62 > %64 = add nuw nsw i32 %63, 17 > %65 = zext i32 %64 to i64 > %66 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %65 > %67 = load i32, i32 addrspace(3)* %66, align 4 > %68 = lshr i32 %7, 13 > %69 = and i32 %68, 255 > %70 = and i32 %7, 8191 > %71 = and i32 %10, 255 > %72 = mul nuw nsw i32 %70, %71 > %73 = mul nuw nsw i32 %45, %69 > %74 = add nuw nsw i32 %72, %73 > %75 = add nuw nsw i32 %74, 18 > %76 = zext i32 %75 to i64 > %77 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %76 > %78 = load i32, i32 addrspace(3)* %77, align 4 > %79 = lshr i32 %7, 13 > %80 = and i32 %79, 255 > %81 = and i32 %7, 8191 > %82 = and i32 %10, 255 > %83 = mul nuw nsw i32 %81, %82 > %84 = mul nuw nsw i32 %45, %80 > %85 = add nuw nsw i32 %83, %84 > %86 = add nuw nsw i32 %85, 19 > %87 = zext i32 %86 to i64 > %88 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %87 > %89 = load i32, i32 addrspace(3)* %88, align 4 > %90 = lshr i32 %6, 13 > %91 = and i32 %90, 255 > %92 = shl i32 %5, 2 > %93 = and i32 %92, 262140 > %94 = and i32 %6, 8191 > %95 = and i32 %10, 255 > %96 = mul nuw nsw i32 %94, %95 > %97 = add nuw nsw i32 %93, %96 > %98 = mul nuw nsw i32 %45, %91 > %99 = add nuw nsw i32 %97, %98 > %100 = add nuw nsw i32 %99, 16 > %101 = zext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > store i32 %56, i32 addrspace(3)* %102, align 4 > %103 = add nuw nsw i32 %99, 17 > %104 = zext i32 %103 to i64 > %105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %104 > store i32 %67, i32 addrspace(3)* %105, align 4 > %106 = add nuw nsw i32 %99, 18 > %107 = zext i32 %106 to i64 > %108 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %107 > store i32 %78, i32 addrspace(3)* %108, align 4 > %109 = add nuw nsw i32 %99, 19 > %110 = zext i32 %109 to i64 > %111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %110 > store i32 %89, i32 addrspace(3)* %111, align 4 > %112 = lshr i32 %7, 13 > %113 = and i32 %112, 255 > %114 = and i32 %7, 8191 > %115 = and i32 %10, 255 > %116 = mul nuw nsw i32 %114, %115 > %117 = mul nuw nsw i32 %45, %113 > %118 = add nuw nsw i32 %116, %117 > %119 = add nuw nsw i32 %118, 20 > %120 = zext i32 %119 to i64 > %121 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %120 > %122 = load i32, i32 addrspace(3)* %121, align 4 > %123 = lshr i32 %7, 13 > %124 = and i32 %123, 255 > %125 = and i32 %7, 8191 > %126 = and i32 %10, 255 > %127 = mul nuw nsw i32 %125, %126 > %128 = mul nuw nsw i32 %45, %124 > %129 = add nuw nsw i32 %127, %128 > %130 = add nuw nsw i32 %129, 21 > %131 = zext i32 %130 to i64 > %132 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %131 > %133 = load i32, i32 addrspace(3)* %132, align 4 > %134 = lshr i32 %7, 13 > %135 = and i32 %134, 255 > %136 = and i32 %7, 8191 > %137 = and i32 %10, 255 > %138 = mul nuw nsw i32 %136, %137 > %139 = mul nuw nsw i32 %45, %135 > %140 = add nuw nsw i32 %138, %139 > %141 = add nuw nsw i32 %140, 22 > %142 = zext i32 %141 to i64 > %143 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %142 > %144 = load i32, i32 addrspace(3)* %143, align 4 > %145 = lshr i32 %7, 13 > %146 = and i32 %145, 255 > %147 = and i32 %7, 8191 > %148 = and i32 %10, 255 > %149 = mul nuw nsw i32 %147, %148 > %150 = mul nuw nsw i32 %45, %146 > %151 = add nuw nsw i32 %149, %150 > %152 = add nuw nsw i32 %151, 23 > %153 = zext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = load i32, i32 addrspace(3)* %154, align 4 > %156 = lshr i32 %6, 13 > %157 = and i32 %156, 255 > %158 = shl i32 %5, 2 > %159 = and i32 %158, 262140 > %160 = and i32 %6, 8191 > %161 = and i32 %10, 255 > %162 = mul nuw nsw i32 %160, %161 > %163 = add nuw nsw i32 %159, %162 > %164 = mul nuw nsw i32 %45, %157 > %165 = add nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 20 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > store i32 %122, i32 addrspace(3)* %168, align 4 > %169 = add nuw nsw i32 %165, 21 > %170 = zext i32 %169 to i64 > %171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %170 > store i32 %133, i32 addrspace(3)* %171, align 4 > %172 = add nuw nsw i32 %165, 22 > %173 = zext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > store i32 %144, i32 addrspace(3)* %174, align 4 > %175 = add nuw nsw i32 %165, 23 > %176 = zext i32 %175 to i64 > %177 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %176 > store i32 %155, i32 addrspace(3)* %177, align 4 > %178 = lshr i32 %7, 13 > %179 = and i32 %178, 255 > %180 = and i32 %7, 8191 > %181 = and i32 %10, 255 > %182 = mul nuw nsw i32 %180, %181 > %183 = mul nuw nsw i32 %45, %179 > %184 = add nuw nsw i32 %182, %183 > %185 = add nuw nsw i32 %184, 24 > %186 = zext i32 %185 to i64 > %187 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %186 > %188 = load i32, i32 addrspace(3)* %187, align 4 > %189 = lshr i32 %7, 13 > %190 = and i32 %189, 255 > %191 = and i32 %7, 8191 > %192 = and i32 %10, 255 > %193 = mul nuw nsw i32 %191, %192 > %194 = mul nuw nsw i32 %45, %190 > %195 = add nuw nsw i32 %193, %194 > %196 = add nuw nsw i32 %195, 25 > %197 = zext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = load i32, i32 addrspace(3)* %198, align 4 > %200 = lshr i32 %7, 13 > %201 = and i32 %200, 255 > %202 = and i32 %7, 8191 > %203 = and i32 %10, 255 > %204 = mul nuw nsw i32 %202, %203 > %205 = mul nuw nsw i32 %45, %201 > %206 = add nuw nsw i32 %204, %205 > %207 = add nuw nsw i32 %206, 26 > %208 = zext i32 %207 to i64 > %209 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %208 > %210 = load i32, i32 addrspace(3)* %209, align 4 > %211 = lshr i32 %7, 13 > %212 = and i32 %211, 255 > %213 = and i32 %7, 8191 > %214 = and i32 %10, 255 > %215 = mul nuw nsw i32 %213, %214 > %216 = mul nuw nsw i32 %45, %212 > %217 = add nuw nsw i32 %215, %216 > %218 = add nuw nsw i32 %217, 27 > %219 = zext i32 %218 to i64 > %220 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %219 > %221 = load i32, i32 addrspace(3)* %220, align 4 > %222 = lshr i32 %6, 13 > %223 = and i32 %222, 255 > %224 = shl i32 %5, 2 > %225 = and i32 %224, 262140 > %226 = and i32 %6, 8191 > %227 = and i32 %10, 255 > %228 = mul nuw nsw i32 %226, %227 > %229 = add nuw nsw i32 %225, %228 > %230 = mul nuw nsw i32 %45, %223 > %231 = add nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, 24 > %233 = zext i32 %232 to i64 > %234 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %233 > store i32 %188, i32 addrspace(3)* %234, align 4 > %235 = add nuw nsw i32 %231, 25 > %236 = zext i32 %235 to i64 > %237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %236 > store i32 %199, i32 addrspace(3)* %237, align 4 > %238 = add nuw nsw i32 %231, 26 > %239 = zext i32 %238 to i64 > %240 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %239 > store i32 %210, i32 addrspace(3)* %240, align 4 > %241 = add nuw nsw i32 %231, 27 > %242 = zext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > store i32 %221, i32 addrspace(3)* %243, align 4 > %244 = lshr i32 %7, 13 > %245 = and i32 %244, 255 > %246 = and i32 %7, 8191 > %247 = and i32 %10, 255 > %248 = mul nuw nsw i32 %246, %247 > %249 = mul nuw nsw i32 %45, %245 > %250 = add nuw nsw i32 %248, %249 > %251 = add nuw nsw i32 %250, 28 > %252 = zext i32 %251 to i64 > %253 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %252 > %254 = load i32, i32 addrspace(3)* %253, align 4 > %255 = lshr i32 %7, 13 > %256 = and i32 %255, 255 > %257 = and i32 %7, 8191 > %258 = and i32 %10, 255 > %259 = mul nuw nsw i32 %257, %258 > %260 = mul nuw nsw i32 %45, %256 > %261 = add nuw nsw i32 %259, %260 > %262 = add nuw nsw i32 %261, 29 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = load i32, i32 addrspace(3)* %264, align 4 > %266 = lshr i32 %7, 13 > %267 = and i32 %266, 255 > %268 = and i32 %7, 8191 > %269 = and i32 %10, 255 > %270 = mul nuw nsw i32 %268, %269 > %271 = mul nuw nsw i32 %45, %267 > %272 = add nuw nsw i32 %270, %271 > %273 = add nuw nsw i32 %272, 30 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = load i32, i32 addrspace(3)* %275, align 4 > %277 = lshr i32 %7, 13 > %278 = and i32 %277, 255 > %279 = and i32 %7, 8191 > %280 = and i32 %10, 255 > %281 = mul nuw nsw i32 %279, %280 > %282 = mul nuw nsw i32 %45, %278 > %283 = add nuw nsw i32 %281, %282 > %284 = add nuw nsw i32 %283, 31 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = load i32, i32 addrspace(3)* %286, align 4 > %288 = lshr i32 %6, 13 > %289 = and i32 %288, 255 > %290 = shl i32 %5, 2 > %291 = and i32 %290, 262140 > %292 = and i32 %6, 8191 > %293 = and i32 %10, 255 > %294 = mul nuw nsw i32 %292, %293 > %295 = add nuw nsw i32 %291, %294 > %296 = mul nuw nsw i32 %45, %289 > %297 = add nuw nsw i32 %295, %296 > %298 = add nuw nsw i32 %297, 28 > %299 = zext i32 %298 to i64 > %300 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %299 > store i32 %254, i32 addrspace(3)* %300, align 4 > %301 = add nuw nsw i32 %297, 29 > %302 = zext i32 %301 to i64 > %303 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %302 > store i32 %265, i32 addrspace(3)* %303, align 4 > %304 = add nuw nsw i32 %297, 30 > %305 = zext i32 %304 to i64 > %306 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %305 > store i32 %276, i32 addrspace(3)* %306, align 4 > %307 = add nuw nsw i32 %297, 31 > %308 = zext i32 %307 to i64 > %309 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %308 > store i32 %287, i32 addrspace(3)* %309, align 4 > %310 = lshr i32 %7, 13 > %311 = and i32 %310, 255 > %312 = and i32 %7, 8191 > %313 = and i32 %10, 255 > %314 = mul nuw nsw i32 %312, %313 > %315 = mul nuw nsw i32 %45, %311 > %316 = add nuw nsw i32 %314, %315 > %317 = add nuw nsw i32 %316, 32 > %318 = zext i32 %317 to i64 > %319 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %318 > %320 = load i32, i32 addrspace(3)* %319, align 4 > %321 = lshr i32 %7, 13 > %322 = and i32 %321, 255 > %323 = and i32 %7, 8191 > %324 = and i32 %10, 255 > %325 = mul nuw nsw i32 %323, %324 > %326 = mul nuw nsw i32 %45, %322 > %327 = add nuw nsw i32 %325, %326 > %328 = add nuw nsw i32 %327, 33 > %329 = zext i32 %328 to i64 > %330 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %329 > %331 = load i32, i32 addrspace(3)* %330, align 4 > %332 = lshr i32 %7, 13 > %333 = and i32 %332, 255 > %334 = and i32 %7, 8191 > %335 = and i32 %10, 255 > %336 = mul nuw nsw i32 %334, %335 > %337 = mul nuw nsw i32 %45, %333 > %338 = add nuw nsw i32 %336, %337 > %339 = add nuw nsw i32 %338, 34 > %340 = zext i32 %339 to i64 > %341 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %340 > %342 = load i32, i32 addrspace(3)* %341, align 4 > %343 = lshr i32 %7, 13 > %344 = and i32 %343, 255 > %345 = and i32 %7, 8191 > %346 = and i32 %10, 255 > %347 = mul nuw nsw i32 %345, %346 > %348 = mul nuw nsw i32 %45, %344 > %349 = add nuw nsw i32 %347, %348 > %350 = add nuw nsw i32 %349, 35 > %351 = zext i32 %350 to i64 > %352 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %351 > %353 = load i32, i32 addrspace(3)* %352, align 4 > %354 = lshr i32 %6, 13 > %355 = and i32 %354, 255 > %356 = shl i32 %5, 2 > %357 = and i32 %356, 262140 > %358 = and i32 %6, 8191 > %359 = and i32 %10, 255 > %360 = mul nuw nsw i32 %358, %359 > %361 = add nuw nsw i32 %357, %360 > %362 = mul nuw nsw i32 %45, %355 > %363 = add nuw nsw i32 %361, %362 > %364 = add nuw nsw i32 %363, 32 > %365 = zext i32 %364 to i64 > %366 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %365 > store i32 %320, i32 addrspace(3)* %366, align 4 > %367 = add nuw nsw i32 %363, 33 > %368 = zext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > store i32 %331, i32 addrspace(3)* %369, align 4 > %370 = add nuw nsw i32 %363, 34 > %371 = zext i32 %370 to i64 > %372 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %371 > store i32 %342, i32 addrspace(3)* %372, align 4 > %373 = add nuw nsw i32 %363, 35 > %374 = zext i32 %373 to i64 > %375 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %374 > store i32 %353, i32 addrspace(3)* %375, align 4 > %376 = lshr i32 %7, 13 > %377 = and i32 %376, 255 > %378 = and i32 %7, 8191 > %379 = and i32 %10, 255 > %380 = mul nuw nsw i32 %378, %379 > %381 = mul nuw nsw i32 %45, %377 > %382 = add nuw nsw i32 %380, %381 > %383 = add nuw nsw i32 %382, 36 > %384 = zext i32 %383 to i64 > %385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %384 > %386 = load i32, i32 addrspace(3)* %385, align 4 > %387 = lshr i32 %7, 13 > %388 = and i32 %387, 255 > %389 = and i32 %7, 8191 > %390 = and i32 %10, 255 > %391 = mul nuw nsw i32 %389, %390 > %392 = mul nuw nsw i32 %45, %388 > %393 = add nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 37 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = load i32, i32 addrspace(3)* %396, align 4 > %398 = lshr i32 %7, 13 > %399 = and i32 %398, 255 > %400 = and i32 %7, 8191 > %401 = and i32 %10, 255 > %402 = mul nuw nsw i32 %400, %401 > %403 = mul nuw nsw i32 %45, %399 > %404 = add nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, 38 > %406 = zext i32 %405 to i64 > %407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %406 > %408 = load i32, i32 addrspace(3)* %407, align 4 > %409 = lshr i32 %7, 13 > %410 = and i32 %409, 255 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = mul nuw nsw i32 %45, %410 > %415 = add nuw nsw i32 %413, %414 > %416 = add nuw nsw i32 %415, 39 > %417 = zext i32 %416 to i64 > %418 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %417 > %419 = load i32, i32 addrspace(3)* %418, align 4 > %420 = lshr i32 %6, 13 > %421 = and i32 %420, 255 > %422 = shl i32 %5, 2 > %423 = and i32 %422, 262140 > %424 = and i32 %6, 8191 > %425 = and i32 %10, 255 > %426 = mul nuw nsw i32 %424, %425 > %427 = add nuw nsw i32 %423, %426 > %428 = mul nuw nsw i32 %45, %421 > %429 = add nuw nsw i32 %427, %428 > %430 = add nuw nsw i32 %429, 36 > %431 = zext i32 %430 to i64 > %432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %431 > store i32 %386, i32 addrspace(3)* %432, align 4 > %433 = add nuw nsw i32 %429, 37 > %434 = zext i32 %433 to i64 > %435 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %434 > store i32 %397, i32 addrspace(3)* %435, align 4 > %436 = add nuw nsw i32 %429, 38 > %437 = zext i32 %436 to i64 > %438 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %437 > store i32 %408, i32 addrspace(3)* %438, align 4 > %439 = add nuw nsw i32 %429, 39 > %440 = zext i32 %439 to i64 > %441 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %440 > store i32 %419, i32 addrspace(3)* %441, align 4 > %442 = and i32 %7, 8191 > %443 = and i32 %10, 255 > %444 = mul nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 16 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > %448 = bitcast i32 addrspace(3)* %447 to float addrspace(3)* > %449 = load float, float addrspace(3)* %448, align 4 > %450 = and i32 %7, 8191 > %451 = and i32 %10, 255 > %452 = mul nuw nsw i32 %450, %451 > %453 = add nuw nsw i32 %452, 17 > %454 = zext i32 %453 to i64 > %455 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %454 > %456 = bitcast i32 addrspace(3)* %455 to float addrspace(3)* > %457 = load float, float addrspace(3)* %456, align 4 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, 18 > %462 = zext i32 %461 to i64 > %463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %462 > %464 = bitcast i32 addrspace(3)* %463 to float addrspace(3)* > %465 = load float, float addrspace(3)* %464, align 4 > %466 = fmul float %23, %449 > %467 = fmul float %24, %457 > %468 = fadd float %466, %467 > %469 = fmul float %25, %465 > %470 = fadd float %468, %469 > %471 = fadd float %470, %26 > %472 = fmul float %27, %449 > %473 = fmul float %28, %457 > %474 = fadd float %472, %473 > %475 = fmul float %29, %465 > %476 = fadd float %474, %475 > %477 = fadd float %476, %30 > %478 = fmul float %31, %449 > %479 = fmul float %32, %457 > %480 = fadd float %478, %479 > %481 = fmul float %33, %465 > %482 = fadd float %480, %481 > %483 = fadd float %482, %34 > %484 = fmul float %35, %449 > %485 = fmul float %36, %457 > %486 = fadd float %484, %485 > %487 = fmul float %37, %465 > %488 = fadd float %486, %487 > %489 = fadd float %488, %38 > %490 = lshr i32 %7, 13 > %491 = and i32 %490, 255 > %492 = and i32 %7, 8191 > %493 = and i32 %10, 255 > %494 = mul nuw nsw i32 %492, %493 > %495 = add nuw nsw i32 %494, %491 > %496 = add nuw nsw i32 %495, 16 > %497 = zext i32 %496 to i64 > %498 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %497 > %499 = bitcast i32 addrspace(3)* %498 to float addrspace(3)* > %500 = load float, float addrspace(3)* %499, align 4 > %501 = lshr i32 %7, 13 > %502 = and i32 %501, 255 > %503 = and i32 %7, 8191 > %504 = and i32 %10, 255 > %505 = mul nuw nsw i32 %503, %504 > %506 = add nuw nsw i32 %505, %502 > %507 = add nuw nsw i32 %506, 17 > %508 = zext i32 %507 to i64 > %509 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %508 > %510 = bitcast i32 addrspace(3)* %509 to float addrspace(3)* > %511 = load float, float addrspace(3)* %510, align 4 > %512 = lshr i32 %7, 13 > %513 = and i32 %512, 255 > %514 = and i32 %7, 8191 > %515 = and i32 %10, 255 > %516 = mul nuw nsw i32 %514, %515 > %517 = add nuw nsw i32 %516, %513 > %518 = add nuw nsw i32 %517, 18 > %519 = zext i32 %518 to i64 > %520 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %519 > %521 = bitcast i32 addrspace(3)* %520 to float addrspace(3)* > %522 = load float, float addrspace(3)* %521, align 4 > %523 = fmul float %23, %500 > %524 = fmul float %24, %511 > %525 = fadd float %523, %524 > %526 = fmul float %25, %522 > %527 = fadd float %525, %526 > %528 = fadd float %527, %26 > %529 = fmul float %27, %500 > %530 = fmul float %28, %511 > %531 = fadd float %529, %530 > %532 = fmul float %29, %522 > %533 = fadd float %531, %532 > %534 = fadd float %533, %30 > %535 = fmul float %31, %500 > %536 = fmul float %32, %511 > %537 = fadd float %535, %536 > %538 = fmul float %33, %522 > %539 = fadd float %537, %538 > %540 = fadd float %539, %34 > %541 = fmul float %35, %500 > %542 = fmul float %36, %511 > %543 = fadd float %541, %542 > %544 = fmul float %37, %522 > %545 = fadd float %543, %544 > %546 = fadd float %545, %38 > %547 = and i32 %7, 8191 > %548 = and i32 %10, 255 > %549 = mul nuw nsw i32 %547, %548 > %550 = lshr i32 %7, 12 > %551 = and i32 %550, 510 > %552 = add nuw nsw i32 %549, %551 > %553 = add nuw nsw i32 %552, 16 > %554 = zext i32 %553 to i64 > %555 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %554 > %556 = bitcast i32 addrspace(3)* %555 to float addrspace(3)* > %557 = load float, float addrspace(3)* %556, align 4 > %558 = and i32 %7, 8191 > %559 = and i32 %10, 255 > %560 = mul nuw nsw i32 %558, %559 > %561 = lshr i32 %7, 12 > %562 = and i32 %561, 510 > %563 = add nuw nsw i32 %560, %562 > %564 = add nuw nsw i32 %563, 17 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fmul float %23, %557 > %581 = fmul float %24, %568 > %582 = fadd float %580, %581 > %583 = fmul float %25, %579 > %584 = fadd float %582, %583 > %585 = fadd float %584, %26 > %586 = fmul float %27, %557 > %587 = fmul float %28, %568 > %588 = fadd float %586, %587 > %589 = fmul float %29, %579 > %590 = fadd float %588, %589 > %591 = fadd float %590, %30 > %592 = fmul float %31, %557 > %593 = fmul float %32, %568 > %594 = fadd float %592, %593 > %595 = fmul float %33, %579 > %596 = fadd float %594, %595 > %597 = fadd float %596, %34 > %598 = fmul float %35, %557 > %599 = fmul float %36, %568 > %600 = fadd float %598, %599 > %601 = fmul float %37, %579 > %602 = fadd float %600, %601 > %603 = fadd float %602, %38 > %604 = fadd float %483, 1.000000e+02 > %605 = fadd float %540, 1.000000e+02 > %606 = fadd float %597, 1.000000e+02 > %607 = call float @llvm.fabs.f32(float %489) > %608 = call float @llvm.minnum.f32(float %607, float 1.000000e+02) > %609 = fcmp ogt float %471, 0.000000e+00 > %610 = fcmp ogt float %477, 0.000000e+00 > %611 = fcmp olt float %471, 0.000000e+00 > %612 = fcmp olt float %477, 0.000000e+00 > %613 = sext i1 %611 to i32 > %614 = sext i1 %612 to i32 > %615 = zext i1 %609 to i32 > %616 = zext i1 %610 to i32 > %617 = add nsw i32 %615, %613 > %618 = add nsw i32 %616, %614 > %619 = sitofp i32 %617 to float > %620 = sitofp i32 %618 to float > %621 = fsub float -0.000000e+00, %608 > %622 = call float @llvm.fma.f32(float %621, float %619, float %471) > %623 = fsub float -0.000000e+00, %608 > %624 = call float @llvm.fma.f32(float %623, float %620, float %477) > %625 = call float @llvm.fabs.f32(float %546) > %626 = call float @llvm.minnum.f32(float %625, float 1.000000e+02) > %627 = fcmp ogt float %528, 0.000000e+00 > %628 = fcmp ogt float %534, 0.000000e+00 > %629 = fcmp olt float %528, 0.000000e+00 > %630 = fcmp olt float %534, 0.000000e+00 > %631 = sext i1 %629 to i32 > %632 = sext i1 %630 to i32 > %633 = zext i1 %627 to i32 > %634 = zext i1 %628 to i32 > %635 = add nsw i32 %633, %631 > %636 = add nsw i32 %634, %632 > %637 = sitofp i32 %635 to float > %638 = sitofp i32 %636 to float > %639 = fsub float -0.000000e+00, %626 > %640 = call float @llvm.fma.f32(float %639, float %637, float %528) > %641 = fsub float -0.000000e+00, %626 > %642 = call float @llvm.fma.f32(float %641, float %638, float %534) > %643 = fcmp ogt float %585, 0.000000e+00 > %644 = fcmp ogt float %591, 0.000000e+00 > %645 = fcmp olt float %585, 0.000000e+00 > %646 = fcmp olt float %591, 0.000000e+00 > %647 = sext i1 %645 to i32 > %648 = sext i1 %646 to i32 > %649 = zext i1 %643 to i32 > %650 = zext i1 %644 to i32 > %651 = add nsw i32 %649, %647 > %652 = add nsw i32 %650, %648 > %653 = sitofp i32 %651 to float > %654 = sitofp i32 %652 to float > %655 = call float @llvm.fabs.f32(float %603) > %656 = call float @llvm.minnum.f32(float %655, float 1.000000e+02) > %657 = fsub float -0.000000e+00, %656 > %658 = call float @llvm.fma.f32(float %657, float %653, float %585) > %659 = fsub float -0.000000e+00, %656 > %660 = call float @llvm.fma.f32(float %659, float %654, float %591) > %661 = fsub float -0.000000e+00, %489 > %662 = fcmp olt float %622, %661 > %663 = fsub float -0.000000e+00, %489 > %664 = fcmp olt float %624, %663 > %665 = zext i1 %662 to i32 > %666 = zext i1 %664 to i32 > %667 = fsub float -0.000000e+00, %546 > %668 = fcmp olt float %640, %667 > %669 = fsub float -0.000000e+00, %546 > %670 = fcmp olt float %642, %669 > %671 = zext i1 %668 to i32 > %672 = zext i1 %670 to i32 > %673 = add nuw nsw i32 %671, %665 > %674 = add nuw nsw i32 %672, %666 > %675 = fsub float -0.000000e+00, %603 > %676 = fcmp olt float %658, %675 > %677 = fsub float -0.000000e+00, %603 > %678 = fcmp olt float %660, %677 > %679 = zext i1 %676 to i32 > %680 = zext i1 %678 to i32 > %681 = add nuw nsw i32 %679, %673 > %682 = add nuw nsw i32 %680, %674 > %683 = fcmp olt float %604, 0.000000e+00 > %684 = zext i1 %683 to i32 > %685 = bitcast i32 %684 to float > %686 = fcmp olt float %605, 0.000000e+00 > %687 = fcmp olt float %606, 0.000000e+00 > %688 = zext i1 %686 to i32 > %689 = zext i1 %687 to i32 > %690 = add nuw nsw i32 %688, %684 > %691 = add nuw nsw i32 %689, %690 > %692 = fcmp olt float %489, %622 > %693 = fcmp olt float %489, %624 > %694 = zext i1 %692 to i32 > %695 = zext i1 %693 to i32 > %696 = fcmp olt float %546, %640 > %697 = fcmp olt float %546, %642 > %698 = zext i1 %696 to i32 > %699 = zext i1 %697 to i32 > %700 = add nuw nsw i32 %694, %698 > %701 = add nuw nsw i32 %695, %699 > %702 = fcmp olt float %603, %658 > %703 = fcmp olt float %603, %660 > %704 = zext i1 %702 to i32 > %705 = zext i1 %703 to i32 > %706 = add nuw nsw i32 %700, %704 > %707 = add nuw nsw i32 %701, %705 > %708 = icmp eq i32 %681, 3 > %709 = icmp eq i32 %682, 3 > %710 = sext i1 %708 to i32 > %711 = sext i1 %709 to i32 > %712 = bitcast i32 %711 to float > %713 = icmp eq i32 %706, 3 > %714 = icmp eq i32 %707, 3 > %715 = sext i1 %714 to i32 > %716 = bitcast i32 %715 to float > %717 = bitcast i32 %711 to float > %718 = select i1 %714, float 0xFFFFFFFFE0000000, float %717 > %719 = bitcast float %718 to i32 > %720 = select i1 %713, i32 -1, i32 %710 > %721 = or i32 %719, %720 > %722 = icmp eq i32 %721, 0 > %not. = icmp ne i32 %691, 3 > %723 = and i1 %722, %not. > br i1 %723, label %IF, label %ENDIF > >IF: ; preds = %main_body > %724 = lshr i32 %7, 13 > %725 = and i32 %724, 255 > %726 = and i32 %7, 8191 > %727 = and i32 %10, 255 > %728 = mul nuw nsw i32 %726, %727 > %729 = add nuw nsw i32 %728, %725 > %730 = add nuw nsw i32 %729, 16 > %731 = zext i32 %730 to i64 > %732 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %731 > %733 = bitcast i32 addrspace(3)* %732 to float addrspace(3)* > %734 = load float, float addrspace(3)* %733, align 4 > %735 = and i32 %7, 8191 > %736 = and i32 %10, 255 > %737 = mul nuw nsw i32 %735, %736 > %738 = add nuw nsw i32 %737, 16 > %739 = zext i32 %738 to i64 > %740 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %739 > %741 = bitcast i32 addrspace(3)* %740 to float addrspace(3)* > %742 = load float, float addrspace(3)* %741, align 4 > %743 = fsub float %742, %734 > %744 = lshr i32 %7, 13 > %745 = and i32 %744, 255 > %746 = and i32 %7, 8191 > %747 = and i32 %10, 255 > %748 = mul nuw nsw i32 %746, %747 > %749 = add nuw nsw i32 %748, %745 > %750 = add nuw nsw i32 %749, 17 > %751 = zext i32 %750 to i64 > %752 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %751 > %753 = bitcast i32 addrspace(3)* %752 to float addrspace(3)* > %754 = load float, float addrspace(3)* %753, align 4 > %755 = and i32 %7, 8191 > %756 = and i32 %10, 255 > %757 = mul nuw nsw i32 %755, %756 > %758 = add nuw nsw i32 %757, 17 > %759 = zext i32 %758 to i64 > %760 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %759 > %761 = bitcast i32 addrspace(3)* %760 to float addrspace(3)* > %762 = load float, float addrspace(3)* %761, align 4 > %763 = fsub float %762, %754 > %764 = lshr i32 %7, 13 > %765 = and i32 %764, 255 > %766 = and i32 %7, 8191 > %767 = and i32 %10, 255 > %768 = mul nuw nsw i32 %766, %767 > %769 = add nuw nsw i32 %768, %765 > %770 = add nuw nsw i32 %769, 18 > %771 = zext i32 %770 to i64 > %772 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %771 > %773 = bitcast i32 addrspace(3)* %772 to float addrspace(3)* > %774 = load float, float addrspace(3)* %773, align 4 > %775 = and i32 %7, 8191 > %776 = and i32 %10, 255 > %777 = mul nuw nsw i32 %775, %776 > %778 = add nuw nsw i32 %777, 18 > %779 = zext i32 %778 to i64 > %780 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %779 > %781 = bitcast i32 addrspace(3)* %780 to float addrspace(3)* > %782 = load float, float addrspace(3)* %781, align 4 > %783 = fsub float %782, %774 > %784 = fmul float %743, %743 > %785 = fmul float %763, %763 > %786 = fadd float %785, %784 > %787 = fmul float %783, %783 > %788 = fadd float %786, %787 > %789 = and i32 %7, 8191 > %790 = and i32 %10, 255 > %791 = mul nuw nsw i32 %789, %790 > %792 = lshr i32 %7, 12 > %793 = and i32 %792, 510 > %794 = add nuw nsw i32 %791, %793 > %795 = add nuw nsw i32 %794, 16 > %796 = zext i32 %795 to i64 > %797 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %796 > %798 = bitcast i32 addrspace(3)* %797 to float addrspace(3)* > %799 = load float, float addrspace(3)* %798, align 4 > %800 = lshr i32 %7, 13 > %801 = and i32 %800, 255 > %802 = and i32 %7, 8191 > %803 = and i32 %10, 255 > %804 = mul nuw nsw i32 %802, %803 > %805 = add nuw nsw i32 %804, %801 > %806 = add nuw nsw i32 %805, 16 > %807 = zext i32 %806 to i64 > %808 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %807 > %809 = bitcast i32 addrspace(3)* %808 to float addrspace(3)* > %810 = load float, float addrspace(3)* %809, align 4 > %811 = fsub float %810, %799 > %812 = and i32 %7, 8191 > %813 = and i32 %10, 255 > %814 = mul nuw nsw i32 %812, %813 > %815 = lshr i32 %7, 12 > %816 = and i32 %815, 510 > %817 = add nuw nsw i32 %814, %816 > %818 = add nuw nsw i32 %817, 17 > %819 = zext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = lshr i32 %7, 13 > %824 = and i32 %823, 255 > %825 = and i32 %7, 8191 > %826 = and i32 %10, 255 > %827 = mul nuw nsw i32 %825, %826 > %828 = add nuw nsw i32 %827, %824 > %829 = add nuw nsw i32 %828, 17 > %830 = zext i32 %829 to i64 > %831 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %830 > %832 = bitcast i32 addrspace(3)* %831 to float addrspace(3)* > %833 = load float, float addrspace(3)* %832, align 4 > %834 = fsub float %833, %822 > %835 = and i32 %7, 8191 > %836 = and i32 %10, 255 > %837 = mul nuw nsw i32 %835, %836 > %838 = lshr i32 %7, 12 > %839 = and i32 %838, 510 > %840 = add nuw nsw i32 %837, %839 > %841 = add nuw nsw i32 %840, 18 > %842 = zext i32 %841 to i64 > %843 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %842 > %844 = bitcast i32 addrspace(3)* %843 to float addrspace(3)* > %845 = load float, float addrspace(3)* %844, align 4 > %846 = lshr i32 %7, 13 > %847 = and i32 %846, 255 > %848 = and i32 %7, 8191 > %849 = and i32 %10, 255 > %850 = mul nuw nsw i32 %848, %849 > %851 = add nuw nsw i32 %850, %847 > %852 = add nuw nsw i32 %851, 18 > %853 = zext i32 %852 to i64 > %854 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %853 > %855 = bitcast i32 addrspace(3)* %854 to float addrspace(3)* > %856 = load float, float addrspace(3)* %855, align 4 > %857 = fsub float %856, %845 > %858 = fmul float %811, %811 > %859 = fmul float %834, %834 > %860 = fadd float %859, %858 > %861 = fmul float %857, %857 > %862 = fadd float %860, %861 > %863 = and i32 %7, 8191 > %864 = and i32 %10, 255 > %865 = mul nuw nsw i32 %863, %864 > %866 = add nuw nsw i32 %865, 16 > %867 = zext i32 %866 to i64 > %868 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %867 > %869 = bitcast i32 addrspace(3)* %868 to float addrspace(3)* > %870 = load float, float addrspace(3)* %869, align 4 > %871 = and i32 %7, 8191 > %872 = and i32 %10, 255 > %873 = mul nuw nsw i32 %871, %872 > %874 = lshr i32 %7, 12 > %875 = and i32 %874, 510 > %876 = add nuw nsw i32 %873, %875 > %877 = add nuw nsw i32 %876, 16 > %878 = zext i32 %877 to i64 > %879 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %878 > %880 = bitcast i32 addrspace(3)* %879 to float addrspace(3)* > %881 = load float, float addrspace(3)* %880, align 4 > %882 = fsub float %881, %870 > %883 = and i32 %7, 8191 > %884 = and i32 %10, 255 > %885 = mul nuw nsw i32 %883, %884 > %886 = add nuw nsw i32 %885, 17 > %887 = zext i32 %886 to i64 > %888 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %887 > %889 = bitcast i32 addrspace(3)* %888 to float addrspace(3)* > %890 = load float, float addrspace(3)* %889, align 4 > %891 = and i32 %7, 8191 > %892 = and i32 %10, 255 > %893 = mul nuw nsw i32 %891, %892 > %894 = lshr i32 %7, 12 > %895 = and i32 %894, 510 > %896 = add nuw nsw i32 %893, %895 > %897 = add nuw nsw i32 %896, 17 > %898 = zext i32 %897 to i64 > %899 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %898 > %900 = bitcast i32 addrspace(3)* %899 to float addrspace(3)* > %901 = load float, float addrspace(3)* %900, align 4 > %902 = fsub float %901, %890 > %903 = and i32 %7, 8191 > %904 = and i32 %10, 255 > %905 = mul nuw nsw i32 %903, %904 > %906 = add nuw nsw i32 %905, 18 > %907 = zext i32 %906 to i64 > %908 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %907 > %909 = bitcast i32 addrspace(3)* %908 to float addrspace(3)* > %910 = load float, float addrspace(3)* %909, align 4 > %911 = and i32 %7, 8191 > %912 = and i32 %10, 255 > %913 = mul nuw nsw i32 %911, %912 > %914 = lshr i32 %7, 12 > %915 = and i32 %914, 510 > %916 = add nuw nsw i32 %913, %915 > %917 = add nuw nsw i32 %916, 18 > %918 = zext i32 %917 to i64 > %919 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %918 > %920 = bitcast i32 addrspace(3)* %919 to float addrspace(3)* > %921 = load float, float addrspace(3)* %920, align 4 > %922 = fsub float %921, %910 > %923 = fmul float %882, %882 > %924 = fmul float %902, %902 > %925 = fadd float %924, %923 > %926 = fmul float %922, %922 > %927 = fadd float %925, %926 > %928 = call float @llvm.sqrt.f32(float %788) > %929 = call float @llvm.sqrt.f32(float %862) > %930 = call float @llvm.sqrt.f32(float %927) > %931 = lshr i32 %7, 13 > %932 = and i32 %931, 255 > %933 = and i32 %7, 8191 > %934 = and i32 %10, 255 > %935 = mul nuw nsw i32 %933, %934 > %936 = add nuw nsw i32 %935, %932 > %937 = add nuw nsw i32 %936, 16 > %938 = zext i32 %937 to i64 > %939 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %938 > %940 = bitcast i32 addrspace(3)* %939 to float addrspace(3)* > %941 = load float, float addrspace(3)* %940, align 4 > %942 = and i32 %7, 8191 > %943 = and i32 %10, 255 > %944 = mul nuw nsw i32 %942, %943 > %945 = add nuw nsw i32 %944, 16 > %946 = zext i32 %945 to i64 > %947 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %946 > %948 = bitcast i32 addrspace(3)* %947 to float addrspace(3)* > %949 = load float, float addrspace(3)* %948, align 4 > %950 = fadd float %941, %949 > %951 = lshr i32 %7, 13 > %952 = and i32 %951, 255 > %953 = and i32 %7, 8191 > %954 = and i32 %10, 255 > %955 = mul nuw nsw i32 %953, %954 > %956 = add nuw nsw i32 %955, %952 > %957 = add nuw nsw i32 %956, 17 > %958 = zext i32 %957 to i64 > %959 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %958 > %960 = bitcast i32 addrspace(3)* %959 to float addrspace(3)* > %961 = load float, float addrspace(3)* %960, align 4 > %962 = and i32 %7, 8191 > %963 = and i32 %10, 255 > %964 = mul nuw nsw i32 %962, %963 > %965 = add nuw nsw i32 %964, 17 > %966 = zext i32 %965 to i64 > %967 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %966 > %968 = bitcast i32 addrspace(3)* %967 to float addrspace(3)* > %969 = load float, float addrspace(3)* %968, align 4 > %970 = fadd float %961, %969 > %971 = lshr i32 %7, 13 > %972 = and i32 %971, 255 > %973 = and i32 %7, 8191 > %974 = and i32 %10, 255 > %975 = mul nuw nsw i32 %973, %974 > %976 = add nuw nsw i32 %975, %972 > %977 = add nuw nsw i32 %976, 18 > %978 = zext i32 %977 to i64 > %979 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %978 > %980 = bitcast i32 addrspace(3)* %979 to float addrspace(3)* > %981 = load float, float addrspace(3)* %980, align 4 > %982 = and i32 %7, 8191 > %983 = and i32 %10, 255 > %984 = mul nuw nsw i32 %982, %983 > %985 = add nuw nsw i32 %984, 18 > %986 = zext i32 %985 to i64 > %987 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %986 > %988 = bitcast i32 addrspace(3)* %987 to float addrspace(3)* > %989 = load float, float addrspace(3)* %988, align 4 > %990 = fadd float %981, %989 > %991 = fmul float %950, 5.000000e-01 > %992 = fmul float %970, 5.000000e-01 > %993 = fmul float %990, 5.000000e-01 > %994 = and i32 %7, 8191 > %995 = and i32 %10, 255 > %996 = mul nuw nsw i32 %994, %995 > %997 = lshr i32 %7, 12 > %998 = and i32 %997, 510 > %999 = add nuw nsw i32 %996, %998 > %1000 = add nuw nsw i32 %999, 16 > %1001 = zext i32 %1000 to i64 > %1002 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1001 > %1003 = bitcast i32 addrspace(3)* %1002 to float addrspace(3)* > %1004 = load float, float addrspace(3)* %1003, align 4 > %1005 = lshr i32 %7, 13 > %1006 = and i32 %1005, 255 > %1007 = and i32 %7, 8191 > %1008 = and i32 %10, 255 > %1009 = mul nuw nsw i32 %1007, %1008 > %1010 = add nuw nsw i32 %1009, %1006 > %1011 = add nuw nsw i32 %1010, 16 > %1012 = zext i32 %1011 to i64 > %1013 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1012 > %1014 = bitcast i32 addrspace(3)* %1013 to float addrspace(3)* > %1015 = load float, float addrspace(3)* %1014, align 4 > %1016 = fadd float %1004, %1015 > %1017 = and i32 %7, 8191 > %1018 = and i32 %10, 255 > %1019 = mul nuw nsw i32 %1017, %1018 > %1020 = lshr i32 %7, 12 > %1021 = and i32 %1020, 510 > %1022 = add nuw nsw i32 %1019, %1021 > %1023 = add nuw nsw i32 %1022, 17 > %1024 = zext i32 %1023 to i64 > %1025 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1024 > %1026 = bitcast i32 addrspace(3)* %1025 to float addrspace(3)* > %1027 = load float, float addrspace(3)* %1026, align 4 > %1028 = lshr i32 %7, 13 > %1029 = and i32 %1028, 255 > %1030 = and i32 %7, 8191 > %1031 = and i32 %10, 255 > %1032 = mul nuw nsw i32 %1030, %1031 > %1033 = add nuw nsw i32 %1032, %1029 > %1034 = add nuw nsw i32 %1033, 17 > %1035 = zext i32 %1034 to i64 > %1036 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1035 > %1037 = bitcast i32 addrspace(3)* %1036 to float addrspace(3)* > %1038 = load float, float addrspace(3)* %1037, align 4 > %1039 = fadd float %1027, %1038 > %1040 = and i32 %7, 8191 > %1041 = and i32 %10, 255 > %1042 = mul nuw nsw i32 %1040, %1041 > %1043 = lshr i32 %7, 12 > %1044 = and i32 %1043, 510 > %1045 = add nuw nsw i32 %1042, %1044 > %1046 = add nuw nsw i32 %1045, 18 > %1047 = zext i32 %1046 to i64 > %1048 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1047 > %1049 = bitcast i32 addrspace(3)* %1048 to float addrspace(3)* > %1050 = load float, float addrspace(3)* %1049, align 4 > %1051 = lshr i32 %7, 13 > %1052 = and i32 %1051, 255 > %1053 = and i32 %7, 8191 > %1054 = and i32 %10, 255 > %1055 = mul nuw nsw i32 %1053, %1054 > %1056 = add nuw nsw i32 %1055, %1052 > %1057 = add nuw nsw i32 %1056, 18 > %1058 = zext i32 %1057 to i64 > %1059 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1058 > %1060 = bitcast i32 addrspace(3)* %1059 to float addrspace(3)* > %1061 = load float, float addrspace(3)* %1060, align 4 > %1062 = fadd float %1050, %1061 > %1063 = fmul float %1016, 5.000000e-01 > %1064 = fmul float %1039, 5.000000e-01 > %1065 = fmul float %1062, 5.000000e-01 > %1066 = and i32 %7, 8191 > %1067 = and i32 %10, 255 > %1068 = mul nuw nsw i32 %1066, %1067 > %1069 = add nuw nsw i32 %1068, 16 > %1070 = zext i32 %1069 to i64 > %1071 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1070 > %1072 = bitcast i32 addrspace(3)* %1071 to float addrspace(3)* > %1073 = load float, float addrspace(3)* %1072, align 4 > %1074 = and i32 %7, 8191 > %1075 = and i32 %10, 255 > %1076 = mul nuw nsw i32 %1074, %1075 > %1077 = lshr i32 %7, 12 > %1078 = and i32 %1077, 510 > %1079 = add nuw nsw i32 %1076, %1078 > %1080 = add nuw nsw i32 %1079, 16 > %1081 = zext i32 %1080 to i64 > %1082 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1081 > %1083 = bitcast i32 addrspace(3)* %1082 to float addrspace(3)* > %1084 = load float, float addrspace(3)* %1083, align 4 > %1085 = fadd float %1073, %1084 > %1086 = and i32 %7, 8191 > %1087 = and i32 %10, 255 > %1088 = mul nuw nsw i32 %1086, %1087 > %1089 = add nuw nsw i32 %1088, 17 > %1090 = zext i32 %1089 to i64 > %1091 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1090 > %1092 = bitcast i32 addrspace(3)* %1091 to float addrspace(3)* > %1093 = load float, float addrspace(3)* %1092, align 4 > %1094 = and i32 %7, 8191 > %1095 = and i32 %10, 255 > %1096 = mul nuw nsw i32 %1094, %1095 > %1097 = lshr i32 %7, 12 > %1098 = and i32 %1097, 510 > %1099 = add nuw nsw i32 %1096, %1098 > %1100 = add nuw nsw i32 %1099, 17 > %1101 = zext i32 %1100 to i64 > %1102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1101 > %1103 = bitcast i32 addrspace(3)* %1102 to float addrspace(3)* > %1104 = load float, float addrspace(3)* %1103, align 4 > %1105 = fadd float %1093, %1104 > %1106 = and i32 %7, 8191 > %1107 = and i32 %10, 255 > %1108 = mul nuw nsw i32 %1106, %1107 > %1109 = add nuw nsw i32 %1108, 18 > %1110 = zext i32 %1109 to i64 > %1111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1110 > %1112 = bitcast i32 addrspace(3)* %1111 to float addrspace(3)* > %1113 = load float, float addrspace(3)* %1112, align 4 > %1114 = and i32 %7, 8191 > %1115 = and i32 %10, 255 > %1116 = mul nuw nsw i32 %1114, %1115 > %1117 = lshr i32 %7, 12 > %1118 = and i32 %1117, 510 > %1119 = add nuw nsw i32 %1116, %1118 > %1120 = add nuw nsw i32 %1119, 18 > %1121 = zext i32 %1120 to i64 > %1122 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1121 > %1123 = bitcast i32 addrspace(3)* %1122 to float addrspace(3)* > %1124 = load float, float addrspace(3)* %1123, align 4 > %1125 = fadd float %1113, %1124 > %1126 = fmul float %1085, 5.000000e-01 > %1127 = fmul float %1105, 5.000000e-01 > %1128 = fmul float %1125, 5.000000e-01 > %1129 = call float @llvm.fma.f32(float %39, float %928, float %991) > %1130 = call float @llvm.fma.f32(float %40, float %928, float %992) > %1131 = call float @llvm.fma.f32(float %41, float %928, float %993) > %1132 = call float @llvm.fma.f32(float %39, float %929, float %1063) > %1133 = call float @llvm.fma.f32(float %40, float %929, float %1064) > %1134 = call float @llvm.fma.f32(float %41, float %929, float %1065) > %1135 = call float @llvm.fma.f32(float %39, float %930, float %1126) > %1136 = call float @llvm.fma.f32(float %40, float %930, float %1127) > %1137 = call float @llvm.fma.f32(float %41, float %930, float %1128) > %1138 = fmul float %23, %991 > %1139 = fmul float %24, %992 > %1140 = fadd float %1138, %1139 > %1141 = fmul float %25, %993 > %1142 = fadd float %1140, %1141 > %1143 = fadd float %1142, %26 > %1144 = fmul float %27, %991 > %1145 = fmul float %28, %992 > %1146 = fadd float %1144, %1145 > %1147 = fmul float %29, %993 > %1148 = fadd float %1146, %1147 > %1149 = fadd float %1148, %30 > %1150 = fmul float %35, %991 > %1151 = fmul float %36, %992 > %1152 = fadd float %1150, %1151 > %1153 = fmul float %37, %993 > %1154 = fadd float %1152, %1153 > %1155 = fadd float %1154, %38 > %1156 = fmul float %23, %1063 > %1157 = fmul float %24, %1064 > %1158 = fadd float %1156, %1157 > %1159 = fmul float %25, %1065 > %1160 = fadd float %1158, %1159 > %1161 = fadd float %1160, %26 > %1162 = fmul float %27, %1063 > %1163 = fmul float %28, %1064 > %1164 = fadd float %1162, %1163 > %1165 = fmul float %29, %1065 > %1166 = fadd float %1164, %1165 > %1167 = fadd float %1166, %30 > %1168 = fmul float %35, %1063 > %1169 = fmul float %36, %1064 > %1170 = fadd float %1168, %1169 > %1171 = fmul float %37, %1065 > %1172 = fadd float %1170, %1171 > %1173 = fadd float %1172, %38 > %1174 = fmul float %23, %1126 > %1175 = fmul float %24, %1127 > %1176 = fadd float %1174, %1175 > %1177 = fmul float %25, %1128 > %1178 = fadd float %1176, %1177 > %1179 = fadd float %1178, %26 > %1180 = fmul float %27, %1126 > %1181 = fmul float %28, %1127 > %1182 = fadd float %1180, %1181 > %1183 = fmul float %29, %1128 > %1184 = fadd float %1182, %1183 > %1185 = fadd float %1184, %30 > %1186 = fmul float %35, %1126 > %1187 = fmul float %36, %1127 > %1188 = fadd float %1186, %1187 > %1189 = fmul float %37, %1128 > %1190 = fadd float %1188, %1189 > %1191 = fadd float %1190, %38 > %1192 = fmul float %23, %1129 > %1193 = fmul float %24, %1130 > %1194 = fadd float %1192, %1193 > %1195 = fmul float %25, %1131 > %1196 = fadd float %1194, %1195 > %1197 = fadd float %1196, %26 > %1198 = fmul float %27, %1129 > %1199 = fmul float %28, %1130 > %1200 = fadd float %1198, %1199 > %1201 = fmul float %29, %1131 > %1202 = fadd float %1200, %1201 > %1203 = fadd float %1202, %30 > %1204 = fmul float %35, %1129 > %1205 = fmul float %36, %1130 > %1206 = fadd float %1204, %1205 > %1207 = fmul float %37, %1131 > %1208 = fadd float %1206, %1207 > %1209 = fadd float %1208, %38 > %1210 = fmul float %23, %1132 > %1211 = fmul float %24, %1133 > %1212 = fadd float %1210, %1211 > %1213 = fmul float %25, %1134 > %1214 = fadd float %1212, %1213 > %1215 = fadd float %1214, %26 > %1216 = fmul float %27, %1132 > %1217 = fmul float %28, %1133 > %1218 = fadd float %1216, %1217 > %1219 = fmul float %29, %1134 > %1220 = fadd float %1218, %1219 > %1221 = fadd float %1220, %30 > %1222 = fmul float %35, %1132 > %1223 = fmul float %36, %1133 > %1224 = fadd float %1222, %1223 > %1225 = fmul float %37, %1134 > %1226 = fadd float %1224, %1225 > %1227 = fadd float %1226, %38 > %1228 = fmul float %23, %1135 > %1229 = fmul float %24, %1136 > %1230 = fadd float %1228, %1229 > %1231 = fmul float %25, %1137 > %1232 = fadd float %1230, %1231 > %1233 = fadd float %1232, %26 > %1234 = fmul float %27, %1135 > %1235 = fmul float %28, %1136 > %1236 = fadd float %1234, %1235 > %1237 = fmul float %29, %1137 > %1238 = fadd float %1236, %1237 > %1239 = fadd float %1238, %30 > %1240 = fmul float %35, %1135 > %1241 = fmul float %36, %1136 > %1242 = fadd float %1240, %1241 > %1243 = fmul float %37, %1137 > %1244 = fadd float %1242, %1243 > %1245 = fadd float %1244, %38 > %1246 = fcmp oeq float %1173, 0.000000e+00 > %1247 = fcmp oeq float %1173, 0.000000e+00 > %1248 = fcmp ogt float %1161, 0.000000e+00 > %1249 = select i1 %1248, float 1.000000e+00, float %1161 > %1250 = fcmp oge float %1249, 0.000000e+00 > %1251 = fcmp ogt float %1167, 0.000000e+00 > %1252 = select i1 %1251, float 1.000000e+00, float %1167 > %1253 = fcmp oge float %1252, 0.000000e+00 > %.op = fmul float %1249, 0x4600000000000000 > %1254 = select i1 %1250, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1252, 0x4600000000000000 > %1255 = select i1 %1253, float %.op80, float 0xC600000000000000 > %1256 = fdiv float 1.000000e+00, %1173 > %1257 = fmul float %1161, %1256 > %1258 = fmul float %1167, %1256 > %1259 = select i1 %1246, float %1254, float %1257 > %1260 = select i1 %1247, float %1255, float %1258 > %1261 = fcmp oeq float %1191, 0.000000e+00 > %1262 = fcmp oeq float %1191, 0.000000e+00 > %1263 = fcmp ogt float %1179, 0.000000e+00 > %1264 = select i1 %1263, float 1.000000e+00, float %1179 > %1265 = fcmp oge float %1264, 0.000000e+00 > %1266 = fcmp ogt float %1185, 0.000000e+00 > %1267 = select i1 %1266, float 1.000000e+00, float %1185 > %1268 = fcmp oge float %1267, 0.000000e+00 > %.op81 = fmul float %1264, 0x4600000000000000 > %1269 = select i1 %1265, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1267, 0x4600000000000000 > %1270 = select i1 %1268, float %.op82, float 0xC600000000000000 > %1271 = fdiv float 1.000000e+00, %1191 > %1272 = fmul float %1179, %1271 > %1273 = fmul float %1185, %1271 > %1274 = select i1 %1261, float %1269, float %1272 > %1275 = select i1 %1262, float %1270, float %1273 > %1276 = fcmp oeq float %1209, 0.000000e+00 > %1277 = fcmp oeq float %1209, 0.000000e+00 > %1278 = fcmp ogt float %1197, 0.000000e+00 > %1279 = select i1 %1278, float 1.000000e+00, float %1197 > %1280 = fcmp oge float %1279, 0.000000e+00 > %1281 = fcmp ogt float %1203, 0.000000e+00 > %1282 = select i1 %1281, float 1.000000e+00, float %1203 > %1283 = fcmp oge float %1282, 0.000000e+00 > %.op83 = fmul float %1279, 0x4600000000000000 > %1284 = select i1 %1280, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1282, 0x4600000000000000 > %1285 = select i1 %1283, float %.op84, float 0xC600000000000000 > %1286 = fdiv float 1.000000e+00, %1209 > %1287 = fmul float %1197, %1286 > %1288 = fmul float %1203, %1286 > %1289 = select i1 %1276, float %1284, float %1287 > %1290 = select i1 %1277, float %1285, float %1288 > %1291 = fcmp oeq float %1155, 0.000000e+00 > %1292 = fcmp oeq float %1155, 0.000000e+00 > %1293 = fcmp ogt float %1143, 0.000000e+00 > %1294 = select i1 %1293, float 1.000000e+00, float %1143 > %1295 = fcmp oge float %1294, 0.000000e+00 > %1296 = fcmp ogt float %1149, 0.000000e+00 > %1297 = select i1 %1296, float 1.000000e+00, float %1149 > %1298 = fcmp oge float %1297, 0.000000e+00 > %.op85 = fmul float %1294, 0x4600000000000000 > %1299 = select i1 %1295, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1297, 0x4600000000000000 > %1300 = select i1 %1298, float %.op86, float 0xC600000000000000 > %1301 = fdiv float 1.000000e+00, %1155 > %1302 = fmul float %1143, %1301 > %1303 = fmul float %1149, %1301 > %1304 = select i1 %1291, float %1299, float %1302 > %1305 = select i1 %1292, float %1300, float %1303 > %1306 = fsub float %1304, %1289 > %1307 = fsub float %1305, %1290 > %1308 = fcmp oeq float %1227, 0.000000e+00 > %1309 = fcmp oeq float %1227, 0.000000e+00 > %1310 = fcmp ogt float %1215, 0.000000e+00 > %1311 = select i1 %1310, float 1.000000e+00, float %1215 > %1312 = fcmp oge float %1311, 0.000000e+00 > %1313 = fcmp ogt float %1221, 0.000000e+00 > %1314 = select i1 %1313, float 1.000000e+00, float %1221 > %1315 = fcmp oge float %1314, 0.000000e+00 > %.op87 = fmul float %1311, 0x4600000000000000 > %1316 = select i1 %1312, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1314, 0x4600000000000000 > %1317 = select i1 %1315, float %.op88, float 0xC600000000000000 > %1318 = fdiv float 1.000000e+00, %1227 > %1319 = fmul float %1215, %1318 > %1320 = fmul float %1221, %1318 > %1321 = select i1 %1308, float %1316, float %1319 > %1322 = select i1 %1309, float %1317, float %1320 > %1323 = fsub float %1259, %1321 > %1324 = fsub float %1260, %1322 > %1325 = fmul float %1323, %42 > %1326 = fmul float %1324, %43 > %1327 = fcmp oeq float %1245, 0.000000e+00 > %1328 = fcmp oeq float %1245, 0.000000e+00 > %1329 = fcmp ogt float %1233, 0.000000e+00 > %1330 = select i1 %1329, float 1.000000e+00, float %1233 > %1331 = fcmp oge float %1330, 0.000000e+00 > %1332 = fcmp ogt float %1239, 0.000000e+00 > %1333 = select i1 %1332, float 1.000000e+00, float %1239 > %1334 = fcmp oge float %1333, 0.000000e+00 > %.op89 = fmul float %1330, 0x4600000000000000 > %1335 = select i1 %1331, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1333, 0x4600000000000000 > %1336 = select i1 %1334, float %.op90, float 0xC600000000000000 > %1337 = fdiv float 1.000000e+00, %1245 > %1338 = fmul float %1233, %1337 > %1339 = fmul float %1239, %1337 > %1340 = select i1 %1327, float %1335, float %1338 > %1341 = select i1 %1328, float %1336, float %1339 > %1342 = fsub float %1274, %1340 > %1343 = fsub float %1275, %1341 > %1344 = fmul float %1342, %42 > %1345 = fmul float %1306, %42 > %1346 = fmul float %1307, %43 > %1347 = fmul float %1343, %43 > %1348 = fmul float %1345, %1345 > %1349 = fmul float %1346, %1346 > %1350 = fadd float %1348, %1349 > %1351 = fmul float %1325, %1325 > %1352 = fmul float %1326, %1326 > %1353 = fadd float %1351, %1352 > %1354 = fmul float %1344, %1344 > %1355 = fmul float %1347, %1347 > %1356 = fadd float %1354, %1355 > %1357 = call float @llvm.sqrt.f32(float %1356) > %1358 = call float @llvm.sqrt.f32(float %1350) > %1359 = call float @llvm.sqrt.f32(float %1353) > %1360 = fsub float %1155, %15 > %1361 = fsub float %1173, %15 > %1362 = fsub float %1191, %15 > %1363 = fcmp une float %16, 0.000000e+00 > br i1 %1363, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %16, %ENDIF77 ], [ %38, %main_body ] > %temp16.0 = phi float [ %1577, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1578, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1567, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1580, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %15, %ENDIF77 ], [ %37, %main_body ] > %temp13.0 = phi float [ %1560, %ENDIF77 ], [ %36, %main_body ] > %1364 = phi i32 [ 1065353216, %ENDIF77 ], [ %672, %main_body ] > %temp10.0 = phi float [ %1359, %ENDIF77 ], [ %716, %main_body ] > %temp9.0 = phi float [ %1552, %ENDIF77 ], [ %718, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %603, %main_body ] > %temp6.0 = phi float [ %993, %ENDIF77 ], [ %685, %main_body ] > %temp5.0 = phi float [ %1547, %ENDIF77 ], [ %712, %main_body ] > %1365 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1366 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1367 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1368 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1369 = lshr i32 %5, 16 > %1370 = shl nuw nsw i32 %1369, 2 > %1371 = and i32 %6, 8191 > %1372 = and i32 %10, 255 > %1373 = mul nuw nsw i32 %1371, %1372 > %1374 = add nuw nsw i32 %1370, %1373 > %1375 = add nuw nsw i32 %1374, 8 > %1376 = zext i32 %1375 to i64 > %1377 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1376 > %1378 = bitcast i32 addrspace(3)* %1377 to float addrspace(3)* > store float %1365, float addrspace(3)* %1378, align 4 > %1379 = add nuw nsw i32 %1374, 9 > %1380 = zext i32 %1379 to i64 > %1381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1380 > %1382 = bitcast i32 addrspace(3)* %1381 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1382, align 4 > %1383 = add nuw nsw i32 %1374, 10 > %1384 = zext i32 %1383 to i64 > %1385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1384 > %1386 = bitcast i32 addrspace(3)* %1385 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1386, align 4 > %1387 = add nuw nsw i32 %1374, 11 > %1388 = zext i32 %1387 to i64 > %1389 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1388 > %1390 = bitcast i32 addrspace(3)* %1389 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1390, align 4 > %1391 = lshr i32 %5, 16 > %1392 = shl nuw nsw i32 %1391, 2 > %1393 = and i32 %6, 8191 > %1394 = and i32 %10, 255 > %1395 = mul nuw nsw i32 %1393, %1394 > %1396 = add nuw nsw i32 %1392, %1395 > %1397 = add nuw nsw i32 %1396, 12 > %1398 = zext i32 %1397 to i64 > %1399 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1398 > %1400 = bitcast i32 addrspace(3)* %1399 to float addrspace(3)* > store float %1366, float addrspace(3)* %1400, align 4 > %1401 = add nuw nsw i32 %1396, 13 > %1402 = zext i32 %1401 to i64 > %1403 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1402 > %1404 = bitcast i32 addrspace(3)* %1403 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1404, align 4 > %1405 = add nuw nsw i32 %1396, 14 > %1406 = zext i32 %1405 to i64 > %1407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1406 > %1408 = bitcast i32 addrspace(3)* %1407 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1408, align 4 > %1409 = add nuw nsw i32 %1396, 15 > %1410 = zext i32 %1409 to i64 > %1411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1410 > store i32 %1364, i32 addrspace(3)* %1411, align 4 > %1412 = lshr i32 %5, 16 > %1413 = shl nuw nsw i32 %1412, 2 > %1414 = and i32 %6, 8191 > %1415 = and i32 %10, 255 > %1416 = mul nuw nsw i32 %1414, %1415 > %1417 = add nuw nsw i32 %1413, %1416 > %1418 = add nuw nsw i32 %1417, 16 > %1419 = zext i32 %1418 to i64 > %1420 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1419 > %1421 = bitcast i32 addrspace(3)* %1420 to float addrspace(3)* > store float %1367, float addrspace(3)* %1421, align 4 > %1422 = add nuw nsw i32 %1417, 17 > %1423 = zext i32 %1422 to i64 > %1424 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1423 > %1425 = bitcast i32 addrspace(3)* %1424 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1425, align 4 > %1426 = add nuw nsw i32 %1417, 18 > %1427 = zext i32 %1426 to i64 > %1428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1427 > %1429 = bitcast i32 addrspace(3)* %1428 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1429, align 4 > %1430 = add nuw nsw i32 %1417, 19 > %1431 = zext i32 %1430 to i64 > %1432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1431 > %1433 = bitcast i32 addrspace(3)* %1432 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1433, align 4 > %1434 = lshr i32 %5, 16 > %1435 = shl nuw nsw i32 %1434, 2 > %1436 = and i32 %6, 8191 > %1437 = and i32 %10, 255 > %1438 = mul nuw nsw i32 %1436, %1437 > %1439 = add nuw nsw i32 %1435, %1438 > %1440 = add nuw nsw i32 %1439, 20 > %1441 = zext i32 %1440 to i64 > %1442 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1441 > %1443 = bitcast i32 addrspace(3)* %1442 to float addrspace(3)* > store float %1368, float addrspace(3)* %1443, align 4 > %1444 = add nuw nsw i32 %1439, 21 > %1445 = zext i32 %1444 to i64 > %1446 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1445 > %1447 = bitcast i32 addrspace(3)* %1446 to float addrspace(3)* > store float %1366, float addrspace(3)* %1447, align 4 > %1448 = add nuw nsw i32 %1439, 22 > %1449 = zext i32 %1448 to i64 > %1450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1449 > %1451 = bitcast i32 addrspace(3)* %1450 to float addrspace(3)* > store float %1367, float addrspace(3)* %1451, align 4 > %1452 = add nuw nsw i32 %1439, 23 > %1453 = zext i32 %1452 to i64 > %1454 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1453 > %1455 = bitcast i32 addrspace(3)* %1454 to float addrspace(3)* > store float %1368, float addrspace(3)* %1455, align 4 > %1456 = lshr i32 %5, 16 > %1457 = shl nuw nsw i32 %1456, 2 > %1458 = and i32 %6, 8191 > %1459 = and i32 %10, 255 > %1460 = mul nuw nsw i32 %1458, %1459 > %1461 = add nuw nsw i32 %1457, %1460 > %1462 = zext i32 %1461 to i64 > %1463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1462 > %1464 = bitcast i32 addrspace(3)* %1463 to float addrspace(3)* > store float %1365, float addrspace(3)* %1464, align 4 > %1465 = lshr i32 %5, 16 > %1466 = shl nuw nsw i32 %1465, 2 > %1467 = and i32 %6, 8191 > %1468 = and i32 %10, 255 > %1469 = mul nuw nsw i32 %1467, %1468 > %1470 = add nuw nsw i32 %1466, %1469 > %1471 = add nuw nsw i32 %1470, 1 > %1472 = zext i32 %1471 to i64 > %1473 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1472 > %1474 = bitcast i32 addrspace(3)* %1473 to float addrspace(3)* > store float %1366, float addrspace(3)* %1474, align 4 > %1475 = lshr i32 %5, 16 > %1476 = shl nuw nsw i32 %1475, 2 > %1477 = and i32 %6, 8191 > %1478 = and i32 %10, 255 > %1479 = mul nuw nsw i32 %1477, %1478 > %1480 = add nuw nsw i32 %1476, %1479 > %1481 = add nuw nsw i32 %1480, 2 > %1482 = zext i32 %1481 to i64 > %1483 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1482 > %1484 = bitcast i32 addrspace(3)* %1483 to float addrspace(3)* > store float %1367, float addrspace(3)* %1484, align 4 > %1485 = lshr i32 %5, 16 > %1486 = shl nuw nsw i32 %1485, 2 > %1487 = and i32 %6, 8191 > %1488 = and i32 %10, 255 > %1489 = mul nuw nsw i32 %1487, %1488 > %1490 = add nuw nsw i32 %1486, %1489 > %1491 = add nuw nsw i32 %1490, 4 > %1492 = zext i32 %1491 to i64 > %1493 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1492 > %1494 = bitcast i32 addrspace(3)* %1493 to float addrspace(3)* > store float %1368, float addrspace(3)* %1494, align 4 > %1495 = and i32 %10, 255 > %1496 = lshr i32 %10, 8 > %1497 = and i32 %1496, 31 > %1498 = lshr i32 %5, 16 > %1499 = shl nuw nsw i32 %1498, 2 > %1500 = and i32 %6, 8191 > %1501 = and i32 %10, 255 > %1502 = mul nuw nsw i32 %1500, %1501 > %1503 = add nuw nsw i32 %1499, %1502 > %1504 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1505 = bitcast i64 %1504 to <2 x i32> > %1506 = extractelement <2 x i32> %1505, i32 0 > %1507 = extractelement <2 x i32> %1505, i32 1 > %1508 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1506, 0 > %1509 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1508, i32 %1507, 1 > %1510 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1509, i32 %8, 13 > %1511 = bitcast i32 %1495 to float > %1512 = bitcast i32 %1497 to float > %1513 = bitcast i32 %1503 to float > %1514 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1510, float %1511, 14 > %1515 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1514, float %1512, 15 > %1516 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1515, float %1513, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1516 > >IF69: ; preds = %IF > %1517 = fdiv float 1.000000e+00, %16 > %1518 = fmul float %1360, %1517 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1519 = fcmp ogt float %1360, 0.000000e+00 > %1520 = select i1 %1519, float 1.000000e+00, float %1360 > %1521 = fcmp oge float %1520, 0.000000e+00 > %.op91 = fmul float %1520, 0x4600000000000000 > %1522 = select i1 %1521, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1518, %IF69 ], [ %1522, %ELSE70 ] > %1523 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1524 = fsub float 1.000000e+00, %1523 > %1525 = fmul float %1524, %1358 > %1526 = fcmp une float %16, 0.000000e+00 > br i1 %1526, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1527 = fdiv float 1.000000e+00, %16 > %1528 = fmul float %1361, %1527 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1529 = fcmp ogt float %1361, 0.000000e+00 > %1530 = select i1 %1529, float 1.000000e+00, float %1361 > %1531 = fcmp oge float %1530, 0.000000e+00 > %.op92 = fmul float %1530, 0x4600000000000000 > %1532 = select i1 %1531, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1528, %IF72 ], [ %1532, %ELSE73 ] > %1533 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1534 = fsub float 1.000000e+00, %1533 > %1535 = fmul float %1534, %1359 > %1536 = fcmp une float %16, 0.000000e+00 > br i1 %1536, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1537 = fdiv float 1.000000e+00, %16 > %1538 = fmul float %1362, %1537 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1539 = fcmp ogt float %1362, 0.000000e+00 > %1540 = select i1 %1539, float 1.000000e+00, float %1362 > %1541 = fcmp oge float %1540, 0.000000e+00 > %.op93 = fmul float %1540, 0x4600000000000000 > %1542 = select i1 %1541, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1538, %IF75 ], [ %1542, %ELSE76 ] > %1543 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1544 = fsub float 1.000000e+00, %1543 > %1545 = fmul float %1544, %1357 > %1546 = fmul float %13, %19 > %1547 = fmul float %14, %20 > %1548 = call float @llvm.maxnum.f32(float %1547, float 1.000000e+00) > %1549 = fcmp oeq float %1546, 0.000000e+00 > %1550 = fcmp oeq float %1546, 0.000000e+00 > %1551 = sext i1 %1550 to i32 > %1552 = bitcast i32 %1551 to float > %1553 = fcmp ogt float %1545, 0.000000e+00 > %1554 = select i1 %1553, float 1.000000e+00, float %1545 > %1555 = fcmp oge float %1554, 0.000000e+00 > %1556 = fcmp ogt float %1525, 0.000000e+00 > %1557 = select i1 %1556, float 1.000000e+00, float %1525 > %1558 = fcmp oge float %1557, 0.000000e+00 > %.op94 = fmul float %1554, 0x4600000000000000 > %1559 = select i1 %1555, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1557, 0x4600000000000000 > %1560 = select i1 %1558, float %.op95, float 0xC600000000000000 > %1561 = fdiv float 1.000000e+00, %1546 > %1562 = fmul float %1545, %1561 > %1563 = fmul float %1525, %1561 > %1564 = select i1 %1549, float %1559, float %1562 > %1565 = select i1 %1550, float %1560, float %1563 > %1566 = call float @llvm.maxnum.f32(float %1565, float 1.000000e+00) > %1567 = call float @llvm.minnum.f32(float %1548, float %1566) > %1568 = fcmp une float %1546, 0.000000e+00 > br i1 %1568, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1569 = fdiv float 1.000000e+00, %1546 > %1570 = fmul float %1535, %1569 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1571 = fcmp ogt float %1535, 0.000000e+00 > %1572 = select i1 %1571, float 1.000000e+00, float %1535 > %1573 = fcmp oge float %1572, 0.000000e+00 > %.op96 = fmul float %1572, 0x4600000000000000 > %1574 = select i1 %1573, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1570, %IF78 ], [ %1574, %ELSE79 ] > %1575 = call float @llvm.maxnum.f32(float %1564, float 1.000000e+00) > %1576 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1577 = call float @llvm.minnum.f32(float %1548, float %1576) > %1578 = call float @llvm.minnum.f32(float %1548, float %1575) > %1579 = call float @llvm.maxnum.f32(float %1567, float %1578) > %1580 = call float @llvm.maxnum.f32(float %1579, float %1577) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[0].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[0].xxxx >101: MOV OUT[4], TEMP[3] >102: MOV OUT[2], TEMP[6] >103: MOV OUT[3], TEMP[4] >104: MOV OUT[1], TEMP[5] >105: MOV OUT[0], TEMP[1] >106: END >radeonsi: Compiling shader 235 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = bitcast i32 %10 to float > %711 = insertvalue <{ float, float, float }> undef, float %710, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %711 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..10] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 16, 32, 48} >IMM[2] UINT32 {112, 128, 144, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][0], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][1], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][2], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][3], TEMP[0] > 8: MOV TEMP[1].w, TEMP[0].xxxx > 9: MOV TEMP[0].xy, IN[2].xyxx > 10: DP3 TEMP[2].x, CONST[1][7].xyzz, IN[3].xyzz > 11: DP3 TEMP[3].x, CONST[1][7].xyzz, IN[4].xyzz > 12: MOV TEMP[2].y, TEMP[3].xxxx > 13: DP3 TEMP[3].x, CONST[1][7].xyzz, IN[1].xyzz > 14: MOV TEMP[2].z, TEMP[3].xxxx > 15: DP3 TEMP[3].x, CONST[1][8].xyzz, IN[3].xyzz > 16: DP3 TEMP[4].x, CONST[1][8].xyzz, IN[4].xyzz > 17: MOV TEMP[3].y, TEMP[4].xxxx > 18: DP3 TEMP[4].x, CONST[1][8].xyzz, IN[1].xyzz > 19: MOV TEMP[3].z, TEMP[4].xxxx > 20: DP3 TEMP[4].x, CONST[1][9].xyzz, IN[3].xyzz > 21: DP3 TEMP[5].x, CONST[1][9].xyzz, IN[4].xyzz > 22: MOV TEMP[4].y, TEMP[5].xxxx > 23: DP3 TEMP[5].x, CONST[1][9].xyzz, IN[1].xyzz > 24: MOV TEMP[4].z, TEMP[5].xxxx > 25: MOV OUT[4], TEMP[4] > 26: MOV OUT[3], TEMP[3] > 27: MOV OUT[2], TEMP[2] > 28: MOV OUT[1], TEMP[0] > 29: MOV OUT[0], TEMP[1] > 30: END >radeonsi: Compiling shader 236 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 0) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 4) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 8) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 12) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 16) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 20) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 24) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 28) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 32) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 36) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 40) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 44) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 48) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 52) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 56) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 60) > %37 = call float @llvm.SI.load.const(<16 x i8> %20, i32 112) > %38 = call float @llvm.SI.load.const(<16 x i8> %20, i32 116) > %39 = call float @llvm.SI.load.const(<16 x i8> %20, i32 120) > %40 = call float @llvm.SI.load.const(<16 x i8> %20, i32 128) > %41 = call float @llvm.SI.load.const(<16 x i8> %20, i32 132) > %42 = call float @llvm.SI.load.const(<16 x i8> %20, i32 136) > %43 = call float @llvm.SI.load.const(<16 x i8> %20, i32 144) > %44 = call float @llvm.SI.load.const(<16 x i8> %20, i32 148) > %45 = call float @llvm.SI.load.const(<16 x i8> %20, i32 152) > %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 > %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %13) > %49 = extractelement <4 x float> %48, i32 0 > %50 = extractelement <4 x float> %48, i32 1 > %51 = extractelement <4 x float> %48, i32 2 > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %14) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %15) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 > %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %16) > %66 = extractelement <4 x float> %65, i32 0 > %67 = extractelement <4 x float> %65, i32 1 > %68 = extractelement <4 x float> %65, i32 2 > %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 > %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %17) > %72 = extractelement <4 x float> %71, i32 0 > %73 = extractelement <4 x float> %71, i32 1 > %74 = extractelement <4 x float> %71, i32 2 > %75 = fmul float %21, %49 > %76 = fmul float %22, %50 > %77 = fadd float %75, %76 > %78 = fmul float %23, %51 > %79 = fadd float %77, %78 > %80 = fadd float %79, %24 > %81 = fmul float %25, %49 > %82 = fmul float %26, %50 > %83 = fadd float %81, %82 > %84 = fmul float %27, %51 > %85 = fadd float %83, %84 > %86 = fadd float %85, %28 > %87 = fmul float %29, %49 > %88 = fmul float %30, %50 > %89 = fadd float %87, %88 > %90 = fmul float %31, %51 > %91 = fadd float %89, %90 > %92 = fadd float %91, %32 > %93 = fmul float %33, %49 > %94 = fmul float %34, %50 > %95 = fadd float %93, %94 > %96 = fmul float %35, %51 > %97 = fadd float %95, %96 > %98 = fadd float %97, %36 > %99 = fmul float %37, %66 > %100 = fmul float %38, %67 > %101 = fadd float %100, %99 > %102 = fmul float %39, %68 > %103 = fadd float %101, %102 > %104 = fmul float %37, %72 > %105 = fmul float %38, %73 > %106 = fadd float %105, %104 > %107 = fmul float %39, %74 > %108 = fadd float %106, %107 > %109 = fmul float %37, %55 > %110 = fmul float %38, %56 > %111 = fadd float %110, %109 > %112 = fmul float %39, %57 > %113 = fadd float %111, %112 > %114 = fmul float %40, %66 > %115 = fmul float %41, %67 > %116 = fadd float %115, %114 > %117 = fmul float %42, %68 > %118 = fadd float %116, %117 > %119 = fmul float %40, %72 > %120 = fmul float %41, %73 > %121 = fadd float %120, %119 > %122 = fmul float %42, %74 > %123 = fadd float %121, %122 > %124 = fmul float %40, %55 > %125 = fmul float %41, %56 > %126 = fadd float %125, %124 > %127 = fmul float %42, %57 > %128 = fadd float %126, %127 > %129 = fmul float %43, %66 > %130 = fmul float %44, %67 > %131 = fadd float %130, %129 > %132 = fmul float %45, %68 > %133 = fadd float %131, %132 > %134 = fmul float %43, %72 > %135 = fmul float %44, %73 > %136 = fadd float %135, %134 > %137 = fmul float %45, %74 > %138 = fadd float %136, %137 > %139 = fmul float %43, %55 > %140 = fmul float %44, %56 > %141 = fadd float %140, %139 > %142 = fmul float %45, %57 > %143 = fadd float %141, %142 > %144 = bitcast i32 %11 to float > %145 = insertvalue <{ float, float, float }> undef, float %144, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %61, float %62, float %51, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %103, float %108, float %113, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %118, float %123, float %128, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %133, float %138, float %143, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %80, float %86, float %92, float %98) > ret <{ float, float, float }> %145 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..24] >DCL TEMP[0..4], LOCAL >IMM[0] UINT32 {0, 352, 384, 368} >IMM[1] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[2] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, -0.3765} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] FLT32 { 2.0000, 0.5000, 0.0000, 0.0000} > 0: ADD TEMP[0].x, CONST[1][22].yyyy, IMM[1].xxxx > 1: ADD TEMP[1].xy, -IN[4].wwww, IMM[1].xyyy > 2: FMA TEMP[2].x, CONST[1][22].xxxx, TEMP[0].xxxx, TEMP[1].xxxx > 3: CEIL TEMP[3].x, TEMP[1].yyyy > 4: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 5: ADD TEMP[0].x, TEMP[2].xxxx, IMM[1].zzzz > 6: FSNE TEMP[2].x, CONST[1][22].yyyy, IMM[1].wwww > 7: UIF TEMP[2].xxxx :0 > 8: RCP TEMP[2].x, CONST[1][22].yyyy > 9: ELSE :0 > 10: MOV TEMP[2].x, IMM[2].xxxx > 11: ENDIF > 12: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[0].xxxx > 13: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 14: FMA TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy, IMM[2].zzzz > 15: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[2].xxxx > 16: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx > 17: FMA TEMP[2].x, TEMP[3].xxxx, TEMP[0].xxxx, IMM[2].wwww > 18: FMA TEMP[0].x, -TEMP[3].xxxx, TEMP[0].xxxx, IMM[1].xxxx > 19: LG2 TEMP[3].x, TEMP[0].xxxx > 20: MUL TEMP[0].x, TEMP[3].xxxx, CONST[1][24].xxxx > 21: EX2 TEMP[3].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[3].xxxx, CONST[1][23].wwww > 23: MUL TEMP[3].xyz, TEMP[0].xxxx, CONST[1][23].xyzz > 24: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[1].wwww > 25: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx > 26: INEG TEMP[2].x, TEMP[2].xxxx > 27: USNE TEMP[1].x, TEMP[2].xxxx, IMM[0].xxxx > 28: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 29: KILL_IF -TEMP[1].xxxx > 30: MOV TEMP[3].w, IMM[1].wwww > 31: MOV TEMP[1].xy, IN[0].xyyy > 32: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 33: FMA TEMP[1].xy, TEMP[1].ywww, IMM[4].xxxx, IMM[1].zzzz > 34: MOV TEMP[0].xy, TEMP[1].xyxx > 35: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[1].xxxx > 36: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 37: SQRT TEMP[1].x, TEMP[1].xxxx > 38: MOV TEMP[0].z, TEMP[1].xxxx > 39: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 40: DP3 TEMP[2].x, IN[2].xyzz, TEMP[0].xyzz > 41: MOV TEMP[1].y, TEMP[2].xxxx > 42: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 43: MOV TEMP[1].z, TEMP[2].xxxx > 44: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 45: RSQ TEMP[2].x, TEMP[0].xxxx > 46: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 47: FMA TEMP[1].xyz, TEMP[0].xyzz, IMM[4].yyyy, IMM[4].yyyy > 48: MOV TEMP[1].w, CONST[1][24].zzzz > 49: MOV TEMP[0].xy, IN[0].xyyy > 50: TEX TEMP[0], TEMP[0], SAMP[1], 2D > 51: MOV TEMP[2].xy, IN[0].xyyy > 52: TEX TEMP[2], TEMP[2], SAMP[2], 2D > 53: MUL TEMP[4].x, TEMP[2].zzzz, CONST[1][24].yyyy > 54: MOV TEMP[4].yzw, TEMP[2].xyxw > 55: MOV OUT[0], TEMP[3] > 56: MOV OUT[1], TEMP[1] > 57: MOV OUT[2], TEMP[0] > 58: MOV OUT[3], TEMP[4] > 59: END >radeonsi: Compiling shader 237 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 368) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 372) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 376) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 380) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 392) > %34 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 > %36 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %37 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %36, i64 0, i64 3 > %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 > %39 = extractelement <8 x i32> %35, i32 7 > %40 = extractelement <4 x i32> %38, i32 0 > %41 = and i32 %40, %39 > %42 = insertelement <4 x i32> %38, i32 %41, i32 0 > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 7 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 11 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %73 = fadd float %26, 1.000000e+00 > %74 = fsub float 1.000000e+00, %72 > %75 = fsub float 0x3FEFD70A40000000, %72 > %76 = call float @llvm.fma.f32(float %25, float %73, float %74) > %77 = call float @llvm.ceil.f32(float %75) > %78 = call float @llvm.AMDGPU.clamp.(float %77, float 0.000000e+00, float 1.000000e+00) > %79 = fadd float %76, -1.000000e+00 > %80 = fcmp une float %26, 0.000000e+00 > %81 = fdiv float 1.000000e+00, %26 > %temp8.0 = select i1 %80, float %81, float 0x4600000000000000 > %82 = fmul float %temp8.0, %79 > %83 = call float @llvm.AMDGPU.clamp.(float %82, float 0.000000e+00, float 1.000000e+00) > %84 = call float @llvm.fma.f32(float %83, float -2.000000e+00, float 3.000000e+00) > %85 = fmul float %83, %83 > %86 = fmul float %85, %84 > %87 = call float @llvm.fma.f32(float %78, float %86, float 0xBFD8181820000000) > %88 = fsub float -0.000000e+00, %78 > %89 = call float @llvm.fma.f32(float %88, float %86, float 1.000000e+00) > %90 = call float @llvm.log2.f32(float %89) > %91 = fmul float %90, %31 > %92 = call float @llvm.exp2.f32(float %91) > %93 = fmul float %92, %30 > %94 = fmul float %93, %27 > %95 = fmul float %93, %28 > %96 = fmul float %93, %29 > %97 = fcmp olt float %87, 0.000000e+00 > %98 = select i1 %97, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %98) > %99 = bitcast float %61 to i32 > %100 = bitcast float %62 to i32 > %101 = insertelement <2 x i32> undef, i32 %99, i32 0 > %102 = insertelement <2 x i32> %101, i32 %100, i32 1 > %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %35, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %104 = extractelement <4 x float> %103, i32 1 > %105 = extractelement <4 x float> %103, i32 3 > %106 = call float @llvm.fma.f32(float %104, float 2.000000e+00, float -1.000000e+00) > %107 = call float @llvm.fma.f32(float %105, float 2.000000e+00, float -1.000000e+00) > %108 = fsub float -0.000000e+00, %106 > %109 = call float @llvm.fma.f32(float %108, float %106, float 1.000000e+00) > %110 = fsub float -0.000000e+00, %107 > %111 = call float @llvm.fma.f32(float %110, float %107, float %109) > %112 = call float @llvm.sqrt.f32(float %111) > %113 = fmul float %63, %106 > %114 = fmul float %64, %107 > %115 = fadd float %114, %113 > %116 = fmul float %65, %112 > %117 = fadd float %115, %116 > %118 = fmul float %66, %106 > %119 = fmul float %67, %107 > %120 = fadd float %119, %118 > %121 = fmul float %68, %112 > %122 = fadd float %120, %121 > %123 = fmul float %69, %106 > %124 = fmul float %70, %107 > %125 = fadd float %124, %123 > %126 = fmul float %71, %112 > %127 = fadd float %125, %126 > %128 = fmul float %117, %117 > %129 = fmul float %122, %122 > %130 = fadd float %129, %128 > %131 = fmul float %127, %127 > %132 = fadd float %130, %131 > %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) > %134 = fmul float %133, %117 > %135 = fmul float %133, %122 > %136 = fmul float %133, %127 > %137 = call float @llvm.fma.f32(float %134, float 5.000000e-01, float 5.000000e-01) > %138 = call float @llvm.fma.f32(float %135, float 5.000000e-01, float 5.000000e-01) > %139 = call float @llvm.fma.f32(float %136, float 5.000000e-01, float 5.000000e-01) > %140 = bitcast float %61 to i32 > %141 = bitcast float %62 to i32 > %142 = insertelement <2 x i32> undef, i32 %140, i32 0 > %143 = insertelement <2 x i32> %142, i32 %141, i32 1 > %144 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %143, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %145 = extractelement <4 x float> %144, i32 0 > %146 = extractelement <4 x float> %144, i32 1 > %147 = extractelement <4 x float> %144, i32 2 > %148 = extractelement <4 x float> %144, i32 3 > %149 = bitcast float %61 to i32 > %150 = bitcast float %62 to i32 > %151 = insertelement <2 x i32> undef, i32 %149, i32 0 > %152 = insertelement <2 x i32> %151, i32 %150, i32 1 > %153 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %152, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %154 = extractelement <4 x float> %153, i32 0 > %155 = extractelement <4 x float> %153, i32 1 > %156 = extractelement <4 x float> %153, i32 2 > %157 = extractelement <4 x float> %153, i32 3 > %158 = fmul float %156, %32 > %159 = bitcast float %5 to i32 > %160 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %159, 10 > %161 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %160, float %94, 11 > %162 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %161, float %95, 12 > %163 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %162, float %96, 13 > %164 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %163, float 0.000000e+00, 14 > %165 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %164, float %137, 15 > %166 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %165, float %138, 16 > %167 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %166, float %139, 17 > %168 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %167, float %33, 18 > %169 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %168, float %145, 19 > %170 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %169, float %146, 20 > %171 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %170, float %147, 21 > %172 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %171, float %148, 22 > %173 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %172, float %158, 23 > %174 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %173, float %155, 24 > %175 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %174, float %154, 25 > %176 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %175, float %157, 26 > %177 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %176, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %177 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL CONST[1][0..9] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[0].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[0].xxxx > 7: MOV TEMP[0].xy, IN[1].xyxx > 8: MOV OUT[1], TEMP[0] > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 238 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 128) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 132) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 136) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 140) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 144) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 148) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 152) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 156) > %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 > %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %13) > %32 = extractelement <4 x float> %31, i32 0 > %33 = extractelement <4 x float> %31, i32 1 > %34 = extractelement <4 x float> %31, i32 2 > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %14) > %38 = fmul float %17, %32 > %39 = fmul float %18, %33 > %40 = fadd float %38, %39 > %41 = fmul float %19, %34 > %42 = fadd float %40, %41 > %43 = fadd float %42, %20 > %44 = fmul float %21, %32 > %45 = fmul float %22, %33 > %46 = fadd float %44, %45 > %47 = fmul float %23, %34 > %48 = fadd float %46, %47 > %49 = fadd float %48, %24 > %50 = fmul float %25, %32 > %51 = fmul float %26, %33 > %52 = fadd float %50, %51 > %53 = fmul float %27, %34 > %54 = fadd float %52, %53 > %55 = fadd float %54, %28 > %56 = lshr i32 %8, 13 > %57 = and i32 %56, 255 > %58 = mul i32 %57, %10 > %59 = add i32 %58, 16 > %60 = sext i32 %59 to i64 > %61 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %60 > %62 = bitcast i32 addrspace(3)* %61 to float addrspace(3)* > store float %43, float addrspace(3)* %62, align 4 > %63 = add i32 %58, 17 > %64 = sext i32 %63 to i64 > %65 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %64 > %66 = bitcast i32 addrspace(3)* %65 to float addrspace(3)* > store float %49, float addrspace(3)* %66, align 4 > %67 = add i32 %58, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > store float %55, float addrspace(3)* %70, align 4 > %71 = add i32 %58, 20 > %bc = bitcast <4 x float> %37 to <4 x i32> > %72 = extractelement <4 x i32> %bc, i32 0 > %73 = sext i32 %71 to i64 > %74 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %73 > store i32 %72, i32 addrspace(3)* %74, align 4 > %75 = add i32 %58, 21 > %bc12 = bitcast <4 x float> %37 to <4 x i32> > %76 = extractelement <4 x i32> %bc12, i32 1 > %77 = sext i32 %75 to i64 > %78 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %77 > store i32 %76, i32 addrspace(3)* %78, align 4 > %79 = add i32 %58, 22 > %80 = sext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = bitcast i32 addrspace(3)* %81 to float addrspace(3)* > store float %34, float addrspace(3)* %82, align 4 > %83 = add i32 %58, 23 > %84 = sext i32 %83 to i64 > %85 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %84 > store i32 1065353216, i32 addrspace(3)* %85, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..3], ARRAY(1), GENERIC[0] >DCL OUT[4], PATCH >DCL OUT[5], PATCH[1] >DCL OUT[6], PATCH[2] >DCL OUT[7], PATCH[3] >DCL CONST[1][0..49] >DCL CONST[2][0..39] >DCL TEMP[0..10], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 0.0000, 0.4000, 100.0000} >IMM[1] UINT32 {0, 752, 768, 784} >IMM[2] UINT32 {1, 624, 0, 0} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: MOV TEMP[0].xyz, IN[0][0].xyzx > 11: MOV TEMP[0].w, IMM[0].xxxx > 12: MOV TEMP[1], CONST[1][47] > 13: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 14: MOV TEMP[2], CONST[1][48] > 15: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 16: MOV TEMP[3], CONST[1][49] > 17: DP4 TEMP[0].x, TEMP[3], TEMP[0] > 18: MOV TEMP[4].xyz, IN[1][0].xyzx > 19: MOV TEMP[4].w, IMM[0].xxxx > 20: MOV TEMP[5], CONST[1][47] > 21: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 22: MOV TEMP[0].y, TEMP[5].xxxx > 23: MOV TEMP[5], CONST[1][48] > 24: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 25: MOV TEMP[0].z, TEMP[5].xxxx > 26: MOV TEMP[5], CONST[1][49] > 27: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 28: MOV TEMP[0].w, TEMP[5].xxxx > 29: MOV TEMP[4].xyz, IN[2][0].xyzx > 30: MOV TEMP[4].w, IMM[0].xxxx > 31: MOV TEMP[5], CONST[1][47] > 32: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 33: MOV TEMP[3].z, TEMP[5].xxxx > 34: MOV TEMP[6], CONST[1][48] > 35: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 36: MOV TEMP[7].z, CONST[2][39] > 37: MUL TEMP[7].xy, TEMP[0].xwww, TEMP[7].zzzz > 38: MOV TEMP[0].xw, TEMP[7].xxxy > 39: MOV TEMP[7], CONST[1][49] > 40: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 41: MOV TEMP[8].z, CONST[2][39] > 42: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[8].zzzz > 43: MOV TEMP[7].x, CONST[2][39] > 44: FSLT TEMP[7].x, TEMP[1].xxxx, -TEMP[7].xxxx > 45: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 46: INEG TEMP[7].x, TEMP[7].xxxx > 47: MOV TEMP[4].y, TEMP[7].xxxx > 48: MOV TEMP[7].x, CONST[2][39] > 49: FSLT TEMP[7].xy, TEMP[0].yzzz, -TEMP[7].xxxx > 50: AND TEMP[7].xy, TEMP[7].xyyy, IMM[3].xxxx > 51: INEG TEMP[7].xy, TEMP[7].xyyy > 52: MOV TEMP[4].zw, TEMP[7].yyxy > 53: AND TEMP[7].xy, TEMP[4].yzzz, IMM[2].xxxx > 54: MOV TEMP[4].yz, TEMP[7].yxyy > 55: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 56: MOV TEMP[4].y, TEMP[7].xxxx > 57: MOV TEMP[7].x, CONST[2][39] > 58: FSLT TEMP[7].x, TEMP[5].xxxx, -TEMP[7].xxxx > 59: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 60: INEG TEMP[7].x, TEMP[7].xxxx > 61: MOV TEMP[4].z, TEMP[7].xxxx > 62: AND TEMP[7].x, TEMP[4].zzzz, IMM[2].xxxx > 63: MOV TEMP[4].z, TEMP[7].xxxx > 64: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 65: MOV TEMP[4].y, TEMP[7].xxxx > 66: MOV TEMP[7].x, CONST[2][39] > 67: FSLT TEMP[7].x, TEMP[2].xxxx, -TEMP[7].xxxx > 68: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 69: INEG TEMP[7].x, TEMP[7].xxxx > 70: MOV TEMP[4].z, TEMP[7].xxxx > 71: AND TEMP[7].xy, TEMP[4].zwww, IMM[2].xxxx > 72: MOV TEMP[4].zw, TEMP[7].yyxy > 73: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 74: MOV TEMP[4].z, TEMP[7].xxxx > 75: MOV TEMP[7].x, CONST[2][39] > 76: FSLT TEMP[7].x, TEMP[6].xxxx, -TEMP[7].xxxx > 77: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 78: INEG TEMP[7].x, TEMP[7].xxxx > 79: MOV TEMP[4].w, TEMP[7].xxxx > 80: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 81: MOV TEMP[4].w, TEMP[7].xxxx > 82: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 83: MOV TEMP[4].z, TEMP[7].xxxx > 84: FSLT TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy > 85: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 86: INEG TEMP[7].x, TEMP[7].xxxx > 87: MOV TEMP[4].w, TEMP[7].xxxx > 88: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 89: MOV TEMP[4].w, TEMP[7].xxxx > 90: FSLT TEMP[7].x, TEMP[0].wwww, IMM[0].yyyy > 91: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 92: INEG TEMP[7].x, TEMP[7].xxxx > 93: MOV TEMP[7].x, TEMP[7].xxxx > 94: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx > 95: MOV TEMP[7].x, TEMP[8].xxxx > 96: UADD TEMP[8].x, TEMP[4].wwww, TEMP[7].xxxx > 97: MOV TEMP[4].w, TEMP[8].xxxx > 98: FSLT TEMP[8].x, TEMP[4].xxxx, IMM[0].yyyy > 99: AND TEMP[8].x, TEMP[8].xxxx, IMM[3].xxxx >100: INEG TEMP[8].x, TEMP[8].xxxx >101: MOV TEMP[7].x, TEMP[8].xxxx >102: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx >103: MOV TEMP[7].x, TEMP[8].xxxx >104: UADD TEMP[7].x, TEMP[4].wwww, TEMP[7].xxxx >105: MOV TEMP[4].w, TEMP[7].xxxx >106: MOV TEMP[7].x, CONST[2][39] >107: FSLT TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx >108: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >109: INEG TEMP[1].x, TEMP[1].xxxx >110: MOV TEMP[3].x, TEMP[1].xxxx >111: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >112: MOV TEMP[3].x, TEMP[1].xxxx >113: MOV TEMP[1].x, CONST[2][39] >114: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].yzzz >115: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >116: INEG TEMP[1].xy, TEMP[1].xyyy >117: MOV TEMP[0].yz, TEMP[1].yxyy >118: AND TEMP[1].xy, TEMP[0].yzzz, IMM[2].xxxx >119: MOV TEMP[0].yz, TEMP[1].yxyy >120: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >121: MOV TEMP[0].y, TEMP[1].xxxx >122: MOV TEMP[1].x, CONST[2][39] >123: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx >124: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >125: INEG TEMP[1].x, TEMP[1].xxxx >126: MOV TEMP[3].x, TEMP[1].xxxx >127: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >128: MOV TEMP[3].x, TEMP[1].xxxx >129: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >130: MOV TEMP[0].y, TEMP[1].xxxx >131: MOV TEMP[1].x, CONST[2][39] >132: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx >133: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >134: INEG TEMP[1].x, TEMP[1].xxxx >135: MOV TEMP[3].x, TEMP[1].xxxx >136: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >137: MOV TEMP[3].x, TEMP[1].xxxx >138: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >139: MOV TEMP[0].z, TEMP[1].xxxx >140: MOV TEMP[1].x, CONST[2][39] >141: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx >142: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >143: INEG TEMP[1].x, TEMP[1].xxxx >144: MOV TEMP[3].x, TEMP[1].xxxx >145: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >146: MOV TEMP[3].x, TEMP[1].xxxx >147: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >148: MOV TEMP[0].z, TEMP[1].xxxx >149: MOV TEMP[1].x, CONST[2][39] >150: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].xwww >151: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >152: INEG TEMP[1].xy, TEMP[1].xyyy >153: MOV TEMP[3].xy, TEMP[1].xyxx >154: AND TEMP[1].xy, TEMP[3].xyyy, IMM[2].xxxx >155: MOV TEMP[3].xy, TEMP[1].xyxx >156: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >157: MOV TEMP[3].x, TEMP[1].xxxx >158: MOV TEMP[1].x, CONST[2][39] >159: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx >160: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >161: INEG TEMP[1].x, TEMP[1].xxxx >162: MOV TEMP[3].y, TEMP[1].xxxx >163: AND TEMP[1].x, TEMP[3].yyyy, IMM[2].xxxx >164: MOV TEMP[3].y, TEMP[1].xxxx >165: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >166: MOV TEMP[3].x, TEMP[1].xxxx >167: USEQ TEMP[1].x, TEMP[4].yyyy, IMM[3].yyyy >168: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >169: INEG TEMP[1].x, TEMP[1].xxxx >170: MOV TEMP[3].y, TEMP[1].xxxx >171: USEQ TEMP[1].xy, TEMP[0].yzzz, IMM[3].yyyy >172: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >173: INEG TEMP[1].xy, TEMP[1].xyyy >174: MOV TEMP[0].yz, TEMP[1].yxyy >175: OR TEMP[1].x, TEMP[0].yyyy, TEMP[3].yyyy >176: MOV TEMP[0].y, TEMP[1].xxxx >177: USEQ TEMP[1].x, TEMP[4].zzzz, IMM[3].yyyy >178: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >179: INEG TEMP[1].x, TEMP[1].xxxx >180: MOV TEMP[3].y, TEMP[1].xxxx >181: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].yyyy >182: MOV TEMP[0].z, TEMP[1].xxxx >183: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >184: MOV TEMP[0].y, TEMP[1].xxxx >185: USEQ TEMP[1].x, TEMP[4].wwww, IMM[3].yyyy >186: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >187: INEG TEMP[1].x, TEMP[1].xxxx >188: MOV TEMP[0].z, TEMP[1].xxxx >189: USEQ TEMP[1].x, TEMP[3].xxxx, IMM[3].yyyy >190: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >191: INEG TEMP[1].x, TEMP[1].xxxx >192: MOV TEMP[3].x, TEMP[1].xxxx >193: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >194: MOV TEMP[0].z, TEMP[1].xxxx >195: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >196: MOV TEMP[0].y, TEMP[1].xxxx >197: MOV TEMP[1].x, TEMP[0].yyyy >198: USNE TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx >199: UIF TEMP[1].xxxx :0 >200: MOV TEMP[1].x, IMM[0].yyyy >201: MOV TEMP[2].x, IMM[0].yyyy >202: MOV TEMP[5].x, IMM[0].yyyy >203: MOV TEMP[6].x, IMM[0].yyyy >204: ELSE :0 >205: ADD TEMP[3].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >206: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >207: MOV TEMP[0].y, TEMP[7].xxxx >208: ADD TEMP[3].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >209: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >210: MOV TEMP[0].z, TEMP[7].xxxx >211: SQRT TEMP[7].x, TEMP[0].yyyy >212: SQRT TEMP[7].y, TEMP[0].zzzz >213: MOV TEMP[7].xy, TEMP[7].xyxx >214: ADD TEMP[3].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >215: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz >216: SQRT TEMP[8].x, TEMP[3].xxxx >217: MIN TEMP[9].x, TEMP[0].wwww, TEMP[0].xxxx >218: MIN TEMP[10].x, TEMP[0].wwww, TEMP[4].xxxx >219: MOV TEMP[0].w, TEMP[10].xxxx >220: MIN TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >221: MUL TEMP[4].x, TEMP[9].xxxx, IMM[0].zzzz >222: MOV TEMP[3].y, TEMP[4].xxxx >223: MAX TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww >224: MUL TEMP[4].xy, TEMP[0].xwww, IMM[0].zzzz >225: MOV TEMP[0].xw, TEMP[4].xxxy >226: MAX TEMP[4].xy, TEMP[0].xwww, IMM[0].wwww >227: FSNE TEMP[9].x, TEMP[3].xxxx, IMM[0].yyyy >228: UIF TEMP[9].xxxx :0 >229: RCP TEMP[3].x, TEMP[3].xxxx >230: MUL TEMP[3].x, TEMP[7].xxxx, TEMP[3].xxxx >231: ELSE :0 >232: SSG TEMP[9].x, TEMP[7].xxxx >233: MUL TEMP[3].x, IMM[4].xxxx, TEMP[9].xxxx >234: ENDIF >235: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >236: MOV TEMP[0].y, TEMP[3].xxxx >237: FSNE TEMP[3].x, TEMP[4].yyyy, IMM[0].yyyy >238: UIF TEMP[3].xxxx :0 >239: RCP TEMP[3].x, TEMP[4].yyyy >240: MUL TEMP[3].x, TEMP[7].yyyy, TEMP[3].xxxx >241: ELSE :0 >242: SSG TEMP[7].x, TEMP[7].yyyy >243: MUL TEMP[3].x, IMM[4].xxxx, TEMP[7].xxxx >244: ENDIF >245: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >246: MOV TEMP[0].z, TEMP[3].xxxx >247: FSNE TEMP[3].x, TEMP[4].xxxx, IMM[0].yyyy >248: UIF TEMP[3].xxxx :0 >249: RCP TEMP[3].x, TEMP[4].xxxx >250: MUL TEMP[3].x, TEMP[8].xxxx, TEMP[3].xxxx >251: ELSE :0 >252: SSG TEMP[4].x, TEMP[8].xxxx >253: MUL TEMP[3].x, IMM[4].xxxx, TEMP[4].xxxx >254: ENDIF >255: MAX TEMP[0].x, TEMP[3].xxxx, IMM[0].xxxx >256: MIN TEMP[0].xyz, TEMP[0].xyzz, IMM[4].yyyy >257: MAX TEMP[3].x, TEMP[0].yyyy, TEMP[0].xxxx >258: MOV TEMP[0].w, TEMP[3].xxxx >259: MAX TEMP[6].x, TEMP[0].wwww, TEMP[0].zzzz >260: MOV TEMP[1].x, TEMP[0].zzzz >261: MOV TEMP[2].x, TEMP[0].xxxx >262: MOV TEMP[5].x, TEMP[0].yyyy >263: ENDIF >264: MOV OUT[4], TEMP[1] >265: MOV OUT[5], TEMP[2] >266: MOV OUT[6], TEMP[5] >267: MOV OUT[7], TEMP[6] >268: MOV OUT[0].x, TEMP[1].xxxx >269: MOV OUT[0].y, TEMP[2].xxxx >270: MOV OUT[0].z, TEMP[5].xxxx >271: MOV OUT[1].x, TEMP[6].xxxx >272: END >radeonsi: Compiling shader 239 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 752) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 756) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 760) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 764) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = lshr i32 %10, 8 > %30 = and i32 %29, 31 > %31 = lshr i32 %7, 13 > %32 = and i32 %31, 255 > %33 = and i32 %7, 8191 > %34 = and i32 %10, 255 > %35 = mul nuw nsw i32 %33, %34 > %36 = mul nuw nsw i32 %30, %32 > %37 = add nuw nsw i32 %35, %36 > %38 = add nuw nsw i32 %37, 16 > %39 = zext i32 %38 to i64 > %40 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %39 > %41 = load i32, i32 addrspace(3)* %40, align 4 > %42 = lshr i32 %7, 13 > %43 = and i32 %42, 255 > %44 = and i32 %7, 8191 > %45 = and i32 %10, 255 > %46 = mul nuw nsw i32 %44, %45 > %47 = mul nuw nsw i32 %30, %43 > %48 = add nuw nsw i32 %46, %47 > %49 = add nuw nsw i32 %48, 17 > %50 = zext i32 %49 to i64 > %51 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %50 > %52 = load i32, i32 addrspace(3)* %51, align 4 > %53 = lshr i32 %7, 13 > %54 = and i32 %53, 255 > %55 = and i32 %7, 8191 > %56 = and i32 %10, 255 > %57 = mul nuw nsw i32 %55, %56 > %58 = mul nuw nsw i32 %30, %54 > %59 = add nuw nsw i32 %57, %58 > %60 = add nuw nsw i32 %59, 18 > %61 = zext i32 %60 to i64 > %62 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %61 > %63 = load i32, i32 addrspace(3)* %62, align 4 > %64 = lshr i32 %7, 13 > %65 = and i32 %64, 255 > %66 = and i32 %7, 8191 > %67 = and i32 %10, 255 > %68 = mul nuw nsw i32 %66, %67 > %69 = mul nuw nsw i32 %30, %65 > %70 = add nuw nsw i32 %68, %69 > %71 = add nuw nsw i32 %70, 19 > %72 = zext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = load i32, i32 addrspace(3)* %73, align 4 > %75 = lshr i32 %6, 13 > %76 = and i32 %75, 255 > %77 = shl i32 %5, 2 > %78 = and i32 %77, 262140 > %79 = and i32 %6, 8191 > %80 = and i32 %10, 255 > %81 = mul nuw nsw i32 %79, %80 > %82 = add nuw nsw i32 %78, %81 > %83 = mul nuw nsw i32 %30, %76 > %84 = add nuw nsw i32 %82, %83 > %85 = add nuw nsw i32 %84, 16 > %86 = zext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > store i32 %41, i32 addrspace(3)* %87, align 4 > %88 = add nuw nsw i32 %84, 17 > %89 = zext i32 %88 to i64 > %90 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %89 > store i32 %52, i32 addrspace(3)* %90, align 4 > %91 = add nuw nsw i32 %84, 18 > %92 = zext i32 %91 to i64 > %93 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %92 > store i32 %63, i32 addrspace(3)* %93, align 4 > %94 = add nuw nsw i32 %84, 19 > %95 = zext i32 %94 to i64 > %96 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %95 > store i32 %74, i32 addrspace(3)* %96, align 4 > %97 = lshr i32 %7, 13 > %98 = and i32 %97, 255 > %99 = and i32 %7, 8191 > %100 = and i32 %10, 255 > %101 = mul nuw nsw i32 %99, %100 > %102 = mul nuw nsw i32 %30, %98 > %103 = add nuw nsw i32 %101, %102 > %104 = add nuw nsw i32 %103, 20 > %105 = zext i32 %104 to i64 > %106 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %105 > %107 = load i32, i32 addrspace(3)* %106, align 4 > %108 = lshr i32 %7, 13 > %109 = and i32 %108, 255 > %110 = and i32 %7, 8191 > %111 = and i32 %10, 255 > %112 = mul nuw nsw i32 %110, %111 > %113 = mul nuw nsw i32 %30, %109 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 21 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > %118 = load i32, i32 addrspace(3)* %117, align 4 > %119 = lshr i32 %7, 13 > %120 = and i32 %119, 255 > %121 = and i32 %7, 8191 > %122 = and i32 %10, 255 > %123 = mul nuw nsw i32 %121, %122 > %124 = mul nuw nsw i32 %30, %120 > %125 = add nuw nsw i32 %123, %124 > %126 = add nuw nsw i32 %125, 22 > %127 = zext i32 %126 to i64 > %128 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %127 > %129 = load i32, i32 addrspace(3)* %128, align 4 > %130 = lshr i32 %7, 13 > %131 = and i32 %130, 255 > %132 = and i32 %7, 8191 > %133 = and i32 %10, 255 > %134 = mul nuw nsw i32 %132, %133 > %135 = mul nuw nsw i32 %30, %131 > %136 = add nuw nsw i32 %134, %135 > %137 = add nuw nsw i32 %136, 23 > %138 = zext i32 %137 to i64 > %139 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %138 > %140 = load i32, i32 addrspace(3)* %139, align 4 > %141 = lshr i32 %6, 13 > %142 = and i32 %141, 255 > %143 = shl i32 %5, 2 > %144 = and i32 %143, 262140 > %145 = and i32 %6, 8191 > %146 = and i32 %10, 255 > %147 = mul nuw nsw i32 %145, %146 > %148 = add nuw nsw i32 %144, %147 > %149 = mul nuw nsw i32 %30, %142 > %150 = add nuw nsw i32 %148, %149 > %151 = add nuw nsw i32 %150, 20 > %152 = zext i32 %151 to i64 > %153 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %152 > store i32 %107, i32 addrspace(3)* %153, align 4 > %154 = add nuw nsw i32 %150, 21 > %155 = zext i32 %154 to i64 > %156 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %155 > store i32 %118, i32 addrspace(3)* %156, align 4 > %157 = add nuw nsw i32 %150, 22 > %158 = zext i32 %157 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %129, i32 addrspace(3)* %159, align 4 > %160 = add nuw nsw i32 %150, 23 > %161 = zext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > store i32 %140, i32 addrspace(3)* %162, align 4 > %163 = and i32 %7, 8191 > %164 = and i32 %10, 255 > %165 = mul nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 16 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > %169 = bitcast i32 addrspace(3)* %168 to float addrspace(3)* > %170 = load float, float addrspace(3)* %169, align 4 > %171 = and i32 %7, 8191 > %172 = and i32 %10, 255 > %173 = mul nuw nsw i32 %171, %172 > %174 = add nuw nsw i32 %173, 17 > %175 = zext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = and i32 %7, 8191 > %180 = and i32 %10, 255 > %181 = mul nuw nsw i32 %179, %180 > %182 = add nuw nsw i32 %181, 18 > %183 = zext i32 %182 to i64 > %184 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %183 > %185 = bitcast i32 addrspace(3)* %184 to float addrspace(3)* > %186 = load float, float addrspace(3)* %185, align 4 > %187 = fmul float %13, %170 > %188 = fmul float %14, %178 > %189 = fadd float %187, %188 > %190 = fmul float %15, %186 > %191 = fadd float %189, %190 > %192 = fadd float %191, %16 > %193 = fmul float %17, %170 > %194 = fmul float %18, %178 > %195 = fadd float %193, %194 > %196 = fmul float %19, %186 > %197 = fadd float %195, %196 > %198 = fadd float %197, %20 > %199 = fmul float %21, %170 > %200 = fmul float %22, %178 > %201 = fadd float %199, %200 > %202 = fmul float %23, %186 > %203 = fadd float %201, %202 > %204 = fadd float %203, %24 > %205 = lshr i32 %7, 13 > %206 = and i32 %205, 255 > %207 = and i32 %7, 8191 > %208 = and i32 %10, 255 > %209 = mul nuw nsw i32 %207, %208 > %210 = add nuw nsw i32 %209, %206 > %211 = add nuw nsw i32 %210, 16 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = bitcast i32 addrspace(3)* %213 to float addrspace(3)* > %215 = load float, float addrspace(3)* %214, align 4 > %216 = lshr i32 %7, 13 > %217 = and i32 %216, 255 > %218 = and i32 %7, 8191 > %219 = and i32 %10, 255 > %220 = mul nuw nsw i32 %218, %219 > %221 = add nuw nsw i32 %220, %217 > %222 = add nuw nsw i32 %221, 17 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = bitcast i32 addrspace(3)* %224 to float addrspace(3)* > %226 = load float, float addrspace(3)* %225, align 4 > %227 = lshr i32 %7, 13 > %228 = and i32 %227, 255 > %229 = and i32 %7, 8191 > %230 = and i32 %10, 255 > %231 = mul nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, %228 > %233 = add nuw nsw i32 %232, 18 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %13, %215 > %239 = fmul float %14, %226 > %240 = fadd float %238, %239 > %241 = fmul float %15, %237 > %242 = fadd float %240, %241 > %243 = fadd float %242, %16 > %244 = fmul float %17, %215 > %245 = fmul float %18, %226 > %246 = fadd float %244, %245 > %247 = fmul float %19, %237 > %248 = fadd float %246, %247 > %249 = fadd float %248, %20 > %250 = fmul float %21, %215 > %251 = fmul float %22, %226 > %252 = fadd float %250, %251 > %253 = fmul float %23, %237 > %254 = fadd float %252, %253 > %255 = fadd float %254, %24 > %256 = and i32 %7, 8191 > %257 = and i32 %10, 255 > %258 = mul nuw nsw i32 %256, %257 > %259 = lshr i32 %7, 12 > %260 = and i32 %259, 510 > %261 = add nuw nsw i32 %258, %260 > %262 = add nuw nsw i32 %261, 16 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = bitcast i32 addrspace(3)* %264 to float addrspace(3)* > %266 = load float, float addrspace(3)* %265, align 4 > %267 = and i32 %7, 8191 > %268 = and i32 %10, 255 > %269 = mul nuw nsw i32 %267, %268 > %270 = lshr i32 %7, 12 > %271 = and i32 %270, 510 > %272 = add nuw nsw i32 %269, %271 > %273 = add nuw nsw i32 %272, 17 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = bitcast i32 addrspace(3)* %275 to float addrspace(3)* > %277 = load float, float addrspace(3)* %276, align 4 > %278 = and i32 %7, 8191 > %279 = and i32 %10, 255 > %280 = mul nuw nsw i32 %278, %279 > %281 = lshr i32 %7, 12 > %282 = and i32 %281, 510 > %283 = add nuw nsw i32 %280, %282 > %284 = add nuw nsw i32 %283, 18 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = bitcast i32 addrspace(3)* %286 to float addrspace(3)* > %288 = load float, float addrspace(3)* %287, align 4 > %289 = fmul float %13, %266 > %290 = fmul float %14, %277 > %291 = fadd float %289, %290 > %292 = fmul float %15, %288 > %293 = fadd float %291, %292 > %294 = fadd float %293, %16 > %295 = fmul float %17, %266 > %296 = fmul float %18, %277 > %297 = fadd float %295, %296 > %298 = fmul float %19, %288 > %299 = fadd float %297, %298 > %300 = fadd float %299, %20 > %301 = fmul float %204, %28 > %302 = fmul float %255, %28 > %303 = fmul float %21, %266 > %304 = fmul float %22, %277 > %305 = fadd float %303, %304 > %306 = fmul float %23, %288 > %307 = fadd float %305, %306 > %308 = fadd float %307, %24 > %309 = fmul float %308, %28 > %310 = fsub float -0.000000e+00, %27 > %311 = fcmp olt float %192, %310 > %312 = zext i1 %311 to i32 > %313 = fsub float -0.000000e+00, %27 > %314 = fcmp olt float %243, %313 > %315 = fsub float -0.000000e+00, %27 > %316 = fcmp olt float %249, %315 > %317 = zext i1 %314 to i32 > %318 = zext i1 %316 to i32 > %319 = add nuw nsw i32 %317, %312 > %320 = fsub float -0.000000e+00, %27 > %321 = fcmp olt float %294, %320 > %322 = zext i1 %321 to i32 > %323 = add nuw nsw i32 %322, %319 > %324 = fsub float -0.000000e+00, %27 > %325 = fcmp olt float %198, %324 > %326 = zext i1 %325 to i32 > %327 = add nuw nsw i32 %318, %326 > %328 = fsub float -0.000000e+00, %27 > %329 = fcmp olt float %300, %328 > %330 = zext i1 %329 to i32 > %331 = add nuw nsw i32 %330, %327 > %332 = fcmp olt float %301, 0.000000e+00 > %333 = zext i1 %332 to i32 > %334 = fcmp olt float %302, 0.000000e+00 > %335 = zext i1 %334 to i32 > %336 = add nuw nsw i32 %333, %335 > %337 = fcmp olt float %309, 0.000000e+00 > %338 = zext i1 %337 to i32 > %339 = add nuw nsw i32 %336, %338 > %340 = fcmp olt float %27, %192 > %341 = zext i1 %340 to i32 > %342 = fcmp olt float %27, %243 > %343 = fcmp olt float %27, %249 > %344 = zext i1 %342 to i32 > %345 = zext i1 %343 to i32 > %346 = add nuw nsw i32 %344, %341 > %347 = fcmp olt float %27, %294 > %348 = zext i1 %347 to i32 > %349 = add nuw nsw i32 %346, %348 > %350 = fcmp olt float %27, %198 > %351 = zext i1 %350 to i32 > %352 = add nuw nsw i32 %345, %351 > %353 = fcmp olt float %27, %300 > %354 = zext i1 %353 to i32 > %355 = add nuw nsw i32 %352, %354 > %356 = fcmp olt float %27, %301 > %357 = fcmp olt float %27, %302 > %358 = zext i1 %356 to i32 > %359 = zext i1 %357 to i32 > %360 = add nuw nsw i32 %359, %358 > %361 = fcmp olt float %27, %309 > %362 = zext i1 %361 to i32 > %363 = add nuw nsw i32 %362, %360 > %364 = icmp eq i32 %323, 3 > %365 = sext i1 %364 to i32 > %366 = icmp eq i32 %349, 3 > %367 = icmp eq i32 %355, 3 > %368 = sext i1 %367 to i32 > %369 = icmp eq i32 %331, 3 > %370 = sext i1 %369 to i32 > %371 = select i1 %367, i32 -1, i32 %370 > %372 = select i1 %366, i32 -1, i32 %365 > %373 = or i32 %371, %372 > %374 = icmp eq i32 %339, 3 > %375 = icmp eq i32 %363, 3 > %376 = sext i1 %375 to i32 > %377 = select i1 %374, i32 -1, i32 %376 > %378 = or i32 %377, %373 > %379 = icmp eq i32 %378, 0 > br i1 %379, label %ELSE, label %ENDIF > >ELSE: ; preds = %main_body > %380 = lshr i32 %7, 13 > %381 = and i32 %380, 255 > %382 = and i32 %7, 8191 > %383 = and i32 %10, 255 > %384 = mul nuw nsw i32 %382, %383 > %385 = add nuw nsw i32 %384, %381 > %386 = add nuw nsw i32 %385, 16 > %387 = zext i32 %386 to i64 > %388 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %387 > %389 = bitcast i32 addrspace(3)* %388 to float addrspace(3)* > %390 = load float, float addrspace(3)* %389, align 4 > %391 = and i32 %7, 8191 > %392 = and i32 %10, 255 > %393 = mul nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 16 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = bitcast i32 addrspace(3)* %396 to float addrspace(3)* > %398 = load float, float addrspace(3)* %397, align 4 > %399 = fsub float %398, %390 > %400 = lshr i32 %7, 13 > %401 = and i32 %400, 255 > %402 = and i32 %7, 8191 > %403 = and i32 %10, 255 > %404 = mul nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, %401 > %406 = add nuw nsw i32 %405, 17 > %407 = zext i32 %406 to i64 > %408 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %407 > %409 = bitcast i32 addrspace(3)* %408 to float addrspace(3)* > %410 = load float, float addrspace(3)* %409, align 4 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = add nuw nsw i32 %413, 17 > %415 = zext i32 %414 to i64 > %416 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %415 > %417 = bitcast i32 addrspace(3)* %416 to float addrspace(3)* > %418 = load float, float addrspace(3)* %417, align 4 > %419 = fsub float %418, %410 > %420 = lshr i32 %7, 13 > %421 = and i32 %420, 255 > %422 = and i32 %7, 8191 > %423 = and i32 %10, 255 > %424 = mul nuw nsw i32 %422, %423 > %425 = add nuw nsw i32 %424, %421 > %426 = add nuw nsw i32 %425, 18 > %427 = zext i32 %426 to i64 > %428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %427 > %429 = bitcast i32 addrspace(3)* %428 to float addrspace(3)* > %430 = load float, float addrspace(3)* %429, align 4 > %431 = and i32 %7, 8191 > %432 = and i32 %10, 255 > %433 = mul nuw nsw i32 %431, %432 > %434 = add nuw nsw i32 %433, 18 > %435 = zext i32 %434 to i64 > %436 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %435 > %437 = bitcast i32 addrspace(3)* %436 to float addrspace(3)* > %438 = load float, float addrspace(3)* %437, align 4 > %439 = fsub float %438, %430 > %440 = fmul float %399, %399 > %441 = fmul float %419, %419 > %442 = fadd float %441, %440 > %443 = fmul float %439, %439 > %444 = fadd float %442, %443 > %445 = and i32 %7, 8191 > %446 = and i32 %10, 255 > %447 = mul nuw nsw i32 %445, %446 > %448 = lshr i32 %7, 12 > %449 = and i32 %448, 510 > %450 = add nuw nsw i32 %447, %449 > %451 = add nuw nsw i32 %450, 16 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > %454 = bitcast i32 addrspace(3)* %453 to float addrspace(3)* > %455 = load float, float addrspace(3)* %454, align 4 > %456 = lshr i32 %7, 13 > %457 = and i32 %456, 255 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, %457 > %462 = add nuw nsw i32 %461, 16 > %463 = zext i32 %462 to i64 > %464 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %463 > %465 = bitcast i32 addrspace(3)* %464 to float addrspace(3)* > %466 = load float, float addrspace(3)* %465, align 4 > %467 = fsub float %466, %455 > %468 = and i32 %7, 8191 > %469 = and i32 %10, 255 > %470 = mul nuw nsw i32 %468, %469 > %471 = lshr i32 %7, 12 > %472 = and i32 %471, 510 > %473 = add nuw nsw i32 %470, %472 > %474 = add nuw nsw i32 %473, 17 > %475 = zext i32 %474 to i64 > %476 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %475 > %477 = bitcast i32 addrspace(3)* %476 to float addrspace(3)* > %478 = load float, float addrspace(3)* %477, align 4 > %479 = lshr i32 %7, 13 > %480 = and i32 %479, 255 > %481 = and i32 %7, 8191 > %482 = and i32 %10, 255 > %483 = mul nuw nsw i32 %481, %482 > %484 = add nuw nsw i32 %483, %480 > %485 = add nuw nsw i32 %484, 17 > %486 = zext i32 %485 to i64 > %487 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %486 > %488 = bitcast i32 addrspace(3)* %487 to float addrspace(3)* > %489 = load float, float addrspace(3)* %488, align 4 > %490 = fsub float %489, %478 > %491 = and i32 %7, 8191 > %492 = and i32 %10, 255 > %493 = mul nuw nsw i32 %491, %492 > %494 = lshr i32 %7, 12 > %495 = and i32 %494, 510 > %496 = add nuw nsw i32 %493, %495 > %497 = add nuw nsw i32 %496, 18 > %498 = zext i32 %497 to i64 > %499 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %498 > %500 = bitcast i32 addrspace(3)* %499 to float addrspace(3)* > %501 = load float, float addrspace(3)* %500, align 4 > %502 = lshr i32 %7, 13 > %503 = and i32 %502, 255 > %504 = and i32 %7, 8191 > %505 = and i32 %10, 255 > %506 = mul nuw nsw i32 %504, %505 > %507 = add nuw nsw i32 %506, %503 > %508 = add nuw nsw i32 %507, 18 > %509 = zext i32 %508 to i64 > %510 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %509 > %511 = bitcast i32 addrspace(3)* %510 to float addrspace(3)* > %512 = load float, float addrspace(3)* %511, align 4 > %513 = fsub float %512, %501 > %514 = fmul float %467, %467 > %515 = fmul float %490, %490 > %516 = fadd float %515, %514 > %517 = fmul float %513, %513 > %518 = fadd float %516, %517 > %519 = call float @llvm.sqrt.f32(float %444) > %520 = call float @llvm.sqrt.f32(float %518) > %521 = and i32 %7, 8191 > %522 = and i32 %10, 255 > %523 = mul nuw nsw i32 %521, %522 > %524 = add nuw nsw i32 %523, 16 > %525 = zext i32 %524 to i64 > %526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %525 > %527 = bitcast i32 addrspace(3)* %526 to float addrspace(3)* > %528 = load float, float addrspace(3)* %527, align 4 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = lshr i32 %7, 12 > %533 = and i32 %532, 510 > %534 = add nuw nsw i32 %531, %533 > %535 = add nuw nsw i32 %534, 16 > %536 = zext i32 %535 to i64 > %537 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %536 > %538 = bitcast i32 addrspace(3)* %537 to float addrspace(3)* > %539 = load float, float addrspace(3)* %538, align 4 > %540 = fsub float %539, %528 > %541 = and i32 %7, 8191 > %542 = and i32 %10, 255 > %543 = mul nuw nsw i32 %541, %542 > %544 = add nuw nsw i32 %543, 17 > %545 = zext i32 %544 to i64 > %546 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %545 > %547 = bitcast i32 addrspace(3)* %546 to float addrspace(3)* > %548 = load float, float addrspace(3)* %547, align 4 > %549 = and i32 %7, 8191 > %550 = and i32 %10, 255 > %551 = mul nuw nsw i32 %549, %550 > %552 = lshr i32 %7, 12 > %553 = and i32 %552, 510 > %554 = add nuw nsw i32 %551, %553 > %555 = add nuw nsw i32 %554, 17 > %556 = zext i32 %555 to i64 > %557 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %556 > %558 = bitcast i32 addrspace(3)* %557 to float addrspace(3)* > %559 = load float, float addrspace(3)* %558, align 4 > %560 = fsub float %559, %548 > %561 = and i32 %7, 8191 > %562 = and i32 %10, 255 > %563 = mul nuw nsw i32 %561, %562 > %564 = add nuw nsw i32 %563, 18 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fsub float %579, %568 > %581 = fmul float %540, %540 > %582 = fmul float %560, %560 > %583 = fadd float %582, %581 > %584 = fmul float %580, %580 > %585 = fadd float %583, %584 > %586 = call float @llvm.sqrt.f32(float %585) > %587 = call float @llvm.minnum.f32(float %302, float %301) > %588 = call float @llvm.minnum.f32(float %302, float %309) > %589 = call float @llvm.minnum.f32(float %301, float %309) > %590 = fmul float %587, 0x3FD99999A0000000 > %591 = call float @llvm.maxnum.f32(float %590, float 1.000000e+02) > %592 = fmul float %589, 0x3FD99999A0000000 > %593 = fmul float %588, 0x3FD99999A0000000 > %594 = call float @llvm.maxnum.f32(float %592, float 1.000000e+02) > %595 = call float @llvm.maxnum.f32(float %593, float 1.000000e+02) > %596 = fcmp une float %591, 0.000000e+00 > br i1 %596, label %IF45, label %ELSE46 > >ENDIF: ; preds = %main_body, %ENDIF50 > %temp24.0 = phi i32 [ %phitmp57, %ENDIF50 ], [ 0, %main_body ] > %temp20.0 = phi i32 [ %phitmp56, %ENDIF50 ], [ 0, %main_body ] > %temp8.0 = phi i32 [ %phitmp55, %ENDIF50 ], [ 0, %main_body ] > %temp4.0 = phi i32 [ %phitmp, %ENDIF50 ], [ 0, %main_body ] > %597 = lshr i32 %5, 16 > %598 = shl nuw nsw i32 %597, 2 > %599 = and i32 %6, 8191 > %600 = and i32 %10, 255 > %601 = mul nuw nsw i32 %599, %600 > %602 = add nuw nsw i32 %598, %601 > %603 = add nuw nsw i32 %602, 8 > %604 = zext i32 %603 to i64 > %605 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %604 > store i32 %temp4.0, i32 addrspace(3)* %605, align 4 > %606 = add nuw nsw i32 %602, 9 > %607 = zext i32 %606 to i64 > %608 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %607 > store i32 %368, i32 addrspace(3)* %608, align 4 > %609 = add nuw nsw i32 %602, 10 > %610 = zext i32 %609 to i64 > %611 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %610 > %612 = bitcast i32 addrspace(3)* %611 to float addrspace(3)* > store float %15, float addrspace(3)* %612, align 4 > %613 = add nuw nsw i32 %602, 11 > %614 = zext i32 %613 to i64 > %615 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %614 > %616 = bitcast i32 addrspace(3)* %615 to float addrspace(3)* > store float %16, float addrspace(3)* %616, align 4 > %617 = lshr i32 %5, 16 > %618 = shl nuw nsw i32 %617, 2 > %619 = and i32 %6, 8191 > %620 = and i32 %10, 255 > %621 = mul nuw nsw i32 %619, %620 > %622 = add nuw nsw i32 %618, %621 > %623 = add nuw nsw i32 %622, 12 > %624 = zext i32 %623 to i64 > %625 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %624 > store i32 %temp8.0, i32 addrspace(3)* %625, align 4 > %626 = add nuw nsw i32 %622, 13 > %627 = zext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > store float %18, float addrspace(3)* %629, align 4 > %630 = add nuw nsw i32 %622, 14 > %631 = zext i32 %630 to i64 > %632 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %631 > %633 = bitcast i32 addrspace(3)* %632 to float addrspace(3)* > store float %19, float addrspace(3)* %633, align 4 > %634 = add nuw nsw i32 %622, 15 > %635 = zext i32 %634 to i64 > %636 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %635 > %637 = bitcast i32 addrspace(3)* %636 to float addrspace(3)* > store float %20, float addrspace(3)* %637, align 4 > %638 = lshr i32 %5, 16 > %639 = shl nuw nsw i32 %638, 2 > %640 = and i32 %6, 8191 > %641 = and i32 %10, 255 > %642 = mul nuw nsw i32 %640, %641 > %643 = add nuw nsw i32 %639, %642 > %644 = add nuw nsw i32 %643, 16 > %645 = zext i32 %644 to i64 > %646 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %645 > store i32 %temp20.0, i32 addrspace(3)* %646, align 4 > %647 = add nuw nsw i32 %643, 17 > %648 = zext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %14, float addrspace(3)* %650, align 4 > %651 = add nuw nsw i32 %643, 18 > %652 = zext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %15, float addrspace(3)* %654, align 4 > %655 = add nuw nsw i32 %643, 19 > %656 = zext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %16, float addrspace(3)* %658, align 4 > %659 = lshr i32 %5, 16 > %660 = shl nuw nsw i32 %659, 2 > %661 = and i32 %6, 8191 > %662 = and i32 %10, 255 > %663 = mul nuw nsw i32 %661, %662 > %664 = add nuw nsw i32 %660, %663 > %665 = add nuw nsw i32 %664, 20 > %666 = zext i32 %665 to i64 > %667 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %666 > store i32 %temp24.0, i32 addrspace(3)* %667, align 4 > %668 = add nuw nsw i32 %664, 21 > %669 = zext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > store float %18, float addrspace(3)* %671, align 4 > %672 = add nuw nsw i32 %664, 22 > %673 = zext i32 %672 to i64 > %674 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %673 > %675 = bitcast i32 addrspace(3)* %674 to float addrspace(3)* > store float %19, float addrspace(3)* %675, align 4 > %676 = add nuw nsw i32 %664, 23 > %677 = zext i32 %676 to i64 > %678 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %677 > %679 = bitcast i32 addrspace(3)* %678 to float addrspace(3)* > store float %20, float addrspace(3)* %679, align 4 > %680 = lshr i32 %5, 16 > %681 = shl nuw nsw i32 %680, 2 > %682 = and i32 %6, 8191 > %683 = and i32 %10, 255 > %684 = mul nuw nsw i32 %682, %683 > %685 = add nuw nsw i32 %681, %684 > %686 = zext i32 %685 to i64 > %687 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %686 > store i32 %temp4.0, i32 addrspace(3)* %687, align 4 > %688 = lshr i32 %5, 16 > %689 = shl nuw nsw i32 %688, 2 > %690 = and i32 %6, 8191 > %691 = and i32 %10, 255 > %692 = mul nuw nsw i32 %690, %691 > %693 = add nuw nsw i32 %689, %692 > %694 = add nuw nsw i32 %693, 1 > %695 = zext i32 %694 to i64 > %696 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %695 > store i32 %temp8.0, i32 addrspace(3)* %696, align 4 > %697 = lshr i32 %5, 16 > %698 = shl nuw nsw i32 %697, 2 > %699 = and i32 %6, 8191 > %700 = and i32 %10, 255 > %701 = mul nuw nsw i32 %699, %700 > %702 = add nuw nsw i32 %698, %701 > %703 = add nuw nsw i32 %702, 2 > %704 = zext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > store i32 %temp20.0, i32 addrspace(3)* %705, align 4 > %706 = lshr i32 %5, 16 > %707 = shl nuw nsw i32 %706, 2 > %708 = and i32 %6, 8191 > %709 = and i32 %10, 255 > %710 = mul nuw nsw i32 %708, %709 > %711 = add nuw nsw i32 %707, %710 > %712 = add nuw nsw i32 %711, 4 > %713 = zext i32 %712 to i64 > %714 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %713 > store i32 %temp24.0, i32 addrspace(3)* %714, align 4 > %715 = and i32 %10, 255 > %716 = lshr i32 %10, 8 > %717 = and i32 %716, 31 > %718 = lshr i32 %5, 16 > %719 = shl nuw nsw i32 %718, 2 > %720 = and i32 %6, 8191 > %721 = and i32 %10, 255 > %722 = mul nuw nsw i32 %720, %721 > %723 = add nuw nsw i32 %719, %722 > %724 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %725 = bitcast i64 %724 to <2 x i32> > %726 = extractelement <2 x i32> %725, i32 0 > %727 = extractelement <2 x i32> %725, i32 1 > %728 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %726, 0 > %729 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %728, i32 %727, 1 > %730 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %729, i32 %8, 13 > %731 = bitcast i32 %715 to float > %732 = bitcast i32 %717 to float > %733 = bitcast i32 %723 to float > %734 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %730, float %731, 14 > %735 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %734, float %732, 15 > %736 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %735, float %733, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %736 > >IF45: ; preds = %ELSE > %737 = fdiv float 1.000000e+00, %591 > %738 = fmul float %519, %737 > br label %ENDIF44 > >ELSE46: ; preds = %ELSE > %739 = fcmp ogt float %519, 0.000000e+00 > %740 = select i1 %739, float 1.000000e+00, float %519 > %741 = fcmp oge float %740, 0.000000e+00 > %.op = fmul float %740, 0x4600000000000000 > %742 = select i1 %741, float %.op, float 0xC600000000000000 > br label %ENDIF44 > >ENDIF44: ; preds = %ELSE46, %IF45 > %temp12.0 = phi float [ %738, %IF45 ], [ %742, %ELSE46 ] > %743 = call float @llvm.maxnum.f32(float %temp12.0, float 1.000000e+00) > %744 = fcmp une float %595, 0.000000e+00 > br i1 %744, label %IF48, label %ELSE49 > >IF48: ; preds = %ENDIF44 > %745 = fdiv float 1.000000e+00, %595 > %746 = fmul float %520, %745 > br label %ENDIF47 > >ELSE49: ; preds = %ENDIF44 > %747 = fcmp ogt float %520, 0.000000e+00 > %748 = select i1 %747, float 1.000000e+00, float %520 > %749 = fcmp oge float %748, 0.000000e+00 > %.op53 = fmul float %748, 0x4600000000000000 > %750 = select i1 %749, float %.op53, float 0xC600000000000000 > br label %ENDIF47 > >ENDIF47: ; preds = %ELSE49, %IF48 > %temp12.1 = phi float [ %746, %IF48 ], [ %750, %ELSE49 ] > %751 = call float @llvm.maxnum.f32(float %temp12.1, float 1.000000e+00) > %752 = fcmp une float %594, 0.000000e+00 > br i1 %752, label %IF51, label %ELSE52 > >IF51: ; preds = %ENDIF47 > %753 = fdiv float 1.000000e+00, %594 > %754 = fmul float %586, %753 > br label %ENDIF50 > >ELSE52: ; preds = %ENDIF47 > %755 = fcmp ogt float %586, 0.000000e+00 > %756 = select i1 %755, float 1.000000e+00, float %586 > %757 = fcmp oge float %756, 0.000000e+00 > %.op54 = fmul float %756, 0x4600000000000000 > %758 = select i1 %757, float %.op54, float 0xC600000000000000 > br label %ENDIF50 > >ENDIF50: ; preds = %ELSE52, %IF51 > %temp12.2 = phi float [ %754, %IF51 ], [ %758, %ELSE52 ] > %759 = call float @llvm.maxnum.f32(float %temp12.2, float 1.000000e+00) > %760 = call float @llvm.minnum.f32(float %759, float 6.300000e+01) > %761 = call float @llvm.minnum.f32(float %743, float 6.300000e+01) > %762 = call float @llvm.minnum.f32(float %751, float 6.300000e+01) > %763 = call float @llvm.maxnum.f32(float %761, float %760) > %764 = call float @llvm.maxnum.f32(float %763, float %762) > %phitmp = bitcast float %762 to i32 > %phitmp55 = bitcast float %760 to i32 > %phitmp56 = bitcast float %761 to i32 > %phitmp57 = bitcast float %764 to i32 > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..1], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..49] >DCL CONST[2][0..39] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[1] UINT32 {0, 752, 768, 784} >IMM[2] UINT32 {1, 624, 0, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[0].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[0].w, IMM[0].xxxx > 4: MOV TEMP[1], CONST[1][47] > 5: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 6: MOV TEMP[2], CONST[1][48] > 7: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 8: MOV TEMP[1].y, TEMP[2].xxxx > 9: MOV TEMP[2], CONST[1][49] > 10: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 11: MOV TEMP[1].z, TEMP[2].xxxx > 12: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 13: SQRT TEMP[2].x, TEMP[0].xxxx > 14: FSEQ TEMP[3].xyz, TEMP[2].xxxx, IMM[0].yyyy > 15: SSG TEMP[4].xyz, TEMP[1].xyzz > 16: MUL TEMP[4].xyz, IMM[0].zzzz, TEMP[4].xyzz > 17: RCP TEMP[5].xyz, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz > 19: UCMP TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[1].xyzz > 20: MOV TEMP[3].x, CONST[2][39] > 21: FSNE TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy > 22: UIF TEMP[3].xxxx :0 > 23: MOV TEMP[3].x, CONST[2][39] > 24: RCP TEMP[3].x, TEMP[3].xxxx > 25: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[3].xxxx > 26: ELSE :0 > 27: SSG TEMP[2].x, TEMP[2].xxxx > 28: MUL TEMP[3].x, IMM[0].zzzz, TEMP[2].xxxx > 29: ENDIF > 30: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[0].xxxx > 31: MOV TEMP[2].z, CONST[2][39] > 32: FMA TEMP[2].x, TEMP[1].zzzz, TEMP[2].zzzz, IMM[0].xxxx > 33: FSEQ TEMP[3].xy, TEMP[2].xxxx, IMM[0].yyyy > 34: SSG TEMP[4].xy, TEMP[1].xyyy > 35: MUL TEMP[4].xy, IMM[0].zzzz, TEMP[4].xyyy > 36: RCP TEMP[2].xy, TEMP[2].xxxx > 37: MUL TEMP[2].xy, TEMP[1].xyyy, TEMP[2].xyyy > 38: UCMP TEMP[2].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[2].xyyy > 39: MOV TEMP[3].z, CONST[2][39] > 40: MUL TEMP[1].x, TEMP[1].zzzz, TEMP[3].zzzz > 41: MOV TEMP[0].y, TEMP[1].xxxx > 42: MOV TEMP[2].z, TEMP[0].xxxx > 43: MOV TEMP[1].zw, TEMP[0].xxyx > 44: MOV TEMP[2].w, IMM[0].xxxx > 45: MUL TEMP[0].xy, SV[0].yyyy, IN[1][1].xyyy > 46: FMA TEMP[0].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[0].xyyy > 47: FMA TEMP[1].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[0].xyyy > 48: MOV OUT[1], TEMP[1] > 49: MOV OUT[0], TEMP[2] > 50: END >radeonsi: Compiling shader 240 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 752) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 756) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 760) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 764) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = fadd float %7, %8 > %30 = fsub float 1.000000e+00, %29 > %31 = lshr i32 %6, 13 > %32 = and i32 %31, 255 > %33 = shl i32 %5, 2 > %34 = and i32 %33, 262140 > %35 = and i32 %6, 8191 > %36 = mul i32 %35, %9 > %37 = add i32 %34, %36 > %38 = add i32 %37, %32 > %39 = add i32 %38, 16 > %40 = sext i32 %39 to i64 > %41 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %40 > %42 = bitcast i32 addrspace(3)* %41 to float addrspace(3)* > %43 = load float, float addrspace(3)* %42, align 4 > %44 = fmul float %43, %8 > %45 = lshr i32 %6, 13 > %46 = and i32 %45, 255 > %47 = shl i32 %5, 2 > %48 = and i32 %47, 262140 > %49 = and i32 %6, 8191 > %50 = mul i32 %49, %9 > %51 = add i32 %48, %50 > %52 = add i32 %51, %46 > %53 = add i32 %52, 17 > %54 = sext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = bitcast i32 addrspace(3)* %55 to float addrspace(3)* > %57 = load float, float addrspace(3)* %56, align 4 > %58 = fmul float %57, %8 > %59 = lshr i32 %6, 13 > %60 = and i32 %59, 255 > %61 = shl i32 %5, 2 > %62 = and i32 %61, 262140 > %63 = and i32 %6, 8191 > %64 = mul i32 %63, %9 > %65 = add i32 %62, %64 > %66 = add i32 %65, %60 > %67 = add i32 %66, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > %71 = load float, float addrspace(3)* %70, align 4 > %72 = fmul float %71, %8 > %73 = shl i32 %5, 2 > %74 = and i32 %73, 262140 > %75 = and i32 %6, 8191 > %76 = mul i32 %75, %9 > %77 = add i32 %74, %76 > %78 = add i32 %77, 16 > %79 = sext i32 %78 to i64 > %80 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %79 > %81 = bitcast i32 addrspace(3)* %80 to float addrspace(3)* > %82 = load float, float addrspace(3)* %81, align 4 > %83 = call float @llvm.fma.f32(float %7, float %82, float %44) > %84 = shl i32 %5, 2 > %85 = and i32 %84, 262140 > %86 = and i32 %6, 8191 > %87 = mul i32 %86, %9 > %88 = add i32 %85, %87 > %89 = add i32 %88, 17 > %90 = sext i32 %89 to i64 > %91 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %90 > %92 = bitcast i32 addrspace(3)* %91 to float addrspace(3)* > %93 = load float, float addrspace(3)* %92, align 4 > %94 = call float @llvm.fma.f32(float %7, float %93, float %58) > %95 = shl i32 %5, 2 > %96 = and i32 %95, 262140 > %97 = and i32 %6, 8191 > %98 = mul i32 %97, %9 > %99 = add i32 %96, %98 > %100 = add i32 %99, 18 > %101 = sext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > %103 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > %104 = load float, float addrspace(3)* %103, align 4 > %105 = call float @llvm.fma.f32(float %7, float %104, float %72) > %106 = shl i32 %5, 2 > %107 = and i32 %106, 262140 > %108 = and i32 %6, 8191 > %109 = mul i32 %108, %9 > %110 = add i32 %107, %109 > %111 = lshr i32 %6, 12 > %112 = and i32 %111, 510 > %113 = add i32 %110, %112 > %114 = add i32 %113, 16 > %115 = sext i32 %114 to i64 > %116 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %115 > %117 = bitcast i32 addrspace(3)* %116 to float addrspace(3)* > %118 = load float, float addrspace(3)* %117, align 4 > %119 = call float @llvm.fma.f32(float %30, float %118, float %83) > %120 = shl i32 %5, 2 > %121 = and i32 %120, 262140 > %122 = and i32 %6, 8191 > %123 = mul i32 %122, %9 > %124 = add i32 %121, %123 > %125 = lshr i32 %6, 12 > %126 = and i32 %125, 510 > %127 = add i32 %124, %126 > %128 = add i32 %127, 17 > %129 = sext i32 %128 to i64 > %130 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %129 > %131 = bitcast i32 addrspace(3)* %130 to float addrspace(3)* > %132 = load float, float addrspace(3)* %131, align 4 > %133 = call float @llvm.fma.f32(float %30, float %132, float %94) > %134 = shl i32 %5, 2 > %135 = and i32 %134, 262140 > %136 = and i32 %6, 8191 > %137 = mul i32 %136, %9 > %138 = add i32 %135, %137 > %139 = lshr i32 %6, 12 > %140 = and i32 %139, 510 > %141 = add i32 %138, %140 > %142 = add i32 %141, 18 > %143 = sext i32 %142 to i64 > %144 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %143 > %145 = bitcast i32 addrspace(3)* %144 to float addrspace(3)* > %146 = load float, float addrspace(3)* %145, align 4 > %147 = call float @llvm.fma.f32(float %30, float %146, float %105) > %148 = fmul float %13, %119 > %149 = fmul float %14, %133 > %150 = fadd float %148, %149 > %151 = fmul float %15, %147 > %152 = fadd float %150, %151 > %153 = fadd float %152, %16 > %154 = fmul float %17, %119 > %155 = fmul float %18, %133 > %156 = fadd float %154, %155 > %157 = fmul float %19, %147 > %158 = fadd float %156, %157 > %159 = fadd float %158, %20 > %160 = fmul float %21, %119 > %161 = fmul float %22, %133 > %162 = fadd float %160, %161 > %163 = fmul float %23, %147 > %164 = fadd float %162, %163 > %165 = fadd float %164, %24 > %166 = fmul float %153, %153 > %167 = fmul float %159, %159 > %168 = fadd float %167, %166 > %169 = fmul float %165, %165 > %170 = fadd float %168, %169 > %171 = call float @llvm.sqrt.f32(float %170) > %172 = fcmp oeq float %171, 0.000000e+00 > %173 = fcmp oeq float %171, 0.000000e+00 > %174 = fcmp oeq float %171, 0.000000e+00 > %175 = fcmp ogt float %153, 0.000000e+00 > %176 = select i1 %175, float 1.000000e+00, float %153 > %177 = fcmp oge float %176, 0.000000e+00 > %178 = fcmp ogt float %159, 0.000000e+00 > %179 = select i1 %178, float 1.000000e+00, float %159 > %180 = fcmp oge float %179, 0.000000e+00 > %181 = fcmp ogt float %165, 0.000000e+00 > %182 = select i1 %181, float 1.000000e+00, float %165 > %183 = fcmp oge float %182, 0.000000e+00 > %.op = fmul float %176, 0x4600000000000000 > %184 = select i1 %177, float %.op, float 0xC600000000000000 > %.op24 = fmul float %179, 0x4600000000000000 > %185 = select i1 %180, float %.op24, float 0xC600000000000000 > %.op25 = fmul float %182, 0x4600000000000000 > %186 = select i1 %183, float %.op25, float 0xC600000000000000 > %187 = fdiv float 1.000000e+00, %171 > %188 = fmul float %153, %187 > %189 = fmul float %159, %187 > %190 = fmul float %165, %187 > %191 = select i1 %172, float %184, float %188 > %192 = select i1 %173, float %185, float %189 > %193 = select i1 %174, float %186, float %190 > %194 = fcmp une float %27, 0.000000e+00 > br i1 %194, label %IF, label %ELSE > >IF: ; preds = %main_body > %195 = fdiv float 1.000000e+00, %27 > %196 = fmul float %171, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fcmp ogt float %171, 0.000000e+00 > %198 = select i1 %197, float 1.000000e+00, float %171 > %199 = fcmp oge float %198, 0.000000e+00 > %.op26 = fmul float %198, 0x4600000000000000 > %200 = select i1 %199, float %.op26, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %196, %IF ], [ %200, %ELSE ] > %201 = fsub float 1.000000e+00, %temp12.0 > %202 = call float @llvm.fma.f32(float %193, float %28, float 1.000000e+00) > %203 = fcmp oeq float %202, 0.000000e+00 > %204 = fcmp oeq float %202, 0.000000e+00 > %205 = fcmp ogt float %191, 0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %191 > %207 = fcmp oge float %206, 0.000000e+00 > %208 = fcmp ogt float %192, 0.000000e+00 > %209 = select i1 %208, float 1.000000e+00, float %192 > %210 = fcmp oge float %209, 0.000000e+00 > %.op27 = fmul float %206, 0x4600000000000000 > %211 = select i1 %207, float %.op27, float 0xC600000000000000 > %.op28 = fmul float %209, 0x4600000000000000 > %212 = select i1 %210, float %.op28, float 0xC600000000000000 > %213 = fdiv float 1.000000e+00, %202 > %214 = fmul float %191, %213 > %215 = fmul float %192, %213 > %216 = select i1 %203, float %211, float %214 > %217 = select i1 %204, float %212, float %215 > %218 = fmul float %193, %28 > %219 = lshr i32 %6, 13 > %220 = and i32 %219, 255 > %221 = shl i32 %5, 2 > %222 = and i32 %221, 262140 > %223 = and i32 %6, 8191 > %224 = mul i32 %223, %9 > %225 = add i32 %222, %224 > %226 = add i32 %225, %220 > %227 = add i32 %226, 20 > %228 = sext i32 %227 to i64 > %229 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %228 > %230 = bitcast i32 addrspace(3)* %229 to float addrspace(3)* > %231 = load float, float addrspace(3)* %230, align 4 > %232 = fmul float %231, %8 > %233 = lshr i32 %6, 13 > %234 = and i32 %233, 255 > %235 = shl i32 %5, 2 > %236 = and i32 %235, 262140 > %237 = and i32 %6, 8191 > %238 = mul i32 %237, %9 > %239 = add i32 %236, %238 > %240 = add i32 %239, %234 > %241 = add i32 %240, 21 > %242 = sext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > %244 = bitcast i32 addrspace(3)* %243 to float addrspace(3)* > %245 = load float, float addrspace(3)* %244, align 4 > %246 = fmul float %245, %8 > %247 = shl i32 %5, 2 > %248 = and i32 %247, 262140 > %249 = and i32 %6, 8191 > %250 = mul i32 %249, %9 > %251 = add i32 %248, %250 > %252 = add i32 %251, 20 > %253 = sext i32 %252 to i64 > %254 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %253 > %255 = bitcast i32 addrspace(3)* %254 to float addrspace(3)* > %256 = load float, float addrspace(3)* %255, align 4 > %257 = call float @llvm.fma.f32(float %7, float %256, float %232) > %258 = shl i32 %5, 2 > %259 = and i32 %258, 262140 > %260 = and i32 %6, 8191 > %261 = mul i32 %260, %9 > %262 = add i32 %259, %261 > %263 = add i32 %262, 21 > %264 = sext i32 %263 to i64 > %265 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %264 > %266 = bitcast i32 addrspace(3)* %265 to float addrspace(3)* > %267 = load float, float addrspace(3)* %266, align 4 > %268 = call float @llvm.fma.f32(float %7, float %267, float %246) > %269 = shl i32 %5, 2 > %270 = and i32 %269, 262140 > %271 = and i32 %6, 8191 > %272 = mul i32 %271, %9 > %273 = add i32 %270, %272 > %274 = lshr i32 %6, 12 > %275 = and i32 %274, 510 > %276 = add i32 %273, %275 > %277 = add i32 %276, 20 > %278 = sext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = bitcast i32 addrspace(3)* %279 to float addrspace(3)* > %281 = load float, float addrspace(3)* %280, align 4 > %282 = call float @llvm.fma.f32(float %30, float %281, float %257) > %283 = shl i32 %5, 2 > %284 = and i32 %283, 262140 > %285 = and i32 %6, 8191 > %286 = mul i32 %285, %9 > %287 = add i32 %284, %286 > %288 = lshr i32 %6, 12 > %289 = and i32 %288, 510 > %290 = add i32 %287, %289 > %291 = add i32 %290, 21 > %292 = sext i32 %291 to i64 > %293 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %292 > %294 = bitcast i32 addrspace(3)* %293 to float addrspace(3)* > %295 = load float, float addrspace(3)* %294, align 4 > %296 = call float @llvm.fma.f32(float %30, float %295, float %268) > %297 = bitcast i32 %10 to float > %298 = insertvalue <{ float, float, float }> undef, float %297, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %282, float %296, float %218, float %201) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %216, float %217, float %201, float 1.000000e+00) > ret <{ float, float, float }> %298 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..22] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} >IMM[1] UINT32 {0, 352, 0, 0} >IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 9: DP3 TEMP[2].x, IN[2].xyzz, TEMP[0].xyzz > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 14: RSQ TEMP[2].x, TEMP[0].xxxx > 15: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 16: FMA TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[0].wwww > 17: MOV TEMP[0].w, CONST[1][22].yyyy > 18: MOV TEMP[1].xy, IN[0].xyyy > 19: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 20: MOV TEMP[2].w, IMM[2].xxxx > 21: MOV TEMP[3].xy, IN[0].xyyy > 22: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D > 23: MUL TEMP[2].x, TEMP[3].zzzz, CONST[1][22].xxxx > 24: MOV TEMP[2].yz, TEMP[3].xyxx > 25: MOV OUT[0], TEMP[0] > 26: MOV OUT[1], TEMP[1] > 27: MOV OUT[2], TEMP[2] > 28: END >radeonsi: Compiling shader 241 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 7 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 11 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %65 = bitcast float %54 to i32 > %66 = bitcast float %55 to i32 > %67 = insertelement <2 x i32> undef, i32 %65, i32 0 > %68 = insertelement <2 x i32> %67, i32 %66, i32 1 > %69 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %68, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %70 = extractelement <4 x float> %69, i32 1 > %71 = extractelement <4 x float> %69, i32 3 > %72 = call float @llvm.fma.f32(float %70, float 2.000000e+00, float -1.000000e+00) > %73 = call float @llvm.fma.f32(float %71, float 2.000000e+00, float -1.000000e+00) > %74 = fsub float -0.000000e+00, %72 > %75 = call float @llvm.fma.f32(float %74, float %72, float 1.000000e+00) > %76 = fsub float -0.000000e+00, %73 > %77 = call float @llvm.fma.f32(float %76, float %73, float %75) > %78 = call float @llvm.sqrt.f32(float %77) > %79 = fmul float %56, %72 > %80 = fmul float %57, %73 > %81 = fadd float %80, %79 > %82 = fmul float %58, %78 > %83 = fadd float %81, %82 > %84 = fmul float %59, %72 > %85 = fmul float %60, %73 > %86 = fadd float %85, %84 > %87 = fmul float %61, %78 > %88 = fadd float %86, %87 > %89 = fmul float %62, %72 > %90 = fmul float %63, %73 > %91 = fadd float %90, %89 > %92 = fmul float %64, %78 > %93 = fadd float %91, %92 > %94 = fmul float %83, %83 > %95 = fmul float %88, %88 > %96 = fadd float %95, %94 > %97 = fmul float %93, %93 > %98 = fadd float %96, %97 > %99 = call float @llvm.AMDGPU.rsq.clamped.f32(float %98) > %100 = fmul float %99, %83 > %101 = fmul float %99, %88 > %102 = fmul float %99, %93 > %103 = call float @llvm.fma.f32(float %100, float 5.000000e-01, float 5.000000e-01) > %104 = call float @llvm.fma.f32(float %101, float 5.000000e-01, float 5.000000e-01) > %105 = call float @llvm.fma.f32(float %102, float 5.000000e-01, float 5.000000e-01) > %106 = bitcast float %54 to i32 > %107 = bitcast float %55 to i32 > %108 = insertelement <2 x i32> undef, i32 %106, i32 0 > %109 = insertelement <2 x i32> %108, i32 %107, i32 1 > %110 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %109, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %111 = extractelement <4 x float> %110, i32 0 > %112 = extractelement <4 x float> %110, i32 1 > %113 = extractelement <4 x float> %110, i32 2 > %114 = extractelement <4 x float> %110, i32 3 > %115 = bitcast float %54 to i32 > %116 = bitcast float %55 to i32 > %117 = insertelement <2 x i32> undef, i32 %115, i32 0 > %118 = insertelement <2 x i32> %117, i32 %116, i32 1 > %119 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %118, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %120 = extractelement <4 x float> %119, i32 0 > %121 = extractelement <4 x float> %119, i32 1 > %122 = extractelement <4 x float> %119, i32 2 > %123 = fmul float %122, %25 > %124 = bitcast float %5 to i32 > %125 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %124, 10 > %126 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %125, float %103, 11 > %127 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %126, float %104, 12 > %128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %127, float %105, 13 > %129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128, float %26, 14 > %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129, float %111, 15 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130, float %112, 16 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %113, 17 > %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %114, 18 > %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %123, 19 > %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %121, 20 > %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135, float %120, 21 > %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136, float 0.000000e+00, 22 > %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..3] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 16, 32, 48} >IMM[1] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: DP4 TEMP[0].x, IN[2], CONST[1][2] > 1: DP4 TEMP[1].x, IN[2], CONST[1][3] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: MOV TEMP[0].zw, IMM[1].yyxy > 4: MOV OUT[4], CONST[1][1] > 5: MOV OUT[3], CONST[1][0] > 6: MOV OUT[2], IN[1] > 7: MOV OUT[1], IN[0] > 8: MOV OUT[0], TEMP[0] > 9: END >radeonsi: Compiling shader 242 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 0) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 4) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 8) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 12) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 16) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 20) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 24) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 28) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 32) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 36) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 40) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 44) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 48) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 52) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 56) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 60) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %35, i32 0, i32 %13) > %37 = extractelement <4 x float> %36, i32 0 > %38 = extractelement <4 x float> %36, i32 1 > %39 = extractelement <4 x float> %36, i32 2 > %40 = extractelement <4 x float> %36, i32 3 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %14) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = extractelement <4 x float> %43, i32 3 > %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 > %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %15) > %51 = extractelement <4 x float> %50, i32 0 > %52 = extractelement <4 x float> %50, i32 1 > %53 = extractelement <4 x float> %50, i32 2 > %54 = extractelement <4 x float> %50, i32 3 > %55 = fmul float %51, %26 > %56 = fmul float %52, %27 > %57 = fadd float %55, %56 > %58 = fmul float %53, %28 > %59 = fadd float %57, %58 > %60 = fmul float %54, %29 > %61 = fadd float %59, %60 > %62 = fmul float %51, %30 > %63 = fmul float %52, %31 > %64 = fadd float %62, %63 > %65 = fmul float %53, %32 > %66 = fadd float %64, %65 > %67 = fmul float %54, %33 > %68 = fadd float %66, %67 > %69 = bitcast i32 %11 to float > %70 = insertvalue <{ float, float, float }> undef, float %69, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %37, float %38, float %39, float %40) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %44, float %45, float %46, float %47) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %18, float %19, float %20, float %21) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %22, float %23, float %24, float %25) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %61, float %68, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %70 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL CONST[1][0..9] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[0].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[0].xxxx > 7: MOV TEMP[0].xy, IN[1].xyxx > 8: MOV OUT[1], TEMP[0] > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 243 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 128) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 132) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 136) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 140) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 144) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 148) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 152) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 156) > %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 > %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %13) > %32 = extractelement <4 x float> %31, i32 0 > %33 = extractelement <4 x float> %31, i32 1 > %34 = extractelement <4 x float> %31, i32 2 > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %14) > %38 = fmul float %17, %32 > %39 = fmul float %18, %33 > %40 = fadd float %38, %39 > %41 = fmul float %19, %34 > %42 = fadd float %40, %41 > %43 = fadd float %42, %20 > %44 = fmul float %21, %32 > %45 = fmul float %22, %33 > %46 = fadd float %44, %45 > %47 = fmul float %23, %34 > %48 = fadd float %46, %47 > %49 = fadd float %48, %24 > %50 = fmul float %25, %32 > %51 = fmul float %26, %33 > %52 = fadd float %50, %51 > %53 = fmul float %27, %34 > %54 = fadd float %52, %53 > %55 = fadd float %54, %28 > %56 = lshr i32 %8, 13 > %57 = and i32 %56, 255 > %58 = mul i32 %57, %10 > %59 = add i32 %58, 16 > %60 = sext i32 %59 to i64 > %61 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %60 > %62 = bitcast i32 addrspace(3)* %61 to float addrspace(3)* > store float %43, float addrspace(3)* %62, align 4 > %63 = add i32 %58, 17 > %64 = sext i32 %63 to i64 > %65 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %64 > %66 = bitcast i32 addrspace(3)* %65 to float addrspace(3)* > store float %49, float addrspace(3)* %66, align 4 > %67 = add i32 %58, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > store float %55, float addrspace(3)* %70, align 4 > %71 = add i32 %58, 20 > %bc = bitcast <4 x float> %37 to <4 x i32> > %72 = extractelement <4 x i32> %bc, i32 0 > %73 = sext i32 %71 to i64 > %74 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %73 > store i32 %72, i32 addrspace(3)* %74, align 4 > %75 = add i32 %58, 21 > %bc12 = bitcast <4 x float> %37 to <4 x i32> > %76 = extractelement <4 x i32> %bc12, i32 1 > %77 = sext i32 %75 to i64 > %78 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %77 > store i32 %76, i32 addrspace(3)* %78, align 4 > %79 = add i32 %58, 22 > %80 = sext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = bitcast i32 addrspace(3)* %81 to float addrspace(3)* > store float %34, float addrspace(3)* %82, align 4 > %83 = add i32 %58, 23 > %84 = sext i32 %83 to i64 > %85 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %84 > store i32 1065353216, i32 addrspace(3)* %85, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..3], ARRAY(1), GENERIC[0] >DCL OUT[4], PATCH >DCL OUT[5], PATCH[1] >DCL OUT[6], PATCH[2] >DCL OUT[7], PATCH[3] >DCL CONST[1][0..48] >DCL CONST[2][0..39] >DCL TEMP[0..10], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 0.0000, 0.4000, 100.0000} >IMM[1] UINT32 {0, 736, 752, 768} >IMM[2] UINT32 {1, 624, 0, 0} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: MOV TEMP[0].xyz, IN[0][0].xyzx > 11: MOV TEMP[0].w, IMM[0].xxxx > 12: MOV TEMP[1], CONST[1][46] > 13: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 14: MOV TEMP[2], CONST[1][47] > 15: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 16: MOV TEMP[3], CONST[1][48] > 17: DP4 TEMP[0].x, TEMP[3], TEMP[0] > 18: MOV TEMP[4].xyz, IN[1][0].xyzx > 19: MOV TEMP[4].w, IMM[0].xxxx > 20: MOV TEMP[5], CONST[1][46] > 21: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 22: MOV TEMP[0].y, TEMP[5].xxxx > 23: MOV TEMP[5], CONST[1][47] > 24: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 25: MOV TEMP[0].z, TEMP[5].xxxx > 26: MOV TEMP[5], CONST[1][48] > 27: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 28: MOV TEMP[0].w, TEMP[5].xxxx > 29: MOV TEMP[4].xyz, IN[2][0].xyzx > 30: MOV TEMP[4].w, IMM[0].xxxx > 31: MOV TEMP[5], CONST[1][46] > 32: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 33: MOV TEMP[3].z, TEMP[5].xxxx > 34: MOV TEMP[6], CONST[1][47] > 35: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 36: MOV TEMP[7].z, CONST[2][39] > 37: MUL TEMP[7].xy, TEMP[0].xwww, TEMP[7].zzzz > 38: MOV TEMP[0].xw, TEMP[7].xxxy > 39: MOV TEMP[7], CONST[1][48] > 40: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 41: MOV TEMP[8].z, CONST[2][39] > 42: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[8].zzzz > 43: MOV TEMP[7].x, CONST[2][39] > 44: FSLT TEMP[7].x, TEMP[1].xxxx, -TEMP[7].xxxx > 45: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 46: INEG TEMP[7].x, TEMP[7].xxxx > 47: MOV TEMP[4].y, TEMP[7].xxxx > 48: MOV TEMP[7].x, CONST[2][39] > 49: FSLT TEMP[7].xy, TEMP[0].yzzz, -TEMP[7].xxxx > 50: AND TEMP[7].xy, TEMP[7].xyyy, IMM[3].xxxx > 51: INEG TEMP[7].xy, TEMP[7].xyyy > 52: MOV TEMP[4].zw, TEMP[7].yyxy > 53: AND TEMP[7].xy, TEMP[4].yzzz, IMM[2].xxxx > 54: MOV TEMP[4].yz, TEMP[7].yxyy > 55: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 56: MOV TEMP[4].y, TEMP[7].xxxx > 57: MOV TEMP[7].x, CONST[2][39] > 58: FSLT TEMP[7].x, TEMP[5].xxxx, -TEMP[7].xxxx > 59: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 60: INEG TEMP[7].x, TEMP[7].xxxx > 61: MOV TEMP[4].z, TEMP[7].xxxx > 62: AND TEMP[7].x, TEMP[4].zzzz, IMM[2].xxxx > 63: MOV TEMP[4].z, TEMP[7].xxxx > 64: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 65: MOV TEMP[4].y, TEMP[7].xxxx > 66: MOV TEMP[7].x, CONST[2][39] > 67: FSLT TEMP[7].x, TEMP[2].xxxx, -TEMP[7].xxxx > 68: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 69: INEG TEMP[7].x, TEMP[7].xxxx > 70: MOV TEMP[4].z, TEMP[7].xxxx > 71: AND TEMP[7].xy, TEMP[4].zwww, IMM[2].xxxx > 72: MOV TEMP[4].zw, TEMP[7].yyxy > 73: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 74: MOV TEMP[4].z, TEMP[7].xxxx > 75: MOV TEMP[7].x, CONST[2][39] > 76: FSLT TEMP[7].x, TEMP[6].xxxx, -TEMP[7].xxxx > 77: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 78: INEG TEMP[7].x, TEMP[7].xxxx > 79: MOV TEMP[4].w, TEMP[7].xxxx > 80: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 81: MOV TEMP[4].w, TEMP[7].xxxx > 82: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 83: MOV TEMP[4].z, TEMP[7].xxxx > 84: FSLT TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy > 85: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 86: INEG TEMP[7].x, TEMP[7].xxxx > 87: MOV TEMP[4].w, TEMP[7].xxxx > 88: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 89: MOV TEMP[4].w, TEMP[7].xxxx > 90: FSLT TEMP[7].x, TEMP[0].wwww, IMM[0].yyyy > 91: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 92: INEG TEMP[7].x, TEMP[7].xxxx > 93: MOV TEMP[7].x, TEMP[7].xxxx > 94: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx > 95: MOV TEMP[7].x, TEMP[8].xxxx > 96: UADD TEMP[8].x, TEMP[4].wwww, TEMP[7].xxxx > 97: MOV TEMP[4].w, TEMP[8].xxxx > 98: FSLT TEMP[8].x, TEMP[4].xxxx, IMM[0].yyyy > 99: AND TEMP[8].x, TEMP[8].xxxx, IMM[3].xxxx >100: INEG TEMP[8].x, TEMP[8].xxxx >101: MOV TEMP[7].x, TEMP[8].xxxx >102: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx >103: MOV TEMP[7].x, TEMP[8].xxxx >104: UADD TEMP[7].x, TEMP[4].wwww, TEMP[7].xxxx >105: MOV TEMP[4].w, TEMP[7].xxxx >106: MOV TEMP[7].x, CONST[2][39] >107: FSLT TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx >108: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >109: INEG TEMP[1].x, TEMP[1].xxxx >110: MOV TEMP[3].x, TEMP[1].xxxx >111: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >112: MOV TEMP[3].x, TEMP[1].xxxx >113: MOV TEMP[1].x, CONST[2][39] >114: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].yzzz >115: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >116: INEG TEMP[1].xy, TEMP[1].xyyy >117: MOV TEMP[0].yz, TEMP[1].yxyy >118: AND TEMP[1].xy, TEMP[0].yzzz, IMM[2].xxxx >119: MOV TEMP[0].yz, TEMP[1].yxyy >120: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >121: MOV TEMP[0].y, TEMP[1].xxxx >122: MOV TEMP[1].x, CONST[2][39] >123: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx >124: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >125: INEG TEMP[1].x, TEMP[1].xxxx >126: MOV TEMP[3].x, TEMP[1].xxxx >127: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >128: MOV TEMP[3].x, TEMP[1].xxxx >129: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >130: MOV TEMP[0].y, TEMP[1].xxxx >131: MOV TEMP[1].x, CONST[2][39] >132: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx >133: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >134: INEG TEMP[1].x, TEMP[1].xxxx >135: MOV TEMP[3].x, TEMP[1].xxxx >136: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >137: MOV TEMP[3].x, TEMP[1].xxxx >138: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >139: MOV TEMP[0].z, TEMP[1].xxxx >140: MOV TEMP[1].x, CONST[2][39] >141: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx >142: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >143: INEG TEMP[1].x, TEMP[1].xxxx >144: MOV TEMP[3].x, TEMP[1].xxxx >145: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >146: MOV TEMP[3].x, TEMP[1].xxxx >147: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >148: MOV TEMP[0].z, TEMP[1].xxxx >149: MOV TEMP[1].x, CONST[2][39] >150: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].xwww >151: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >152: INEG TEMP[1].xy, TEMP[1].xyyy >153: MOV TEMP[3].xy, TEMP[1].xyxx >154: AND TEMP[1].xy, TEMP[3].xyyy, IMM[2].xxxx >155: MOV TEMP[3].xy, TEMP[1].xyxx >156: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >157: MOV TEMP[3].x, TEMP[1].xxxx >158: MOV TEMP[1].x, CONST[2][39] >159: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx >160: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >161: INEG TEMP[1].x, TEMP[1].xxxx >162: MOV TEMP[3].y, TEMP[1].xxxx >163: AND TEMP[1].x, TEMP[3].yyyy, IMM[2].xxxx >164: MOV TEMP[3].y, TEMP[1].xxxx >165: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >166: MOV TEMP[3].x, TEMP[1].xxxx >167: USEQ TEMP[1].x, TEMP[4].yyyy, IMM[3].yyyy >168: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >169: INEG TEMP[1].x, TEMP[1].xxxx >170: MOV TEMP[3].y, TEMP[1].xxxx >171: USEQ TEMP[1].xy, TEMP[0].yzzz, IMM[3].yyyy >172: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >173: INEG TEMP[1].xy, TEMP[1].xyyy >174: MOV TEMP[0].yz, TEMP[1].yxyy >175: OR TEMP[1].x, TEMP[0].yyyy, TEMP[3].yyyy >176: MOV TEMP[0].y, TEMP[1].xxxx >177: USEQ TEMP[1].x, TEMP[4].zzzz, IMM[3].yyyy >178: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >179: INEG TEMP[1].x, TEMP[1].xxxx >180: MOV TEMP[3].y, TEMP[1].xxxx >181: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].yyyy >182: MOV TEMP[0].z, TEMP[1].xxxx >183: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >184: MOV TEMP[0].y, TEMP[1].xxxx >185: USEQ TEMP[1].x, TEMP[4].wwww, IMM[3].yyyy >186: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >187: INEG TEMP[1].x, TEMP[1].xxxx >188: MOV TEMP[0].z, TEMP[1].xxxx >189: USEQ TEMP[1].x, TEMP[3].xxxx, IMM[3].yyyy >190: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >191: INEG TEMP[1].x, TEMP[1].xxxx >192: MOV TEMP[3].x, TEMP[1].xxxx >193: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >194: MOV TEMP[0].z, TEMP[1].xxxx >195: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >196: MOV TEMP[0].y, TEMP[1].xxxx >197: MOV TEMP[1].x, TEMP[0].yyyy >198: USNE TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx >199: UIF TEMP[1].xxxx :0 >200: MOV TEMP[1].x, IMM[0].yyyy >201: MOV TEMP[2].x, IMM[0].yyyy >202: MOV TEMP[5].x, IMM[0].yyyy >203: MOV TEMP[6].x, IMM[0].yyyy >204: ELSE :0 >205: ADD TEMP[3].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >206: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >207: MOV TEMP[0].y, TEMP[7].xxxx >208: ADD TEMP[3].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >209: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >210: MOV TEMP[0].z, TEMP[7].xxxx >211: SQRT TEMP[7].x, TEMP[0].yyyy >212: SQRT TEMP[7].y, TEMP[0].zzzz >213: MOV TEMP[7].xy, TEMP[7].xyxx >214: ADD TEMP[3].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >215: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz >216: SQRT TEMP[8].x, TEMP[3].xxxx >217: MIN TEMP[9].x, TEMP[0].wwww, TEMP[0].xxxx >218: MIN TEMP[10].x, TEMP[0].wwww, TEMP[4].xxxx >219: MOV TEMP[0].w, TEMP[10].xxxx >220: MIN TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >221: MUL TEMP[4].x, TEMP[9].xxxx, IMM[0].zzzz >222: MOV TEMP[3].y, TEMP[4].xxxx >223: MAX TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww >224: MUL TEMP[4].xy, TEMP[0].xwww, IMM[0].zzzz >225: MOV TEMP[0].xw, TEMP[4].xxxy >226: MAX TEMP[4].xy, TEMP[0].xwww, IMM[0].wwww >227: FSNE TEMP[9].x, TEMP[3].xxxx, IMM[0].yyyy >228: UIF TEMP[9].xxxx :0 >229: RCP TEMP[3].x, TEMP[3].xxxx >230: MUL TEMP[3].x, TEMP[7].xxxx, TEMP[3].xxxx >231: ELSE :0 >232: SSG TEMP[9].x, TEMP[7].xxxx >233: MUL TEMP[3].x, IMM[4].xxxx, TEMP[9].xxxx >234: ENDIF >235: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >236: MOV TEMP[0].y, TEMP[3].xxxx >237: FSNE TEMP[3].x, TEMP[4].yyyy, IMM[0].yyyy >238: UIF TEMP[3].xxxx :0 >239: RCP TEMP[3].x, TEMP[4].yyyy >240: MUL TEMP[3].x, TEMP[7].yyyy, TEMP[3].xxxx >241: ELSE :0 >242: SSG TEMP[7].x, TEMP[7].yyyy >243: MUL TEMP[3].x, IMM[4].xxxx, TEMP[7].xxxx >244: ENDIF >245: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >246: MOV TEMP[0].z, TEMP[3].xxxx >247: FSNE TEMP[3].x, TEMP[4].xxxx, IMM[0].yyyy >248: UIF TEMP[3].xxxx :0 >249: RCP TEMP[3].x, TEMP[4].xxxx >250: MUL TEMP[3].x, TEMP[8].xxxx, TEMP[3].xxxx >251: ELSE :0 >252: SSG TEMP[4].x, TEMP[8].xxxx >253: MUL TEMP[3].x, IMM[4].xxxx, TEMP[4].xxxx >254: ENDIF >255: MAX TEMP[0].x, TEMP[3].xxxx, IMM[0].xxxx >256: MIN TEMP[0].xyz, TEMP[0].xyzz, IMM[4].yyyy >257: MAX TEMP[3].x, TEMP[0].yyyy, TEMP[0].xxxx >258: MOV TEMP[0].w, TEMP[3].xxxx >259: MAX TEMP[6].x, TEMP[0].wwww, TEMP[0].zzzz >260: MOV TEMP[1].x, TEMP[0].zzzz >261: MOV TEMP[2].x, TEMP[0].xxxx >262: MOV TEMP[5].x, TEMP[0].yyyy >263: ENDIF >264: MOV OUT[4], TEMP[1] >265: MOV OUT[5], TEMP[2] >266: MOV OUT[6], TEMP[5] >267: MOV OUT[7], TEMP[6] >268: MOV OUT[0].x, TEMP[1].xxxx >269: MOV OUT[0].y, TEMP[2].xxxx >270: MOV OUT[0].z, TEMP[5].xxxx >271: MOV OUT[1].x, TEMP[6].xxxx >272: END >radeonsi: Compiling shader 244 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 736) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 740) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 744) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 748) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 752) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 756) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 760) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 764) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = lshr i32 %10, 8 > %30 = and i32 %29, 31 > %31 = lshr i32 %7, 13 > %32 = and i32 %31, 255 > %33 = and i32 %7, 8191 > %34 = and i32 %10, 255 > %35 = mul nuw nsw i32 %33, %34 > %36 = mul nuw nsw i32 %30, %32 > %37 = add nuw nsw i32 %35, %36 > %38 = add nuw nsw i32 %37, 16 > %39 = zext i32 %38 to i64 > %40 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %39 > %41 = load i32, i32 addrspace(3)* %40, align 4 > %42 = lshr i32 %7, 13 > %43 = and i32 %42, 255 > %44 = and i32 %7, 8191 > %45 = and i32 %10, 255 > %46 = mul nuw nsw i32 %44, %45 > %47 = mul nuw nsw i32 %30, %43 > %48 = add nuw nsw i32 %46, %47 > %49 = add nuw nsw i32 %48, 17 > %50 = zext i32 %49 to i64 > %51 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %50 > %52 = load i32, i32 addrspace(3)* %51, align 4 > %53 = lshr i32 %7, 13 > %54 = and i32 %53, 255 > %55 = and i32 %7, 8191 > %56 = and i32 %10, 255 > %57 = mul nuw nsw i32 %55, %56 > %58 = mul nuw nsw i32 %30, %54 > %59 = add nuw nsw i32 %57, %58 > %60 = add nuw nsw i32 %59, 18 > %61 = zext i32 %60 to i64 > %62 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %61 > %63 = load i32, i32 addrspace(3)* %62, align 4 > %64 = lshr i32 %7, 13 > %65 = and i32 %64, 255 > %66 = and i32 %7, 8191 > %67 = and i32 %10, 255 > %68 = mul nuw nsw i32 %66, %67 > %69 = mul nuw nsw i32 %30, %65 > %70 = add nuw nsw i32 %68, %69 > %71 = add nuw nsw i32 %70, 19 > %72 = zext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = load i32, i32 addrspace(3)* %73, align 4 > %75 = lshr i32 %6, 13 > %76 = and i32 %75, 255 > %77 = shl i32 %5, 2 > %78 = and i32 %77, 262140 > %79 = and i32 %6, 8191 > %80 = and i32 %10, 255 > %81 = mul nuw nsw i32 %79, %80 > %82 = add nuw nsw i32 %78, %81 > %83 = mul nuw nsw i32 %30, %76 > %84 = add nuw nsw i32 %82, %83 > %85 = add nuw nsw i32 %84, 16 > %86 = zext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > store i32 %41, i32 addrspace(3)* %87, align 4 > %88 = add nuw nsw i32 %84, 17 > %89 = zext i32 %88 to i64 > %90 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %89 > store i32 %52, i32 addrspace(3)* %90, align 4 > %91 = add nuw nsw i32 %84, 18 > %92 = zext i32 %91 to i64 > %93 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %92 > store i32 %63, i32 addrspace(3)* %93, align 4 > %94 = add nuw nsw i32 %84, 19 > %95 = zext i32 %94 to i64 > %96 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %95 > store i32 %74, i32 addrspace(3)* %96, align 4 > %97 = lshr i32 %7, 13 > %98 = and i32 %97, 255 > %99 = and i32 %7, 8191 > %100 = and i32 %10, 255 > %101 = mul nuw nsw i32 %99, %100 > %102 = mul nuw nsw i32 %30, %98 > %103 = add nuw nsw i32 %101, %102 > %104 = add nuw nsw i32 %103, 20 > %105 = zext i32 %104 to i64 > %106 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %105 > %107 = load i32, i32 addrspace(3)* %106, align 4 > %108 = lshr i32 %7, 13 > %109 = and i32 %108, 255 > %110 = and i32 %7, 8191 > %111 = and i32 %10, 255 > %112 = mul nuw nsw i32 %110, %111 > %113 = mul nuw nsw i32 %30, %109 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 21 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > %118 = load i32, i32 addrspace(3)* %117, align 4 > %119 = lshr i32 %7, 13 > %120 = and i32 %119, 255 > %121 = and i32 %7, 8191 > %122 = and i32 %10, 255 > %123 = mul nuw nsw i32 %121, %122 > %124 = mul nuw nsw i32 %30, %120 > %125 = add nuw nsw i32 %123, %124 > %126 = add nuw nsw i32 %125, 22 > %127 = zext i32 %126 to i64 > %128 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %127 > %129 = load i32, i32 addrspace(3)* %128, align 4 > %130 = lshr i32 %7, 13 > %131 = and i32 %130, 255 > %132 = and i32 %7, 8191 > %133 = and i32 %10, 255 > %134 = mul nuw nsw i32 %132, %133 > %135 = mul nuw nsw i32 %30, %131 > %136 = add nuw nsw i32 %134, %135 > %137 = add nuw nsw i32 %136, 23 > %138 = zext i32 %137 to i64 > %139 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %138 > %140 = load i32, i32 addrspace(3)* %139, align 4 > %141 = lshr i32 %6, 13 > %142 = and i32 %141, 255 > %143 = shl i32 %5, 2 > %144 = and i32 %143, 262140 > %145 = and i32 %6, 8191 > %146 = and i32 %10, 255 > %147 = mul nuw nsw i32 %145, %146 > %148 = add nuw nsw i32 %144, %147 > %149 = mul nuw nsw i32 %30, %142 > %150 = add nuw nsw i32 %148, %149 > %151 = add nuw nsw i32 %150, 20 > %152 = zext i32 %151 to i64 > %153 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %152 > store i32 %107, i32 addrspace(3)* %153, align 4 > %154 = add nuw nsw i32 %150, 21 > %155 = zext i32 %154 to i64 > %156 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %155 > store i32 %118, i32 addrspace(3)* %156, align 4 > %157 = add nuw nsw i32 %150, 22 > %158 = zext i32 %157 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %129, i32 addrspace(3)* %159, align 4 > %160 = add nuw nsw i32 %150, 23 > %161 = zext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > store i32 %140, i32 addrspace(3)* %162, align 4 > %163 = and i32 %7, 8191 > %164 = and i32 %10, 255 > %165 = mul nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 16 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > %169 = bitcast i32 addrspace(3)* %168 to float addrspace(3)* > %170 = load float, float addrspace(3)* %169, align 4 > %171 = and i32 %7, 8191 > %172 = and i32 %10, 255 > %173 = mul nuw nsw i32 %171, %172 > %174 = add nuw nsw i32 %173, 17 > %175 = zext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = and i32 %7, 8191 > %180 = and i32 %10, 255 > %181 = mul nuw nsw i32 %179, %180 > %182 = add nuw nsw i32 %181, 18 > %183 = zext i32 %182 to i64 > %184 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %183 > %185 = bitcast i32 addrspace(3)* %184 to float addrspace(3)* > %186 = load float, float addrspace(3)* %185, align 4 > %187 = fmul float %13, %170 > %188 = fmul float %14, %178 > %189 = fadd float %187, %188 > %190 = fmul float %15, %186 > %191 = fadd float %189, %190 > %192 = fadd float %191, %16 > %193 = fmul float %17, %170 > %194 = fmul float %18, %178 > %195 = fadd float %193, %194 > %196 = fmul float %19, %186 > %197 = fadd float %195, %196 > %198 = fadd float %197, %20 > %199 = fmul float %21, %170 > %200 = fmul float %22, %178 > %201 = fadd float %199, %200 > %202 = fmul float %23, %186 > %203 = fadd float %201, %202 > %204 = fadd float %203, %24 > %205 = lshr i32 %7, 13 > %206 = and i32 %205, 255 > %207 = and i32 %7, 8191 > %208 = and i32 %10, 255 > %209 = mul nuw nsw i32 %207, %208 > %210 = add nuw nsw i32 %209, %206 > %211 = add nuw nsw i32 %210, 16 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = bitcast i32 addrspace(3)* %213 to float addrspace(3)* > %215 = load float, float addrspace(3)* %214, align 4 > %216 = lshr i32 %7, 13 > %217 = and i32 %216, 255 > %218 = and i32 %7, 8191 > %219 = and i32 %10, 255 > %220 = mul nuw nsw i32 %218, %219 > %221 = add nuw nsw i32 %220, %217 > %222 = add nuw nsw i32 %221, 17 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = bitcast i32 addrspace(3)* %224 to float addrspace(3)* > %226 = load float, float addrspace(3)* %225, align 4 > %227 = lshr i32 %7, 13 > %228 = and i32 %227, 255 > %229 = and i32 %7, 8191 > %230 = and i32 %10, 255 > %231 = mul nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, %228 > %233 = add nuw nsw i32 %232, 18 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %13, %215 > %239 = fmul float %14, %226 > %240 = fadd float %238, %239 > %241 = fmul float %15, %237 > %242 = fadd float %240, %241 > %243 = fadd float %242, %16 > %244 = fmul float %17, %215 > %245 = fmul float %18, %226 > %246 = fadd float %244, %245 > %247 = fmul float %19, %237 > %248 = fadd float %246, %247 > %249 = fadd float %248, %20 > %250 = fmul float %21, %215 > %251 = fmul float %22, %226 > %252 = fadd float %250, %251 > %253 = fmul float %23, %237 > %254 = fadd float %252, %253 > %255 = fadd float %254, %24 > %256 = and i32 %7, 8191 > %257 = and i32 %10, 255 > %258 = mul nuw nsw i32 %256, %257 > %259 = lshr i32 %7, 12 > %260 = and i32 %259, 510 > %261 = add nuw nsw i32 %258, %260 > %262 = add nuw nsw i32 %261, 16 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = bitcast i32 addrspace(3)* %264 to float addrspace(3)* > %266 = load float, float addrspace(3)* %265, align 4 > %267 = and i32 %7, 8191 > %268 = and i32 %10, 255 > %269 = mul nuw nsw i32 %267, %268 > %270 = lshr i32 %7, 12 > %271 = and i32 %270, 510 > %272 = add nuw nsw i32 %269, %271 > %273 = add nuw nsw i32 %272, 17 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = bitcast i32 addrspace(3)* %275 to float addrspace(3)* > %277 = load float, float addrspace(3)* %276, align 4 > %278 = and i32 %7, 8191 > %279 = and i32 %10, 255 > %280 = mul nuw nsw i32 %278, %279 > %281 = lshr i32 %7, 12 > %282 = and i32 %281, 510 > %283 = add nuw nsw i32 %280, %282 > %284 = add nuw nsw i32 %283, 18 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = bitcast i32 addrspace(3)* %286 to float addrspace(3)* > %288 = load float, float addrspace(3)* %287, align 4 > %289 = fmul float %13, %266 > %290 = fmul float %14, %277 > %291 = fadd float %289, %290 > %292 = fmul float %15, %288 > %293 = fadd float %291, %292 > %294 = fadd float %293, %16 > %295 = fmul float %17, %266 > %296 = fmul float %18, %277 > %297 = fadd float %295, %296 > %298 = fmul float %19, %288 > %299 = fadd float %297, %298 > %300 = fadd float %299, %20 > %301 = fmul float %204, %28 > %302 = fmul float %255, %28 > %303 = fmul float %21, %266 > %304 = fmul float %22, %277 > %305 = fadd float %303, %304 > %306 = fmul float %23, %288 > %307 = fadd float %305, %306 > %308 = fadd float %307, %24 > %309 = fmul float %308, %28 > %310 = fsub float -0.000000e+00, %27 > %311 = fcmp olt float %192, %310 > %312 = zext i1 %311 to i32 > %313 = fsub float -0.000000e+00, %27 > %314 = fcmp olt float %243, %313 > %315 = fsub float -0.000000e+00, %27 > %316 = fcmp olt float %249, %315 > %317 = zext i1 %314 to i32 > %318 = zext i1 %316 to i32 > %319 = add nuw nsw i32 %317, %312 > %320 = fsub float -0.000000e+00, %27 > %321 = fcmp olt float %294, %320 > %322 = zext i1 %321 to i32 > %323 = add nuw nsw i32 %322, %319 > %324 = fsub float -0.000000e+00, %27 > %325 = fcmp olt float %198, %324 > %326 = zext i1 %325 to i32 > %327 = add nuw nsw i32 %318, %326 > %328 = fsub float -0.000000e+00, %27 > %329 = fcmp olt float %300, %328 > %330 = zext i1 %329 to i32 > %331 = add nuw nsw i32 %330, %327 > %332 = fcmp olt float %301, 0.000000e+00 > %333 = zext i1 %332 to i32 > %334 = fcmp olt float %302, 0.000000e+00 > %335 = zext i1 %334 to i32 > %336 = add nuw nsw i32 %333, %335 > %337 = fcmp olt float %309, 0.000000e+00 > %338 = zext i1 %337 to i32 > %339 = add nuw nsw i32 %336, %338 > %340 = fcmp olt float %27, %192 > %341 = zext i1 %340 to i32 > %342 = fcmp olt float %27, %243 > %343 = fcmp olt float %27, %249 > %344 = zext i1 %342 to i32 > %345 = zext i1 %343 to i32 > %346 = add nuw nsw i32 %344, %341 > %347 = fcmp olt float %27, %294 > %348 = zext i1 %347 to i32 > %349 = add nuw nsw i32 %346, %348 > %350 = fcmp olt float %27, %198 > %351 = zext i1 %350 to i32 > %352 = add nuw nsw i32 %345, %351 > %353 = fcmp olt float %27, %300 > %354 = zext i1 %353 to i32 > %355 = add nuw nsw i32 %352, %354 > %356 = fcmp olt float %27, %301 > %357 = fcmp olt float %27, %302 > %358 = zext i1 %356 to i32 > %359 = zext i1 %357 to i32 > %360 = add nuw nsw i32 %359, %358 > %361 = fcmp olt float %27, %309 > %362 = zext i1 %361 to i32 > %363 = add nuw nsw i32 %362, %360 > %364 = icmp eq i32 %323, 3 > %365 = sext i1 %364 to i32 > %366 = icmp eq i32 %349, 3 > %367 = icmp eq i32 %355, 3 > %368 = sext i1 %367 to i32 > %369 = icmp eq i32 %331, 3 > %370 = sext i1 %369 to i32 > %371 = select i1 %367, i32 -1, i32 %370 > %372 = select i1 %366, i32 -1, i32 %365 > %373 = or i32 %371, %372 > %374 = icmp eq i32 %339, 3 > %375 = icmp eq i32 %363, 3 > %376 = sext i1 %375 to i32 > %377 = select i1 %374, i32 -1, i32 %376 > %378 = or i32 %377, %373 > %379 = icmp eq i32 %378, 0 > br i1 %379, label %ELSE, label %ENDIF > >ELSE: ; preds = %main_body > %380 = lshr i32 %7, 13 > %381 = and i32 %380, 255 > %382 = and i32 %7, 8191 > %383 = and i32 %10, 255 > %384 = mul nuw nsw i32 %382, %383 > %385 = add nuw nsw i32 %384, %381 > %386 = add nuw nsw i32 %385, 16 > %387 = zext i32 %386 to i64 > %388 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %387 > %389 = bitcast i32 addrspace(3)* %388 to float addrspace(3)* > %390 = load float, float addrspace(3)* %389, align 4 > %391 = and i32 %7, 8191 > %392 = and i32 %10, 255 > %393 = mul nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 16 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = bitcast i32 addrspace(3)* %396 to float addrspace(3)* > %398 = load float, float addrspace(3)* %397, align 4 > %399 = fsub float %398, %390 > %400 = lshr i32 %7, 13 > %401 = and i32 %400, 255 > %402 = and i32 %7, 8191 > %403 = and i32 %10, 255 > %404 = mul nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, %401 > %406 = add nuw nsw i32 %405, 17 > %407 = zext i32 %406 to i64 > %408 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %407 > %409 = bitcast i32 addrspace(3)* %408 to float addrspace(3)* > %410 = load float, float addrspace(3)* %409, align 4 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = add nuw nsw i32 %413, 17 > %415 = zext i32 %414 to i64 > %416 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %415 > %417 = bitcast i32 addrspace(3)* %416 to float addrspace(3)* > %418 = load float, float addrspace(3)* %417, align 4 > %419 = fsub float %418, %410 > %420 = lshr i32 %7, 13 > %421 = and i32 %420, 255 > %422 = and i32 %7, 8191 > %423 = and i32 %10, 255 > %424 = mul nuw nsw i32 %422, %423 > %425 = add nuw nsw i32 %424, %421 > %426 = add nuw nsw i32 %425, 18 > %427 = zext i32 %426 to i64 > %428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %427 > %429 = bitcast i32 addrspace(3)* %428 to float addrspace(3)* > %430 = load float, float addrspace(3)* %429, align 4 > %431 = and i32 %7, 8191 > %432 = and i32 %10, 255 > %433 = mul nuw nsw i32 %431, %432 > %434 = add nuw nsw i32 %433, 18 > %435 = zext i32 %434 to i64 > %436 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %435 > %437 = bitcast i32 addrspace(3)* %436 to float addrspace(3)* > %438 = load float, float addrspace(3)* %437, align 4 > %439 = fsub float %438, %430 > %440 = fmul float %399, %399 > %441 = fmul float %419, %419 > %442 = fadd float %441, %440 > %443 = fmul float %439, %439 > %444 = fadd float %442, %443 > %445 = and i32 %7, 8191 > %446 = and i32 %10, 255 > %447 = mul nuw nsw i32 %445, %446 > %448 = lshr i32 %7, 12 > %449 = and i32 %448, 510 > %450 = add nuw nsw i32 %447, %449 > %451 = add nuw nsw i32 %450, 16 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > %454 = bitcast i32 addrspace(3)* %453 to float addrspace(3)* > %455 = load float, float addrspace(3)* %454, align 4 > %456 = lshr i32 %7, 13 > %457 = and i32 %456, 255 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, %457 > %462 = add nuw nsw i32 %461, 16 > %463 = zext i32 %462 to i64 > %464 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %463 > %465 = bitcast i32 addrspace(3)* %464 to float addrspace(3)* > %466 = load float, float addrspace(3)* %465, align 4 > %467 = fsub float %466, %455 > %468 = and i32 %7, 8191 > %469 = and i32 %10, 255 > %470 = mul nuw nsw i32 %468, %469 > %471 = lshr i32 %7, 12 > %472 = and i32 %471, 510 > %473 = add nuw nsw i32 %470, %472 > %474 = add nuw nsw i32 %473, 17 > %475 = zext i32 %474 to i64 > %476 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %475 > %477 = bitcast i32 addrspace(3)* %476 to float addrspace(3)* > %478 = load float, float addrspace(3)* %477, align 4 > %479 = lshr i32 %7, 13 > %480 = and i32 %479, 255 > %481 = and i32 %7, 8191 > %482 = and i32 %10, 255 > %483 = mul nuw nsw i32 %481, %482 > %484 = add nuw nsw i32 %483, %480 > %485 = add nuw nsw i32 %484, 17 > %486 = zext i32 %485 to i64 > %487 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %486 > %488 = bitcast i32 addrspace(3)* %487 to float addrspace(3)* > %489 = load float, float addrspace(3)* %488, align 4 > %490 = fsub float %489, %478 > %491 = and i32 %7, 8191 > %492 = and i32 %10, 255 > %493 = mul nuw nsw i32 %491, %492 > %494 = lshr i32 %7, 12 > %495 = and i32 %494, 510 > %496 = add nuw nsw i32 %493, %495 > %497 = add nuw nsw i32 %496, 18 > %498 = zext i32 %497 to i64 > %499 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %498 > %500 = bitcast i32 addrspace(3)* %499 to float addrspace(3)* > %501 = load float, float addrspace(3)* %500, align 4 > %502 = lshr i32 %7, 13 > %503 = and i32 %502, 255 > %504 = and i32 %7, 8191 > %505 = and i32 %10, 255 > %506 = mul nuw nsw i32 %504, %505 > %507 = add nuw nsw i32 %506, %503 > %508 = add nuw nsw i32 %507, 18 > %509 = zext i32 %508 to i64 > %510 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %509 > %511 = bitcast i32 addrspace(3)* %510 to float addrspace(3)* > %512 = load float, float addrspace(3)* %511, align 4 > %513 = fsub float %512, %501 > %514 = fmul float %467, %467 > %515 = fmul float %490, %490 > %516 = fadd float %515, %514 > %517 = fmul float %513, %513 > %518 = fadd float %516, %517 > %519 = call float @llvm.sqrt.f32(float %444) > %520 = call float @llvm.sqrt.f32(float %518) > %521 = and i32 %7, 8191 > %522 = and i32 %10, 255 > %523 = mul nuw nsw i32 %521, %522 > %524 = add nuw nsw i32 %523, 16 > %525 = zext i32 %524 to i64 > %526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %525 > %527 = bitcast i32 addrspace(3)* %526 to float addrspace(3)* > %528 = load float, float addrspace(3)* %527, align 4 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = lshr i32 %7, 12 > %533 = and i32 %532, 510 > %534 = add nuw nsw i32 %531, %533 > %535 = add nuw nsw i32 %534, 16 > %536 = zext i32 %535 to i64 > %537 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %536 > %538 = bitcast i32 addrspace(3)* %537 to float addrspace(3)* > %539 = load float, float addrspace(3)* %538, align 4 > %540 = fsub float %539, %528 > %541 = and i32 %7, 8191 > %542 = and i32 %10, 255 > %543 = mul nuw nsw i32 %541, %542 > %544 = add nuw nsw i32 %543, 17 > %545 = zext i32 %544 to i64 > %546 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %545 > %547 = bitcast i32 addrspace(3)* %546 to float addrspace(3)* > %548 = load float, float addrspace(3)* %547, align 4 > %549 = and i32 %7, 8191 > %550 = and i32 %10, 255 > %551 = mul nuw nsw i32 %549, %550 > %552 = lshr i32 %7, 12 > %553 = and i32 %552, 510 > %554 = add nuw nsw i32 %551, %553 > %555 = add nuw nsw i32 %554, 17 > %556 = zext i32 %555 to i64 > %557 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %556 > %558 = bitcast i32 addrspace(3)* %557 to float addrspace(3)* > %559 = load float, float addrspace(3)* %558, align 4 > %560 = fsub float %559, %548 > %561 = and i32 %7, 8191 > %562 = and i32 %10, 255 > %563 = mul nuw nsw i32 %561, %562 > %564 = add nuw nsw i32 %563, 18 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fsub float %579, %568 > %581 = fmul float %540, %540 > %582 = fmul float %560, %560 > %583 = fadd float %582, %581 > %584 = fmul float %580, %580 > %585 = fadd float %583, %584 > %586 = call float @llvm.sqrt.f32(float %585) > %587 = call float @llvm.minnum.f32(float %302, float %301) > %588 = call float @llvm.minnum.f32(float %302, float %309) > %589 = call float @llvm.minnum.f32(float %301, float %309) > %590 = fmul float %587, 0x3FD99999A0000000 > %591 = call float @llvm.maxnum.f32(float %590, float 1.000000e+02) > %592 = fmul float %589, 0x3FD99999A0000000 > %593 = fmul float %588, 0x3FD99999A0000000 > %594 = call float @llvm.maxnum.f32(float %592, float 1.000000e+02) > %595 = call float @llvm.maxnum.f32(float %593, float 1.000000e+02) > %596 = fcmp une float %591, 0.000000e+00 > br i1 %596, label %IF45, label %ELSE46 > >ENDIF: ; preds = %main_body, %ENDIF50 > %temp24.0 = phi i32 [ %phitmp57, %ENDIF50 ], [ 0, %main_body ] > %temp20.0 = phi i32 [ %phitmp56, %ENDIF50 ], [ 0, %main_body ] > %temp8.0 = phi i32 [ %phitmp55, %ENDIF50 ], [ 0, %main_body ] > %temp4.0 = phi i32 [ %phitmp, %ENDIF50 ], [ 0, %main_body ] > %597 = lshr i32 %5, 16 > %598 = shl nuw nsw i32 %597, 2 > %599 = and i32 %6, 8191 > %600 = and i32 %10, 255 > %601 = mul nuw nsw i32 %599, %600 > %602 = add nuw nsw i32 %598, %601 > %603 = add nuw nsw i32 %602, 8 > %604 = zext i32 %603 to i64 > %605 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %604 > store i32 %temp4.0, i32 addrspace(3)* %605, align 4 > %606 = add nuw nsw i32 %602, 9 > %607 = zext i32 %606 to i64 > %608 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %607 > store i32 %368, i32 addrspace(3)* %608, align 4 > %609 = add nuw nsw i32 %602, 10 > %610 = zext i32 %609 to i64 > %611 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %610 > %612 = bitcast i32 addrspace(3)* %611 to float addrspace(3)* > store float %15, float addrspace(3)* %612, align 4 > %613 = add nuw nsw i32 %602, 11 > %614 = zext i32 %613 to i64 > %615 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %614 > %616 = bitcast i32 addrspace(3)* %615 to float addrspace(3)* > store float %16, float addrspace(3)* %616, align 4 > %617 = lshr i32 %5, 16 > %618 = shl nuw nsw i32 %617, 2 > %619 = and i32 %6, 8191 > %620 = and i32 %10, 255 > %621 = mul nuw nsw i32 %619, %620 > %622 = add nuw nsw i32 %618, %621 > %623 = add nuw nsw i32 %622, 12 > %624 = zext i32 %623 to i64 > %625 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %624 > store i32 %temp8.0, i32 addrspace(3)* %625, align 4 > %626 = add nuw nsw i32 %622, 13 > %627 = zext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > store float %18, float addrspace(3)* %629, align 4 > %630 = add nuw nsw i32 %622, 14 > %631 = zext i32 %630 to i64 > %632 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %631 > %633 = bitcast i32 addrspace(3)* %632 to float addrspace(3)* > store float %19, float addrspace(3)* %633, align 4 > %634 = add nuw nsw i32 %622, 15 > %635 = zext i32 %634 to i64 > %636 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %635 > %637 = bitcast i32 addrspace(3)* %636 to float addrspace(3)* > store float %20, float addrspace(3)* %637, align 4 > %638 = lshr i32 %5, 16 > %639 = shl nuw nsw i32 %638, 2 > %640 = and i32 %6, 8191 > %641 = and i32 %10, 255 > %642 = mul nuw nsw i32 %640, %641 > %643 = add nuw nsw i32 %639, %642 > %644 = add nuw nsw i32 %643, 16 > %645 = zext i32 %644 to i64 > %646 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %645 > store i32 %temp20.0, i32 addrspace(3)* %646, align 4 > %647 = add nuw nsw i32 %643, 17 > %648 = zext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %14, float addrspace(3)* %650, align 4 > %651 = add nuw nsw i32 %643, 18 > %652 = zext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %15, float addrspace(3)* %654, align 4 > %655 = add nuw nsw i32 %643, 19 > %656 = zext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %16, float addrspace(3)* %658, align 4 > %659 = lshr i32 %5, 16 > %660 = shl nuw nsw i32 %659, 2 > %661 = and i32 %6, 8191 > %662 = and i32 %10, 255 > %663 = mul nuw nsw i32 %661, %662 > %664 = add nuw nsw i32 %660, %663 > %665 = add nuw nsw i32 %664, 20 > %666 = zext i32 %665 to i64 > %667 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %666 > store i32 %temp24.0, i32 addrspace(3)* %667, align 4 > %668 = add nuw nsw i32 %664, 21 > %669 = zext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > store float %18, float addrspace(3)* %671, align 4 > %672 = add nuw nsw i32 %664, 22 > %673 = zext i32 %672 to i64 > %674 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %673 > %675 = bitcast i32 addrspace(3)* %674 to float addrspace(3)* > store float %19, float addrspace(3)* %675, align 4 > %676 = add nuw nsw i32 %664, 23 > %677 = zext i32 %676 to i64 > %678 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %677 > %679 = bitcast i32 addrspace(3)* %678 to float addrspace(3)* > store float %20, float addrspace(3)* %679, align 4 > %680 = lshr i32 %5, 16 > %681 = shl nuw nsw i32 %680, 2 > %682 = and i32 %6, 8191 > %683 = and i32 %10, 255 > %684 = mul nuw nsw i32 %682, %683 > %685 = add nuw nsw i32 %681, %684 > %686 = zext i32 %685 to i64 > %687 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %686 > store i32 %temp4.0, i32 addrspace(3)* %687, align 4 > %688 = lshr i32 %5, 16 > %689 = shl nuw nsw i32 %688, 2 > %690 = and i32 %6, 8191 > %691 = and i32 %10, 255 > %692 = mul nuw nsw i32 %690, %691 > %693 = add nuw nsw i32 %689, %692 > %694 = add nuw nsw i32 %693, 1 > %695 = zext i32 %694 to i64 > %696 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %695 > store i32 %temp8.0, i32 addrspace(3)* %696, align 4 > %697 = lshr i32 %5, 16 > %698 = shl nuw nsw i32 %697, 2 > %699 = and i32 %6, 8191 > %700 = and i32 %10, 255 > %701 = mul nuw nsw i32 %699, %700 > %702 = add nuw nsw i32 %698, %701 > %703 = add nuw nsw i32 %702, 2 > %704 = zext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > store i32 %temp20.0, i32 addrspace(3)* %705, align 4 > %706 = lshr i32 %5, 16 > %707 = shl nuw nsw i32 %706, 2 > %708 = and i32 %6, 8191 > %709 = and i32 %10, 255 > %710 = mul nuw nsw i32 %708, %709 > %711 = add nuw nsw i32 %707, %710 > %712 = add nuw nsw i32 %711, 4 > %713 = zext i32 %712 to i64 > %714 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %713 > store i32 %temp24.0, i32 addrspace(3)* %714, align 4 > %715 = and i32 %10, 255 > %716 = lshr i32 %10, 8 > %717 = and i32 %716, 31 > %718 = lshr i32 %5, 16 > %719 = shl nuw nsw i32 %718, 2 > %720 = and i32 %6, 8191 > %721 = and i32 %10, 255 > %722 = mul nuw nsw i32 %720, %721 > %723 = add nuw nsw i32 %719, %722 > %724 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %725 = bitcast i64 %724 to <2 x i32> > %726 = extractelement <2 x i32> %725, i32 0 > %727 = extractelement <2 x i32> %725, i32 1 > %728 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %726, 0 > %729 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %728, i32 %727, 1 > %730 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %729, i32 %8, 13 > %731 = bitcast i32 %715 to float > %732 = bitcast i32 %717 to float > %733 = bitcast i32 %723 to float > %734 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %730, float %731, 14 > %735 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %734, float %732, 15 > %736 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %735, float %733, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %736 > >IF45: ; preds = %ELSE > %737 = fdiv float 1.000000e+00, %591 > %738 = fmul float %519, %737 > br label %ENDIF44 > >ELSE46: ; preds = %ELSE > %739 = fcmp ogt float %519, 0.000000e+00 > %740 = select i1 %739, float 1.000000e+00, float %519 > %741 = fcmp oge float %740, 0.000000e+00 > %.op = fmul float %740, 0x4600000000000000 > %742 = select i1 %741, float %.op, float 0xC600000000000000 > br label %ENDIF44 > >ENDIF44: ; preds = %ELSE46, %IF45 > %temp12.0 = phi float [ %738, %IF45 ], [ %742, %ELSE46 ] > %743 = call float @llvm.maxnum.f32(float %temp12.0, float 1.000000e+00) > %744 = fcmp une float %595, 0.000000e+00 > br i1 %744, label %IF48, label %ELSE49 > >IF48: ; preds = %ENDIF44 > %745 = fdiv float 1.000000e+00, %595 > %746 = fmul float %520, %745 > br label %ENDIF47 > >ELSE49: ; preds = %ENDIF44 > %747 = fcmp ogt float %520, 0.000000e+00 > %748 = select i1 %747, float 1.000000e+00, float %520 > %749 = fcmp oge float %748, 0.000000e+00 > %.op53 = fmul float %748, 0x4600000000000000 > %750 = select i1 %749, float %.op53, float 0xC600000000000000 > br label %ENDIF47 > >ENDIF47: ; preds = %ELSE49, %IF48 > %temp12.1 = phi float [ %746, %IF48 ], [ %750, %ELSE49 ] > %751 = call float @llvm.maxnum.f32(float %temp12.1, float 1.000000e+00) > %752 = fcmp une float %594, 0.000000e+00 > br i1 %752, label %IF51, label %ELSE52 > >IF51: ; preds = %ENDIF47 > %753 = fdiv float 1.000000e+00, %594 > %754 = fmul float %586, %753 > br label %ENDIF50 > >ELSE52: ; preds = %ENDIF47 > %755 = fcmp ogt float %586, 0.000000e+00 > %756 = select i1 %755, float 1.000000e+00, float %586 > %757 = fcmp oge float %756, 0.000000e+00 > %.op54 = fmul float %756, 0x4600000000000000 > %758 = select i1 %757, float %.op54, float 0xC600000000000000 > br label %ENDIF50 > >ENDIF50: ; preds = %ELSE52, %IF51 > %temp12.2 = phi float [ %754, %IF51 ], [ %758, %ELSE52 ] > %759 = call float @llvm.maxnum.f32(float %temp12.2, float 1.000000e+00) > %760 = call float @llvm.minnum.f32(float %759, float 6.300000e+01) > %761 = call float @llvm.minnum.f32(float %743, float 6.300000e+01) > %762 = call float @llvm.minnum.f32(float %751, float 6.300000e+01) > %763 = call float @llvm.maxnum.f32(float %761, float %760) > %764 = call float @llvm.maxnum.f32(float %763, float %762) > %phitmp = bitcast float %762 to i32 > %phitmp55 = bitcast float %760 to i32 > %phitmp56 = bitcast float %761 to i32 > %phitmp57 = bitcast float %764 to i32 > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..1], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..48] >DCL CONST[2][0..39] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[1] UINT32 {0, 736, 752, 768} >IMM[2] UINT32 {1, 624, 0, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[0].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[0].w, IMM[0].xxxx > 4: MOV TEMP[1], CONST[1][46] > 5: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 6: MOV TEMP[2], CONST[1][47] > 7: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 8: MOV TEMP[1].y, TEMP[2].xxxx > 9: MOV TEMP[2], CONST[1][48] > 10: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 11: MOV TEMP[1].z, TEMP[2].xxxx > 12: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 13: SQRT TEMP[2].x, TEMP[0].xxxx > 14: FSEQ TEMP[3].xyz, TEMP[2].xxxx, IMM[0].yyyy > 15: SSG TEMP[4].xyz, TEMP[1].xyzz > 16: MUL TEMP[4].xyz, IMM[0].zzzz, TEMP[4].xyzz > 17: RCP TEMP[5].xyz, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz > 19: UCMP TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[1].xyzz > 20: MOV TEMP[3].x, CONST[2][39] > 21: FSNE TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy > 22: UIF TEMP[3].xxxx :0 > 23: MOV TEMP[3].x, CONST[2][39] > 24: RCP TEMP[3].x, TEMP[3].xxxx > 25: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[3].xxxx > 26: ELSE :0 > 27: SSG TEMP[2].x, TEMP[2].xxxx > 28: MUL TEMP[3].x, IMM[0].zzzz, TEMP[2].xxxx > 29: ENDIF > 30: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[0].xxxx > 31: MOV TEMP[2].z, CONST[2][39] > 32: FMA TEMP[2].x, TEMP[1].zzzz, TEMP[2].zzzz, IMM[0].xxxx > 33: FSEQ TEMP[3].xy, TEMP[2].xxxx, IMM[0].yyyy > 34: SSG TEMP[4].xy, TEMP[1].xyyy > 35: MUL TEMP[4].xy, IMM[0].zzzz, TEMP[4].xyyy > 36: RCP TEMP[2].xy, TEMP[2].xxxx > 37: MUL TEMP[2].xy, TEMP[1].xyyy, TEMP[2].xyyy > 38: UCMP TEMP[2].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[2].xyyy > 39: MOV TEMP[3].z, CONST[2][39] > 40: MUL TEMP[1].x, TEMP[1].zzzz, TEMP[3].zzzz > 41: MOV TEMP[0].y, TEMP[1].xxxx > 42: MOV TEMP[2].z, TEMP[0].xxxx > 43: MOV TEMP[1].zw, TEMP[0].xxyx > 44: MOV TEMP[2].w, IMM[0].xxxx > 45: MUL TEMP[0].xy, SV[0].yyyy, IN[1][1].xyyy > 46: FMA TEMP[0].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[0].xyyy > 47: FMA TEMP[1].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[0].xyyy > 48: MOV OUT[1], TEMP[1] > 49: MOV OUT[0], TEMP[2] > 50: END >radeonsi: Compiling shader 245 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 736) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 740) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 744) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 748) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 752) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 756) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 760) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 764) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = fadd float %7, %8 > %30 = fsub float 1.000000e+00, %29 > %31 = lshr i32 %6, 13 > %32 = and i32 %31, 255 > %33 = shl i32 %5, 2 > %34 = and i32 %33, 262140 > %35 = and i32 %6, 8191 > %36 = mul i32 %35, %9 > %37 = add i32 %34, %36 > %38 = add i32 %37, %32 > %39 = add i32 %38, 16 > %40 = sext i32 %39 to i64 > %41 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %40 > %42 = bitcast i32 addrspace(3)* %41 to float addrspace(3)* > %43 = load float, float addrspace(3)* %42, align 4 > %44 = fmul float %43, %8 > %45 = lshr i32 %6, 13 > %46 = and i32 %45, 255 > %47 = shl i32 %5, 2 > %48 = and i32 %47, 262140 > %49 = and i32 %6, 8191 > %50 = mul i32 %49, %9 > %51 = add i32 %48, %50 > %52 = add i32 %51, %46 > %53 = add i32 %52, 17 > %54 = sext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = bitcast i32 addrspace(3)* %55 to float addrspace(3)* > %57 = load float, float addrspace(3)* %56, align 4 > %58 = fmul float %57, %8 > %59 = lshr i32 %6, 13 > %60 = and i32 %59, 255 > %61 = shl i32 %5, 2 > %62 = and i32 %61, 262140 > %63 = and i32 %6, 8191 > %64 = mul i32 %63, %9 > %65 = add i32 %62, %64 > %66 = add i32 %65, %60 > %67 = add i32 %66, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > %71 = load float, float addrspace(3)* %70, align 4 > %72 = fmul float %71, %8 > %73 = shl i32 %5, 2 > %74 = and i32 %73, 262140 > %75 = and i32 %6, 8191 > %76 = mul i32 %75, %9 > %77 = add i32 %74, %76 > %78 = add i32 %77, 16 > %79 = sext i32 %78 to i64 > %80 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %79 > %81 = bitcast i32 addrspace(3)* %80 to float addrspace(3)* > %82 = load float, float addrspace(3)* %81, align 4 > %83 = call float @llvm.fma.f32(float %7, float %82, float %44) > %84 = shl i32 %5, 2 > %85 = and i32 %84, 262140 > %86 = and i32 %6, 8191 > %87 = mul i32 %86, %9 > %88 = add i32 %85, %87 > %89 = add i32 %88, 17 > %90 = sext i32 %89 to i64 > %91 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %90 > %92 = bitcast i32 addrspace(3)* %91 to float addrspace(3)* > %93 = load float, float addrspace(3)* %92, align 4 > %94 = call float @llvm.fma.f32(float %7, float %93, float %58) > %95 = shl i32 %5, 2 > %96 = and i32 %95, 262140 > %97 = and i32 %6, 8191 > %98 = mul i32 %97, %9 > %99 = add i32 %96, %98 > %100 = add i32 %99, 18 > %101 = sext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > %103 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > %104 = load float, float addrspace(3)* %103, align 4 > %105 = call float @llvm.fma.f32(float %7, float %104, float %72) > %106 = shl i32 %5, 2 > %107 = and i32 %106, 262140 > %108 = and i32 %6, 8191 > %109 = mul i32 %108, %9 > %110 = add i32 %107, %109 > %111 = lshr i32 %6, 12 > %112 = and i32 %111, 510 > %113 = add i32 %110, %112 > %114 = add i32 %113, 16 > %115 = sext i32 %114 to i64 > %116 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %115 > %117 = bitcast i32 addrspace(3)* %116 to float addrspace(3)* > %118 = load float, float addrspace(3)* %117, align 4 > %119 = call float @llvm.fma.f32(float %30, float %118, float %83) > %120 = shl i32 %5, 2 > %121 = and i32 %120, 262140 > %122 = and i32 %6, 8191 > %123 = mul i32 %122, %9 > %124 = add i32 %121, %123 > %125 = lshr i32 %6, 12 > %126 = and i32 %125, 510 > %127 = add i32 %124, %126 > %128 = add i32 %127, 17 > %129 = sext i32 %128 to i64 > %130 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %129 > %131 = bitcast i32 addrspace(3)* %130 to float addrspace(3)* > %132 = load float, float addrspace(3)* %131, align 4 > %133 = call float @llvm.fma.f32(float %30, float %132, float %94) > %134 = shl i32 %5, 2 > %135 = and i32 %134, 262140 > %136 = and i32 %6, 8191 > %137 = mul i32 %136, %9 > %138 = add i32 %135, %137 > %139 = lshr i32 %6, 12 > %140 = and i32 %139, 510 > %141 = add i32 %138, %140 > %142 = add i32 %141, 18 > %143 = sext i32 %142 to i64 > %144 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %143 > %145 = bitcast i32 addrspace(3)* %144 to float addrspace(3)* > %146 = load float, float addrspace(3)* %145, align 4 > %147 = call float @llvm.fma.f32(float %30, float %146, float %105) > %148 = fmul float %13, %119 > %149 = fmul float %14, %133 > %150 = fadd float %148, %149 > %151 = fmul float %15, %147 > %152 = fadd float %150, %151 > %153 = fadd float %152, %16 > %154 = fmul float %17, %119 > %155 = fmul float %18, %133 > %156 = fadd float %154, %155 > %157 = fmul float %19, %147 > %158 = fadd float %156, %157 > %159 = fadd float %158, %20 > %160 = fmul float %21, %119 > %161 = fmul float %22, %133 > %162 = fadd float %160, %161 > %163 = fmul float %23, %147 > %164 = fadd float %162, %163 > %165 = fadd float %164, %24 > %166 = fmul float %153, %153 > %167 = fmul float %159, %159 > %168 = fadd float %167, %166 > %169 = fmul float %165, %165 > %170 = fadd float %168, %169 > %171 = call float @llvm.sqrt.f32(float %170) > %172 = fcmp oeq float %171, 0.000000e+00 > %173 = fcmp oeq float %171, 0.000000e+00 > %174 = fcmp oeq float %171, 0.000000e+00 > %175 = fcmp ogt float %153, 0.000000e+00 > %176 = select i1 %175, float 1.000000e+00, float %153 > %177 = fcmp oge float %176, 0.000000e+00 > %178 = fcmp ogt float %159, 0.000000e+00 > %179 = select i1 %178, float 1.000000e+00, float %159 > %180 = fcmp oge float %179, 0.000000e+00 > %181 = fcmp ogt float %165, 0.000000e+00 > %182 = select i1 %181, float 1.000000e+00, float %165 > %183 = fcmp oge float %182, 0.000000e+00 > %.op = fmul float %176, 0x4600000000000000 > %184 = select i1 %177, float %.op, float 0xC600000000000000 > %.op24 = fmul float %179, 0x4600000000000000 > %185 = select i1 %180, float %.op24, float 0xC600000000000000 > %.op25 = fmul float %182, 0x4600000000000000 > %186 = select i1 %183, float %.op25, float 0xC600000000000000 > %187 = fdiv float 1.000000e+00, %171 > %188 = fmul float %153, %187 > %189 = fmul float %159, %187 > %190 = fmul float %165, %187 > %191 = select i1 %172, float %184, float %188 > %192 = select i1 %173, float %185, float %189 > %193 = select i1 %174, float %186, float %190 > %194 = fcmp une float %27, 0.000000e+00 > br i1 %194, label %IF, label %ELSE > >IF: ; preds = %main_body > %195 = fdiv float 1.000000e+00, %27 > %196 = fmul float %171, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fcmp ogt float %171, 0.000000e+00 > %198 = select i1 %197, float 1.000000e+00, float %171 > %199 = fcmp oge float %198, 0.000000e+00 > %.op26 = fmul float %198, 0x4600000000000000 > %200 = select i1 %199, float %.op26, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %196, %IF ], [ %200, %ELSE ] > %201 = fsub float 1.000000e+00, %temp12.0 > %202 = call float @llvm.fma.f32(float %193, float %28, float 1.000000e+00) > %203 = fcmp oeq float %202, 0.000000e+00 > %204 = fcmp oeq float %202, 0.000000e+00 > %205 = fcmp ogt float %191, 0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %191 > %207 = fcmp oge float %206, 0.000000e+00 > %208 = fcmp ogt float %192, 0.000000e+00 > %209 = select i1 %208, float 1.000000e+00, float %192 > %210 = fcmp oge float %209, 0.000000e+00 > %.op27 = fmul float %206, 0x4600000000000000 > %211 = select i1 %207, float %.op27, float 0xC600000000000000 > %.op28 = fmul float %209, 0x4600000000000000 > %212 = select i1 %210, float %.op28, float 0xC600000000000000 > %213 = fdiv float 1.000000e+00, %202 > %214 = fmul float %191, %213 > %215 = fmul float %192, %213 > %216 = select i1 %203, float %211, float %214 > %217 = select i1 %204, float %212, float %215 > %218 = fmul float %193, %28 > %219 = lshr i32 %6, 13 > %220 = and i32 %219, 255 > %221 = shl i32 %5, 2 > %222 = and i32 %221, 262140 > %223 = and i32 %6, 8191 > %224 = mul i32 %223, %9 > %225 = add i32 %222, %224 > %226 = add i32 %225, %220 > %227 = add i32 %226, 20 > %228 = sext i32 %227 to i64 > %229 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %228 > %230 = bitcast i32 addrspace(3)* %229 to float addrspace(3)* > %231 = load float, float addrspace(3)* %230, align 4 > %232 = fmul float %231, %8 > %233 = lshr i32 %6, 13 > %234 = and i32 %233, 255 > %235 = shl i32 %5, 2 > %236 = and i32 %235, 262140 > %237 = and i32 %6, 8191 > %238 = mul i32 %237, %9 > %239 = add i32 %236, %238 > %240 = add i32 %239, %234 > %241 = add i32 %240, 21 > %242 = sext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > %244 = bitcast i32 addrspace(3)* %243 to float addrspace(3)* > %245 = load float, float addrspace(3)* %244, align 4 > %246 = fmul float %245, %8 > %247 = shl i32 %5, 2 > %248 = and i32 %247, 262140 > %249 = and i32 %6, 8191 > %250 = mul i32 %249, %9 > %251 = add i32 %248, %250 > %252 = add i32 %251, 20 > %253 = sext i32 %252 to i64 > %254 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %253 > %255 = bitcast i32 addrspace(3)* %254 to float addrspace(3)* > %256 = load float, float addrspace(3)* %255, align 4 > %257 = call float @llvm.fma.f32(float %7, float %256, float %232) > %258 = shl i32 %5, 2 > %259 = and i32 %258, 262140 > %260 = and i32 %6, 8191 > %261 = mul i32 %260, %9 > %262 = add i32 %259, %261 > %263 = add i32 %262, 21 > %264 = sext i32 %263 to i64 > %265 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %264 > %266 = bitcast i32 addrspace(3)* %265 to float addrspace(3)* > %267 = load float, float addrspace(3)* %266, align 4 > %268 = call float @llvm.fma.f32(float %7, float %267, float %246) > %269 = shl i32 %5, 2 > %270 = and i32 %269, 262140 > %271 = and i32 %6, 8191 > %272 = mul i32 %271, %9 > %273 = add i32 %270, %272 > %274 = lshr i32 %6, 12 > %275 = and i32 %274, 510 > %276 = add i32 %273, %275 > %277 = add i32 %276, 20 > %278 = sext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = bitcast i32 addrspace(3)* %279 to float addrspace(3)* > %281 = load float, float addrspace(3)* %280, align 4 > %282 = call float @llvm.fma.f32(float %30, float %281, float %257) > %283 = shl i32 %5, 2 > %284 = and i32 %283, 262140 > %285 = and i32 %6, 8191 > %286 = mul i32 %285, %9 > %287 = add i32 %284, %286 > %288 = lshr i32 %6, 12 > %289 = and i32 %288, 510 > %290 = add i32 %287, %289 > %291 = add i32 %290, 21 > %292 = sext i32 %291 to i64 > %293 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %292 > %294 = bitcast i32 addrspace(3)* %293 to float addrspace(3)* > %295 = load float, float addrspace(3)* %294, align 4 > %296 = call float @llvm.fma.f32(float %30, float %295, float %268) > %297 = bitcast i32 %10 to float > %298 = insertvalue <{ float, float, float }> undef, float %297, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %282, float %296, float %218, float %201) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %216, float %217, float %201, float 1.000000e+00) > ret <{ float, float, float }> %298 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..3] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 16, 32, 48} > 0: DP4 TEMP[0].x, IN[2], CONST[1][0] > 1: DP4 TEMP[1].x, IN[2], CONST[1][1] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: DP4 TEMP[1].x, IN[2], CONST[1][2] > 4: MOV TEMP[0].z, TEMP[1].xxxx > 5: DP4 TEMP[1].x, IN[2], CONST[1][3] > 6: MOV TEMP[0].w, TEMP[1].xxxx > 7: MOV OUT[2], IN[1] > 8: MOV OUT[1], IN[0] > 9: MOV OUT[0], TEMP[0] > 10: END >radeonsi: Compiling shader 246 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 0) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 4) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 8) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 12) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 16) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 20) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 24) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 28) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 32) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 36) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 40) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 44) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 48) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 52) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 56) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 60) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %35, i32 0, i32 %13) > %37 = extractelement <4 x float> %36, i32 0 > %38 = extractelement <4 x float> %36, i32 1 > %39 = extractelement <4 x float> %36, i32 2 > %40 = extractelement <4 x float> %36, i32 3 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %14) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = extractelement <4 x float> %43, i32 3 > %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 > %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %15) > %51 = extractelement <4 x float> %50, i32 0 > %52 = extractelement <4 x float> %50, i32 1 > %53 = extractelement <4 x float> %50, i32 2 > %54 = extractelement <4 x float> %50, i32 3 > %55 = fmul float %51, %18 > %56 = fmul float %52, %19 > %57 = fadd float %55, %56 > %58 = fmul float %53, %20 > %59 = fadd float %57, %58 > %60 = fmul float %54, %21 > %61 = fadd float %59, %60 > %62 = fmul float %51, %22 > %63 = fmul float %52, %23 > %64 = fadd float %62, %63 > %65 = fmul float %53, %24 > %66 = fadd float %64, %65 > %67 = fmul float %54, %25 > %68 = fadd float %66, %67 > %69 = fmul float %51, %26 > %70 = fmul float %52, %27 > %71 = fadd float %69, %70 > %72 = fmul float %53, %28 > %73 = fadd float %71, %72 > %74 = fmul float %54, %29 > %75 = fadd float %73, %74 > %76 = fmul float %51, %30 > %77 = fmul float %52, %31 > %78 = fadd float %76, %77 > %79 = fmul float %53, %32 > %80 = fadd float %78, %79 > %81 = fmul float %54, %33 > %82 = fadd float %80, %81 > %83 = bitcast i32 %11 to float > %84 = insertvalue <{ float, float, float }> undef, float %83, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %37, float %38, float %39, float %40) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %44, float %45, float %46, float %47) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %61, float %68, float %75, float %82) > ret <{ float, float, float }> %84 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..1] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 16, 0, 0} >IMM[1] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: DP4 TEMP[0].x, IN[2], CONST[1][0] > 1: DP4 TEMP[1].x, IN[2], CONST[1][1] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: MOV TEMP[0].zw, IMM[1].yyxy > 4: MOV OUT[2], IN[1] > 5: MOV OUT[1], IN[0] > 6: MOV OUT[0], TEMP[0] > 7: END >radeonsi: Compiling shader 247 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 0) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 4) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 8) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 12) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 16) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 20) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 24) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 28) > %26 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 > %28 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %13) > %29 = extractelement <4 x float> %28, i32 0 > %30 = extractelement <4 x float> %28, i32 1 > %31 = extractelement <4 x float> %28, i32 2 > %32 = extractelement <4 x float> %28, i32 3 > %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 > %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %14) > %36 = extractelement <4 x float> %35, i32 0 > %37 = extractelement <4 x float> %35, i32 1 > %38 = extractelement <4 x float> %35, i32 2 > %39 = extractelement <4 x float> %35, i32 3 > %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 > %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %15) > %43 = extractelement <4 x float> %42, i32 0 > %44 = extractelement <4 x float> %42, i32 1 > %45 = extractelement <4 x float> %42, i32 2 > %46 = extractelement <4 x float> %42, i32 3 > %47 = fmul float %43, %18 > %48 = fmul float %44, %19 > %49 = fadd float %47, %48 > %50 = fmul float %45, %20 > %51 = fadd float %49, %50 > %52 = fmul float %46, %21 > %53 = fadd float %51, %52 > %54 = fmul float %43, %22 > %55 = fmul float %44, %23 > %56 = fadd float %54, %55 > %57 = fmul float %45, %24 > %58 = fadd float %56, %57 > %59 = fmul float %46, %25 > %60 = fadd float %58, %59 > %61 = bitcast i32 %11 to float > %62 = insertvalue <{ float, float, float }> undef, float %61, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %29, float %30, float %31, float %32) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %36, float %37, float %38, float %39) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %53, float %60, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %62 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..3] >DCL CONST[2][0..25] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 32, 1, 400} >IMM[2] UINT32 {48, 16, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][2], TEMP[0] > 3: MIN TEMP[2].x, TEMP[1].xxxx, CONST[2][25].zzzz > 4: MOV TEMP[2].z, TEMP[2].xxxx > 5: DP4 TEMP[1].x, CONST[1][3], TEMP[0] > 6: MOV TEMP[2].w, TEMP[1].xxxx > 7: DP4 TEMP[2].x, CONST[1][0], TEMP[0] > 8: DP4 TEMP[0].x, CONST[1][1], TEMP[0] > 9: MOV TEMP[2].y, TEMP[0].xxxx > 10: MOV OUT[0], TEMP[2] > 11: END >radeonsi: Compiling shader 248 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 0) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 4) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 8) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 12) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 16) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 20) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 24) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 28) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 32) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 36) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 40) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 44) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 48) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 52) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 56) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 60) > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call float @llvm.SI.load.const(<16 x i8> %33, i32 408) > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %13) > %38 = extractelement <4 x float> %37, i32 0 > %39 = extractelement <4 x float> %37, i32 1 > %40 = extractelement <4 x float> %37, i32 2 > %41 = fmul float %24, %38 > %42 = fmul float %25, %39 > %43 = fadd float %41, %42 > %44 = fmul float %26, %40 > %45 = fadd float %43, %44 > %46 = fadd float %45, %27 > %47 = call float @llvm.minnum.f32(float %46, float %34) > %48 = fmul float %28, %38 > %49 = fmul float %29, %39 > %50 = fadd float %48, %49 > %51 = fmul float %30, %40 > %52 = fadd float %50, %51 > %53 = fadd float %52, %31 > %54 = fmul float %16, %38 > %55 = fmul float %17, %39 > %56 = fadd float %54, %55 > %57 = fmul float %18, %40 > %58 = fadd float %56, %57 > %59 = fadd float %58, %19 > %60 = fmul float %20, %38 > %61 = fmul float %21, %39 > %62 = fadd float %60, %61 > %63 = fmul float %22, %40 > %64 = fadd float %62, %63 > %65 = fadd float %64, %23 > %66 = bitcast i32 %11 to float > %67 = insertvalue <{ float, float, float }> undef, float %66, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %65, float %47, float %53) > ret <{ float, float, float }> %67 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..22] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} >IMM[1] UINT32 {0, 352, 0, 0} >IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 9: DP3 TEMP[2].x, IN[2].xyzz, TEMP[0].xyzz > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 14: RSQ TEMP[2].x, TEMP[0].xxxx > 15: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 16: FMA TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[0].wwww > 17: MOV TEMP[0].w, CONST[1][22].yyyy > 18: MOV TEMP[1].xy, IN[0].xyyy > 19: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 20: MUL TEMP[2].xyz, TEMP[1].xyzz, IN[4].xyzz > 21: MOV TEMP[2].w, TEMP[1].wwww > 22: MOV TEMP[1].w, IMM[2].xxxx > 23: MOV TEMP[3].xy, IN[0].xyyy > 24: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D > 25: MUL TEMP[1].x, TEMP[3].zzzz, CONST[1][22].xxxx > 26: MOV TEMP[1].yz, TEMP[3].xyxx > 27: MOV OUT[0], TEMP[0] > 28: MOV OUT[1], TEMP[2] > 29: MOV OUT[2], TEMP[1] > 30: END >radeonsi: Compiling shader 249 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 7 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 11 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %68 = bitcast float %54 to i32 > %69 = bitcast float %55 to i32 > %70 = insertelement <2 x i32> undef, i32 %68, i32 0 > %71 = insertelement <2 x i32> %70, i32 %69, i32 1 > %72 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %71, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %73 = extractelement <4 x float> %72, i32 1 > %74 = extractelement <4 x float> %72, i32 3 > %75 = call float @llvm.fma.f32(float %73, float 2.000000e+00, float -1.000000e+00) > %76 = call float @llvm.fma.f32(float %74, float 2.000000e+00, float -1.000000e+00) > %77 = fsub float -0.000000e+00, %75 > %78 = call float @llvm.fma.f32(float %77, float %75, float 1.000000e+00) > %79 = fsub float -0.000000e+00, %76 > %80 = call float @llvm.fma.f32(float %79, float %76, float %78) > %81 = call float @llvm.sqrt.f32(float %80) > %82 = fmul float %56, %75 > %83 = fmul float %57, %76 > %84 = fadd float %83, %82 > %85 = fmul float %58, %81 > %86 = fadd float %84, %85 > %87 = fmul float %59, %75 > %88 = fmul float %60, %76 > %89 = fadd float %88, %87 > %90 = fmul float %61, %81 > %91 = fadd float %89, %90 > %92 = fmul float %62, %75 > %93 = fmul float %63, %76 > %94 = fadd float %93, %92 > %95 = fmul float %64, %81 > %96 = fadd float %94, %95 > %97 = fmul float %86, %86 > %98 = fmul float %91, %91 > %99 = fadd float %98, %97 > %100 = fmul float %96, %96 > %101 = fadd float %99, %100 > %102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101) > %103 = fmul float %102, %86 > %104 = fmul float %102, %91 > %105 = fmul float %102, %96 > %106 = call float @llvm.fma.f32(float %103, float 5.000000e-01, float 5.000000e-01) > %107 = call float @llvm.fma.f32(float %104, float 5.000000e-01, float 5.000000e-01) > %108 = call float @llvm.fma.f32(float %105, float 5.000000e-01, float 5.000000e-01) > %109 = bitcast float %54 to i32 > %110 = bitcast float %55 to i32 > %111 = insertelement <2 x i32> undef, i32 %109, i32 0 > %112 = insertelement <2 x i32> %111, i32 %110, i32 1 > %113 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %112, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %114 = extractelement <4 x float> %113, i32 0 > %115 = extractelement <4 x float> %113, i32 1 > %116 = extractelement <4 x float> %113, i32 2 > %117 = extractelement <4 x float> %113, i32 3 > %118 = fmul float %114, %65 > %119 = fmul float %115, %66 > %120 = fmul float %116, %67 > %121 = bitcast float %54 to i32 > %122 = bitcast float %55 to i32 > %123 = insertelement <2 x i32> undef, i32 %121, i32 0 > %124 = insertelement <2 x i32> %123, i32 %122, i32 1 > %125 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %124, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %126 = extractelement <4 x float> %125, i32 0 > %127 = extractelement <4 x float> %125, i32 1 > %128 = extractelement <4 x float> %125, i32 2 > %129 = fmul float %128, %25 > %130 = bitcast float %5 to i32 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %130, 10 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %106, 11 > %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %107, 12 > %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %108, 13 > %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %26, 14 > %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135, float %118, 15 > %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136, float %119, 16 > %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %120, 17 > %139 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138, float %117, 18 > %140 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %139, float %129, 19 > %141 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %140, float %127, 20 > %142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %141, float %126, 21 > %143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float 0.000000e+00, 22 > %144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL IN[8] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..10] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 16, 32, 48} >IMM[2] UINT32 {160, 0, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, IN[6], TEMP[0] > 3: DP4 TEMP[2].x, IN[7], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, IN[8], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: MOV TEMP[1].w, IMM[0].xxxx > 8: DP4 TEMP[2].x, CONST[1][0], TEMP[1] > 9: DP4 TEMP[3].x, CONST[1][1], TEMP[1] > 10: MOV TEMP[2].y, TEMP[3].xxxx > 11: DP4 TEMP[3].x, CONST[1][2], TEMP[1] > 12: MOV TEMP[2].z, TEMP[3].xxxx > 13: DP4 TEMP[1].x, CONST[1][3], TEMP[1] > 14: MOV TEMP[2].w, TEMP[1].xxxx > 15: MOV TEMP[1].xy, IN[2].xyxx > 16: DP3 TEMP[3].x, IN[6].xyzz, IN[3].xyzz > 17: DP3 TEMP[4].x, IN[6].xyzz, IN[4].xyzz > 18: MOV TEMP[3].y, TEMP[4].xxxx > 19: DP3 TEMP[4].x, IN[6].xyzz, IN[1].xyzz > 20: MOV TEMP[3].z, TEMP[4].xxxx > 21: DP3 TEMP[4].x, IN[7].xyzz, IN[3].xyzz > 22: DP3 TEMP[5].x, IN[7].xyzz, IN[4].xyzz > 23: MOV TEMP[4].y, TEMP[5].xxxx > 24: DP3 TEMP[5].x, IN[7].xyzz, IN[1].xyzz > 25: MOV TEMP[4].z, TEMP[5].xxxx > 26: DP3 TEMP[5].x, IN[8].xyzz, IN[3].xyzz > 27: DP3 TEMP[6].x, IN[8].xyzz, IN[4].xyzz > 28: MOV TEMP[5].y, TEMP[6].xxxx > 29: DP3 TEMP[6].x, IN[8].xyzz, IN[1].xyzz > 30: MOV TEMP[5].z, TEMP[6].xxxx > 31: MUL TEMP[0].xyz, IN[5].wwww, IN[5].xyzz > 32: MOV TEMP[0].w, IN[5].wwww > 33: MUL TEMP[0], TEMP[0], CONST[1][10] > 34: MOV OUT[5], TEMP[0] > 35: MOV OUT[4], TEMP[5] > 36: MOV OUT[3], TEMP[4] > 37: MOV OUT[2], TEMP[3] > 38: MOV OUT[1], TEMP[1] > 39: MOV OUT[0], TEMP[2] > 40: END >radeonsi: Compiling shader 250 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 > %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) > %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) > %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) > %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) > %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) > %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) > %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) > %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) > %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) > %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) > %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) > %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) > %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) > %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) > %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) > %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) > %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) > %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) > %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) > %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) > %44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 > %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %13) > %47 = extractelement <4 x float> %46, i32 0 > %48 = extractelement <4 x float> %46, i32 1 > %49 = extractelement <4 x float> %46, i32 2 > %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 > %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %14) > %53 = extractelement <4 x float> %52, i32 0 > %54 = extractelement <4 x float> %52, i32 1 > %55 = extractelement <4 x float> %52, i32 2 > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %15) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 > %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %62, i32 0, i32 %16) > %64 = extractelement <4 x float> %63, i32 0 > %65 = extractelement <4 x float> %63, i32 1 > %66 = extractelement <4 x float> %63, i32 2 > %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 > %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %17) > %70 = extractelement <4 x float> %69, i32 0 > %71 = extractelement <4 x float> %69, i32 1 > %72 = extractelement <4 x float> %69, i32 2 > %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 > %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %18) > %76 = extractelement <4 x float> %75, i32 0 > %77 = extractelement <4 x float> %75, i32 1 > %78 = extractelement <4 x float> %75, i32 2 > %79 = extractelement <4 x float> %75, i32 3 > %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 > %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %19) > %83 = extractelement <4 x float> %82, i32 0 > %84 = extractelement <4 x float> %82, i32 1 > %85 = extractelement <4 x float> %82, i32 2 > %86 = extractelement <4 x float> %82, i32 3 > %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 > %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %20) > %90 = extractelement <4 x float> %89, i32 0 > %91 = extractelement <4 x float> %89, i32 1 > %92 = extractelement <4 x float> %89, i32 2 > %93 = extractelement <4 x float> %89, i32 3 > %94 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 8 > %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 > %96 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %95, i32 0, i32 %21) > %97 = extractelement <4 x float> %96, i32 0 > %98 = extractelement <4 x float> %96, i32 1 > %99 = extractelement <4 x float> %96, i32 2 > %100 = extractelement <4 x float> %96, i32 3 > %101 = fmul float %83, %47 > %102 = fmul float %84, %48 > %103 = fadd float %101, %102 > %104 = fmul float %85, %49 > %105 = fadd float %103, %104 > %106 = fadd float %105, %86 > %107 = fmul float %90, %47 > %108 = fmul float %91, %48 > %109 = fadd float %107, %108 > %110 = fmul float %92, %49 > %111 = fadd float %109, %110 > %112 = fadd float %111, %93 > %113 = fmul float %97, %47 > %114 = fmul float %98, %48 > %115 = fadd float %113, %114 > %116 = fmul float %99, %49 > %117 = fadd float %115, %116 > %118 = fadd float %117, %100 > %119 = fmul float %24, %106 > %120 = fmul float %25, %112 > %121 = fadd float %119, %120 > %122 = fmul float %26, %118 > %123 = fadd float %121, %122 > %124 = fadd float %123, %27 > %125 = fmul float %28, %106 > %126 = fmul float %29, %112 > %127 = fadd float %125, %126 > %128 = fmul float %30, %118 > %129 = fadd float %127, %128 > %130 = fadd float %129, %31 > %131 = fmul float %32, %106 > %132 = fmul float %33, %112 > %133 = fadd float %131, %132 > %134 = fmul float %34, %118 > %135 = fadd float %133, %134 > %136 = fadd float %135, %35 > %137 = fmul float %36, %106 > %138 = fmul float %37, %112 > %139 = fadd float %137, %138 > %140 = fmul float %38, %118 > %141 = fadd float %139, %140 > %142 = fadd float %141, %39 > %143 = fmul float %83, %64 > %144 = fmul float %84, %65 > %145 = fadd float %144, %143 > %146 = fmul float %85, %66 > %147 = fadd float %145, %146 > %148 = fmul float %83, %70 > %149 = fmul float %84, %71 > %150 = fadd float %149, %148 > %151 = fmul float %85, %72 > %152 = fadd float %150, %151 > %153 = fmul float %83, %53 > %154 = fmul float %84, %54 > %155 = fadd float %154, %153 > %156 = fmul float %85, %55 > %157 = fadd float %155, %156 > %158 = fmul float %90, %64 > %159 = fmul float %91, %65 > %160 = fadd float %159, %158 > %161 = fmul float %92, %66 > %162 = fadd float %160, %161 > %163 = fmul float %90, %70 > %164 = fmul float %91, %71 > %165 = fadd float %164, %163 > %166 = fmul float %92, %72 > %167 = fadd float %165, %166 > %168 = fmul float %90, %53 > %169 = fmul float %91, %54 > %170 = fadd float %169, %168 > %171 = fmul float %92, %55 > %172 = fadd float %170, %171 > %173 = fmul float %97, %64 > %174 = fmul float %98, %65 > %175 = fadd float %174, %173 > %176 = fmul float %99, %66 > %177 = fadd float %175, %176 > %178 = fmul float %97, %70 > %179 = fmul float %98, %71 > %180 = fadd float %179, %178 > %181 = fmul float %99, %72 > %182 = fadd float %180, %181 > %183 = fmul float %97, %53 > %184 = fmul float %98, %54 > %185 = fadd float %184, %183 > %186 = fmul float %99, %55 > %187 = fadd float %185, %186 > %188 = fmul float %79, %76 > %189 = fmul float %79, %77 > %190 = fmul float %79, %78 > %191 = fmul float %188, %40 > %192 = fmul float %189, %41 > %193 = fmul float %190, %42 > %194 = fmul float %79, %43 > %195 = bitcast i32 %11 to float > %196 = insertvalue <{ float, float, float }> undef, float %195, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %59, float %60, float %118, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %147, float %152, float %157, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %162, float %167, float %172, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %177, float %182, float %187, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %191, float %192, float %193, float %194) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %124, float %130, float %136, float %142) > ret <{ float, float, float }> %196 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..10] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 16, 32, 48} >IMM[2] UINT32 {112, 128, 144, 160} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][0], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][1], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][2], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][3], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: MOV TEMP[2].xy, IN[2].xyxx > 10: DP3 TEMP[3].x, CONST[1][7].xyzz, IN[3].xyzz > 11: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[4].xyzz > 12: MOV TEMP[3].y, TEMP[4].xxxx > 13: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[1].xyzz > 14: MOV TEMP[3].z, TEMP[4].xxxx > 15: DP3 TEMP[4].x, CONST[1][8].xyzz, IN[3].xyzz > 16: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[4].xyzz > 17: MOV TEMP[4].y, TEMP[5].xxxx > 18: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[1].xyzz > 19: MOV TEMP[4].z, TEMP[5].xxxx > 20: DP3 TEMP[5].x, CONST[1][9].xyzz, IN[3].xyzz > 21: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[4].xyzz > 22: MOV TEMP[5].y, TEMP[6].xxxx > 23: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[1].xyzz > 24: MOV TEMP[5].z, TEMP[6].xxxx > 25: MUL TEMP[0].xyz, IN[5].wwww, IN[5].xyzz > 26: MOV TEMP[0].w, IN[5].wwww > 27: MUL TEMP[0], TEMP[0], CONST[1][10] > 28: MOV OUT[5], TEMP[0] > 29: MOV OUT[4], TEMP[5] > 30: MOV OUT[3], TEMP[4] > 31: MOV OUT[2], TEMP[3] > 32: MOV OUT[1], TEMP[2] > 33: MOV OUT[0], TEMP[1] > 34: END >radeonsi: Compiling shader 251 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 0) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 4) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 8) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 12) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 16) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 20) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 24) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 28) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 32) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 36) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 40) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 44) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 48) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 52) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 56) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 60) > %37 = call float @llvm.SI.load.const(<16 x i8> %20, i32 112) > %38 = call float @llvm.SI.load.const(<16 x i8> %20, i32 116) > %39 = call float @llvm.SI.load.const(<16 x i8> %20, i32 120) > %40 = call float @llvm.SI.load.const(<16 x i8> %20, i32 128) > %41 = call float @llvm.SI.load.const(<16 x i8> %20, i32 132) > %42 = call float @llvm.SI.load.const(<16 x i8> %20, i32 136) > %43 = call float @llvm.SI.load.const(<16 x i8> %20, i32 144) > %44 = call float @llvm.SI.load.const(<16 x i8> %20, i32 148) > %45 = call float @llvm.SI.load.const(<16 x i8> %20, i32 152) > %46 = call float @llvm.SI.load.const(<16 x i8> %20, i32 160) > %47 = call float @llvm.SI.load.const(<16 x i8> %20, i32 164) > %48 = call float @llvm.SI.load.const(<16 x i8> %20, i32 168) > %49 = call float @llvm.SI.load.const(<16 x i8> %20, i32 172) > %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 > %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %13) > %53 = extractelement <4 x float> %52, i32 0 > %54 = extractelement <4 x float> %52, i32 1 > %55 = extractelement <4 x float> %52, i32 2 > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %14) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %15) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 > %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %16) > %70 = extractelement <4 x float> %69, i32 0 > %71 = extractelement <4 x float> %69, i32 1 > %72 = extractelement <4 x float> %69, i32 2 > %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 > %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %17) > %76 = extractelement <4 x float> %75, i32 0 > %77 = extractelement <4 x float> %75, i32 1 > %78 = extractelement <4 x float> %75, i32 2 > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %18) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = extractelement <4 x float> %81, i32 3 > %86 = fmul float %21, %53 > %87 = fmul float %22, %54 > %88 = fadd float %86, %87 > %89 = fmul float %23, %55 > %90 = fadd float %88, %89 > %91 = fadd float %90, %24 > %92 = fmul float %25, %53 > %93 = fmul float %26, %54 > %94 = fadd float %92, %93 > %95 = fmul float %27, %55 > %96 = fadd float %94, %95 > %97 = fadd float %96, %28 > %98 = fmul float %29, %53 > %99 = fmul float %30, %54 > %100 = fadd float %98, %99 > %101 = fmul float %31, %55 > %102 = fadd float %100, %101 > %103 = fadd float %102, %32 > %104 = fmul float %33, %53 > %105 = fmul float %34, %54 > %106 = fadd float %104, %105 > %107 = fmul float %35, %55 > %108 = fadd float %106, %107 > %109 = fadd float %108, %36 > %110 = fmul float %37, %70 > %111 = fmul float %38, %71 > %112 = fadd float %111, %110 > %113 = fmul float %39, %72 > %114 = fadd float %112, %113 > %115 = fmul float %37, %76 > %116 = fmul float %38, %77 > %117 = fadd float %116, %115 > %118 = fmul float %39, %78 > %119 = fadd float %117, %118 > %120 = fmul float %37, %59 > %121 = fmul float %38, %60 > %122 = fadd float %121, %120 > %123 = fmul float %39, %61 > %124 = fadd float %122, %123 > %125 = fmul float %40, %70 > %126 = fmul float %41, %71 > %127 = fadd float %126, %125 > %128 = fmul float %42, %72 > %129 = fadd float %127, %128 > %130 = fmul float %40, %76 > %131 = fmul float %41, %77 > %132 = fadd float %131, %130 > %133 = fmul float %42, %78 > %134 = fadd float %132, %133 > %135 = fmul float %40, %59 > %136 = fmul float %41, %60 > %137 = fadd float %136, %135 > %138 = fmul float %42, %61 > %139 = fadd float %137, %138 > %140 = fmul float %43, %70 > %141 = fmul float %44, %71 > %142 = fadd float %141, %140 > %143 = fmul float %45, %72 > %144 = fadd float %142, %143 > %145 = fmul float %43, %76 > %146 = fmul float %44, %77 > %147 = fadd float %146, %145 > %148 = fmul float %45, %78 > %149 = fadd float %147, %148 > %150 = fmul float %43, %59 > %151 = fmul float %44, %60 > %152 = fadd float %151, %150 > %153 = fmul float %45, %61 > %154 = fadd float %152, %153 > %155 = fmul float %85, %82 > %156 = fmul float %85, %83 > %157 = fmul float %85, %84 > %158 = fmul float %155, %46 > %159 = fmul float %156, %47 > %160 = fmul float %157, %48 > %161 = fmul float %85, %49 > %162 = bitcast i32 %11 to float > %163 = insertvalue <{ float, float, float }> undef, float %162, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %65, float %66, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %114, float %119, float %124, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %129, float %134, float %139, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %144, float %149, float %154, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %158, float %159, float %160, float %161) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %91, float %97, float %103, float %109) > ret <{ float, float, float }> %163 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 252 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, -0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 176, 112} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {128, 144, 64, 80} >IMM[5] FLT32 { 0.5000, 158456325028528675187087900672.0000, 63.0000, 0.0000} >IMM[6] UINT32 {96, 368, 352, 0} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 66: MOV TEMP[1].z, TEMP[2].xxxx > 67: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 68: MOV TEMP[0].yw, TEMP[2].yxyy > 69: ABS TEMP[2].x, TEMP[3].xxxx > 70: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 71: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 72: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 73: INEG TEMP[9].xy, TEMP[9].xyyy > 74: MOV TEMP[4].yz, TEMP[9].yxyy > 75: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 76: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 77: INEG TEMP[9].xy, TEMP[9].xyyy > 78: MOV TEMP[5].zw, TEMP[9].yyxy > 79: INEG TEMP[9].xy, TEMP[4].yzzz > 80: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 81: MOV TEMP[4].yz, TEMP[9].yxyy > 82: I2F TEMP[9].xy, TEMP[4].yzzz > 83: MOV TEMP[4].yz, TEMP[9].yxyy > 84: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 85: ABS TEMP[2].x, TEMP[6].xxxx > 86: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 87: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 88: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 89: INEG TEMP[9].xy, TEMP[9].xyyy > 90: MOV TEMP[4].yz, TEMP[9].yxyy > 91: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 92: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 93: INEG TEMP[9].xy, TEMP[9].xyyy > 94: MOV TEMP[5].zw, TEMP[9].yyxy > 95: INEG TEMP[9].xy, TEMP[4].yzzz > 96: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 97: MOV TEMP[4].yz, TEMP[9].yxyy > 98: I2F TEMP[9].xy, TEMP[4].yzzz > 99: MOV TEMP[4].yz, TEMP[9].yxyy >100: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >101: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >102: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >103: INEG TEMP[2].xy, TEMP[2].xyyy >104: MOV TEMP[5].xy, TEMP[2].xyxx >105: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >106: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >107: INEG TEMP[2].xy, TEMP[2].xyyy >108: MOV TEMP[5].zw, TEMP[2].yyxy >109: INEG TEMP[2].xy, TEMP[5].xyyy >110: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >111: MOV TEMP[5].xy, TEMP[2].xyxx >112: I2F TEMP[5].xy, TEMP[5].xyyy >113: ABS TEMP[2].x, TEMP[8].xxxx >114: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >115: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >116: MOV TEMP[4].zw, TEMP[2].yyxy >117: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >118: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >119: INEG TEMP[2].xy, TEMP[2].xyyy >120: MOV TEMP[5].xy, TEMP[2].xyxx >121: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >122: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >123: INEG TEMP[2].xy, TEMP[2].xyyy >124: MOV TEMP[5].zw, TEMP[2].yyxy >125: AND TEMP[2], TEMP[5], IMM[2].yyyy >126: MOV TEMP[2], TEMP[2] >127: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >128: MOV TEMP[5].xy, TEMP[2].xyxx >129: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >130: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >131: INEG TEMP[2].xy, TEMP[2].xyyy >132: MOV TEMP[5].zw, TEMP[2].yyxy >133: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >134: MOV TEMP[5].zw, TEMP[2].yyxy >135: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >136: MOV TEMP[5].xy, TEMP[2].xyxx >137: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >138: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >139: INEG TEMP[2].x, TEMP[2].xxxx >140: MOV TEMP[1].z, TEMP[2].xxxx >141: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >142: MOV TEMP[1].z, TEMP[2].xxxx >143: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >144: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >145: INEG TEMP[2].xy, TEMP[2].xyyy >146: MOV TEMP[0].yw, TEMP[2].yxyy >147: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >148: MOV TEMP[0].yw, TEMP[2].yxyy >149: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >150: MOV TEMP[0].y, TEMP[2].xxxx >151: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >152: MOV TEMP[0].y, TEMP[2].xxxx >153: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >154: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >155: INEG TEMP[2].xy, TEMP[2].xyyy >156: MOV TEMP[0].xw, TEMP[2].xxxy >157: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >158: MOV TEMP[0].xw, TEMP[2].xxxy >159: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >160: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >161: INEG TEMP[2].xy, TEMP[2].xyyy >162: MOV TEMP[1].xy, TEMP[2].xyxx >163: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >164: MOV TEMP[1].xy, TEMP[2].xyxx >165: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >166: MOV TEMP[0].xz, TEMP[2].xxyx >167: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >168: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >169: INEG TEMP[2].xy, TEMP[2].xyyy >170: MOV TEMP[1].xy, TEMP[2].xyxx >171: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >172: MOV TEMP[1].xy, TEMP[2].xyxx >173: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >174: MOV TEMP[0].xz, TEMP[2].xxyx >175: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >176: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >177: INEG TEMP[2].xy, TEMP[2].xyyy >178: MOV TEMP[1].xy, TEMP[2].xyxx >179: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >180: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >181: INEG TEMP[2].xyz, TEMP[2].xyzz >182: MOV TEMP[0].xyz, TEMP[2].xyzx >183: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >184: MOV TEMP[0].xz, TEMP[2].xxyx >185: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >186: MOV TEMP[0].x, TEMP[2].xxxx >187: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >188: MOV TEMP[0].x, TEMP[2].xxxx >189: ADD TEMP[2].xyz, -IN[0][0].zxyy, IN[1][0].zxyy >190: MOV TEMP[0].yzw, TEMP[2].yxyz >191: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >192: MUL TEMP[4].xyz, TEMP[0].yzww, TEMP[1].yzxx >193: FMA TEMP[2].xyz, TEMP[0].wyzz, TEMP[1].zxyy, -TEMP[4].xyzz >194: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz >195: MOV TEMP[1].w, TEMP[3].xxxx >196: RSQ TEMP[3].x, TEMP[1].wwww >197: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx >198: MOV TEMP[0].yzw, TEMP[2].yxyz >199: MOV TEMP[2].xyz, CONST[1][11] >200: MOV TEMP[4].xyz, TEMP[2].xyzx >201: MOV TEMP[4].w, IMM[0].xxxx >202: MOV TEMP[2], CONST[1][7] >203: DP4 TEMP[5].x, TEMP[2], TEMP[4] >204: MOV TEMP[2], CONST[1][8] >205: DP4 TEMP[2].x, TEMP[2], TEMP[4] >206: MOV TEMP[5].y, TEMP[2].xxxx >207: MOV TEMP[2], CONST[1][9] >208: DP4 TEMP[2].x, TEMP[2], TEMP[4] >209: MOV TEMP[5].z, TEMP[2].xxxx >210: ADD TEMP[4].xyz, TEMP[5].xyzz, -IN[0][0].xyzz >211: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[4].xyzz >212: MOV TEMP[1].w, TEMP[2].xxxx >213: RSQ TEMP[2].x, TEMP[1].wwww >214: MOV TEMP[1].w, TEMP[2].xxxx >215: MUL TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz >216: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[4].xyzz >217: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].wwww >218: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >219: INEG TEMP[2].x, TEMP[2].xxxx >220: MOV TEMP[0].y, TEMP[2].xxxx >221: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >222: MOV TEMP[0].x, TEMP[2].xxxx >223: MOV TEMP[2].x, TEMP[0].xxxx >224: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >225: UIF TEMP[2].xxxx :0 >226: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >227: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >228: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >229: MOV TEMP[0].yzw, TEMP[2].yxyz >230: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >231: MOV TEMP[0].y, TEMP[2].xxxx >232: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >233: MOV TEMP[0].z, TEMP[2].xxxx >234: SQRT TEMP[2].x, TEMP[0].xxxx >235: SQRT TEMP[2].y, TEMP[0].yyyy >236: SQRT TEMP[2].z, TEMP[0].zzzz >237: MOV TEMP[0].xyz, TEMP[2].xyzx >238: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >239: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].xxxx >240: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >241: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[5].xxxx >242: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >243: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[5].xxxx >244: MOV TEMP[2].y, CONST[3][4] >245: MOV TEMP[7].x, TEMP[2].yyyy >246: MOV TEMP[2].y, CONST[3][5] >247: MOV TEMP[7].y, TEMP[2].yyyy >248: MOV TEMP[2].y, CONST[3][6] >249: MOV TEMP[7].z, TEMP[2].yyyy >250: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >251: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >252: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >253: MOV TEMP[1].w, IMM[0].xxxx >254: MOV TEMP[6], CONST[3][0] >255: DP4 TEMP[7].x, TEMP[6], TEMP[1] >256: MOV TEMP[6], CONST[3][1] >257: DP4 TEMP[6].x, TEMP[6], TEMP[1] >258: MOV TEMP[7].y, TEMP[6].xxxx >259: MOV TEMP[6], CONST[3][3] >260: DP4 TEMP[6].x, TEMP[6], TEMP[1] >261: MOV TEMP[4].w, IMM[0].xxxx >262: MOV TEMP[8], CONST[3][0] >263: DP4 TEMP[8].x, TEMP[8], TEMP[4] >264: MOV TEMP[9], CONST[3][1] >265: DP4 TEMP[9].x, TEMP[9], TEMP[4] >266: MOV TEMP[8].y, TEMP[9].xxxx >267: MOV TEMP[9], CONST[3][3] >268: DP4 TEMP[9].x, TEMP[9], TEMP[4] >269: MOV TEMP[5].w, IMM[0].xxxx >270: MOV TEMP[10], CONST[3][0] >271: DP4 TEMP[4].x, TEMP[10], TEMP[5] >272: MOV TEMP[10], CONST[3][1] >273: DP4 TEMP[10].x, TEMP[10], TEMP[5] >274: MOV TEMP[4].y, TEMP[10].xxxx >275: MOV TEMP[10], CONST[3][3] >276: DP4 TEMP[10].x, TEMP[10], TEMP[5] >277: MOV TEMP[2].w, IMM[0].xxxx >278: MOV TEMP[11], CONST[3][0] >279: DP4 TEMP[5].x, TEMP[11], TEMP[2] >280: MOV TEMP[11], CONST[3][1] >281: DP4 TEMP[11].x, TEMP[11], TEMP[2] >282: MOV TEMP[5].y, TEMP[11].xxxx >283: MOV TEMP[11], CONST[3][3] >284: DP4 TEMP[11].x, TEMP[11], TEMP[2] >285: MOV TEMP[3].w, IMM[0].xxxx >286: MOV TEMP[12], CONST[3][0] >287: DP4 TEMP[2].x, TEMP[12], TEMP[3] >288: MOV TEMP[12], CONST[3][1] >289: DP4 TEMP[12].x, TEMP[12], TEMP[3] >290: MOV TEMP[2].y, TEMP[12].xxxx >291: MOV TEMP[12], CONST[3][3] >292: DP4 TEMP[12].x, TEMP[12], TEMP[3] >293: MOV TEMP[0].w, IMM[0].xxxx >294: MOV TEMP[13], CONST[3][0] >295: DP4 TEMP[3].x, TEMP[13], TEMP[0] >296: MOV TEMP[13], CONST[3][1] >297: DP4 TEMP[13].x, TEMP[13], TEMP[0] >298: MOV TEMP[3].y, TEMP[13].xxxx >299: MOV TEMP[13], CONST[3][3] >300: DP4 TEMP[13].x, TEMP[13], TEMP[0] >301: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >302: SSG TEMP[15].xy, TEMP[8].xyyy >303: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >304: RCP TEMP[16].xy, TEMP[9].xxxx >305: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >306: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >307: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >308: SSG TEMP[15].xy, TEMP[4].xyyy >309: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >310: RCP TEMP[16].xy, TEMP[10].xxxx >311: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >312: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >313: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >314: SSG TEMP[16].xy, TEMP[5].xyyy >315: MUL TEMP[16].xy, IMM[5].yyyy, TEMP[16].xyyy >316: RCP TEMP[11].xy, TEMP[11].xxxx >317: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >318: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >319: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >320: SSG TEMP[15].xy, TEMP[7].xyyy >321: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >322: RCP TEMP[16].xy, TEMP[6].xxxx >323: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >324: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >325: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >326: MOV TEMP[0].yz, TEMP[5].yxyy >327: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >328: SSG TEMP[7].xy, TEMP[2].xyyy >329: MUL TEMP[7].xy, IMM[5].yyyy, TEMP[7].xyyy >330: RCP TEMP[11].xy, TEMP[12].xxxx >331: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >332: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >333: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >334: MOV TEMP[4].zw, TEMP[2].yyxy >335: MOV TEMP[2].xy, CONST[3][23] >336: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >337: MOV TEMP[4].zw, TEMP[2].yyxy >338: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >339: SSG TEMP[5].xy, TEMP[3].xyyy >340: MUL TEMP[5].xy, IMM[5].yyyy, TEMP[5].xyyy >341: RCP TEMP[7].xy, TEMP[13].xxxx >342: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >343: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >344: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >345: MOV TEMP[0].xw, TEMP[2].xxxy >346: MOV TEMP[2].xy, CONST[3][23] >347: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >348: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >349: MOV TEMP[0].y, TEMP[2].xxxx >350: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >351: MOV TEMP[0].z, TEMP[2].xxxx >352: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >353: SQRT TEMP[2].x, TEMP[0].xxxx >354: SQRT TEMP[2].y, TEMP[0].yyyy >355: SQRT TEMP[2].z, TEMP[0].zzzz >356: MOV TEMP[2].xyz, TEMP[2].xyzx >357: MOV TEMP[3].z, CONST[1][22] >358: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >359: MOV TEMP[0].w, TEMP[3].xxxx >360: MOV TEMP[3].z, CONST[1][22] >361: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >362: MOV TEMP[3].z, CONST[1][22] >363: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >364: MOV TEMP[1].y, TEMP[3].xxxx >365: MOV TEMP[3].w, CONST[1][22] >366: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >367: UIF TEMP[3].xxxx :0 >368: MOV TEMP[3].w, CONST[1][22] >369: RCP TEMP[3].x, TEMP[3].wwww >370: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >371: ELSE :0 >372: SSG TEMP[5].x, TEMP[0].wwww >373: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >374: ENDIF >375: MOV_SAT TEMP[3].x, TEMP[3].xxxx >376: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >377: MOV TEMP[0].w, TEMP[3].xxxx >378: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >379: MOV TEMP[0].y, TEMP[3].xxxx >380: MOV TEMP[3].w, CONST[1][22] >381: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >382: UIF TEMP[3].xxxx :0 >383: MOV TEMP[3].w, CONST[1][22] >384: RCP TEMP[3].x, TEMP[3].wwww >385: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >386: ELSE :0 >387: SSG TEMP[5].x, TEMP[1].xxxx >388: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >389: ENDIF >390: MOV_SAT TEMP[3].x, TEMP[3].xxxx >391: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >392: MOV TEMP[0].w, TEMP[3].xxxx >393: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >394: MOV TEMP[0].z, TEMP[3].xxxx >395: MOV TEMP[3].w, CONST[1][22] >396: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >397: UIF TEMP[3].xxxx :0 >398: MOV TEMP[3].w, CONST[1][22] >399: RCP TEMP[3].x, TEMP[3].wwww >400: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >401: ELSE :0 >402: SSG TEMP[5].x, TEMP[1].yyyy >403: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >404: ENDIF >405: MOV_SAT TEMP[3].x, TEMP[3].xxxx >406: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >407: MOV TEMP[0].w, TEMP[3].xxxx >408: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >409: MOV TEMP[2].xy, CONST[1][22] >410: MOV TEMP[3].xy, CONST[2][4] >411: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >412: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >413: MOV TEMP[0].w, TEMP[2].xxxx >414: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >415: SSG TEMP[3].xy, TEMP[0].xyyy >416: MUL TEMP[3].xy, IMM[5].yyyy, TEMP[3].xyyy >417: RCP TEMP[5].xy, TEMP[1].xxxx >418: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >419: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >420: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >421: MOV TEMP[0].y, TEMP[2].xxxx >422: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >423: MOV TEMP[4].z, TEMP[2].xxxx >424: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >425: UIF TEMP[2].xxxx :0 >426: RCP TEMP[1].x, TEMP[1].xxxx >427: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >428: ELSE :0 >429: SSG TEMP[2].x, TEMP[0].zzzz >430: MUL TEMP[1].x, IMM[5].yyyy, TEMP[2].xxxx >431: ENDIF >432: MOV TEMP[0].y, TEMP[1].xxxx >433: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >434: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >435: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >436: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >437: MOV TEMP[4].w, TEMP[0].xxxx >438: ELSE :0 >439: MOV TEMP[4], IMM[0].zzzz >440: ENDIF >441: MIN TEMP[0], TEMP[4], IMM[5].zzzz >442: MOV TEMP[1].x, TEMP[0].xxxx >443: MOV TEMP[2].x, TEMP[0].yyyy >444: MOV TEMP[3].x, TEMP[0].zzzz >445: MOV TEMP[0].x, TEMP[0].wwww >446: MOV OUT[8], TEMP[1] >447: MOV OUT[9], TEMP[2] >448: MOV OUT[10], TEMP[3] >449: MOV OUT[11], TEMP[0] >450: MOV OUT[0].x, TEMP[1].xxxx >451: MOV OUT[0].y, TEMP[2].xxxx >452: MOV OUT[0].z, TEMP[3].xxxx >453: MOV OUT[1].x, TEMP[0].xxxx >454: END >radeonsi: Compiling shader 253 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call float @llvm.SI.load.const(<16 x i8> %33, i32 64) > %35 = call float @llvm.SI.load.const(<16 x i8> %33, i32 68) > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = call float @llvm.SI.load.const(<16 x i8> %37, i32 0) > %39 = call float @llvm.SI.load.const(<16 x i8> %37, i32 4) > %40 = call float @llvm.SI.load.const(<16 x i8> %37, i32 8) > %41 = call float @llvm.SI.load.const(<16 x i8> %37, i32 12) > %42 = call float @llvm.SI.load.const(<16 x i8> %37, i32 16) > %43 = call float @llvm.SI.load.const(<16 x i8> %37, i32 20) > %44 = call float @llvm.SI.load.const(<16 x i8> %37, i32 24) > %45 = call float @llvm.SI.load.const(<16 x i8> %37, i32 28) > %46 = call float @llvm.SI.load.const(<16 x i8> %37, i32 32) > %47 = call float @llvm.SI.load.const(<16 x i8> %37, i32 36) > %48 = call float @llvm.SI.load.const(<16 x i8> %37, i32 40) > %49 = call float @llvm.SI.load.const(<16 x i8> %37, i32 44) > %50 = call float @llvm.SI.load.const(<16 x i8> %37, i32 48) > %51 = call float @llvm.SI.load.const(<16 x i8> %37, i32 52) > %52 = call float @llvm.SI.load.const(<16 x i8> %37, i32 56) > %53 = call float @llvm.SI.load.const(<16 x i8> %37, i32 60) > %54 = call float @llvm.SI.load.const(<16 x i8> %37, i32 68) > %55 = call float @llvm.SI.load.const(<16 x i8> %37, i32 84) > %56 = call float @llvm.SI.load.const(<16 x i8> %37, i32 100) > %57 = call float @llvm.SI.load.const(<16 x i8> %37, i32 368) > %58 = call float @llvm.SI.load.const(<16 x i8> %37, i32 372) > %59 = lshr i32 %10, 8 > %60 = and i32 %59, 31 > %61 = lshr i32 %7, 13 > %62 = and i32 %61, 255 > %63 = and i32 %7, 8191 > %64 = and i32 %10, 255 > %65 = mul nuw nsw i32 %63, %64 > %66 = mul nuw nsw i32 %60, %62 > %67 = add nuw nsw i32 %65, %66 > %68 = add nuw nsw i32 %67, 16 > %69 = zext i32 %68 to i64 > %70 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %69 > %71 = load i32, i32 addrspace(3)* %70, align 4 > %72 = lshr i32 %7, 13 > %73 = and i32 %72, 255 > %74 = and i32 %7, 8191 > %75 = and i32 %10, 255 > %76 = mul nuw nsw i32 %74, %75 > %77 = mul nuw nsw i32 %60, %73 > %78 = add nuw nsw i32 %76, %77 > %79 = add nuw nsw i32 %78, 17 > %80 = zext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = load i32, i32 addrspace(3)* %81, align 4 > %83 = lshr i32 %7, 13 > %84 = and i32 %83, 255 > %85 = and i32 %7, 8191 > %86 = and i32 %10, 255 > %87 = mul nuw nsw i32 %85, %86 > %88 = mul nuw nsw i32 %60, %84 > %89 = add nuw nsw i32 %87, %88 > %90 = add nuw nsw i32 %89, 18 > %91 = zext i32 %90 to i64 > %92 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %91 > %93 = load i32, i32 addrspace(3)* %92, align 4 > %94 = lshr i32 %7, 13 > %95 = and i32 %94, 255 > %96 = and i32 %7, 8191 > %97 = and i32 %10, 255 > %98 = mul nuw nsw i32 %96, %97 > %99 = mul nuw nsw i32 %60, %95 > %100 = add nuw nsw i32 %98, %99 > %101 = add nuw nsw i32 %100, 19 > %102 = zext i32 %101 to i64 > %103 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %102 > %104 = load i32, i32 addrspace(3)* %103, align 4 > %105 = lshr i32 %6, 13 > %106 = and i32 %105, 255 > %107 = shl i32 %5, 2 > %108 = and i32 %107, 262140 > %109 = and i32 %6, 8191 > %110 = and i32 %10, 255 > %111 = mul nuw nsw i32 %109, %110 > %112 = add nuw nsw i32 %108, %111 > %113 = mul nuw nsw i32 %60, %106 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 16 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > store i32 %71, i32 addrspace(3)* %117, align 4 > %118 = add nuw nsw i32 %114, 17 > %119 = zext i32 %118 to i64 > %120 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %119 > store i32 %82, i32 addrspace(3)* %120, align 4 > %121 = add nuw nsw i32 %114, 18 > %122 = zext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > store i32 %93, i32 addrspace(3)* %123, align 4 > %124 = add nuw nsw i32 %114, 19 > %125 = zext i32 %124 to i64 > %126 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %125 > store i32 %104, i32 addrspace(3)* %126, align 4 > %127 = lshr i32 %7, 13 > %128 = and i32 %127, 255 > %129 = and i32 %7, 8191 > %130 = and i32 %10, 255 > %131 = mul nuw nsw i32 %129, %130 > %132 = mul nuw nsw i32 %60, %128 > %133 = add nuw nsw i32 %131, %132 > %134 = add nuw nsw i32 %133, 20 > %135 = zext i32 %134 to i64 > %136 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %135 > %137 = load i32, i32 addrspace(3)* %136, align 4 > %138 = lshr i32 %7, 13 > %139 = and i32 %138, 255 > %140 = and i32 %7, 8191 > %141 = and i32 %10, 255 > %142 = mul nuw nsw i32 %140, %141 > %143 = mul nuw nsw i32 %60, %139 > %144 = add nuw nsw i32 %142, %143 > %145 = add nuw nsw i32 %144, 21 > %146 = zext i32 %145 to i64 > %147 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %146 > %148 = load i32, i32 addrspace(3)* %147, align 4 > %149 = lshr i32 %7, 13 > %150 = and i32 %149, 255 > %151 = and i32 %7, 8191 > %152 = and i32 %10, 255 > %153 = mul nuw nsw i32 %151, %152 > %154 = mul nuw nsw i32 %60, %150 > %155 = add nuw nsw i32 %153, %154 > %156 = add nuw nsw i32 %155, 22 > %157 = zext i32 %156 to i64 > %158 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %157 > %159 = load i32, i32 addrspace(3)* %158, align 4 > %160 = lshr i32 %7, 13 > %161 = and i32 %160, 255 > %162 = and i32 %7, 8191 > %163 = and i32 %10, 255 > %164 = mul nuw nsw i32 %162, %163 > %165 = mul nuw nsw i32 %60, %161 > %166 = add nuw nsw i32 %164, %165 > %167 = add nuw nsw i32 %166, 23 > %168 = zext i32 %167 to i64 > %169 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %168 > %170 = load i32, i32 addrspace(3)* %169, align 4 > %171 = lshr i32 %6, 13 > %172 = and i32 %171, 255 > %173 = shl i32 %5, 2 > %174 = and i32 %173, 262140 > %175 = and i32 %6, 8191 > %176 = and i32 %10, 255 > %177 = mul nuw nsw i32 %175, %176 > %178 = add nuw nsw i32 %174, %177 > %179 = mul nuw nsw i32 %60, %172 > %180 = add nuw nsw i32 %178, %179 > %181 = add nuw nsw i32 %180, 20 > %182 = zext i32 %181 to i64 > %183 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %182 > store i32 %137, i32 addrspace(3)* %183, align 4 > %184 = add nuw nsw i32 %180, 21 > %185 = zext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > store i32 %148, i32 addrspace(3)* %186, align 4 > %187 = add nuw nsw i32 %180, 22 > %188 = zext i32 %187 to i64 > %189 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %188 > store i32 %159, i32 addrspace(3)* %189, align 4 > %190 = add nuw nsw i32 %180, 23 > %191 = zext i32 %190 to i64 > %192 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %191 > store i32 %170, i32 addrspace(3)* %192, align 4 > %193 = lshr i32 %7, 13 > %194 = and i32 %193, 255 > %195 = and i32 %7, 8191 > %196 = and i32 %10, 255 > %197 = mul nuw nsw i32 %195, %196 > %198 = mul nuw nsw i32 %60, %194 > %199 = add nuw nsw i32 %197, %198 > %200 = add nuw nsw i32 %199, 24 > %201 = zext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = load i32, i32 addrspace(3)* %202, align 4 > %204 = lshr i32 %7, 13 > %205 = and i32 %204, 255 > %206 = and i32 %7, 8191 > %207 = and i32 %10, 255 > %208 = mul nuw nsw i32 %206, %207 > %209 = mul nuw nsw i32 %60, %205 > %210 = add nuw nsw i32 %208, %209 > %211 = add nuw nsw i32 %210, 25 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = load i32, i32 addrspace(3)* %213, align 4 > %215 = lshr i32 %7, 13 > %216 = and i32 %215, 255 > %217 = and i32 %7, 8191 > %218 = and i32 %10, 255 > %219 = mul nuw nsw i32 %217, %218 > %220 = mul nuw nsw i32 %60, %216 > %221 = add nuw nsw i32 %219, %220 > %222 = add nuw nsw i32 %221, 26 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = load i32, i32 addrspace(3)* %224, align 4 > %226 = lshr i32 %7, 13 > %227 = and i32 %226, 255 > %228 = and i32 %7, 8191 > %229 = and i32 %10, 255 > %230 = mul nuw nsw i32 %228, %229 > %231 = mul nuw nsw i32 %60, %227 > %232 = add nuw nsw i32 %230, %231 > %233 = add nuw nsw i32 %232, 27 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = load i32, i32 addrspace(3)* %235, align 4 > %237 = lshr i32 %6, 13 > %238 = and i32 %237, 255 > %239 = shl i32 %5, 2 > %240 = and i32 %239, 262140 > %241 = and i32 %6, 8191 > %242 = and i32 %10, 255 > %243 = mul nuw nsw i32 %241, %242 > %244 = add nuw nsw i32 %240, %243 > %245 = mul nuw nsw i32 %60, %238 > %246 = add nuw nsw i32 %244, %245 > %247 = add nuw nsw i32 %246, 24 > %248 = zext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > store i32 %203, i32 addrspace(3)* %249, align 4 > %250 = add nuw nsw i32 %246, 25 > %251 = zext i32 %250 to i64 > %252 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %251 > store i32 %214, i32 addrspace(3)* %252, align 4 > %253 = add nuw nsw i32 %246, 26 > %254 = zext i32 %253 to i64 > %255 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %254 > store i32 %225, i32 addrspace(3)* %255, align 4 > %256 = add nuw nsw i32 %246, 27 > %257 = zext i32 %256 to i64 > %258 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %257 > store i32 %236, i32 addrspace(3)* %258, align 4 > %259 = lshr i32 %7, 13 > %260 = and i32 %259, 255 > %261 = and i32 %7, 8191 > %262 = and i32 %10, 255 > %263 = mul nuw nsw i32 %261, %262 > %264 = mul nuw nsw i32 %60, %260 > %265 = add nuw nsw i32 %263, %264 > %266 = add nuw nsw i32 %265, 28 > %267 = zext i32 %266 to i64 > %268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %267 > %269 = load i32, i32 addrspace(3)* %268, align 4 > %270 = lshr i32 %7, 13 > %271 = and i32 %270, 255 > %272 = and i32 %7, 8191 > %273 = and i32 %10, 255 > %274 = mul nuw nsw i32 %272, %273 > %275 = mul nuw nsw i32 %60, %271 > %276 = add nuw nsw i32 %274, %275 > %277 = add nuw nsw i32 %276, 29 > %278 = zext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = load i32, i32 addrspace(3)* %279, align 4 > %281 = lshr i32 %7, 13 > %282 = and i32 %281, 255 > %283 = and i32 %7, 8191 > %284 = and i32 %10, 255 > %285 = mul nuw nsw i32 %283, %284 > %286 = mul nuw nsw i32 %60, %282 > %287 = add nuw nsw i32 %285, %286 > %288 = add nuw nsw i32 %287, 30 > %289 = zext i32 %288 to i64 > %290 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %289 > %291 = load i32, i32 addrspace(3)* %290, align 4 > %292 = lshr i32 %7, 13 > %293 = and i32 %292, 255 > %294 = and i32 %7, 8191 > %295 = and i32 %10, 255 > %296 = mul nuw nsw i32 %294, %295 > %297 = mul nuw nsw i32 %60, %293 > %298 = add nuw nsw i32 %296, %297 > %299 = add nuw nsw i32 %298, 31 > %300 = zext i32 %299 to i64 > %301 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %300 > %302 = load i32, i32 addrspace(3)* %301, align 4 > %303 = lshr i32 %6, 13 > %304 = and i32 %303, 255 > %305 = shl i32 %5, 2 > %306 = and i32 %305, 262140 > %307 = and i32 %6, 8191 > %308 = and i32 %10, 255 > %309 = mul nuw nsw i32 %307, %308 > %310 = add nuw nsw i32 %306, %309 > %311 = mul nuw nsw i32 %60, %304 > %312 = add nuw nsw i32 %310, %311 > %313 = add nuw nsw i32 %312, 28 > %314 = zext i32 %313 to i64 > %315 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %314 > store i32 %269, i32 addrspace(3)* %315, align 4 > %316 = add nuw nsw i32 %312, 29 > %317 = zext i32 %316 to i64 > %318 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %317 > store i32 %280, i32 addrspace(3)* %318, align 4 > %319 = add nuw nsw i32 %312, 30 > %320 = zext i32 %319 to i64 > %321 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %320 > store i32 %291, i32 addrspace(3)* %321, align 4 > %322 = add nuw nsw i32 %312, 31 > %323 = zext i32 %322 to i64 > %324 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %323 > store i32 %302, i32 addrspace(3)* %324, align 4 > %325 = lshr i32 %7, 13 > %326 = and i32 %325, 255 > %327 = and i32 %7, 8191 > %328 = and i32 %10, 255 > %329 = mul nuw nsw i32 %327, %328 > %330 = mul nuw nsw i32 %60, %326 > %331 = add nuw nsw i32 %329, %330 > %332 = add nuw nsw i32 %331, 32 > %333 = zext i32 %332 to i64 > %334 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %333 > %335 = load i32, i32 addrspace(3)* %334, align 4 > %336 = lshr i32 %7, 13 > %337 = and i32 %336, 255 > %338 = and i32 %7, 8191 > %339 = and i32 %10, 255 > %340 = mul nuw nsw i32 %338, %339 > %341 = mul nuw nsw i32 %60, %337 > %342 = add nuw nsw i32 %340, %341 > %343 = add nuw nsw i32 %342, 33 > %344 = zext i32 %343 to i64 > %345 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %344 > %346 = load i32, i32 addrspace(3)* %345, align 4 > %347 = lshr i32 %7, 13 > %348 = and i32 %347, 255 > %349 = and i32 %7, 8191 > %350 = and i32 %10, 255 > %351 = mul nuw nsw i32 %349, %350 > %352 = mul nuw nsw i32 %60, %348 > %353 = add nuw nsw i32 %351, %352 > %354 = add nuw nsw i32 %353, 34 > %355 = zext i32 %354 to i64 > %356 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %355 > %357 = load i32, i32 addrspace(3)* %356, align 4 > %358 = lshr i32 %7, 13 > %359 = and i32 %358, 255 > %360 = and i32 %7, 8191 > %361 = and i32 %10, 255 > %362 = mul nuw nsw i32 %360, %361 > %363 = mul nuw nsw i32 %60, %359 > %364 = add nuw nsw i32 %362, %363 > %365 = add nuw nsw i32 %364, 35 > %366 = zext i32 %365 to i64 > %367 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %366 > %368 = load i32, i32 addrspace(3)* %367, align 4 > %369 = lshr i32 %6, 13 > %370 = and i32 %369, 255 > %371 = shl i32 %5, 2 > %372 = and i32 %371, 262140 > %373 = and i32 %6, 8191 > %374 = and i32 %10, 255 > %375 = mul nuw nsw i32 %373, %374 > %376 = add nuw nsw i32 %372, %375 > %377 = mul nuw nsw i32 %60, %370 > %378 = add nuw nsw i32 %376, %377 > %379 = add nuw nsw i32 %378, 32 > %380 = zext i32 %379 to i64 > %381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %380 > store i32 %335, i32 addrspace(3)* %381, align 4 > %382 = add nuw nsw i32 %378, 33 > %383 = zext i32 %382 to i64 > %384 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %383 > store i32 %346, i32 addrspace(3)* %384, align 4 > %385 = add nuw nsw i32 %378, 34 > %386 = zext i32 %385 to i64 > %387 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %386 > store i32 %357, i32 addrspace(3)* %387, align 4 > %388 = add nuw nsw i32 %378, 35 > %389 = zext i32 %388 to i64 > %390 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %389 > store i32 %368, i32 addrspace(3)* %390, align 4 > %391 = lshr i32 %7, 13 > %392 = and i32 %391, 255 > %393 = and i32 %7, 8191 > %394 = and i32 %10, 255 > %395 = mul nuw nsw i32 %393, %394 > %396 = mul nuw nsw i32 %60, %392 > %397 = add nuw nsw i32 %395, %396 > %398 = add nuw nsw i32 %397, 36 > %399 = zext i32 %398 to i64 > %400 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %399 > %401 = load i32, i32 addrspace(3)* %400, align 4 > %402 = lshr i32 %7, 13 > %403 = and i32 %402, 255 > %404 = and i32 %7, 8191 > %405 = and i32 %10, 255 > %406 = mul nuw nsw i32 %404, %405 > %407 = mul nuw nsw i32 %60, %403 > %408 = add nuw nsw i32 %406, %407 > %409 = add nuw nsw i32 %408, 37 > %410 = zext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = load i32, i32 addrspace(3)* %411, align 4 > %413 = lshr i32 %7, 13 > %414 = and i32 %413, 255 > %415 = and i32 %7, 8191 > %416 = and i32 %10, 255 > %417 = mul nuw nsw i32 %415, %416 > %418 = mul nuw nsw i32 %60, %414 > %419 = add nuw nsw i32 %417, %418 > %420 = add nuw nsw i32 %419, 38 > %421 = zext i32 %420 to i64 > %422 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %421 > %423 = load i32, i32 addrspace(3)* %422, align 4 > %424 = lshr i32 %7, 13 > %425 = and i32 %424, 255 > %426 = and i32 %7, 8191 > %427 = and i32 %10, 255 > %428 = mul nuw nsw i32 %426, %427 > %429 = mul nuw nsw i32 %60, %425 > %430 = add nuw nsw i32 %428, %429 > %431 = add nuw nsw i32 %430, 39 > %432 = zext i32 %431 to i64 > %433 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %432 > %434 = load i32, i32 addrspace(3)* %433, align 4 > %435 = lshr i32 %6, 13 > %436 = and i32 %435, 255 > %437 = shl i32 %5, 2 > %438 = and i32 %437, 262140 > %439 = and i32 %6, 8191 > %440 = and i32 %10, 255 > %441 = mul nuw nsw i32 %439, %440 > %442 = add nuw nsw i32 %438, %441 > %443 = mul nuw nsw i32 %60, %436 > %444 = add nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 36 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > store i32 %401, i32 addrspace(3)* %447, align 4 > %448 = add nuw nsw i32 %444, 37 > %449 = zext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > store i32 %412, i32 addrspace(3)* %450, align 4 > %451 = add nuw nsw i32 %444, 38 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > store i32 %423, i32 addrspace(3)* %453, align 4 > %454 = add nuw nsw i32 %444, 39 > %455 = zext i32 %454 to i64 > %456 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %455 > store i32 %434, i32 addrspace(3)* %456, align 4 > %457 = and i32 %7, 8191 > %458 = and i32 %10, 255 > %459 = mul nuw nsw i32 %457, %458 > %460 = add nuw nsw i32 %459, 16 > %461 = zext i32 %460 to i64 > %462 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %461 > %463 = bitcast i32 addrspace(3)* %462 to float addrspace(3)* > %464 = load float, float addrspace(3)* %463, align 4 > %465 = and i32 %7, 8191 > %466 = and i32 %10, 255 > %467 = mul nuw nsw i32 %465, %466 > %468 = add nuw nsw i32 %467, 17 > %469 = zext i32 %468 to i64 > %470 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %469 > %471 = bitcast i32 addrspace(3)* %470 to float addrspace(3)* > %472 = load float, float addrspace(3)* %471, align 4 > %473 = and i32 %7, 8191 > %474 = and i32 %10, 255 > %475 = mul nuw nsw i32 %473, %474 > %476 = add nuw nsw i32 %475, 18 > %477 = zext i32 %476 to i64 > %478 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %477 > %479 = bitcast i32 addrspace(3)* %478 to float addrspace(3)* > %480 = load float, float addrspace(3)* %479, align 4 > %481 = fmul float %38, %464 > %482 = fmul float %39, %472 > %483 = fadd float %481, %482 > %484 = fmul float %40, %480 > %485 = fadd float %483, %484 > %486 = fadd float %485, %41 > %487 = fmul float %42, %464 > %488 = fmul float %43, %472 > %489 = fadd float %487, %488 > %490 = fmul float %44, %480 > %491 = fadd float %489, %490 > %492 = fadd float %491, %45 > %493 = fmul float %46, %464 > %494 = fmul float %47, %472 > %495 = fadd float %493, %494 > %496 = fmul float %48, %480 > %497 = fadd float %495, %496 > %498 = fadd float %497, %49 > %499 = fmul float %50, %464 > %500 = fmul float %51, %472 > %501 = fadd float %499, %500 > %502 = fmul float %52, %480 > %503 = fadd float %501, %502 > %504 = fadd float %503, %53 > %505 = lshr i32 %7, 13 > %506 = and i32 %505, 255 > %507 = and i32 %7, 8191 > %508 = and i32 %10, 255 > %509 = mul nuw nsw i32 %507, %508 > %510 = add nuw nsw i32 %509, %506 > %511 = add nuw nsw i32 %510, 16 > %512 = zext i32 %511 to i64 > %513 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %512 > %514 = bitcast i32 addrspace(3)* %513 to float addrspace(3)* > %515 = load float, float addrspace(3)* %514, align 4 > %516 = lshr i32 %7, 13 > %517 = and i32 %516, 255 > %518 = and i32 %7, 8191 > %519 = and i32 %10, 255 > %520 = mul nuw nsw i32 %518, %519 > %521 = add nuw nsw i32 %520, %517 > %522 = add nuw nsw i32 %521, 17 > %523 = zext i32 %522 to i64 > %524 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %523 > %525 = bitcast i32 addrspace(3)* %524 to float addrspace(3)* > %526 = load float, float addrspace(3)* %525, align 4 > %527 = lshr i32 %7, 13 > %528 = and i32 %527, 255 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = add nuw nsw i32 %531, %528 > %533 = add nuw nsw i32 %532, 18 > %534 = zext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %38, %515 > %539 = fmul float %39, %526 > %540 = fadd float %538, %539 > %541 = fmul float %40, %537 > %542 = fadd float %540, %541 > %543 = fadd float %542, %41 > %544 = fmul float %42, %515 > %545 = fmul float %43, %526 > %546 = fadd float %544, %545 > %547 = fmul float %44, %537 > %548 = fadd float %546, %547 > %549 = fadd float %548, %45 > %550 = fmul float %46, %515 > %551 = fmul float %47, %526 > %552 = fadd float %550, %551 > %553 = fmul float %48, %537 > %554 = fadd float %552, %553 > %555 = fadd float %554, %49 > %556 = fmul float %50, %515 > %557 = fmul float %51, %526 > %558 = fadd float %556, %557 > %559 = fmul float %52, %537 > %560 = fadd float %558, %559 > %561 = fadd float %560, %53 > %562 = and i32 %7, 8191 > %563 = and i32 %10, 255 > %564 = mul nuw nsw i32 %562, %563 > %565 = lshr i32 %7, 12 > %566 = and i32 %565, 510 > %567 = add nuw nsw i32 %564, %566 > %568 = add nuw nsw i32 %567, 16 > %569 = zext i32 %568 to i64 > %570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %569 > %571 = bitcast i32 addrspace(3)* %570 to float addrspace(3)* > %572 = load float, float addrspace(3)* %571, align 4 > %573 = and i32 %7, 8191 > %574 = and i32 %10, 255 > %575 = mul nuw nsw i32 %573, %574 > %576 = lshr i32 %7, 12 > %577 = and i32 %576, 510 > %578 = add nuw nsw i32 %575, %577 > %579 = add nuw nsw i32 %578, 17 > %580 = zext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = and i32 %7, 8191 > %585 = and i32 %10, 255 > %586 = mul nuw nsw i32 %584, %585 > %587 = lshr i32 %7, 12 > %588 = and i32 %587, 510 > %589 = add nuw nsw i32 %586, %588 > %590 = add nuw nsw i32 %589, 18 > %591 = zext i32 %590 to i64 > %592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %591 > %593 = bitcast i32 addrspace(3)* %592 to float addrspace(3)* > %594 = load float, float addrspace(3)* %593, align 4 > %595 = fmul float %38, %572 > %596 = fmul float %39, %583 > %597 = fadd float %595, %596 > %598 = fmul float %40, %594 > %599 = fadd float %597, %598 > %600 = fadd float %599, %41 > %601 = fmul float %42, %572 > %602 = fmul float %43, %583 > %603 = fadd float %601, %602 > %604 = fmul float %44, %594 > %605 = fadd float %603, %604 > %606 = fadd float %605, %45 > %607 = fmul float %46, %572 > %608 = fmul float %47, %583 > %609 = fadd float %607, %608 > %610 = fmul float %48, %594 > %611 = fadd float %609, %610 > %612 = fadd float %611, %49 > %613 = fmul float %50, %572 > %614 = fmul float %51, %583 > %615 = fadd float %613, %614 > %616 = fmul float %52, %594 > %617 = fadd float %615, %616 > %618 = fadd float %617, %53 > %619 = fadd float %498, 1.000000e+02 > %620 = fadd float %555, 1.000000e+02 > %621 = fadd float %612, 1.000000e+02 > %622 = call float @llvm.fabs.f32(float %504) > %623 = call float @llvm.minnum.f32(float %622, float 1.000000e+02) > %624 = fcmp ogt float %486, 0.000000e+00 > %625 = fcmp ogt float %492, 0.000000e+00 > %626 = fcmp olt float %486, 0.000000e+00 > %627 = fcmp olt float %492, 0.000000e+00 > %628 = sext i1 %626 to i32 > %629 = sext i1 %627 to i32 > %630 = zext i1 %624 to i32 > %631 = zext i1 %625 to i32 > %632 = add nsw i32 %630, %628 > %633 = add nsw i32 %631, %629 > %634 = sitofp i32 %632 to float > %635 = sitofp i32 %633 to float > %636 = fsub float -0.000000e+00, %623 > %637 = call float @llvm.fma.f32(float %636, float %634, float %486) > %638 = fsub float -0.000000e+00, %623 > %639 = call float @llvm.fma.f32(float %638, float %635, float %492) > %640 = call float @llvm.fabs.f32(float %561) > %641 = call float @llvm.minnum.f32(float %640, float 1.000000e+02) > %642 = fcmp ogt float %543, 0.000000e+00 > %643 = fcmp ogt float %549, 0.000000e+00 > %644 = fcmp olt float %543, 0.000000e+00 > %645 = fcmp olt float %549, 0.000000e+00 > %646 = sext i1 %644 to i32 > %647 = sext i1 %645 to i32 > %648 = zext i1 %642 to i32 > %649 = zext i1 %643 to i32 > %650 = add nsw i32 %648, %646 > %651 = add nsw i32 %649, %647 > %652 = sitofp i32 %650 to float > %653 = sitofp i32 %651 to float > %654 = fsub float -0.000000e+00, %641 > %655 = call float @llvm.fma.f32(float %654, float %652, float %543) > %656 = fsub float -0.000000e+00, %641 > %657 = call float @llvm.fma.f32(float %656, float %653, float %549) > %658 = fcmp ogt float %600, 0.000000e+00 > %659 = fcmp ogt float %606, 0.000000e+00 > %660 = fcmp olt float %600, 0.000000e+00 > %661 = fcmp olt float %606, 0.000000e+00 > %662 = sext i1 %660 to i32 > %663 = sext i1 %661 to i32 > %664 = zext i1 %658 to i32 > %665 = zext i1 %659 to i32 > %666 = add nsw i32 %664, %662 > %667 = add nsw i32 %665, %663 > %668 = sitofp i32 %666 to float > %669 = sitofp i32 %667 to float > %670 = call float @llvm.fabs.f32(float %618) > %671 = call float @llvm.minnum.f32(float %670, float 1.000000e+02) > %672 = fsub float -0.000000e+00, %671 > %673 = call float @llvm.fma.f32(float %672, float %668, float %600) > %674 = fsub float -0.000000e+00, %671 > %675 = call float @llvm.fma.f32(float %674, float %669, float %606) > %676 = fsub float -0.000000e+00, %504 > %677 = fcmp olt float %637, %676 > %678 = fsub float -0.000000e+00, %504 > %679 = fcmp olt float %639, %678 > %680 = zext i1 %677 to i32 > %681 = zext i1 %679 to i32 > %682 = fsub float -0.000000e+00, %561 > %683 = fcmp olt float %655, %682 > %684 = fsub float -0.000000e+00, %561 > %685 = fcmp olt float %657, %684 > %686 = zext i1 %683 to i32 > %687 = zext i1 %685 to i32 > %688 = add nuw nsw i32 %686, %680 > %689 = add nuw nsw i32 %687, %681 > %690 = fsub float -0.000000e+00, %618 > %691 = fcmp olt float %673, %690 > %692 = fsub float -0.000000e+00, %618 > %693 = fcmp olt float %675, %692 > %694 = zext i1 %691 to i32 > %695 = zext i1 %693 to i32 > %696 = add nuw nsw i32 %694, %688 > %697 = add nuw nsw i32 %695, %689 > %698 = fcmp olt float %619, 0.000000e+00 > %699 = zext i1 %698 to i32 > %700 = fcmp olt float %620, 0.000000e+00 > %701 = fcmp olt float %621, 0.000000e+00 > %702 = zext i1 %700 to i32 > %703 = zext i1 %701 to i32 > %704 = add nuw nsw i32 %702, %699 > %705 = add nuw nsw i32 %703, %704 > %706 = fcmp olt float %504, %637 > %707 = fcmp olt float %504, %639 > %708 = zext i1 %706 to i32 > %709 = zext i1 %707 to i32 > %710 = fcmp olt float %561, %655 > %711 = fcmp olt float %561, %657 > %712 = zext i1 %710 to i32 > %713 = zext i1 %711 to i32 > %714 = add nuw nsw i32 %708, %712 > %715 = add nuw nsw i32 %709, %713 > %716 = fcmp olt float %618, %673 > %717 = fcmp olt float %618, %675 > %718 = zext i1 %716 to i32 > %719 = zext i1 %717 to i32 > %720 = add nuw nsw i32 %714, %718 > %721 = add nuw nsw i32 %715, %719 > %722 = icmp eq i32 %696, 3 > %723 = icmp eq i32 %697, 3 > %724 = sext i1 %722 to i32 > %725 = sext i1 %723 to i32 > %726 = icmp eq i32 %720, 3 > %727 = icmp eq i32 %721, 3 > %728 = select i1 %727, i32 -1, i32 %725 > %729 = select i1 %726, i32 -1, i32 %724 > %730 = or i32 %728, %729 > %731 = and i32 %7, 8191 > %732 = and i32 %10, 255 > %733 = mul nuw nsw i32 %731, %732 > %734 = add nuw nsw i32 %733, 18 > %735 = zext i32 %734 to i64 > %736 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %735 > %737 = bitcast i32 addrspace(3)* %736 to float addrspace(3)* > %738 = load float, float addrspace(3)* %737, align 4 > %739 = lshr i32 %7, 13 > %740 = and i32 %739, 255 > %741 = and i32 %7, 8191 > %742 = and i32 %10, 255 > %743 = mul nuw nsw i32 %741, %742 > %744 = add nuw nsw i32 %743, %740 > %745 = add nuw nsw i32 %744, 18 > %746 = zext i32 %745 to i64 > %747 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %746 > %748 = bitcast i32 addrspace(3)* %747 to float addrspace(3)* > %749 = load float, float addrspace(3)* %748, align 4 > %750 = fsub float %749, %738 > %751 = and i32 %7, 8191 > %752 = and i32 %10, 255 > %753 = mul nuw nsw i32 %751, %752 > %754 = add nuw nsw i32 %753, 16 > %755 = zext i32 %754 to i64 > %756 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %755 > %757 = bitcast i32 addrspace(3)* %756 to float addrspace(3)* > %758 = load float, float addrspace(3)* %757, align 4 > %759 = lshr i32 %7, 13 > %760 = and i32 %759, 255 > %761 = and i32 %7, 8191 > %762 = and i32 %10, 255 > %763 = mul nuw nsw i32 %761, %762 > %764 = add nuw nsw i32 %763, %760 > %765 = add nuw nsw i32 %764, 16 > %766 = zext i32 %765 to i64 > %767 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %766 > %768 = bitcast i32 addrspace(3)* %767 to float addrspace(3)* > %769 = load float, float addrspace(3)* %768, align 4 > %770 = fsub float %769, %758 > %771 = and i32 %7, 8191 > %772 = and i32 %10, 255 > %773 = mul nuw nsw i32 %771, %772 > %774 = add nuw nsw i32 %773, 17 > %775 = zext i32 %774 to i64 > %776 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %775 > %777 = bitcast i32 addrspace(3)* %776 to float addrspace(3)* > %778 = load float, float addrspace(3)* %777, align 4 > %779 = lshr i32 %7, 13 > %780 = and i32 %779, 255 > %781 = and i32 %7, 8191 > %782 = and i32 %10, 255 > %783 = mul nuw nsw i32 %781, %782 > %784 = add nuw nsw i32 %783, %780 > %785 = add nuw nsw i32 %784, 17 > %786 = zext i32 %785 to i64 > %787 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %786 > %788 = bitcast i32 addrspace(3)* %787 to float addrspace(3)* > %789 = load float, float addrspace(3)* %788, align 4 > %790 = fsub float %789, %778 > %791 = and i32 %7, 8191 > %792 = and i32 %10, 255 > %793 = mul nuw nsw i32 %791, %792 > %794 = add nuw nsw i32 %793, 16 > %795 = zext i32 %794 to i64 > %796 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %795 > %797 = bitcast i32 addrspace(3)* %796 to float addrspace(3)* > %798 = load float, float addrspace(3)* %797, align 4 > %799 = and i32 %7, 8191 > %800 = and i32 %10, 255 > %801 = mul nuw nsw i32 %799, %800 > %802 = lshr i32 %7, 12 > %803 = and i32 %802, 510 > %804 = add nuw nsw i32 %801, %803 > %805 = add nuw nsw i32 %804, 16 > %806 = zext i32 %805 to i64 > %807 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %806 > %808 = bitcast i32 addrspace(3)* %807 to float addrspace(3)* > %809 = load float, float addrspace(3)* %808, align 4 > %810 = fsub float %809, %798 > %811 = and i32 %7, 8191 > %812 = and i32 %10, 255 > %813 = mul nuw nsw i32 %811, %812 > %814 = add nuw nsw i32 %813, 17 > %815 = zext i32 %814 to i64 > %816 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %815 > %817 = bitcast i32 addrspace(3)* %816 to float addrspace(3)* > %818 = load float, float addrspace(3)* %817, align 4 > %819 = and i32 %7, 8191 > %820 = and i32 %10, 255 > %821 = mul nuw nsw i32 %819, %820 > %822 = lshr i32 %7, 12 > %823 = and i32 %822, 510 > %824 = add nuw nsw i32 %821, %823 > %825 = add nuw nsw i32 %824, 17 > %826 = zext i32 %825 to i64 > %827 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %826 > %828 = bitcast i32 addrspace(3)* %827 to float addrspace(3)* > %829 = load float, float addrspace(3)* %828, align 4 > %830 = fsub float %829, %818 > %831 = and i32 %7, 8191 > %832 = and i32 %10, 255 > %833 = mul nuw nsw i32 %831, %832 > %834 = add nuw nsw i32 %833, 18 > %835 = zext i32 %834 to i64 > %836 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %835 > %837 = bitcast i32 addrspace(3)* %836 to float addrspace(3)* > %838 = load float, float addrspace(3)* %837, align 4 > %839 = and i32 %7, 8191 > %840 = and i32 %10, 255 > %841 = mul nuw nsw i32 %839, %840 > %842 = lshr i32 %7, 12 > %843 = and i32 %842, 510 > %844 = add nuw nsw i32 %841, %843 > %845 = add nuw nsw i32 %844, 18 > %846 = zext i32 %845 to i64 > %847 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %846 > %848 = bitcast i32 addrspace(3)* %847 to float addrspace(3)* > %849 = load float, float addrspace(3)* %848, align 4 > %850 = fsub float %849, %838 > %851 = fmul float %750, %830 > %852 = fmul float %770, %850 > %853 = fmul float %790, %810 > %854 = fsub float -0.000000e+00, %851 > %855 = call float @llvm.fma.f32(float %790, float %850, float %854) > %856 = fsub float -0.000000e+00, %852 > %857 = call float @llvm.fma.f32(float %750, float %810, float %856) > %858 = fsub float -0.000000e+00, %853 > %859 = call float @llvm.fma.f32(float %770, float %830, float %858) > %860 = fmul float %855, %855 > %861 = fmul float %857, %857 > %862 = fadd float %861, %860 > %863 = fmul float %859, %859 > %864 = fadd float %862, %863 > %865 = call float @llvm.AMDGPU.rsq.clamped.f32(float %864) > %866 = fmul float %855, %865 > %867 = fmul float %857, %865 > %868 = fmul float %859, %865 > %869 = fmul float %13, %25 > %870 = fmul float %14, %26 > %871 = fadd float %869, %870 > %872 = fmul float %15, %27 > %873 = fadd float %871, %872 > %874 = fadd float %873, %16 > %875 = fmul float %17, %25 > %876 = fmul float %18, %26 > %877 = fadd float %875, %876 > %878 = fmul float %19, %27 > %879 = fadd float %877, %878 > %880 = fadd float %879, %20 > %881 = fmul float %21, %25 > %882 = fmul float %22, %26 > %883 = fadd float %881, %882 > %884 = fmul float %23, %27 > %885 = fadd float %883, %884 > %886 = fadd float %885, %24 > %887 = and i32 %7, 8191 > %888 = and i32 %10, 255 > %889 = mul nuw nsw i32 %887, %888 > %890 = add nuw nsw i32 %889, 16 > %891 = zext i32 %890 to i64 > %892 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %891 > %893 = bitcast i32 addrspace(3)* %892 to float addrspace(3)* > %894 = load float, float addrspace(3)* %893, align 4 > %895 = fsub float %874, %894 > %896 = and i32 %7, 8191 > %897 = and i32 %10, 255 > %898 = mul nuw nsw i32 %896, %897 > %899 = add nuw nsw i32 %898, 17 > %900 = zext i32 %899 to i64 > %901 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %900 > %902 = bitcast i32 addrspace(3)* %901 to float addrspace(3)* > %903 = load float, float addrspace(3)* %902, align 4 > %904 = fsub float %880, %903 > %905 = and i32 %7, 8191 > %906 = and i32 %10, 255 > %907 = mul nuw nsw i32 %905, %906 > %908 = add nuw nsw i32 %907, 18 > %909 = zext i32 %908 to i64 > %910 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %909 > %911 = bitcast i32 addrspace(3)* %910 to float addrspace(3)* > %912 = load float, float addrspace(3)* %911, align 4 > %913 = fsub float %886, %912 > %914 = fmul float %895, %895 > %915 = fmul float %904, %904 > %916 = fadd float %915, %914 > %917 = fmul float %913, %913 > %918 = fadd float %916, %917 > %919 = call float @llvm.AMDGPU.rsq.clamped.f32(float %918) > %920 = fmul float %919, %895 > %921 = fmul float %919, %904 > %922 = fmul float %919, %913 > %923 = fmul float %866, %920 > %924 = fmul float %867, %921 > %925 = fadd float %924, %923 > %926 = fmul float %868, %922 > %927 = fadd float %925, %926 > %928 = icmp eq i32 %730, 0 > %notlhs = fcmp uge float %927, -5.000000e-01 > %notrhs = icmp ne i32 %705, 3 > %not. = and i1 %notrhs, %notlhs > %929 = and i1 %928, %not. > br i1 %929, label %IF, label %ENDIF > >IF: ; preds = %main_body > %930 = lshr i32 %7, 13 > %931 = and i32 %930, 255 > %932 = and i32 %7, 8191 > %933 = and i32 %10, 255 > %934 = mul nuw nsw i32 %932, %933 > %935 = add nuw nsw i32 %934, %931 > %936 = add nuw nsw i32 %935, 16 > %937 = zext i32 %936 to i64 > %938 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %937 > %939 = bitcast i32 addrspace(3)* %938 to float addrspace(3)* > %940 = load float, float addrspace(3)* %939, align 4 > %941 = and i32 %7, 8191 > %942 = and i32 %10, 255 > %943 = mul nuw nsw i32 %941, %942 > %944 = add nuw nsw i32 %943, 16 > %945 = zext i32 %944 to i64 > %946 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %945 > %947 = bitcast i32 addrspace(3)* %946 to float addrspace(3)* > %948 = load float, float addrspace(3)* %947, align 4 > %949 = fsub float %948, %940 > %950 = lshr i32 %7, 13 > %951 = and i32 %950, 255 > %952 = and i32 %7, 8191 > %953 = and i32 %10, 255 > %954 = mul nuw nsw i32 %952, %953 > %955 = add nuw nsw i32 %954, %951 > %956 = add nuw nsw i32 %955, 17 > %957 = zext i32 %956 to i64 > %958 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %957 > %959 = bitcast i32 addrspace(3)* %958 to float addrspace(3)* > %960 = load float, float addrspace(3)* %959, align 4 > %961 = and i32 %7, 8191 > %962 = and i32 %10, 255 > %963 = mul nuw nsw i32 %961, %962 > %964 = add nuw nsw i32 %963, 17 > %965 = zext i32 %964 to i64 > %966 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %965 > %967 = bitcast i32 addrspace(3)* %966 to float addrspace(3)* > %968 = load float, float addrspace(3)* %967, align 4 > %969 = fsub float %968, %960 > %970 = lshr i32 %7, 13 > %971 = and i32 %970, 255 > %972 = and i32 %7, 8191 > %973 = and i32 %10, 255 > %974 = mul nuw nsw i32 %972, %973 > %975 = add nuw nsw i32 %974, %971 > %976 = add nuw nsw i32 %975, 18 > %977 = zext i32 %976 to i64 > %978 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %977 > %979 = bitcast i32 addrspace(3)* %978 to float addrspace(3)* > %980 = load float, float addrspace(3)* %979, align 4 > %981 = and i32 %7, 8191 > %982 = and i32 %10, 255 > %983 = mul nuw nsw i32 %981, %982 > %984 = add nuw nsw i32 %983, 18 > %985 = zext i32 %984 to i64 > %986 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %985 > %987 = bitcast i32 addrspace(3)* %986 to float addrspace(3)* > %988 = load float, float addrspace(3)* %987, align 4 > %989 = fsub float %988, %980 > %990 = fmul float %949, %949 > %991 = fmul float %969, %969 > %992 = fadd float %991, %990 > %993 = fmul float %989, %989 > %994 = fadd float %992, %993 > %995 = and i32 %7, 8191 > %996 = and i32 %10, 255 > %997 = mul nuw nsw i32 %995, %996 > %998 = lshr i32 %7, 12 > %999 = and i32 %998, 510 > %1000 = add nuw nsw i32 %997, %999 > %1001 = add nuw nsw i32 %1000, 16 > %1002 = zext i32 %1001 to i64 > %1003 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1002 > %1004 = bitcast i32 addrspace(3)* %1003 to float addrspace(3)* > %1005 = load float, float addrspace(3)* %1004, align 4 > %1006 = lshr i32 %7, 13 > %1007 = and i32 %1006, 255 > %1008 = and i32 %7, 8191 > %1009 = and i32 %10, 255 > %1010 = mul nuw nsw i32 %1008, %1009 > %1011 = add nuw nsw i32 %1010, %1007 > %1012 = add nuw nsw i32 %1011, 16 > %1013 = zext i32 %1012 to i64 > %1014 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1013 > %1015 = bitcast i32 addrspace(3)* %1014 to float addrspace(3)* > %1016 = load float, float addrspace(3)* %1015, align 4 > %1017 = fsub float %1016, %1005 > %1018 = and i32 %7, 8191 > %1019 = and i32 %10, 255 > %1020 = mul nuw nsw i32 %1018, %1019 > %1021 = lshr i32 %7, 12 > %1022 = and i32 %1021, 510 > %1023 = add nuw nsw i32 %1020, %1022 > %1024 = add nuw nsw i32 %1023, 17 > %1025 = zext i32 %1024 to i64 > %1026 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1025 > %1027 = bitcast i32 addrspace(3)* %1026 to float addrspace(3)* > %1028 = load float, float addrspace(3)* %1027, align 4 > %1029 = lshr i32 %7, 13 > %1030 = and i32 %1029, 255 > %1031 = and i32 %7, 8191 > %1032 = and i32 %10, 255 > %1033 = mul nuw nsw i32 %1031, %1032 > %1034 = add nuw nsw i32 %1033, %1030 > %1035 = add nuw nsw i32 %1034, 17 > %1036 = zext i32 %1035 to i64 > %1037 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1036 > %1038 = bitcast i32 addrspace(3)* %1037 to float addrspace(3)* > %1039 = load float, float addrspace(3)* %1038, align 4 > %1040 = fsub float %1039, %1028 > %1041 = and i32 %7, 8191 > %1042 = and i32 %10, 255 > %1043 = mul nuw nsw i32 %1041, %1042 > %1044 = lshr i32 %7, 12 > %1045 = and i32 %1044, 510 > %1046 = add nuw nsw i32 %1043, %1045 > %1047 = add nuw nsw i32 %1046, 18 > %1048 = zext i32 %1047 to i64 > %1049 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1048 > %1050 = bitcast i32 addrspace(3)* %1049 to float addrspace(3)* > %1051 = load float, float addrspace(3)* %1050, align 4 > %1052 = lshr i32 %7, 13 > %1053 = and i32 %1052, 255 > %1054 = and i32 %7, 8191 > %1055 = and i32 %10, 255 > %1056 = mul nuw nsw i32 %1054, %1055 > %1057 = add nuw nsw i32 %1056, %1053 > %1058 = add nuw nsw i32 %1057, 18 > %1059 = zext i32 %1058 to i64 > %1060 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1059 > %1061 = bitcast i32 addrspace(3)* %1060 to float addrspace(3)* > %1062 = load float, float addrspace(3)* %1061, align 4 > %1063 = fsub float %1062, %1051 > %1064 = fmul float %1017, %1017 > %1065 = fmul float %1040, %1040 > %1066 = fadd float %1065, %1064 > %1067 = fmul float %1063, %1063 > %1068 = fadd float %1066, %1067 > %1069 = fmul float %810, %810 > %1070 = fmul float %830, %830 > %1071 = fadd float %1070, %1069 > %1072 = fmul float %850, %850 > %1073 = fadd float %1071, %1072 > %1074 = call float @llvm.sqrt.f32(float %994) > %1075 = call float @llvm.sqrt.f32(float %1068) > %1076 = call float @llvm.sqrt.f32(float %1073) > %1077 = lshr i32 %7, 13 > %1078 = and i32 %1077, 255 > %1079 = and i32 %7, 8191 > %1080 = and i32 %10, 255 > %1081 = mul nuw nsw i32 %1079, %1080 > %1082 = add nuw nsw i32 %1081, %1078 > %1083 = add nuw nsw i32 %1082, 16 > %1084 = zext i32 %1083 to i64 > %1085 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1084 > %1086 = bitcast i32 addrspace(3)* %1085 to float addrspace(3)* > %1087 = load float, float addrspace(3)* %1086, align 4 > %1088 = and i32 %7, 8191 > %1089 = and i32 %10, 255 > %1090 = mul nuw nsw i32 %1088, %1089 > %1091 = add nuw nsw i32 %1090, 16 > %1092 = zext i32 %1091 to i64 > %1093 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1092 > %1094 = bitcast i32 addrspace(3)* %1093 to float addrspace(3)* > %1095 = load float, float addrspace(3)* %1094, align 4 > %1096 = fadd float %1087, %1095 > %1097 = lshr i32 %7, 13 > %1098 = and i32 %1097, 255 > %1099 = and i32 %7, 8191 > %1100 = and i32 %10, 255 > %1101 = mul nuw nsw i32 %1099, %1100 > %1102 = add nuw nsw i32 %1101, %1098 > %1103 = add nuw nsw i32 %1102, 17 > %1104 = zext i32 %1103 to i64 > %1105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1104 > %1106 = bitcast i32 addrspace(3)* %1105 to float addrspace(3)* > %1107 = load float, float addrspace(3)* %1106, align 4 > %1108 = and i32 %7, 8191 > %1109 = and i32 %10, 255 > %1110 = mul nuw nsw i32 %1108, %1109 > %1111 = add nuw nsw i32 %1110, 17 > %1112 = zext i32 %1111 to i64 > %1113 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1112 > %1114 = bitcast i32 addrspace(3)* %1113 to float addrspace(3)* > %1115 = load float, float addrspace(3)* %1114, align 4 > %1116 = fadd float %1107, %1115 > %1117 = lshr i32 %7, 13 > %1118 = and i32 %1117, 255 > %1119 = and i32 %7, 8191 > %1120 = and i32 %10, 255 > %1121 = mul nuw nsw i32 %1119, %1120 > %1122 = add nuw nsw i32 %1121, %1118 > %1123 = add nuw nsw i32 %1122, 18 > %1124 = zext i32 %1123 to i64 > %1125 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1124 > %1126 = bitcast i32 addrspace(3)* %1125 to float addrspace(3)* > %1127 = load float, float addrspace(3)* %1126, align 4 > %1128 = and i32 %7, 8191 > %1129 = and i32 %10, 255 > %1130 = mul nuw nsw i32 %1128, %1129 > %1131 = add nuw nsw i32 %1130, 18 > %1132 = zext i32 %1131 to i64 > %1133 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1132 > %1134 = bitcast i32 addrspace(3)* %1133 to float addrspace(3)* > %1135 = load float, float addrspace(3)* %1134, align 4 > %1136 = fadd float %1127, %1135 > %1137 = fmul float %1096, 5.000000e-01 > %1138 = fmul float %1116, 5.000000e-01 > %1139 = fmul float %1136, 5.000000e-01 > %1140 = and i32 %7, 8191 > %1141 = and i32 %10, 255 > %1142 = mul nuw nsw i32 %1140, %1141 > %1143 = lshr i32 %7, 12 > %1144 = and i32 %1143, 510 > %1145 = add nuw nsw i32 %1142, %1144 > %1146 = add nuw nsw i32 %1145, 16 > %1147 = zext i32 %1146 to i64 > %1148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1147 > %1149 = bitcast i32 addrspace(3)* %1148 to float addrspace(3)* > %1150 = load float, float addrspace(3)* %1149, align 4 > %1151 = lshr i32 %7, 13 > %1152 = and i32 %1151, 255 > %1153 = and i32 %7, 8191 > %1154 = and i32 %10, 255 > %1155 = mul nuw nsw i32 %1153, %1154 > %1156 = add nuw nsw i32 %1155, %1152 > %1157 = add nuw nsw i32 %1156, 16 > %1158 = zext i32 %1157 to i64 > %1159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1158 > %1160 = bitcast i32 addrspace(3)* %1159 to float addrspace(3)* > %1161 = load float, float addrspace(3)* %1160, align 4 > %1162 = fadd float %1150, %1161 > %1163 = and i32 %7, 8191 > %1164 = and i32 %10, 255 > %1165 = mul nuw nsw i32 %1163, %1164 > %1166 = lshr i32 %7, 12 > %1167 = and i32 %1166, 510 > %1168 = add nuw nsw i32 %1165, %1167 > %1169 = add nuw nsw i32 %1168, 17 > %1170 = zext i32 %1169 to i64 > %1171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1170 > %1172 = bitcast i32 addrspace(3)* %1171 to float addrspace(3)* > %1173 = load float, float addrspace(3)* %1172, align 4 > %1174 = lshr i32 %7, 13 > %1175 = and i32 %1174, 255 > %1176 = and i32 %7, 8191 > %1177 = and i32 %10, 255 > %1178 = mul nuw nsw i32 %1176, %1177 > %1179 = add nuw nsw i32 %1178, %1175 > %1180 = add nuw nsw i32 %1179, 17 > %1181 = zext i32 %1180 to i64 > %1182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1181 > %1183 = bitcast i32 addrspace(3)* %1182 to float addrspace(3)* > %1184 = load float, float addrspace(3)* %1183, align 4 > %1185 = fadd float %1173, %1184 > %1186 = and i32 %7, 8191 > %1187 = and i32 %10, 255 > %1188 = mul nuw nsw i32 %1186, %1187 > %1189 = lshr i32 %7, 12 > %1190 = and i32 %1189, 510 > %1191 = add nuw nsw i32 %1188, %1190 > %1192 = add nuw nsw i32 %1191, 18 > %1193 = zext i32 %1192 to i64 > %1194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1193 > %1195 = bitcast i32 addrspace(3)* %1194 to float addrspace(3)* > %1196 = load float, float addrspace(3)* %1195, align 4 > %1197 = lshr i32 %7, 13 > %1198 = and i32 %1197, 255 > %1199 = and i32 %7, 8191 > %1200 = and i32 %10, 255 > %1201 = mul nuw nsw i32 %1199, %1200 > %1202 = add nuw nsw i32 %1201, %1198 > %1203 = add nuw nsw i32 %1202, 18 > %1204 = zext i32 %1203 to i64 > %1205 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1204 > %1206 = bitcast i32 addrspace(3)* %1205 to float addrspace(3)* > %1207 = load float, float addrspace(3)* %1206, align 4 > %1208 = fadd float %1196, %1207 > %1209 = fmul float %1162, 5.000000e-01 > %1210 = fmul float %1185, 5.000000e-01 > %1211 = fmul float %1208, 5.000000e-01 > %1212 = and i32 %7, 8191 > %1213 = and i32 %10, 255 > %1214 = mul nuw nsw i32 %1212, %1213 > %1215 = add nuw nsw i32 %1214, 16 > %1216 = zext i32 %1215 to i64 > %1217 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1216 > %1218 = bitcast i32 addrspace(3)* %1217 to float addrspace(3)* > %1219 = load float, float addrspace(3)* %1218, align 4 > %1220 = and i32 %7, 8191 > %1221 = and i32 %10, 255 > %1222 = mul nuw nsw i32 %1220, %1221 > %1223 = lshr i32 %7, 12 > %1224 = and i32 %1223, 510 > %1225 = add nuw nsw i32 %1222, %1224 > %1226 = add nuw nsw i32 %1225, 16 > %1227 = zext i32 %1226 to i64 > %1228 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1227 > %1229 = bitcast i32 addrspace(3)* %1228 to float addrspace(3)* > %1230 = load float, float addrspace(3)* %1229, align 4 > %1231 = fadd float %1219, %1230 > %1232 = and i32 %7, 8191 > %1233 = and i32 %10, 255 > %1234 = mul nuw nsw i32 %1232, %1233 > %1235 = add nuw nsw i32 %1234, 17 > %1236 = zext i32 %1235 to i64 > %1237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1236 > %1238 = bitcast i32 addrspace(3)* %1237 to float addrspace(3)* > %1239 = load float, float addrspace(3)* %1238, align 4 > %1240 = and i32 %7, 8191 > %1241 = and i32 %10, 255 > %1242 = mul nuw nsw i32 %1240, %1241 > %1243 = lshr i32 %7, 12 > %1244 = and i32 %1243, 510 > %1245 = add nuw nsw i32 %1242, %1244 > %1246 = add nuw nsw i32 %1245, 17 > %1247 = zext i32 %1246 to i64 > %1248 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1247 > %1249 = bitcast i32 addrspace(3)* %1248 to float addrspace(3)* > %1250 = load float, float addrspace(3)* %1249, align 4 > %1251 = fadd float %1239, %1250 > %1252 = and i32 %7, 8191 > %1253 = and i32 %10, 255 > %1254 = mul nuw nsw i32 %1252, %1253 > %1255 = add nuw nsw i32 %1254, 18 > %1256 = zext i32 %1255 to i64 > %1257 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1256 > %1258 = bitcast i32 addrspace(3)* %1257 to float addrspace(3)* > %1259 = load float, float addrspace(3)* %1258, align 4 > %1260 = and i32 %7, 8191 > %1261 = and i32 %10, 255 > %1262 = mul nuw nsw i32 %1260, %1261 > %1263 = lshr i32 %7, 12 > %1264 = and i32 %1263, 510 > %1265 = add nuw nsw i32 %1262, %1264 > %1266 = add nuw nsw i32 %1265, 18 > %1267 = zext i32 %1266 to i64 > %1268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1267 > %1269 = bitcast i32 addrspace(3)* %1268 to float addrspace(3)* > %1270 = load float, float addrspace(3)* %1269, align 4 > %1271 = fadd float %1259, %1270 > %1272 = fmul float %1231, 5.000000e-01 > %1273 = fmul float %1251, 5.000000e-01 > %1274 = fmul float %1271, 5.000000e-01 > %1275 = call float @llvm.fma.f32(float %54, float %1074, float %1137) > %1276 = call float @llvm.fma.f32(float %55, float %1074, float %1138) > %1277 = call float @llvm.fma.f32(float %56, float %1074, float %1139) > %1278 = call float @llvm.fma.f32(float %54, float %1075, float %1209) > %1279 = call float @llvm.fma.f32(float %55, float %1075, float %1210) > %1280 = call float @llvm.fma.f32(float %56, float %1075, float %1211) > %1281 = call float @llvm.fma.f32(float %54, float %1076, float %1272) > %1282 = call float @llvm.fma.f32(float %55, float %1076, float %1273) > %1283 = call float @llvm.fma.f32(float %56, float %1076, float %1274) > %1284 = fmul float %38, %1137 > %1285 = fmul float %39, %1138 > %1286 = fadd float %1284, %1285 > %1287 = fmul float %40, %1139 > %1288 = fadd float %1286, %1287 > %1289 = fadd float %1288, %41 > %1290 = fmul float %42, %1137 > %1291 = fmul float %43, %1138 > %1292 = fadd float %1290, %1291 > %1293 = fmul float %44, %1139 > %1294 = fadd float %1292, %1293 > %1295 = fadd float %1294, %45 > %1296 = fmul float %50, %1137 > %1297 = fmul float %51, %1138 > %1298 = fadd float %1296, %1297 > %1299 = fmul float %52, %1139 > %1300 = fadd float %1298, %1299 > %1301 = fadd float %1300, %53 > %1302 = fmul float %38, %1209 > %1303 = fmul float %39, %1210 > %1304 = fadd float %1302, %1303 > %1305 = fmul float %40, %1211 > %1306 = fadd float %1304, %1305 > %1307 = fadd float %1306, %41 > %1308 = fmul float %42, %1209 > %1309 = fmul float %43, %1210 > %1310 = fadd float %1308, %1309 > %1311 = fmul float %44, %1211 > %1312 = fadd float %1310, %1311 > %1313 = fadd float %1312, %45 > %1314 = fmul float %50, %1209 > %1315 = fmul float %51, %1210 > %1316 = fadd float %1314, %1315 > %1317 = fmul float %52, %1211 > %1318 = fadd float %1316, %1317 > %1319 = fadd float %1318, %53 > %1320 = fmul float %38, %1272 > %1321 = fmul float %39, %1273 > %1322 = fadd float %1320, %1321 > %1323 = fmul float %40, %1274 > %1324 = fadd float %1322, %1323 > %1325 = fadd float %1324, %41 > %1326 = fmul float %42, %1272 > %1327 = fmul float %43, %1273 > %1328 = fadd float %1326, %1327 > %1329 = fmul float %44, %1274 > %1330 = fadd float %1328, %1329 > %1331 = fadd float %1330, %45 > %1332 = fmul float %50, %1272 > %1333 = fmul float %51, %1273 > %1334 = fadd float %1332, %1333 > %1335 = fmul float %52, %1274 > %1336 = fadd float %1334, %1335 > %1337 = fadd float %1336, %53 > %1338 = fmul float %38, %1275 > %1339 = fmul float %39, %1276 > %1340 = fadd float %1338, %1339 > %1341 = fmul float %40, %1277 > %1342 = fadd float %1340, %1341 > %1343 = fadd float %1342, %41 > %1344 = fmul float %42, %1275 > %1345 = fmul float %43, %1276 > %1346 = fadd float %1344, %1345 > %1347 = fmul float %44, %1277 > %1348 = fadd float %1346, %1347 > %1349 = fadd float %1348, %45 > %1350 = fmul float %50, %1275 > %1351 = fmul float %51, %1276 > %1352 = fadd float %1350, %1351 > %1353 = fmul float %52, %1277 > %1354 = fadd float %1352, %1353 > %1355 = fadd float %1354, %53 > %1356 = fmul float %38, %1278 > %1357 = fmul float %39, %1279 > %1358 = fadd float %1356, %1357 > %1359 = fmul float %40, %1280 > %1360 = fadd float %1358, %1359 > %1361 = fadd float %1360, %41 > %1362 = fmul float %42, %1278 > %1363 = fmul float %43, %1279 > %1364 = fadd float %1362, %1363 > %1365 = fmul float %44, %1280 > %1366 = fadd float %1364, %1365 > %1367 = fadd float %1366, %45 > %1368 = fmul float %50, %1278 > %1369 = fmul float %51, %1279 > %1370 = fadd float %1368, %1369 > %1371 = fmul float %52, %1280 > %1372 = fadd float %1370, %1371 > %1373 = fadd float %1372, %53 > %1374 = fmul float %38, %1281 > %1375 = fmul float %39, %1282 > %1376 = fadd float %1374, %1375 > %1377 = fmul float %40, %1283 > %1378 = fadd float %1376, %1377 > %1379 = fadd float %1378, %41 > %1380 = fmul float %42, %1281 > %1381 = fmul float %43, %1282 > %1382 = fadd float %1380, %1381 > %1383 = fmul float %44, %1283 > %1384 = fadd float %1382, %1383 > %1385 = fadd float %1384, %45 > %1386 = fmul float %50, %1281 > %1387 = fmul float %51, %1282 > %1388 = fadd float %1386, %1387 > %1389 = fmul float %52, %1283 > %1390 = fadd float %1388, %1389 > %1391 = fadd float %1390, %53 > %1392 = fcmp oeq float %1319, 0.000000e+00 > %1393 = fcmp oeq float %1319, 0.000000e+00 > %1394 = fcmp ogt float %1307, 0.000000e+00 > %1395 = select i1 %1394, float 1.000000e+00, float %1307 > %1396 = fcmp oge float %1395, 0.000000e+00 > %1397 = fcmp ogt float %1313, 0.000000e+00 > %1398 = select i1 %1397, float 1.000000e+00, float %1313 > %1399 = fcmp oge float %1398, 0.000000e+00 > %.op = fmul float %1395, 0x4600000000000000 > %1400 = select i1 %1396, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1398, 0x4600000000000000 > %1401 = select i1 %1399, float %.op80, float 0xC600000000000000 > %1402 = fdiv float 1.000000e+00, %1319 > %1403 = fmul float %1307, %1402 > %1404 = fmul float %1313, %1402 > %1405 = select i1 %1392, float %1400, float %1403 > %1406 = select i1 %1393, float %1401, float %1404 > %1407 = fcmp oeq float %1337, 0.000000e+00 > %1408 = fcmp oeq float %1337, 0.000000e+00 > %1409 = fcmp ogt float %1325, 0.000000e+00 > %1410 = select i1 %1409, float 1.000000e+00, float %1325 > %1411 = fcmp oge float %1410, 0.000000e+00 > %1412 = fcmp ogt float %1331, 0.000000e+00 > %1413 = select i1 %1412, float 1.000000e+00, float %1331 > %1414 = fcmp oge float %1413, 0.000000e+00 > %.op81 = fmul float %1410, 0x4600000000000000 > %1415 = select i1 %1411, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1413, 0x4600000000000000 > %1416 = select i1 %1414, float %.op82, float 0xC600000000000000 > %1417 = fdiv float 1.000000e+00, %1337 > %1418 = fmul float %1325, %1417 > %1419 = fmul float %1331, %1417 > %1420 = select i1 %1407, float %1415, float %1418 > %1421 = select i1 %1408, float %1416, float %1419 > %1422 = fcmp oeq float %1355, 0.000000e+00 > %1423 = fcmp oeq float %1355, 0.000000e+00 > %1424 = fcmp ogt float %1343, 0.000000e+00 > %1425 = select i1 %1424, float 1.000000e+00, float %1343 > %1426 = fcmp oge float %1425, 0.000000e+00 > %1427 = fcmp ogt float %1349, 0.000000e+00 > %1428 = select i1 %1427, float 1.000000e+00, float %1349 > %1429 = fcmp oge float %1428, 0.000000e+00 > %.op83 = fmul float %1425, 0x4600000000000000 > %1430 = select i1 %1426, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1428, 0x4600000000000000 > %1431 = select i1 %1429, float %.op84, float 0xC600000000000000 > %1432 = fdiv float 1.000000e+00, %1355 > %1433 = fmul float %1343, %1432 > %1434 = fmul float %1349, %1432 > %1435 = select i1 %1422, float %1430, float %1433 > %1436 = select i1 %1423, float %1431, float %1434 > %1437 = fcmp oeq float %1301, 0.000000e+00 > %1438 = fcmp oeq float %1301, 0.000000e+00 > %1439 = fcmp ogt float %1289, 0.000000e+00 > %1440 = select i1 %1439, float 1.000000e+00, float %1289 > %1441 = fcmp oge float %1440, 0.000000e+00 > %1442 = fcmp ogt float %1295, 0.000000e+00 > %1443 = select i1 %1442, float 1.000000e+00, float %1295 > %1444 = fcmp oge float %1443, 0.000000e+00 > %.op85 = fmul float %1440, 0x4600000000000000 > %1445 = select i1 %1441, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1443, 0x4600000000000000 > %1446 = select i1 %1444, float %.op86, float 0xC600000000000000 > %1447 = fdiv float 1.000000e+00, %1301 > %1448 = fmul float %1289, %1447 > %1449 = fmul float %1295, %1447 > %1450 = select i1 %1437, float %1445, float %1448 > %1451 = select i1 %1438, float %1446, float %1449 > %1452 = fsub float %1450, %1435 > %1453 = fsub float %1451, %1436 > %1454 = fcmp oeq float %1373, 0.000000e+00 > %1455 = fcmp oeq float %1373, 0.000000e+00 > %1456 = fcmp ogt float %1361, 0.000000e+00 > %1457 = select i1 %1456, float 1.000000e+00, float %1361 > %1458 = fcmp oge float %1457, 0.000000e+00 > %1459 = fcmp ogt float %1367, 0.000000e+00 > %1460 = select i1 %1459, float 1.000000e+00, float %1367 > %1461 = fcmp oge float %1460, 0.000000e+00 > %.op87 = fmul float %1457, 0x4600000000000000 > %1462 = select i1 %1458, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1460, 0x4600000000000000 > %1463 = select i1 %1461, float %.op88, float 0xC600000000000000 > %1464 = fdiv float 1.000000e+00, %1373 > %1465 = fmul float %1361, %1464 > %1466 = fmul float %1367, %1464 > %1467 = select i1 %1454, float %1462, float %1465 > %1468 = select i1 %1455, float %1463, float %1466 > %1469 = fsub float %1405, %1467 > %1470 = fsub float %1406, %1468 > %1471 = fmul float %1469, %57 > %1472 = fmul float %1470, %58 > %1473 = fcmp oeq float %1391, 0.000000e+00 > %1474 = fcmp oeq float %1391, 0.000000e+00 > %1475 = fcmp ogt float %1379, 0.000000e+00 > %1476 = select i1 %1475, float 1.000000e+00, float %1379 > %1477 = fcmp oge float %1476, 0.000000e+00 > %1478 = fcmp ogt float %1385, 0.000000e+00 > %1479 = select i1 %1478, float 1.000000e+00, float %1385 > %1480 = fcmp oge float %1479, 0.000000e+00 > %.op89 = fmul float %1476, 0x4600000000000000 > %1481 = select i1 %1477, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1479, 0x4600000000000000 > %1482 = select i1 %1480, float %.op90, float 0xC600000000000000 > %1483 = fdiv float 1.000000e+00, %1391 > %1484 = fmul float %1379, %1483 > %1485 = fmul float %1385, %1483 > %1486 = select i1 %1473, float %1481, float %1484 > %1487 = select i1 %1474, float %1482, float %1485 > %1488 = fsub float %1420, %1486 > %1489 = fsub float %1421, %1487 > %1490 = fmul float %1488, %57 > %1491 = fmul float %1452, %57 > %1492 = fmul float %1453, %58 > %1493 = fmul float %1489, %58 > %1494 = fmul float %1491, %1491 > %1495 = fmul float %1492, %1492 > %1496 = fadd float %1494, %1495 > %1497 = fmul float %1471, %1471 > %1498 = fmul float %1472, %1472 > %1499 = fadd float %1497, %1498 > %1500 = fmul float %1490, %1490 > %1501 = fmul float %1493, %1493 > %1502 = fadd float %1500, %1501 > %1503 = call float @llvm.sqrt.f32(float %1502) > %1504 = call float @llvm.sqrt.f32(float %1496) > %1505 = call float @llvm.sqrt.f32(float %1499) > %1506 = fsub float %1301, %30 > %1507 = fsub float %1319, %30 > %1508 = fsub float %1337, %30 > %1509 = fcmp une float %31, 0.000000e+00 > br i1 %1509, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %31, %ENDIF77 ], [ %53, %main_body ] > %temp16.0 = phi float [ %1723, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1724, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1713, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1726, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %30, %ENDIF77 ], [ %52, %main_body ] > %temp13.0 = phi float [ %1706, %ENDIF77 ], [ %51, %main_body ] > %temp11.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %24, %main_body ] > %temp10.0 = phi float [ %1505, %ENDIF77 ], [ %23, %main_body ] > %temp9.0 = phi float [ %1698, %ENDIF77 ], [ %22, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %919, %main_body ] > %temp6.0 = phi float [ %1139, %ENDIF77 ], [ %850, %main_body ] > %temp5.0 = phi float [ %1693, %ENDIF77 ], [ %830, %main_body ] > %1510 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1511 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1512 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1513 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1514 = lshr i32 %5, 16 > %1515 = shl nuw nsw i32 %1514, 2 > %1516 = and i32 %6, 8191 > %1517 = and i32 %10, 255 > %1518 = mul nuw nsw i32 %1516, %1517 > %1519 = add nuw nsw i32 %1515, %1518 > %1520 = add nuw nsw i32 %1519, 8 > %1521 = zext i32 %1520 to i64 > %1522 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1521 > %1523 = bitcast i32 addrspace(3)* %1522 to float addrspace(3)* > store float %1510, float addrspace(3)* %1523, align 4 > %1524 = add nuw nsw i32 %1519, 9 > %1525 = zext i32 %1524 to i64 > %1526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1525 > %1527 = bitcast i32 addrspace(3)* %1526 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1527, align 4 > %1528 = add nuw nsw i32 %1519, 10 > %1529 = zext i32 %1528 to i64 > %1530 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1529 > %1531 = bitcast i32 addrspace(3)* %1530 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1531, align 4 > %1532 = add nuw nsw i32 %1519, 11 > %1533 = zext i32 %1532 to i64 > %1534 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1533 > %1535 = bitcast i32 addrspace(3)* %1534 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1535, align 4 > %1536 = lshr i32 %5, 16 > %1537 = shl nuw nsw i32 %1536, 2 > %1538 = and i32 %6, 8191 > %1539 = and i32 %10, 255 > %1540 = mul nuw nsw i32 %1538, %1539 > %1541 = add nuw nsw i32 %1537, %1540 > %1542 = add nuw nsw i32 %1541, 12 > %1543 = zext i32 %1542 to i64 > %1544 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1543 > %1545 = bitcast i32 addrspace(3)* %1544 to float addrspace(3)* > store float %1511, float addrspace(3)* %1545, align 4 > %1546 = add nuw nsw i32 %1541, 13 > %1547 = zext i32 %1546 to i64 > %1548 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1547 > %1549 = bitcast i32 addrspace(3)* %1548 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1549, align 4 > %1550 = add nuw nsw i32 %1541, 14 > %1551 = zext i32 %1550 to i64 > %1552 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1551 > %1553 = bitcast i32 addrspace(3)* %1552 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1553, align 4 > %1554 = add nuw nsw i32 %1541, 15 > %1555 = zext i32 %1554 to i64 > %1556 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1555 > %1557 = bitcast i32 addrspace(3)* %1556 to float addrspace(3)* > store float %temp11.0, float addrspace(3)* %1557, align 4 > %1558 = lshr i32 %5, 16 > %1559 = shl nuw nsw i32 %1558, 2 > %1560 = and i32 %6, 8191 > %1561 = and i32 %10, 255 > %1562 = mul nuw nsw i32 %1560, %1561 > %1563 = add nuw nsw i32 %1559, %1562 > %1564 = add nuw nsw i32 %1563, 16 > %1565 = zext i32 %1564 to i64 > %1566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1565 > %1567 = bitcast i32 addrspace(3)* %1566 to float addrspace(3)* > store float %1512, float addrspace(3)* %1567, align 4 > %1568 = add nuw nsw i32 %1563, 17 > %1569 = zext i32 %1568 to i64 > %1570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1569 > %1571 = bitcast i32 addrspace(3)* %1570 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1571, align 4 > %1572 = add nuw nsw i32 %1563, 18 > %1573 = zext i32 %1572 to i64 > %1574 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1573 > %1575 = bitcast i32 addrspace(3)* %1574 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1575, align 4 > %1576 = add nuw nsw i32 %1563, 19 > %1577 = zext i32 %1576 to i64 > %1578 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1577 > %1579 = bitcast i32 addrspace(3)* %1578 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1579, align 4 > %1580 = lshr i32 %5, 16 > %1581 = shl nuw nsw i32 %1580, 2 > %1582 = and i32 %6, 8191 > %1583 = and i32 %10, 255 > %1584 = mul nuw nsw i32 %1582, %1583 > %1585 = add nuw nsw i32 %1581, %1584 > %1586 = add nuw nsw i32 %1585, 20 > %1587 = zext i32 %1586 to i64 > %1588 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1587 > %1589 = bitcast i32 addrspace(3)* %1588 to float addrspace(3)* > store float %1513, float addrspace(3)* %1589, align 4 > %1590 = add nuw nsw i32 %1585, 21 > %1591 = zext i32 %1590 to i64 > %1592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1591 > %1593 = bitcast i32 addrspace(3)* %1592 to float addrspace(3)* > store float %1511, float addrspace(3)* %1593, align 4 > %1594 = add nuw nsw i32 %1585, 22 > %1595 = zext i32 %1594 to i64 > %1596 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1595 > %1597 = bitcast i32 addrspace(3)* %1596 to float addrspace(3)* > store float %1512, float addrspace(3)* %1597, align 4 > %1598 = add nuw nsw i32 %1585, 23 > %1599 = zext i32 %1598 to i64 > %1600 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1599 > %1601 = bitcast i32 addrspace(3)* %1600 to float addrspace(3)* > store float %1513, float addrspace(3)* %1601, align 4 > %1602 = lshr i32 %5, 16 > %1603 = shl nuw nsw i32 %1602, 2 > %1604 = and i32 %6, 8191 > %1605 = and i32 %10, 255 > %1606 = mul nuw nsw i32 %1604, %1605 > %1607 = add nuw nsw i32 %1603, %1606 > %1608 = zext i32 %1607 to i64 > %1609 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1608 > %1610 = bitcast i32 addrspace(3)* %1609 to float addrspace(3)* > store float %1510, float addrspace(3)* %1610, align 4 > %1611 = lshr i32 %5, 16 > %1612 = shl nuw nsw i32 %1611, 2 > %1613 = and i32 %6, 8191 > %1614 = and i32 %10, 255 > %1615 = mul nuw nsw i32 %1613, %1614 > %1616 = add nuw nsw i32 %1612, %1615 > %1617 = add nuw nsw i32 %1616, 1 > %1618 = zext i32 %1617 to i64 > %1619 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1618 > %1620 = bitcast i32 addrspace(3)* %1619 to float addrspace(3)* > store float %1511, float addrspace(3)* %1620, align 4 > %1621 = lshr i32 %5, 16 > %1622 = shl nuw nsw i32 %1621, 2 > %1623 = and i32 %6, 8191 > %1624 = and i32 %10, 255 > %1625 = mul nuw nsw i32 %1623, %1624 > %1626 = add nuw nsw i32 %1622, %1625 > %1627 = add nuw nsw i32 %1626, 2 > %1628 = zext i32 %1627 to i64 > %1629 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1628 > %1630 = bitcast i32 addrspace(3)* %1629 to float addrspace(3)* > store float %1512, float addrspace(3)* %1630, align 4 > %1631 = lshr i32 %5, 16 > %1632 = shl nuw nsw i32 %1631, 2 > %1633 = and i32 %6, 8191 > %1634 = and i32 %10, 255 > %1635 = mul nuw nsw i32 %1633, %1634 > %1636 = add nuw nsw i32 %1632, %1635 > %1637 = add nuw nsw i32 %1636, 4 > %1638 = zext i32 %1637 to i64 > %1639 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1638 > %1640 = bitcast i32 addrspace(3)* %1639 to float addrspace(3)* > store float %1513, float addrspace(3)* %1640, align 4 > %1641 = and i32 %10, 255 > %1642 = lshr i32 %10, 8 > %1643 = and i32 %1642, 31 > %1644 = lshr i32 %5, 16 > %1645 = shl nuw nsw i32 %1644, 2 > %1646 = and i32 %6, 8191 > %1647 = and i32 %10, 255 > %1648 = mul nuw nsw i32 %1646, %1647 > %1649 = add nuw nsw i32 %1645, %1648 > %1650 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1651 = bitcast i64 %1650 to <2 x i32> > %1652 = extractelement <2 x i32> %1651, i32 0 > %1653 = extractelement <2 x i32> %1651, i32 1 > %1654 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1652, 0 > %1655 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1654, i32 %1653, 1 > %1656 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1655, i32 %8, 13 > %1657 = bitcast i32 %1641 to float > %1658 = bitcast i32 %1643 to float > %1659 = bitcast i32 %1649 to float > %1660 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1656, float %1657, 14 > %1661 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1660, float %1658, 15 > %1662 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1661, float %1659, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1662 > >IF69: ; preds = %IF > %1663 = fdiv float 1.000000e+00, %31 > %1664 = fmul float %1506, %1663 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1665 = fcmp ogt float %1506, 0.000000e+00 > %1666 = select i1 %1665, float 1.000000e+00, float %1506 > %1667 = fcmp oge float %1666, 0.000000e+00 > %.op91 = fmul float %1666, 0x4600000000000000 > %1668 = select i1 %1667, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1664, %IF69 ], [ %1668, %ELSE70 ] > %1669 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1670 = fsub float 1.000000e+00, %1669 > %1671 = fmul float %1670, %1504 > %1672 = fcmp une float %31, 0.000000e+00 > br i1 %1672, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1673 = fdiv float 1.000000e+00, %31 > %1674 = fmul float %1507, %1673 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1675 = fcmp ogt float %1507, 0.000000e+00 > %1676 = select i1 %1675, float 1.000000e+00, float %1507 > %1677 = fcmp oge float %1676, 0.000000e+00 > %.op92 = fmul float %1676, 0x4600000000000000 > %1678 = select i1 %1677, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1674, %IF72 ], [ %1678, %ELSE73 ] > %1679 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1680 = fsub float 1.000000e+00, %1679 > %1681 = fmul float %1680, %1505 > %1682 = fcmp une float %31, 0.000000e+00 > br i1 %1682, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1683 = fdiv float 1.000000e+00, %31 > %1684 = fmul float %1508, %1683 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1685 = fcmp ogt float %1508, 0.000000e+00 > %1686 = select i1 %1685, float 1.000000e+00, float %1508 > %1687 = fcmp oge float %1686, 0.000000e+00 > %.op93 = fmul float %1686, 0x4600000000000000 > %1688 = select i1 %1687, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1684, %IF75 ], [ %1688, %ELSE76 ] > %1689 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1690 = fsub float 1.000000e+00, %1689 > %1691 = fmul float %1690, %1503 > %1692 = fmul float %28, %34 > %1693 = fmul float %29, %35 > %1694 = call float @llvm.maxnum.f32(float %1693, float 1.000000e+00) > %1695 = fcmp oeq float %1692, 0.000000e+00 > %1696 = fcmp oeq float %1692, 0.000000e+00 > %1697 = sext i1 %1696 to i32 > %1698 = bitcast i32 %1697 to float > %1699 = fcmp ogt float %1691, 0.000000e+00 > %1700 = select i1 %1699, float 1.000000e+00, float %1691 > %1701 = fcmp oge float %1700, 0.000000e+00 > %1702 = fcmp ogt float %1671, 0.000000e+00 > %1703 = select i1 %1702, float 1.000000e+00, float %1671 > %1704 = fcmp oge float %1703, 0.000000e+00 > %.op94 = fmul float %1700, 0x4600000000000000 > %1705 = select i1 %1701, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1703, 0x4600000000000000 > %1706 = select i1 %1704, float %.op95, float 0xC600000000000000 > %1707 = fdiv float 1.000000e+00, %1692 > %1708 = fmul float %1691, %1707 > %1709 = fmul float %1671, %1707 > %1710 = select i1 %1695, float %1705, float %1708 > %1711 = select i1 %1696, float %1706, float %1709 > %1712 = call float @llvm.maxnum.f32(float %1711, float 1.000000e+00) > %1713 = call float @llvm.minnum.f32(float %1694, float %1712) > %1714 = fcmp une float %1692, 0.000000e+00 > br i1 %1714, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1715 = fdiv float 1.000000e+00, %1692 > %1716 = fmul float %1681, %1715 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1717 = fcmp ogt float %1681, 0.000000e+00 > %1718 = select i1 %1717, float 1.000000e+00, float %1681 > %1719 = fcmp oge float %1718, 0.000000e+00 > %.op96 = fmul float %1718, 0x4600000000000000 > %1720 = select i1 %1719, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1716, %IF78 ], [ %1720, %ELSE79 ] > %1721 = call float @llvm.maxnum.f32(float %1710, float 1.000000e+00) > %1722 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1723 = call float @llvm.minnum.f32(float %1694, float %1722) > %1724 = call float @llvm.minnum.f32(float %1694, float %1721) > %1725 = call float @llvm.maxnum.f32(float %1713, float %1724) > %1726 = call float @llvm.maxnum.f32(float %1725, float %1723) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[2].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[2].xxxx >101: MUL TEMP[0], SV[0].yyyy, IN[1][2] >102: FMA TEMP[0], SV[0].xxxx, IN[0][2], TEMP[0] >103: FMA TEMP[0], SV[0].zzzz, IN[2][2], TEMP[0] >104: MOV OUT[5], TEMP[0] >105: MOV OUT[4], TEMP[3] >106: MOV OUT[2], TEMP[6] >107: MOV OUT[3], TEMP[4] >108: MOV OUT[1], TEMP[5] >109: MOV OUT[0], TEMP[1] >110: END >radeonsi: Compiling shader 254 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = lshr i32 %6, 13 > %711 = and i32 %710, 255 > %712 = shl i32 %5, 2 > %713 = and i32 %712, 262140 > %714 = and i32 %6, 8191 > %715 = mul i32 %714, %9 > %716 = add i32 %713, %715 > %717 = add i32 %716, %711 > %718 = add i32 %717, 24 > %719 = sext i32 %718 to i64 > %720 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %719 > %721 = bitcast i32 addrspace(3)* %720 to float addrspace(3)* > %722 = load float, float addrspace(3)* %721, align 4 > %723 = fmul float %722, %8 > %724 = lshr i32 %6, 13 > %725 = and i32 %724, 255 > %726 = shl i32 %5, 2 > %727 = and i32 %726, 262140 > %728 = and i32 %6, 8191 > %729 = mul i32 %728, %9 > %730 = add i32 %727, %729 > %731 = add i32 %730, %725 > %732 = add i32 %731, 25 > %733 = sext i32 %732 to i64 > %734 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %733 > %735 = bitcast i32 addrspace(3)* %734 to float addrspace(3)* > %736 = load float, float addrspace(3)* %735, align 4 > %737 = fmul float %736, %8 > %738 = lshr i32 %6, 13 > %739 = and i32 %738, 255 > %740 = shl i32 %5, 2 > %741 = and i32 %740, 262140 > %742 = and i32 %6, 8191 > %743 = mul i32 %742, %9 > %744 = add i32 %741, %743 > %745 = add i32 %744, %739 > %746 = add i32 %745, 26 > %747 = sext i32 %746 to i64 > %748 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %747 > %749 = bitcast i32 addrspace(3)* %748 to float addrspace(3)* > %750 = load float, float addrspace(3)* %749, align 4 > %751 = fmul float %750, %8 > %752 = lshr i32 %6, 13 > %753 = and i32 %752, 255 > %754 = shl i32 %5, 2 > %755 = and i32 %754, 262140 > %756 = and i32 %6, 8191 > %757 = mul i32 %756, %9 > %758 = add i32 %755, %757 > %759 = add i32 %758, %753 > %760 = add i32 %759, 27 > %761 = sext i32 %760 to i64 > %762 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %761 > %763 = bitcast i32 addrspace(3)* %762 to float addrspace(3)* > %764 = load float, float addrspace(3)* %763, align 4 > %765 = fmul float %764, %8 > %766 = shl i32 %5, 2 > %767 = and i32 %766, 262140 > %768 = and i32 %6, 8191 > %769 = mul i32 %768, %9 > %770 = add i32 %767, %769 > %771 = add i32 %770, 24 > %772 = sext i32 %771 to i64 > %773 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %772 > %774 = bitcast i32 addrspace(3)* %773 to float addrspace(3)* > %775 = load float, float addrspace(3)* %774, align 4 > %776 = call float @llvm.fma.f32(float %7, float %775, float %723) > %777 = shl i32 %5, 2 > %778 = and i32 %777, 262140 > %779 = and i32 %6, 8191 > %780 = mul i32 %779, %9 > %781 = add i32 %778, %780 > %782 = add i32 %781, 25 > %783 = sext i32 %782 to i64 > %784 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %783 > %785 = bitcast i32 addrspace(3)* %784 to float addrspace(3)* > %786 = load float, float addrspace(3)* %785, align 4 > %787 = call float @llvm.fma.f32(float %7, float %786, float %737) > %788 = shl i32 %5, 2 > %789 = and i32 %788, 262140 > %790 = and i32 %6, 8191 > %791 = mul i32 %790, %9 > %792 = add i32 %789, %791 > %793 = add i32 %792, 26 > %794 = sext i32 %793 to i64 > %795 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %794 > %796 = bitcast i32 addrspace(3)* %795 to float addrspace(3)* > %797 = load float, float addrspace(3)* %796, align 4 > %798 = call float @llvm.fma.f32(float %7, float %797, float %751) > %799 = shl i32 %5, 2 > %800 = and i32 %799, 262140 > %801 = and i32 %6, 8191 > %802 = mul i32 %801, %9 > %803 = add i32 %800, %802 > %804 = add i32 %803, 27 > %805 = sext i32 %804 to i64 > %806 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %805 > %807 = bitcast i32 addrspace(3)* %806 to float addrspace(3)* > %808 = load float, float addrspace(3)* %807, align 4 > %809 = call float @llvm.fma.f32(float %7, float %808, float %765) > %810 = shl i32 %5, 2 > %811 = and i32 %810, 262140 > %812 = and i32 %6, 8191 > %813 = mul i32 %812, %9 > %814 = add i32 %811, %813 > %815 = lshr i32 %6, 12 > %816 = and i32 %815, 510 > %817 = add i32 %814, %816 > %818 = add i32 %817, 24 > %819 = sext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = call float @llvm.fma.f32(float %62, float %822, float %776) > %824 = shl i32 %5, 2 > %825 = and i32 %824, 262140 > %826 = and i32 %6, 8191 > %827 = mul i32 %826, %9 > %828 = add i32 %825, %827 > %829 = lshr i32 %6, 12 > %830 = and i32 %829, 510 > %831 = add i32 %828, %830 > %832 = add i32 %831, 25 > %833 = sext i32 %832 to i64 > %834 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %833 > %835 = bitcast i32 addrspace(3)* %834 to float addrspace(3)* > %836 = load float, float addrspace(3)* %835, align 4 > %837 = call float @llvm.fma.f32(float %62, float %836, float %787) > %838 = shl i32 %5, 2 > %839 = and i32 %838, 262140 > %840 = and i32 %6, 8191 > %841 = mul i32 %840, %9 > %842 = add i32 %839, %841 > %843 = lshr i32 %6, 12 > %844 = and i32 %843, 510 > %845 = add i32 %842, %844 > %846 = add i32 %845, 26 > %847 = sext i32 %846 to i64 > %848 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %847 > %849 = bitcast i32 addrspace(3)* %848 to float addrspace(3)* > %850 = load float, float addrspace(3)* %849, align 4 > %851 = call float @llvm.fma.f32(float %62, float %850, float %798) > %852 = shl i32 %5, 2 > %853 = and i32 %852, 262140 > %854 = and i32 %6, 8191 > %855 = mul i32 %854, %9 > %856 = add i32 %853, %855 > %857 = lshr i32 %6, 12 > %858 = and i32 %857, 510 > %859 = add i32 %856, %858 > %860 = add i32 %859, 27 > %861 = sext i32 %860 to i64 > %862 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %861 > %863 = bitcast i32 addrspace(3)* %862 to float addrspace(3)* > %864 = load float, float addrspace(3)* %863, align 4 > %865 = call float @llvm.fma.f32(float %62, float %864, float %809) > %866 = bitcast i32 %10 to float > %867 = insertvalue <{ float, float, float }> undef, float %866, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %823, float %837, float %851, float %865) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %867 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..24] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { -0.3765, 0.0000, 1.0000, 2.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {0, 384, 0, 0} >IMM[3] FLT32 { -1.0000, 0.5000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: ADD TEMP[1].x, TEMP[0].wwww, IMM[0].xxxx > 3: MUL TEMP[2].xyz, TEMP[0].xyzz, IN[4].xyzz > 4: FSLT TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy > 5: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 6: INEG TEMP[1].x, TEMP[1].xxxx > 7: USNE TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 8: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz > 9: KILL_IF -TEMP[1].xxxx > 10: MOV TEMP[1].xy, IN[0].xyyy > 11: TEX TEMP[1].yw, TEMP[1], SAMP[1], 2D > 12: FMA TEMP[1].xy, TEMP[1].ywww, IMM[0].wwww, IMM[3].xxxx > 13: MOV TEMP[0].xy, TEMP[1].xyxx > 14: FMA TEMP[3].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 15: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[3].xxxx > 16: SQRT TEMP[1].x, TEMP[1].xxxx > 17: MOV TEMP[0].z, TEMP[1].xxxx > 18: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 19: DP3 TEMP[3].x, IN[2].xyzz, TEMP[0].xyzz > 20: MOV TEMP[1].y, TEMP[3].xxxx > 21: DP3 TEMP[3].x, IN[3].xyzz, TEMP[0].xyzz > 22: MOV TEMP[1].z, TEMP[3].xxxx > 23: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 24: RSQ TEMP[3].x, TEMP[0].xxxx > 25: MUL TEMP[0].xyz, TEMP[3].xxxx, TEMP[1].xyzz > 26: FMA TEMP[0].xyz, TEMP[0].xyzz, IMM[3].yyyy, IMM[3].yyyy > 27: MOV TEMP[0].w, CONST[1][24].zzzz > 28: MOV TEMP[1].xy, IN[0].xyyy > 29: TEX TEMP[1], TEMP[1], SAMP[2], 2D > 30: MOV TEMP[2].w, TEMP[1].wwww > 31: MUL TEMP[3].x, TEMP[1].zzzz, CONST[1][24].yyyy > 32: MOV TEMP[3].yz, TEMP[1].xyxx > 33: MOV TEMP[3].w, CONST[1][24].xxxx > 34: MOV OUT[0], TEMP[0] > 35: MOV OUT[1], TEMP[2] > 36: MOV OUT[2], TEMP[3] > 37: END >radeonsi: Compiling shader 255 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 392) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %38 = load <8 x i32>, <8 x i32> addrspace(2)* %37, align 32, !tbaa !0 > %39 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %40 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %39, i64 0, i64 7 > %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 > %42 = extractelement <8 x i32> %38, i32 7 > %43 = extractelement <4 x i32> %41, i32 0 > %44 = and i32 %43, %42 > %45 = insertelement <4 x i32> %41, i32 %44, i32 0 > %46 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 > %48 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %49 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %48, i64 0, i64 11 > %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 > %51 = extractelement <8 x i32> %47, i32 7 > %52 = extractelement <4 x i32> %50, i32 0 > %53 = and i32 %52, %51 > %54 = insertelement <4 x i32> %50, i32 %53, i32 0 > %55 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %69 = bitcast float %55 to i32 > %70 = bitcast float %56 to i32 > %71 = insertelement <2 x i32> undef, i32 %69, i32 0 > %72 = insertelement <2 x i32> %71, i32 %70, i32 1 > %73 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %72, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = extractelement <4 x float> %73, i32 3 > %78 = fadd float %77, 0xBFD8181820000000 > %79 = fmul float %74, %66 > %80 = fmul float %75, %67 > %81 = fmul float %76, %68 > %82 = fcmp olt float %78, 0.000000e+00 > %83 = select i1 %82, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %83) > %84 = bitcast float %55 to i32 > %85 = bitcast float %56 to i32 > %86 = insertelement <2 x i32> undef, i32 %84, i32 0 > %87 = insertelement <2 x i32> %86, i32 %85, i32 1 > %88 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %87, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %89 = extractelement <4 x float> %88, i32 1 > %90 = extractelement <4 x float> %88, i32 3 > %91 = call float @llvm.fma.f32(float %89, float 2.000000e+00, float -1.000000e+00) > %92 = call float @llvm.fma.f32(float %90, float 2.000000e+00, float -1.000000e+00) > %93 = fsub float -0.000000e+00, %91 > %94 = call float @llvm.fma.f32(float %93, float %91, float 1.000000e+00) > %95 = fsub float -0.000000e+00, %92 > %96 = call float @llvm.fma.f32(float %95, float %92, float %94) > %97 = call float @llvm.sqrt.f32(float %96) > %98 = fmul float %57, %91 > %99 = fmul float %58, %92 > %100 = fadd float %99, %98 > %101 = fmul float %59, %97 > %102 = fadd float %100, %101 > %103 = fmul float %60, %91 > %104 = fmul float %61, %92 > %105 = fadd float %104, %103 > %106 = fmul float %62, %97 > %107 = fadd float %105, %106 > %108 = fmul float %63, %91 > %109 = fmul float %64, %92 > %110 = fadd float %109, %108 > %111 = fmul float %65, %97 > %112 = fadd float %110, %111 > %113 = fmul float %102, %102 > %114 = fmul float %107, %107 > %115 = fadd float %114, %113 > %116 = fmul float %112, %112 > %117 = fadd float %115, %116 > %118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117) > %119 = fmul float %118, %102 > %120 = fmul float %118, %107 > %121 = fmul float %118, %112 > %122 = call float @llvm.fma.f32(float %119, float 5.000000e-01, float 5.000000e-01) > %123 = call float @llvm.fma.f32(float %120, float 5.000000e-01, float 5.000000e-01) > %124 = call float @llvm.fma.f32(float %121, float 5.000000e-01, float 5.000000e-01) > %125 = bitcast float %55 to i32 > %126 = bitcast float %56 to i32 > %127 = insertelement <2 x i32> undef, i32 %125, i32 0 > %128 = insertelement <2 x i32> %127, i32 %126, i32 1 > %129 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %128, <8 x i32> %47, <4 x i32> %54, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %130 = extractelement <4 x float> %129, i32 0 > %131 = extractelement <4 x float> %129, i32 1 > %132 = extractelement <4 x float> %129, i32 2 > %133 = extractelement <4 x float> %129, i32 3 > %134 = fmul float %132, %26 > %135 = bitcast float %5 to i32 > %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %135, 10 > %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136, float %122, 11 > %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %123, 12 > %139 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138, float %124, 13 > %140 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %139, float %27, 14 > %141 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %140, float %79, 15 > %142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %141, float %80, 16 > %143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float %81, 17 > %144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %133, 18 > %145 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144, float %134, 19 > %146 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %145, float %131, 20 > %147 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %146, float %130, 21 > %148 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %147, float %25, 22 > %149 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %148, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %149 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..5] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 16, 64, 80} >IMM[1] UINT32 {32, 48, 0, 0} >IMM[2] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: DP4 TEMP[0].x, IN[1], CONST[1][4] > 1: DP4 TEMP[1].x, IN[1], CONST[1][5] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: DP4 TEMP[1].x, IN[1], CONST[1][2] > 4: DP4 TEMP[2].x, IN[1], CONST[1][3] > 5: MOV TEMP[1].y, TEMP[2].xxxx > 6: MOV TEMP[1].zw, IMM[2].yyxy > 7: MOV OUT[4], TEMP[0] > 8: MOV OUT[3], CONST[1][1] > 9: MOV OUT[2], CONST[1][0] > 10: MOV OUT[1], IN[0] > 11: MOV OUT[0], TEMP[1] > 12: END >radeonsi: Compiling shader 256 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = call float @llvm.SI.load.const(<16 x i8> %16, i32 64) > %34 = call float @llvm.SI.load.const(<16 x i8> %16, i32 68) > %35 = call float @llvm.SI.load.const(<16 x i8> %16, i32 72) > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 76) > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 80) > %38 = call float @llvm.SI.load.const(<16 x i8> %16, i32 84) > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 88) > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 92) > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = extractelement <4 x float> %43, i32 3 > %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 > %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %14) > %51 = extractelement <4 x float> %50, i32 0 > %52 = extractelement <4 x float> %50, i32 1 > %53 = extractelement <4 x float> %50, i32 2 > %54 = extractelement <4 x float> %50, i32 3 > %55 = fmul float %51, %33 > %56 = fmul float %52, %34 > %57 = fadd float %55, %56 > %58 = fmul float %53, %35 > %59 = fadd float %57, %58 > %60 = fmul float %54, %36 > %61 = fadd float %59, %60 > %62 = fmul float %51, %37 > %63 = fmul float %52, %38 > %64 = fadd float %62, %63 > %65 = fmul float %53, %39 > %66 = fadd float %64, %65 > %67 = fmul float %54, %40 > %68 = fadd float %66, %67 > %69 = fmul float %51, %25 > %70 = fmul float %52, %26 > %71 = fadd float %69, %70 > %72 = fmul float %53, %27 > %73 = fadd float %71, %72 > %74 = fmul float %54, %28 > %75 = fadd float %73, %74 > %76 = fmul float %51, %29 > %77 = fmul float %52, %30 > %78 = fadd float %76, %77 > %79 = fmul float %53, %31 > %80 = fadd float %78, %79 > %81 = fmul float %54, %32 > %82 = fadd float %80, %81 > %83 = bitcast i32 %11 to float > %84 = insertvalue <{ float, float, float }> undef, float %83, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float %46, float %47) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %61, float %68, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %75, float %82, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %84 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..7] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 16, 96, 112} >IMM[1] UINT32 {32, 48, 64, 80} > 0: DP4 TEMP[0].x, IN[1], CONST[1][6] > 1: DP4 TEMP[1].x, IN[1], CONST[1][7] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: DP4 TEMP[1].x, IN[1], CONST[1][2] > 4: DP4 TEMP[2].x, IN[1], CONST[1][3] > 5: MOV TEMP[1].y, TEMP[2].xxxx > 6: DP4 TEMP[2].x, IN[1], CONST[1][4] > 7: MOV TEMP[1].z, TEMP[2].xxxx > 8: DP4 TEMP[2].x, IN[1], CONST[1][5] > 9: MOV TEMP[1].w, TEMP[2].xxxx > 10: MOV OUT[4], TEMP[0] > 11: MOV OUT[3], CONST[1][1] > 12: MOV OUT[2], CONST[1][0] > 13: MOV OUT[1], IN[0] > 14: MOV OUT[0], TEMP[1] > 15: END >radeonsi: Compiling shader 257 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = call float @llvm.SI.load.const(<16 x i8> %16, i32 64) > %34 = call float @llvm.SI.load.const(<16 x i8> %16, i32 68) > %35 = call float @llvm.SI.load.const(<16 x i8> %16, i32 72) > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 76) > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 80) > %38 = call float @llvm.SI.load.const(<16 x i8> %16, i32 84) > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 88) > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 92) > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 96) > %42 = call float @llvm.SI.load.const(<16 x i8> %16, i32 100) > %43 = call float @llvm.SI.load.const(<16 x i8> %16, i32 104) > %44 = call float @llvm.SI.load.const(<16 x i8> %16, i32 108) > %45 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %46 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %47 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %48 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 > %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %13) > %52 = extractelement <4 x float> %51, i32 0 > %53 = extractelement <4 x float> %51, i32 1 > %54 = extractelement <4 x float> %51, i32 2 > %55 = extractelement <4 x float> %51, i32 3 > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %14) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = extractelement <4 x float> %58, i32 3 > %63 = fmul float %59, %41 > %64 = fmul float %60, %42 > %65 = fadd float %63, %64 > %66 = fmul float %61, %43 > %67 = fadd float %65, %66 > %68 = fmul float %62, %44 > %69 = fadd float %67, %68 > %70 = fmul float %59, %45 > %71 = fmul float %60, %46 > %72 = fadd float %70, %71 > %73 = fmul float %61, %47 > %74 = fadd float %72, %73 > %75 = fmul float %62, %48 > %76 = fadd float %74, %75 > %77 = fmul float %59, %25 > %78 = fmul float %60, %26 > %79 = fadd float %77, %78 > %80 = fmul float %61, %27 > %81 = fadd float %79, %80 > %82 = fmul float %62, %28 > %83 = fadd float %81, %82 > %84 = fmul float %59, %29 > %85 = fmul float %60, %30 > %86 = fadd float %84, %85 > %87 = fmul float %61, %31 > %88 = fadd float %86, %87 > %89 = fmul float %62, %32 > %90 = fadd float %88, %89 > %91 = fmul float %59, %33 > %92 = fmul float %60, %34 > %93 = fadd float %91, %92 > %94 = fmul float %61, %35 > %95 = fadd float %93, %94 > %96 = fmul float %62, %36 > %97 = fadd float %95, %96 > %98 = fmul float %59, %37 > %99 = fmul float %60, %38 > %100 = fadd float %98, %99 > %101 = fmul float %61, %39 > %102 = fadd float %100, %101 > %103 = fmul float %62, %40 > %104 = fadd float %102, %103 > %105 = bitcast i32 %11 to float > %106 = insertvalue <{ float, float, float }> undef, float %105, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %52, float %53, float %54, float %55) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %69, float %76, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %83, float %90, float %97, float %104) > ret <{ float, float, float }> %106 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..3] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 16, 32, 48} >IMM[1] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: DP4 TEMP[1].x, IN[1], CONST[1][2] > 2: DP4 TEMP[2].x, IN[1], CONST[1][3] > 3: MOV TEMP[1].y, TEMP[2].xxxx > 4: MOV TEMP[1].zw, IMM[1].yyxy > 5: MOV OUT[3], TEMP[0] > 6: MOV OUT[2], CONST[1][1] > 7: MOV OUT[1], CONST[1][0] > 8: MOV OUT[0], TEMP[1] > 9: END >radeonsi: Compiling shader 258 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 > %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %13) > %36 = extractelement <4 x float> %35, i32 0 > %37 = extractelement <4 x float> %35, i32 1 > %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 > %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %14) > %41 = extractelement <4 x float> %40, i32 0 > %42 = extractelement <4 x float> %40, i32 1 > %43 = extractelement <4 x float> %40, i32 2 > %44 = extractelement <4 x float> %40, i32 3 > %45 = fmul float %41, %25 > %46 = fmul float %42, %26 > %47 = fadd float %45, %46 > %48 = fmul float %43, %27 > %49 = fadd float %47, %48 > %50 = fmul float %44, %28 > %51 = fadd float %49, %50 > %52 = fmul float %41, %29 > %53 = fmul float %42, %30 > %54 = fadd float %52, %53 > %55 = fmul float %43, %31 > %56 = fadd float %54, %55 > %57 = fmul float %44, %32 > %58 = fadd float %56, %57 > %59 = bitcast i32 %11 to float > %60 = insertvalue <{ float, float, float }> undef, float %59, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %36, float %37, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %60 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..5] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 16, 32, 48} >IMM[1] UINT32 {64, 80, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: DP4 TEMP[1].x, IN[1], CONST[1][2] > 2: DP4 TEMP[2].x, IN[1], CONST[1][3] > 3: MOV TEMP[1].y, TEMP[2].xxxx > 4: DP4 TEMP[2].x, IN[1], CONST[1][4] > 5: MOV TEMP[1].z, TEMP[2].xxxx > 6: DP4 TEMP[2].x, IN[1], CONST[1][5] > 7: MOV TEMP[1].w, TEMP[2].xxxx > 8: MOV OUT[3], TEMP[0] > 9: MOV OUT[2], CONST[1][1] > 10: MOV OUT[1], CONST[1][0] > 11: MOV OUT[0], TEMP[1] > 12: END >radeonsi: Compiling shader 259 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = call float @llvm.SI.load.const(<16 x i8> %16, i32 64) > %34 = call float @llvm.SI.load.const(<16 x i8> %16, i32 68) > %35 = call float @llvm.SI.load.const(<16 x i8> %16, i32 72) > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 76) > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 80) > %38 = call float @llvm.SI.load.const(<16 x i8> %16, i32 84) > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 88) > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 92) > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 > %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %14) > %49 = extractelement <4 x float> %48, i32 0 > %50 = extractelement <4 x float> %48, i32 1 > %51 = extractelement <4 x float> %48, i32 2 > %52 = extractelement <4 x float> %48, i32 3 > %53 = fmul float %49, %25 > %54 = fmul float %50, %26 > %55 = fadd float %53, %54 > %56 = fmul float %51, %27 > %57 = fadd float %55, %56 > %58 = fmul float %52, %28 > %59 = fadd float %57, %58 > %60 = fmul float %49, %29 > %61 = fmul float %50, %30 > %62 = fadd float %60, %61 > %63 = fmul float %51, %31 > %64 = fadd float %62, %63 > %65 = fmul float %52, %32 > %66 = fadd float %64, %65 > %67 = fmul float %49, %33 > %68 = fmul float %50, %34 > %69 = fadd float %67, %68 > %70 = fmul float %51, %35 > %71 = fadd float %69, %70 > %72 = fmul float %52, %36 > %73 = fadd float %71, %72 > %74 = fmul float %49, %37 > %75 = fmul float %50, %38 > %76 = fadd float %74, %75 > %77 = fmul float %51, %39 > %78 = fadd float %76, %77 > %79 = fmul float %52, %40 > %80 = fadd float %78, %79 > %81 = bitcast i32 %11 to float > %82 = insertvalue <{ float, float, float }> undef, float %81, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %44, float %45, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float %73, float %80) > ret <{ float, float, float }> %82 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..3] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 32, 48, 16} >IMM[1] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: DP4 TEMP[0].x, IN[1], CONST[1][2] > 1: DP4 TEMP[1].x, IN[1], CONST[1][3] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: DP4 TEMP[1].x, IN[1], CONST[1][0] > 4: DP4 TEMP[2].x, IN[1], CONST[1][1] > 5: MOV TEMP[1].y, TEMP[2].xxxx > 6: MOV TEMP[1].zw, IMM[1].yyxy > 7: MOV OUT[2], TEMP[0] > 8: MOV OUT[1], IN[0] > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 260 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 > %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %13) > %36 = extractelement <4 x float> %35, i32 0 > %37 = extractelement <4 x float> %35, i32 1 > %38 = extractelement <4 x float> %35, i32 2 > %39 = extractelement <4 x float> %35, i32 3 > %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 > %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %14) > %43 = extractelement <4 x float> %42, i32 0 > %44 = extractelement <4 x float> %42, i32 1 > %45 = extractelement <4 x float> %42, i32 2 > %46 = extractelement <4 x float> %42, i32 3 > %47 = fmul float %43, %25 > %48 = fmul float %44, %26 > %49 = fadd float %47, %48 > %50 = fmul float %45, %27 > %51 = fadd float %49, %50 > %52 = fmul float %46, %28 > %53 = fadd float %51, %52 > %54 = fmul float %43, %29 > %55 = fmul float %44, %30 > %56 = fadd float %54, %55 > %57 = fmul float %45, %31 > %58 = fadd float %56, %57 > %59 = fmul float %46, %32 > %60 = fadd float %58, %59 > %61 = fmul float %43, %17 > %62 = fmul float %44, %18 > %63 = fadd float %61, %62 > %64 = fmul float %45, %19 > %65 = fadd float %63, %64 > %66 = fmul float %46, %20 > %67 = fadd float %65, %66 > %68 = fmul float %43, %21 > %69 = fmul float %44, %22 > %70 = fadd float %68, %69 > %71 = fmul float %45, %23 > %72 = fadd float %70, %71 > %73 = fmul float %46, %24 > %74 = fadd float %72, %73 > %75 = bitcast i32 %11 to float > %76 = insertvalue <{ float, float, float }> undef, float %75, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %36, float %37, float %38, float %39) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %53, float %60, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %74, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %76 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..5] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 64, 80, 16} >IMM[1] UINT32 {32, 48, 0, 0} > 0: DP4 TEMP[0].x, IN[1], CONST[1][4] > 1: DP4 TEMP[1].x, IN[1], CONST[1][5] > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: DP4 TEMP[1].x, IN[1], CONST[1][0] > 4: DP4 TEMP[2].x, IN[1], CONST[1][1] > 5: MOV TEMP[1].y, TEMP[2].xxxx > 6: DP4 TEMP[2].x, IN[1], CONST[1][2] > 7: MOV TEMP[1].z, TEMP[2].xxxx > 8: DP4 TEMP[2].x, IN[1], CONST[1][3] > 9: MOV TEMP[1].w, TEMP[2].xxxx > 10: MOV OUT[2], TEMP[0] > 11: MOV OUT[1], IN[0] > 12: MOV OUT[0], TEMP[1] > 13: END >radeonsi: Compiling shader 261 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = call float @llvm.SI.load.const(<16 x i8> %16, i32 64) > %34 = call float @llvm.SI.load.const(<16 x i8> %16, i32 68) > %35 = call float @llvm.SI.load.const(<16 x i8> %16, i32 72) > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 76) > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 80) > %38 = call float @llvm.SI.load.const(<16 x i8> %16, i32 84) > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 88) > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 92) > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = extractelement <4 x float> %43, i32 3 > %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 > %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %14) > %51 = extractelement <4 x float> %50, i32 0 > %52 = extractelement <4 x float> %50, i32 1 > %53 = extractelement <4 x float> %50, i32 2 > %54 = extractelement <4 x float> %50, i32 3 > %55 = fmul float %51, %33 > %56 = fmul float %52, %34 > %57 = fadd float %55, %56 > %58 = fmul float %53, %35 > %59 = fadd float %57, %58 > %60 = fmul float %54, %36 > %61 = fadd float %59, %60 > %62 = fmul float %51, %37 > %63 = fmul float %52, %38 > %64 = fadd float %62, %63 > %65 = fmul float %53, %39 > %66 = fadd float %64, %65 > %67 = fmul float %54, %40 > %68 = fadd float %66, %67 > %69 = fmul float %51, %17 > %70 = fmul float %52, %18 > %71 = fadd float %69, %70 > %72 = fmul float %53, %19 > %73 = fadd float %71, %72 > %74 = fmul float %54, %20 > %75 = fadd float %73, %74 > %76 = fmul float %51, %21 > %77 = fmul float %52, %22 > %78 = fadd float %76, %77 > %79 = fmul float %53, %23 > %80 = fadd float %78, %79 > %81 = fmul float %54, %24 > %82 = fadd float %80, %81 > %83 = fmul float %51, %25 > %84 = fmul float %52, %26 > %85 = fadd float %83, %84 > %86 = fmul float %53, %27 > %87 = fadd float %85, %86 > %88 = fmul float %54, %28 > %89 = fadd float %87, %88 > %90 = fmul float %51, %29 > %91 = fmul float %52, %30 > %92 = fadd float %90, %91 > %93 = fmul float %53, %31 > %94 = fadd float %92, %93 > %95 = fmul float %54, %32 > %96 = fadd float %94, %95 > %97 = bitcast i32 %11 to float > %98 = insertvalue <{ float, float, float }> undef, float %97, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float %46, float %47) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %61, float %68, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %75, float %82, float %89, float %96) > ret <{ float, float, float }> %98 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..1] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 16, 0, 0} >IMM[1] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: DP4 TEMP[1].x, IN[1], CONST[1][0] > 2: DP4 TEMP[2].x, IN[1], CONST[1][1] > 3: MOV TEMP[1].y, TEMP[2].xxxx > 4: MOV TEMP[1].zw, IMM[1].yyxy > 5: MOV OUT[1], TEMP[0] > 6: MOV OUT[0], TEMP[1] > 7: END >radeonsi: Compiling shader 262 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %13) > %28 = extractelement <4 x float> %27, i32 0 > %29 = extractelement <4 x float> %27, i32 1 > %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 > %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %14) > %33 = extractelement <4 x float> %32, i32 0 > %34 = extractelement <4 x float> %32, i32 1 > %35 = extractelement <4 x float> %32, i32 2 > %36 = extractelement <4 x float> %32, i32 3 > %37 = fmul float %33, %17 > %38 = fmul float %34, %18 > %39 = fadd float %37, %38 > %40 = fmul float %35, %19 > %41 = fadd float %39, %40 > %42 = fmul float %36, %20 > %43 = fadd float %41, %42 > %44 = fmul float %33, %21 > %45 = fmul float %34, %22 > %46 = fadd float %44, %45 > %47 = fmul float %35, %23 > %48 = fadd float %46, %47 > %49 = fmul float %36, %24 > %50 = fadd float %48, %49 > %51 = bitcast i32 %11 to float > %52 = insertvalue <{ float, float, float }> undef, float %51, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %28, float %29, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %43, float %50, float 0.000000e+00, float 1.000000e+00) > ret <{ float, float, float }> %52 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..3] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 16, 32, 48} > 0: MOV TEMP[0].xy, IN[0].xyxx > 1: DP4 TEMP[1].x, IN[1], CONST[1][0] > 2: DP4 TEMP[2].x, IN[1], CONST[1][1] > 3: MOV TEMP[1].y, TEMP[2].xxxx > 4: DP4 TEMP[2].x, IN[1], CONST[1][2] > 5: MOV TEMP[1].z, TEMP[2].xxxx > 6: DP4 TEMP[2].x, IN[1], CONST[1][3] > 7: MOV TEMP[1].w, TEMP[2].xxxx > 8: MOV OUT[1], TEMP[0] > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 263 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) > %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 > %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %13) > %36 = extractelement <4 x float> %35, i32 0 > %37 = extractelement <4 x float> %35, i32 1 > %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 > %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %14) > %41 = extractelement <4 x float> %40, i32 0 > %42 = extractelement <4 x float> %40, i32 1 > %43 = extractelement <4 x float> %40, i32 2 > %44 = extractelement <4 x float> %40, i32 3 > %45 = fmul float %41, %17 > %46 = fmul float %42, %18 > %47 = fadd float %45, %46 > %48 = fmul float %43, %19 > %49 = fadd float %47, %48 > %50 = fmul float %44, %20 > %51 = fadd float %49, %50 > %52 = fmul float %41, %21 > %53 = fmul float %42, %22 > %54 = fadd float %52, %53 > %55 = fmul float %43, %23 > %56 = fadd float %54, %55 > %57 = fmul float %44, %24 > %58 = fadd float %56, %57 > %59 = fmul float %41, %25 > %60 = fmul float %42, %26 > %61 = fadd float %59, %60 > %62 = fmul float %43, %27 > %63 = fadd float %61, %62 > %64 = fmul float %44, %28 > %65 = fadd float %63, %64 > %66 = fmul float %41, %29 > %67 = fmul float %42, %30 > %68 = fadd float %66, %67 > %69 = fmul float %43, %31 > %70 = fadd float %68, %69 > %71 = fmul float %44, %32 > %72 = fadd float %70, %71 > %73 = bitcast i32 %11 to float > %74 = insertvalue <{ float, float, float }> undef, float %73, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %36, float %37, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float %65, float %72) > ret <{ float, float, float }> %74 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D > 2: MOV TEMP[1].xy, IN[0].xyyy > 3: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 4: ADD TEMP[2].x, -TEMP[1].wwww, IMM[0].xxxx > 5: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx, TEMP[1].xyzz > 6: FMA TEMP[1].xyz, TEMP[0].xyzz, CONST[1][0].yyyy, CONST[1][0].zzzz > 7: MOV_SAT TEMP[0].xyz, TEMP[1].xyzz > 8: LG2 TEMP[1].x, TEMP[0].xxxx > 9: LG2 TEMP[1].y, TEMP[0].yyyy > 10: LG2 TEMP[1].z, TEMP[0].zzzz > 11: MUL TEMP[0].xyz, TEMP[1].xyzz, CONST[1][0].xxxx > 12: EX2 TEMP[1].x, TEMP[0].xxxx > 13: EX2 TEMP[1].y, TEMP[0].yyyy > 14: EX2 TEMP[1].z, TEMP[0].zzzz > 15: MOV TEMP[0].xyz, TEMP[1].xyzx > 16: MOV TEMP[0].w, IMM[0].xxxx > 17: MOV OUT[0], TEMP[0] > 18: END >radeonsi: Compiling shader 264 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %38 = load <8 x i32>, <8 x i32> addrspace(2)* %37, align 32, !tbaa !0 > %39 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %40 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %39, i64 0, i64 7 > %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 > %42 = extractelement <8 x i32> %38, i32 7 > %43 = extractelement <4 x i32> %41, i32 0 > %44 = and i32 %43, %42 > %45 = insertelement <4 x i32> %41, i32 %44, i32 0 > %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %48 = bitcast float %46 to i32 > %49 = bitcast float %47 to i32 > %50 = insertelement <2 x i32> undef, i32 %48, i32 0 > %51 = insertelement <2 x i32> %50, i32 %49, i32 1 > %52 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %51, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %53 = extractelement <4 x float> %52, i32 0 > %54 = extractelement <4 x float> %52, i32 1 > %55 = extractelement <4 x float> %52, i32 2 > %56 = bitcast float %46 to i32 > %57 = bitcast float %47 to i32 > %58 = insertelement <2 x i32> undef, i32 %56, i32 0 > %59 = insertelement <2 x i32> %58, i32 %57, i32 1 > %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = fsub float 1.000000e+00, %64 > %66 = call float @llvm.fma.f32(float %53, float %65, float %61) > %67 = call float @llvm.fma.f32(float %54, float %65, float %62) > %68 = call float @llvm.fma.f32(float %55, float %65, float %63) > %69 = call float @llvm.fma.f32(float %66, float %26, float %27) > %70 = call float @llvm.fma.f32(float %67, float %26, float %27) > %71 = call float @llvm.fma.f32(float %68, float %26, float %27) > %72 = call float @llvm.AMDGPU.clamp.(float %69, float 0.000000e+00, float 1.000000e+00) > %73 = call float @llvm.AMDGPU.clamp.(float %70, float 0.000000e+00, float 1.000000e+00) > %74 = call float @llvm.AMDGPU.clamp.(float %71, float 0.000000e+00, float 1.000000e+00) > %75 = call float @llvm.log2.f32(float %72) > %76 = call float @llvm.log2.f32(float %73) > %77 = call float @llvm.log2.f32(float %74) > %78 = fmul float %75, %25 > %79 = fmul float %76, %25 > %80 = fmul float %77, %25 > %81 = call float @llvm.exp2.f32(float %78) > %82 = call float @llvm.exp2.f32(float %79) > %83 = call float @llvm.exp2.f32(float %80) > %84 = bitcast float %5 to i32 > %85 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %84, 10 > %86 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %85, float %81, 11 > %87 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %86, float %82, 12 > %88 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %87, float %83, 13 > %89 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %88, float 1.000000e+00, 14 > %90 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %89, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %90 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION > 0: MOV OUT[0], IN[0] > 1: END >radeonsi: Compiling shader 265 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %13) > %17 = extractelement <4 x float> %16, i32 0 > %18 = extractelement <4 x float> %16, i32 1 > %19 = extractelement <4 x float> %16, i32 2 > %20 = extractelement <4 x float> %16, i32 3 > %21 = bitcast i32 %11 to float > %22 = insertvalue <{ float, float, float }> undef, float %21, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %17, float %18, float %19, float %20) > ret <{ float, float, float }> %22 >} > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL SV[0], POSITION >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1] >DCL TEMP[0] >DCL TEMP[1], LOCAL >IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} >IMM[1] INT32 {0, 0, 0, 0} > 0: MOV TEMP[0], SV[0] > 1: MAD TEMP[0].y, SV[0], CONST[1].xxxx, CONST[1].yyyy > 2: F2U TEMP[1].xy, TEMP[0].xyyy > 3: MOV TEMP[1].xy, TEMP[1].xyyy > 4: MOV TEMP[1].w, IMM[1].xxxx > 5: TXF TEMP[1], TEMP[1], SAMP[0], 2D > 6: MOV OUT[0], TEMP[1] > 7: END >radeonsi: Compiling shader 266 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = fmul float %25, %16 > %30 = fadd float %29, %26 > %31 = fptoui float %15 to i32 > %32 = fptoui float %30 to i32 > %33 = insertelement <4 x i32> undef, i32 %31, i32 0 > %34 = insertelement <4 x i32> %33, i32 %32, i32 1 > %35 = insertelement <4 x i32> %34, i32 0, i32 2 > %36 = call <4 x i32> @llvm.SI.image.load.mip.v4i32(<4 x i32> %35, <8 x i32> %28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %bc = bitcast <4 x i32> %36 to <4 x float> > %37 = extractelement <4 x float> %bc, i32 0 > %bc8 = bitcast <4 x i32> %36 to <4 x float> > %38 = extractelement <4 x float> %bc8, i32 1 > %bc9 = bitcast <4 x i32> %36 to <4 x float> > %39 = extractelement <4 x float> %bc9, i32 2 > %bc10 = bitcast <4 x i32> %36 to <4 x float> > %40 = extractelement <4 x float> %bc10, i32 3 > %41 = bitcast float %5 to i32 > %42 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %41, 10 > %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %37, 11 > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43, float %38, 12 > %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 13 > %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 14 > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x i32> @llvm.SI.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 267 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, 0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 64, 80} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {96, 368, 352, 0} >IMM[5] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: MOV TEMP[1].w, TEMP[8].xxxx > 66: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 67: MOV TEMP[1].z, TEMP[2].xxxx > 68: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 69: MOV TEMP[0].yw, TEMP[2].yxyy > 70: ABS TEMP[2].x, TEMP[3].xxxx > 71: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 72: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 73: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 74: INEG TEMP[9].xy, TEMP[9].xyyy > 75: MOV TEMP[4].yz, TEMP[9].yxyy > 76: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 77: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 78: INEG TEMP[9].xy, TEMP[9].xyyy > 79: MOV TEMP[5].zw, TEMP[9].yyxy > 80: INEG TEMP[9].xy, TEMP[4].yzzz > 81: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 82: MOV TEMP[4].yz, TEMP[9].yxyy > 83: I2F TEMP[9].xy, TEMP[4].yzzz > 84: MOV TEMP[4].yz, TEMP[9].yxyy > 85: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 86: ABS TEMP[2].x, TEMP[6].xxxx > 87: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 88: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 89: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 90: INEG TEMP[9].xy, TEMP[9].xyyy > 91: MOV TEMP[4].yz, TEMP[9].yxyy > 92: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 93: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 94: INEG TEMP[9].xy, TEMP[9].xyyy > 95: MOV TEMP[5].zw, TEMP[9].yyxy > 96: INEG TEMP[9].xy, TEMP[4].yzzz > 97: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 98: MOV TEMP[4].yz, TEMP[9].yxyy > 99: I2F TEMP[9].xy, TEMP[4].yzzz >100: MOV TEMP[4].yz, TEMP[9].yxyy >101: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >102: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >103: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >104: INEG TEMP[2].xy, TEMP[2].xyyy >105: MOV TEMP[5].xy, TEMP[2].xyxx >106: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >107: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >108: INEG TEMP[2].xy, TEMP[2].xyyy >109: MOV TEMP[5].zw, TEMP[2].yyxy >110: INEG TEMP[2].xy, TEMP[5].xyyy >111: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >112: MOV TEMP[5].xy, TEMP[2].xyxx >113: I2F TEMP[5].xy, TEMP[5].xyyy >114: ABS TEMP[2].x, TEMP[8].xxxx >115: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >116: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >117: MOV TEMP[4].zw, TEMP[2].yyxy >118: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >119: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >120: INEG TEMP[2].xy, TEMP[2].xyyy >121: MOV TEMP[5].xy, TEMP[2].xyxx >122: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >123: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >124: INEG TEMP[2].xy, TEMP[2].xyyy >125: MOV TEMP[5].zw, TEMP[2].yyxy >126: AND TEMP[2], TEMP[5], IMM[2].yyyy >127: MOV TEMP[2], TEMP[2] >128: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >129: MOV TEMP[5].xy, TEMP[2].xyxx >130: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >131: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >132: INEG TEMP[2].xy, TEMP[2].xyyy >133: MOV TEMP[5].zw, TEMP[2].yyxy >134: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >135: MOV TEMP[5].zw, TEMP[2].yyxy >136: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >137: MOV TEMP[5].xy, TEMP[2].xyxx >138: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >139: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >140: INEG TEMP[2].x, TEMP[2].xxxx >141: MOV TEMP[1].z, TEMP[2].xxxx >142: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >143: MOV TEMP[1].z, TEMP[2].xxxx >144: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >145: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >146: INEG TEMP[2].xy, TEMP[2].xyyy >147: MOV TEMP[0].yw, TEMP[2].yxyy >148: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >149: MOV TEMP[0].yw, TEMP[2].yxyy >150: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >151: MOV TEMP[0].y, TEMP[2].xxxx >152: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >153: MOV TEMP[0].y, TEMP[2].xxxx >154: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >155: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >156: INEG TEMP[2].xy, TEMP[2].xyyy >157: MOV TEMP[0].xw, TEMP[2].xxxy >158: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >159: MOV TEMP[0].xw, TEMP[2].xxxy >160: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >161: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >162: INEG TEMP[2].xy, TEMP[2].xyyy >163: MOV TEMP[1].xy, TEMP[2].xyxx >164: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >165: MOV TEMP[1].xy, TEMP[2].xyxx >166: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >167: MOV TEMP[0].xz, TEMP[2].xxyx >168: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >169: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >170: INEG TEMP[2].xy, TEMP[2].xyyy >171: MOV TEMP[1].xy, TEMP[2].xyxx >172: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >173: MOV TEMP[1].xy, TEMP[2].xyxx >174: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >175: MOV TEMP[0].xz, TEMP[2].xxyx >176: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >177: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >178: INEG TEMP[2].xy, TEMP[2].xyyy >179: MOV TEMP[1].xy, TEMP[2].xyxx >180: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >181: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >182: INEG TEMP[2].xyz, TEMP[2].xyzz >183: MOV TEMP[0].xyz, TEMP[2].xyzx >184: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >185: MOV TEMP[0].xz, TEMP[2].xxyx >186: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >187: MOV TEMP[0].x, TEMP[2].xxxx >188: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >189: MOV TEMP[0].x, TEMP[2].xxxx >190: MOV TEMP[2].x, TEMP[0].xxxx >191: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >192: UIF TEMP[2].xxxx :0 >193: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >194: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >195: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >196: MOV TEMP[0].yzw, TEMP[2].yxyz >197: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >198: MOV TEMP[0].y, TEMP[2].xxxx >199: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >200: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >201: MOV TEMP[0].z, TEMP[2].xxxx >202: SQRT TEMP[2].x, TEMP[0].xxxx >203: SQRT TEMP[2].y, TEMP[0].yyyy >204: SQRT TEMP[2].z, TEMP[0].zzzz >205: MOV TEMP[0].xyz, TEMP[2].xyzx >206: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >207: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].wwww >208: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >209: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww >210: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >211: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[0].wwww >212: MOV TEMP[2].y, CONST[3][4] >213: MOV TEMP[7].x, TEMP[2].yyyy >214: MOV TEMP[2].y, CONST[3][5] >215: MOV TEMP[7].y, TEMP[2].yyyy >216: MOV TEMP[2].y, CONST[3][6] >217: MOV TEMP[7].z, TEMP[2].yyyy >218: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >219: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >220: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >221: MOV TEMP[1].w, IMM[0].xxxx >222: MOV TEMP[6], CONST[3][0] >223: DP4 TEMP[7].x, TEMP[6], TEMP[1] >224: MOV TEMP[6], CONST[3][1] >225: DP4 TEMP[6].x, TEMP[6], TEMP[1] >226: MOV TEMP[7].y, TEMP[6].xxxx >227: MOV TEMP[6], CONST[3][3] >228: DP4 TEMP[6].x, TEMP[6], TEMP[1] >229: MOV TEMP[4].w, IMM[0].xxxx >230: MOV TEMP[8], CONST[3][0] >231: DP4 TEMP[8].x, TEMP[8], TEMP[4] >232: MOV TEMP[9], CONST[3][1] >233: DP4 TEMP[9].x, TEMP[9], TEMP[4] >234: MOV TEMP[8].y, TEMP[9].xxxx >235: MOV TEMP[9], CONST[3][3] >236: DP4 TEMP[9].x, TEMP[9], TEMP[4] >237: MOV TEMP[5].w, IMM[0].xxxx >238: MOV TEMP[10], CONST[3][0] >239: DP4 TEMP[4].x, TEMP[10], TEMP[5] >240: MOV TEMP[10], CONST[3][1] >241: DP4 TEMP[10].x, TEMP[10], TEMP[5] >242: MOV TEMP[4].y, TEMP[10].xxxx >243: MOV TEMP[10], CONST[3][3] >244: DP4 TEMP[10].x, TEMP[10], TEMP[5] >245: MOV TEMP[2].w, IMM[0].xxxx >246: MOV TEMP[11], CONST[3][0] >247: DP4 TEMP[5].x, TEMP[11], TEMP[2] >248: MOV TEMP[11], CONST[3][1] >249: DP4 TEMP[11].x, TEMP[11], TEMP[2] >250: MOV TEMP[5].y, TEMP[11].xxxx >251: MOV TEMP[11], CONST[3][3] >252: DP4 TEMP[11].x, TEMP[11], TEMP[2] >253: MOV TEMP[3].w, IMM[0].xxxx >254: MOV TEMP[12], CONST[3][0] >255: DP4 TEMP[2].x, TEMP[12], TEMP[3] >256: MOV TEMP[12], CONST[3][1] >257: DP4 TEMP[12].x, TEMP[12], TEMP[3] >258: MOV TEMP[2].y, TEMP[12].xxxx >259: MOV TEMP[12], CONST[3][3] >260: DP4 TEMP[12].x, TEMP[12], TEMP[3] >261: MOV TEMP[0].w, IMM[0].xxxx >262: MOV TEMP[13], CONST[3][0] >263: DP4 TEMP[3].x, TEMP[13], TEMP[0] >264: MOV TEMP[13], CONST[3][1] >265: DP4 TEMP[13].x, TEMP[13], TEMP[0] >266: MOV TEMP[3].y, TEMP[13].xxxx >267: MOV TEMP[13], CONST[3][3] >268: DP4 TEMP[13].x, TEMP[13], TEMP[0] >269: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >270: SSG TEMP[15].xy, TEMP[8].xyyy >271: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >272: RCP TEMP[16].xy, TEMP[9].xxxx >273: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >274: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >275: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >276: SSG TEMP[15].xy, TEMP[4].xyyy >277: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >278: RCP TEMP[16].xy, TEMP[10].xxxx >279: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >280: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >281: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >282: SSG TEMP[16].xy, TEMP[5].xyyy >283: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >284: RCP TEMP[11].xy, TEMP[11].xxxx >285: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >286: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >287: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >288: SSG TEMP[15].xy, TEMP[7].xyyy >289: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >290: RCP TEMP[16].xy, TEMP[6].xxxx >291: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >292: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >293: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >294: MOV TEMP[0].yz, TEMP[5].yxyy >295: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >296: SSG TEMP[7].xy, TEMP[2].xyyy >297: MUL TEMP[7].xy, IMM[5].xxxx, TEMP[7].xyyy >298: RCP TEMP[11].xy, TEMP[12].xxxx >299: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >300: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >301: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >302: MOV TEMP[4].zw, TEMP[2].yyxy >303: MOV TEMP[2].xy, CONST[3][23] >304: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >305: MOV TEMP[4].zw, TEMP[2].yyxy >306: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >307: SSG TEMP[5].xy, TEMP[3].xyyy >308: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >309: RCP TEMP[7].xy, TEMP[13].xxxx >310: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >311: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >312: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >313: MOV TEMP[0].xw, TEMP[2].xxxy >314: MOV TEMP[2].xy, CONST[3][23] >315: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >316: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >317: MOV TEMP[0].y, TEMP[2].xxxx >318: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >319: MOV TEMP[0].z, TEMP[2].xxxx >320: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >321: SQRT TEMP[2].x, TEMP[0].xxxx >322: SQRT TEMP[2].y, TEMP[0].yyyy >323: SQRT TEMP[2].z, TEMP[0].zzzz >324: MOV TEMP[2].xyz, TEMP[2].xyzx >325: MOV TEMP[3].z, CONST[1][22] >326: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >327: MOV TEMP[0].w, TEMP[3].xxxx >328: MOV TEMP[3].z, CONST[1][22] >329: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >330: MOV TEMP[3].z, CONST[1][22] >331: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >332: MOV TEMP[1].y, TEMP[3].xxxx >333: MOV TEMP[3].w, CONST[1][22] >334: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >335: UIF TEMP[3].xxxx :0 >336: MOV TEMP[3].w, CONST[1][22] >337: RCP TEMP[3].x, TEMP[3].wwww >338: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >339: ELSE :0 >340: SSG TEMP[5].x, TEMP[0].wwww >341: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >342: ENDIF >343: MOV_SAT TEMP[3].x, TEMP[3].xxxx >344: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >345: MOV TEMP[0].w, TEMP[3].xxxx >346: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >347: MOV TEMP[0].y, TEMP[3].xxxx >348: MOV TEMP[3].w, CONST[1][22] >349: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >350: UIF TEMP[3].xxxx :0 >351: MOV TEMP[3].w, CONST[1][22] >352: RCP TEMP[3].x, TEMP[3].wwww >353: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >354: ELSE :0 >355: SSG TEMP[5].x, TEMP[1].xxxx >356: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >357: ENDIF >358: MOV_SAT TEMP[3].x, TEMP[3].xxxx >359: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >360: MOV TEMP[0].w, TEMP[3].xxxx >361: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >362: MOV TEMP[0].z, TEMP[3].xxxx >363: MOV TEMP[3].w, CONST[1][22] >364: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >365: UIF TEMP[3].xxxx :0 >366: MOV TEMP[3].w, CONST[1][22] >367: RCP TEMP[3].x, TEMP[3].wwww >368: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >369: ELSE :0 >370: SSG TEMP[5].x, TEMP[1].yyyy >371: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >372: ENDIF >373: MOV_SAT TEMP[3].x, TEMP[3].xxxx >374: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >375: MOV TEMP[0].w, TEMP[3].xxxx >376: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >377: MOV TEMP[2].xy, CONST[1][22] >378: MOV TEMP[3].xy, CONST[2][4] >379: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >380: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >381: MOV TEMP[0].w, TEMP[2].xxxx >382: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >383: SSG TEMP[3].xy, TEMP[0].xyyy >384: MUL TEMP[3].xy, IMM[5].xxxx, TEMP[3].xyyy >385: RCP TEMP[5].xy, TEMP[1].xxxx >386: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >387: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >388: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >389: MOV TEMP[0].y, TEMP[2].xxxx >390: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >391: MOV TEMP[4].z, TEMP[2].xxxx >392: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >393: UIF TEMP[2].xxxx :0 >394: RCP TEMP[1].x, TEMP[1].xxxx >395: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >396: ELSE :0 >397: SSG TEMP[2].x, TEMP[0].zzzz >398: MUL TEMP[1].x, IMM[5].xxxx, TEMP[2].xxxx >399: ENDIF >400: MOV TEMP[0].y, TEMP[1].xxxx >401: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >402: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >403: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >404: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >405: MOV TEMP[4].w, TEMP[0].xxxx >406: ELSE :0 >407: MOV TEMP[4], IMM[0].zzzz >408: ENDIF >409: MIN TEMP[0], TEMP[4], IMM[5].yyyy >410: MOV TEMP[1].x, TEMP[0].xxxx >411: MOV TEMP[2].x, TEMP[0].yyyy >412: MOV TEMP[3].x, TEMP[0].zzzz >413: MOV TEMP[0].x, TEMP[0].wwww >414: MOV OUT[8], TEMP[1] >415: MOV OUT[9], TEMP[2] >416: MOV OUT[10], TEMP[3] >417: MOV OUT[11], TEMP[0] >418: MOV OUT[0].x, TEMP[1].xxxx >419: MOV OUT[0].y, TEMP[2].xxxx >420: MOV OUT[0].z, TEMP[3].xxxx >421: MOV OUT[1].x, TEMP[0].xxxx >422: END >radeonsi: Compiling shader 268 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 64) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 68) > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 84) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %44 = lshr i32 %10, 8 > %45 = and i32 %44, 31 > %46 = lshr i32 %7, 13 > %47 = and i32 %46, 255 > %48 = and i32 %7, 8191 > %49 = and i32 %10, 255 > %50 = mul nuw nsw i32 %48, %49 > %51 = mul nuw nsw i32 %45, %47 > %52 = add nuw nsw i32 %50, %51 > %53 = add nuw nsw i32 %52, 16 > %54 = zext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = load i32, i32 addrspace(3)* %55, align 4 > %57 = lshr i32 %7, 13 > %58 = and i32 %57, 255 > %59 = and i32 %7, 8191 > %60 = and i32 %10, 255 > %61 = mul nuw nsw i32 %59, %60 > %62 = mul nuw nsw i32 %45, %58 > %63 = add nuw nsw i32 %61, %62 > %64 = add nuw nsw i32 %63, 17 > %65 = zext i32 %64 to i64 > %66 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %65 > %67 = load i32, i32 addrspace(3)* %66, align 4 > %68 = lshr i32 %7, 13 > %69 = and i32 %68, 255 > %70 = and i32 %7, 8191 > %71 = and i32 %10, 255 > %72 = mul nuw nsw i32 %70, %71 > %73 = mul nuw nsw i32 %45, %69 > %74 = add nuw nsw i32 %72, %73 > %75 = add nuw nsw i32 %74, 18 > %76 = zext i32 %75 to i64 > %77 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %76 > %78 = load i32, i32 addrspace(3)* %77, align 4 > %79 = lshr i32 %7, 13 > %80 = and i32 %79, 255 > %81 = and i32 %7, 8191 > %82 = and i32 %10, 255 > %83 = mul nuw nsw i32 %81, %82 > %84 = mul nuw nsw i32 %45, %80 > %85 = add nuw nsw i32 %83, %84 > %86 = add nuw nsw i32 %85, 19 > %87 = zext i32 %86 to i64 > %88 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %87 > %89 = load i32, i32 addrspace(3)* %88, align 4 > %90 = lshr i32 %6, 13 > %91 = and i32 %90, 255 > %92 = shl i32 %5, 2 > %93 = and i32 %92, 262140 > %94 = and i32 %6, 8191 > %95 = and i32 %10, 255 > %96 = mul nuw nsw i32 %94, %95 > %97 = add nuw nsw i32 %93, %96 > %98 = mul nuw nsw i32 %45, %91 > %99 = add nuw nsw i32 %97, %98 > %100 = add nuw nsw i32 %99, 16 > %101 = zext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > store i32 %56, i32 addrspace(3)* %102, align 4 > %103 = add nuw nsw i32 %99, 17 > %104 = zext i32 %103 to i64 > %105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %104 > store i32 %67, i32 addrspace(3)* %105, align 4 > %106 = add nuw nsw i32 %99, 18 > %107 = zext i32 %106 to i64 > %108 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %107 > store i32 %78, i32 addrspace(3)* %108, align 4 > %109 = add nuw nsw i32 %99, 19 > %110 = zext i32 %109 to i64 > %111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %110 > store i32 %89, i32 addrspace(3)* %111, align 4 > %112 = lshr i32 %7, 13 > %113 = and i32 %112, 255 > %114 = and i32 %7, 8191 > %115 = and i32 %10, 255 > %116 = mul nuw nsw i32 %114, %115 > %117 = mul nuw nsw i32 %45, %113 > %118 = add nuw nsw i32 %116, %117 > %119 = add nuw nsw i32 %118, 20 > %120 = zext i32 %119 to i64 > %121 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %120 > %122 = load i32, i32 addrspace(3)* %121, align 4 > %123 = lshr i32 %7, 13 > %124 = and i32 %123, 255 > %125 = and i32 %7, 8191 > %126 = and i32 %10, 255 > %127 = mul nuw nsw i32 %125, %126 > %128 = mul nuw nsw i32 %45, %124 > %129 = add nuw nsw i32 %127, %128 > %130 = add nuw nsw i32 %129, 21 > %131 = zext i32 %130 to i64 > %132 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %131 > %133 = load i32, i32 addrspace(3)* %132, align 4 > %134 = lshr i32 %7, 13 > %135 = and i32 %134, 255 > %136 = and i32 %7, 8191 > %137 = and i32 %10, 255 > %138 = mul nuw nsw i32 %136, %137 > %139 = mul nuw nsw i32 %45, %135 > %140 = add nuw nsw i32 %138, %139 > %141 = add nuw nsw i32 %140, 22 > %142 = zext i32 %141 to i64 > %143 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %142 > %144 = load i32, i32 addrspace(3)* %143, align 4 > %145 = lshr i32 %7, 13 > %146 = and i32 %145, 255 > %147 = and i32 %7, 8191 > %148 = and i32 %10, 255 > %149 = mul nuw nsw i32 %147, %148 > %150 = mul nuw nsw i32 %45, %146 > %151 = add nuw nsw i32 %149, %150 > %152 = add nuw nsw i32 %151, 23 > %153 = zext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = load i32, i32 addrspace(3)* %154, align 4 > %156 = lshr i32 %6, 13 > %157 = and i32 %156, 255 > %158 = shl i32 %5, 2 > %159 = and i32 %158, 262140 > %160 = and i32 %6, 8191 > %161 = and i32 %10, 255 > %162 = mul nuw nsw i32 %160, %161 > %163 = add nuw nsw i32 %159, %162 > %164 = mul nuw nsw i32 %45, %157 > %165 = add nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 20 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > store i32 %122, i32 addrspace(3)* %168, align 4 > %169 = add nuw nsw i32 %165, 21 > %170 = zext i32 %169 to i64 > %171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %170 > store i32 %133, i32 addrspace(3)* %171, align 4 > %172 = add nuw nsw i32 %165, 22 > %173 = zext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > store i32 %144, i32 addrspace(3)* %174, align 4 > %175 = add nuw nsw i32 %165, 23 > %176 = zext i32 %175 to i64 > %177 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %176 > store i32 %155, i32 addrspace(3)* %177, align 4 > %178 = lshr i32 %7, 13 > %179 = and i32 %178, 255 > %180 = and i32 %7, 8191 > %181 = and i32 %10, 255 > %182 = mul nuw nsw i32 %180, %181 > %183 = mul nuw nsw i32 %45, %179 > %184 = add nuw nsw i32 %182, %183 > %185 = add nuw nsw i32 %184, 24 > %186 = zext i32 %185 to i64 > %187 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %186 > %188 = load i32, i32 addrspace(3)* %187, align 4 > %189 = lshr i32 %7, 13 > %190 = and i32 %189, 255 > %191 = and i32 %7, 8191 > %192 = and i32 %10, 255 > %193 = mul nuw nsw i32 %191, %192 > %194 = mul nuw nsw i32 %45, %190 > %195 = add nuw nsw i32 %193, %194 > %196 = add nuw nsw i32 %195, 25 > %197 = zext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = load i32, i32 addrspace(3)* %198, align 4 > %200 = lshr i32 %7, 13 > %201 = and i32 %200, 255 > %202 = and i32 %7, 8191 > %203 = and i32 %10, 255 > %204 = mul nuw nsw i32 %202, %203 > %205 = mul nuw nsw i32 %45, %201 > %206 = add nuw nsw i32 %204, %205 > %207 = add nuw nsw i32 %206, 26 > %208 = zext i32 %207 to i64 > %209 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %208 > %210 = load i32, i32 addrspace(3)* %209, align 4 > %211 = lshr i32 %7, 13 > %212 = and i32 %211, 255 > %213 = and i32 %7, 8191 > %214 = and i32 %10, 255 > %215 = mul nuw nsw i32 %213, %214 > %216 = mul nuw nsw i32 %45, %212 > %217 = add nuw nsw i32 %215, %216 > %218 = add nuw nsw i32 %217, 27 > %219 = zext i32 %218 to i64 > %220 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %219 > %221 = load i32, i32 addrspace(3)* %220, align 4 > %222 = lshr i32 %6, 13 > %223 = and i32 %222, 255 > %224 = shl i32 %5, 2 > %225 = and i32 %224, 262140 > %226 = and i32 %6, 8191 > %227 = and i32 %10, 255 > %228 = mul nuw nsw i32 %226, %227 > %229 = add nuw nsw i32 %225, %228 > %230 = mul nuw nsw i32 %45, %223 > %231 = add nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, 24 > %233 = zext i32 %232 to i64 > %234 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %233 > store i32 %188, i32 addrspace(3)* %234, align 4 > %235 = add nuw nsw i32 %231, 25 > %236 = zext i32 %235 to i64 > %237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %236 > store i32 %199, i32 addrspace(3)* %237, align 4 > %238 = add nuw nsw i32 %231, 26 > %239 = zext i32 %238 to i64 > %240 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %239 > store i32 %210, i32 addrspace(3)* %240, align 4 > %241 = add nuw nsw i32 %231, 27 > %242 = zext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > store i32 %221, i32 addrspace(3)* %243, align 4 > %244 = lshr i32 %7, 13 > %245 = and i32 %244, 255 > %246 = and i32 %7, 8191 > %247 = and i32 %10, 255 > %248 = mul nuw nsw i32 %246, %247 > %249 = mul nuw nsw i32 %45, %245 > %250 = add nuw nsw i32 %248, %249 > %251 = add nuw nsw i32 %250, 28 > %252 = zext i32 %251 to i64 > %253 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %252 > %254 = load i32, i32 addrspace(3)* %253, align 4 > %255 = lshr i32 %7, 13 > %256 = and i32 %255, 255 > %257 = and i32 %7, 8191 > %258 = and i32 %10, 255 > %259 = mul nuw nsw i32 %257, %258 > %260 = mul nuw nsw i32 %45, %256 > %261 = add nuw nsw i32 %259, %260 > %262 = add nuw nsw i32 %261, 29 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = load i32, i32 addrspace(3)* %264, align 4 > %266 = lshr i32 %7, 13 > %267 = and i32 %266, 255 > %268 = and i32 %7, 8191 > %269 = and i32 %10, 255 > %270 = mul nuw nsw i32 %268, %269 > %271 = mul nuw nsw i32 %45, %267 > %272 = add nuw nsw i32 %270, %271 > %273 = add nuw nsw i32 %272, 30 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = load i32, i32 addrspace(3)* %275, align 4 > %277 = lshr i32 %7, 13 > %278 = and i32 %277, 255 > %279 = and i32 %7, 8191 > %280 = and i32 %10, 255 > %281 = mul nuw nsw i32 %279, %280 > %282 = mul nuw nsw i32 %45, %278 > %283 = add nuw nsw i32 %281, %282 > %284 = add nuw nsw i32 %283, 31 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = load i32, i32 addrspace(3)* %286, align 4 > %288 = lshr i32 %6, 13 > %289 = and i32 %288, 255 > %290 = shl i32 %5, 2 > %291 = and i32 %290, 262140 > %292 = and i32 %6, 8191 > %293 = and i32 %10, 255 > %294 = mul nuw nsw i32 %292, %293 > %295 = add nuw nsw i32 %291, %294 > %296 = mul nuw nsw i32 %45, %289 > %297 = add nuw nsw i32 %295, %296 > %298 = add nuw nsw i32 %297, 28 > %299 = zext i32 %298 to i64 > %300 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %299 > store i32 %254, i32 addrspace(3)* %300, align 4 > %301 = add nuw nsw i32 %297, 29 > %302 = zext i32 %301 to i64 > %303 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %302 > store i32 %265, i32 addrspace(3)* %303, align 4 > %304 = add nuw nsw i32 %297, 30 > %305 = zext i32 %304 to i64 > %306 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %305 > store i32 %276, i32 addrspace(3)* %306, align 4 > %307 = add nuw nsw i32 %297, 31 > %308 = zext i32 %307 to i64 > %309 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %308 > store i32 %287, i32 addrspace(3)* %309, align 4 > %310 = lshr i32 %7, 13 > %311 = and i32 %310, 255 > %312 = and i32 %7, 8191 > %313 = and i32 %10, 255 > %314 = mul nuw nsw i32 %312, %313 > %315 = mul nuw nsw i32 %45, %311 > %316 = add nuw nsw i32 %314, %315 > %317 = add nuw nsw i32 %316, 32 > %318 = zext i32 %317 to i64 > %319 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %318 > %320 = load i32, i32 addrspace(3)* %319, align 4 > %321 = lshr i32 %7, 13 > %322 = and i32 %321, 255 > %323 = and i32 %7, 8191 > %324 = and i32 %10, 255 > %325 = mul nuw nsw i32 %323, %324 > %326 = mul nuw nsw i32 %45, %322 > %327 = add nuw nsw i32 %325, %326 > %328 = add nuw nsw i32 %327, 33 > %329 = zext i32 %328 to i64 > %330 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %329 > %331 = load i32, i32 addrspace(3)* %330, align 4 > %332 = lshr i32 %7, 13 > %333 = and i32 %332, 255 > %334 = and i32 %7, 8191 > %335 = and i32 %10, 255 > %336 = mul nuw nsw i32 %334, %335 > %337 = mul nuw nsw i32 %45, %333 > %338 = add nuw nsw i32 %336, %337 > %339 = add nuw nsw i32 %338, 34 > %340 = zext i32 %339 to i64 > %341 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %340 > %342 = load i32, i32 addrspace(3)* %341, align 4 > %343 = lshr i32 %7, 13 > %344 = and i32 %343, 255 > %345 = and i32 %7, 8191 > %346 = and i32 %10, 255 > %347 = mul nuw nsw i32 %345, %346 > %348 = mul nuw nsw i32 %45, %344 > %349 = add nuw nsw i32 %347, %348 > %350 = add nuw nsw i32 %349, 35 > %351 = zext i32 %350 to i64 > %352 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %351 > %353 = load i32, i32 addrspace(3)* %352, align 4 > %354 = lshr i32 %6, 13 > %355 = and i32 %354, 255 > %356 = shl i32 %5, 2 > %357 = and i32 %356, 262140 > %358 = and i32 %6, 8191 > %359 = and i32 %10, 255 > %360 = mul nuw nsw i32 %358, %359 > %361 = add nuw nsw i32 %357, %360 > %362 = mul nuw nsw i32 %45, %355 > %363 = add nuw nsw i32 %361, %362 > %364 = add nuw nsw i32 %363, 32 > %365 = zext i32 %364 to i64 > %366 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %365 > store i32 %320, i32 addrspace(3)* %366, align 4 > %367 = add nuw nsw i32 %363, 33 > %368 = zext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > store i32 %331, i32 addrspace(3)* %369, align 4 > %370 = add nuw nsw i32 %363, 34 > %371 = zext i32 %370 to i64 > %372 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %371 > store i32 %342, i32 addrspace(3)* %372, align 4 > %373 = add nuw nsw i32 %363, 35 > %374 = zext i32 %373 to i64 > %375 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %374 > store i32 %353, i32 addrspace(3)* %375, align 4 > %376 = lshr i32 %7, 13 > %377 = and i32 %376, 255 > %378 = and i32 %7, 8191 > %379 = and i32 %10, 255 > %380 = mul nuw nsw i32 %378, %379 > %381 = mul nuw nsw i32 %45, %377 > %382 = add nuw nsw i32 %380, %381 > %383 = add nuw nsw i32 %382, 36 > %384 = zext i32 %383 to i64 > %385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %384 > %386 = load i32, i32 addrspace(3)* %385, align 4 > %387 = lshr i32 %7, 13 > %388 = and i32 %387, 255 > %389 = and i32 %7, 8191 > %390 = and i32 %10, 255 > %391 = mul nuw nsw i32 %389, %390 > %392 = mul nuw nsw i32 %45, %388 > %393 = add nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 37 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = load i32, i32 addrspace(3)* %396, align 4 > %398 = lshr i32 %7, 13 > %399 = and i32 %398, 255 > %400 = and i32 %7, 8191 > %401 = and i32 %10, 255 > %402 = mul nuw nsw i32 %400, %401 > %403 = mul nuw nsw i32 %45, %399 > %404 = add nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, 38 > %406 = zext i32 %405 to i64 > %407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %406 > %408 = load i32, i32 addrspace(3)* %407, align 4 > %409 = lshr i32 %7, 13 > %410 = and i32 %409, 255 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = mul nuw nsw i32 %45, %410 > %415 = add nuw nsw i32 %413, %414 > %416 = add nuw nsw i32 %415, 39 > %417 = zext i32 %416 to i64 > %418 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %417 > %419 = load i32, i32 addrspace(3)* %418, align 4 > %420 = lshr i32 %6, 13 > %421 = and i32 %420, 255 > %422 = shl i32 %5, 2 > %423 = and i32 %422, 262140 > %424 = and i32 %6, 8191 > %425 = and i32 %10, 255 > %426 = mul nuw nsw i32 %424, %425 > %427 = add nuw nsw i32 %423, %426 > %428 = mul nuw nsw i32 %45, %421 > %429 = add nuw nsw i32 %427, %428 > %430 = add nuw nsw i32 %429, 36 > %431 = zext i32 %430 to i64 > %432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %431 > store i32 %386, i32 addrspace(3)* %432, align 4 > %433 = add nuw nsw i32 %429, 37 > %434 = zext i32 %433 to i64 > %435 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %434 > store i32 %397, i32 addrspace(3)* %435, align 4 > %436 = add nuw nsw i32 %429, 38 > %437 = zext i32 %436 to i64 > %438 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %437 > store i32 %408, i32 addrspace(3)* %438, align 4 > %439 = add nuw nsw i32 %429, 39 > %440 = zext i32 %439 to i64 > %441 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %440 > store i32 %419, i32 addrspace(3)* %441, align 4 > %442 = and i32 %7, 8191 > %443 = and i32 %10, 255 > %444 = mul nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 16 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > %448 = bitcast i32 addrspace(3)* %447 to float addrspace(3)* > %449 = load float, float addrspace(3)* %448, align 4 > %450 = and i32 %7, 8191 > %451 = and i32 %10, 255 > %452 = mul nuw nsw i32 %450, %451 > %453 = add nuw nsw i32 %452, 17 > %454 = zext i32 %453 to i64 > %455 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %454 > %456 = bitcast i32 addrspace(3)* %455 to float addrspace(3)* > %457 = load float, float addrspace(3)* %456, align 4 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, 18 > %462 = zext i32 %461 to i64 > %463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %462 > %464 = bitcast i32 addrspace(3)* %463 to float addrspace(3)* > %465 = load float, float addrspace(3)* %464, align 4 > %466 = fmul float %23, %449 > %467 = fmul float %24, %457 > %468 = fadd float %466, %467 > %469 = fmul float %25, %465 > %470 = fadd float %468, %469 > %471 = fadd float %470, %26 > %472 = fmul float %27, %449 > %473 = fmul float %28, %457 > %474 = fadd float %472, %473 > %475 = fmul float %29, %465 > %476 = fadd float %474, %475 > %477 = fadd float %476, %30 > %478 = fmul float %31, %449 > %479 = fmul float %32, %457 > %480 = fadd float %478, %479 > %481 = fmul float %33, %465 > %482 = fadd float %480, %481 > %483 = fadd float %482, %34 > %484 = fmul float %35, %449 > %485 = fmul float %36, %457 > %486 = fadd float %484, %485 > %487 = fmul float %37, %465 > %488 = fadd float %486, %487 > %489 = fadd float %488, %38 > %490 = lshr i32 %7, 13 > %491 = and i32 %490, 255 > %492 = and i32 %7, 8191 > %493 = and i32 %10, 255 > %494 = mul nuw nsw i32 %492, %493 > %495 = add nuw nsw i32 %494, %491 > %496 = add nuw nsw i32 %495, 16 > %497 = zext i32 %496 to i64 > %498 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %497 > %499 = bitcast i32 addrspace(3)* %498 to float addrspace(3)* > %500 = load float, float addrspace(3)* %499, align 4 > %501 = lshr i32 %7, 13 > %502 = and i32 %501, 255 > %503 = and i32 %7, 8191 > %504 = and i32 %10, 255 > %505 = mul nuw nsw i32 %503, %504 > %506 = add nuw nsw i32 %505, %502 > %507 = add nuw nsw i32 %506, 17 > %508 = zext i32 %507 to i64 > %509 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %508 > %510 = bitcast i32 addrspace(3)* %509 to float addrspace(3)* > %511 = load float, float addrspace(3)* %510, align 4 > %512 = lshr i32 %7, 13 > %513 = and i32 %512, 255 > %514 = and i32 %7, 8191 > %515 = and i32 %10, 255 > %516 = mul nuw nsw i32 %514, %515 > %517 = add nuw nsw i32 %516, %513 > %518 = add nuw nsw i32 %517, 18 > %519 = zext i32 %518 to i64 > %520 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %519 > %521 = bitcast i32 addrspace(3)* %520 to float addrspace(3)* > %522 = load float, float addrspace(3)* %521, align 4 > %523 = fmul float %23, %500 > %524 = fmul float %24, %511 > %525 = fadd float %523, %524 > %526 = fmul float %25, %522 > %527 = fadd float %525, %526 > %528 = fadd float %527, %26 > %529 = fmul float %27, %500 > %530 = fmul float %28, %511 > %531 = fadd float %529, %530 > %532 = fmul float %29, %522 > %533 = fadd float %531, %532 > %534 = fadd float %533, %30 > %535 = fmul float %31, %500 > %536 = fmul float %32, %511 > %537 = fadd float %535, %536 > %538 = fmul float %33, %522 > %539 = fadd float %537, %538 > %540 = fadd float %539, %34 > %541 = fmul float %35, %500 > %542 = fmul float %36, %511 > %543 = fadd float %541, %542 > %544 = fmul float %37, %522 > %545 = fadd float %543, %544 > %546 = fadd float %545, %38 > %547 = and i32 %7, 8191 > %548 = and i32 %10, 255 > %549 = mul nuw nsw i32 %547, %548 > %550 = lshr i32 %7, 12 > %551 = and i32 %550, 510 > %552 = add nuw nsw i32 %549, %551 > %553 = add nuw nsw i32 %552, 16 > %554 = zext i32 %553 to i64 > %555 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %554 > %556 = bitcast i32 addrspace(3)* %555 to float addrspace(3)* > %557 = load float, float addrspace(3)* %556, align 4 > %558 = and i32 %7, 8191 > %559 = and i32 %10, 255 > %560 = mul nuw nsw i32 %558, %559 > %561 = lshr i32 %7, 12 > %562 = and i32 %561, 510 > %563 = add nuw nsw i32 %560, %562 > %564 = add nuw nsw i32 %563, 17 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fmul float %23, %557 > %581 = fmul float %24, %568 > %582 = fadd float %580, %581 > %583 = fmul float %25, %579 > %584 = fadd float %582, %583 > %585 = fadd float %584, %26 > %586 = fmul float %27, %557 > %587 = fmul float %28, %568 > %588 = fadd float %586, %587 > %589 = fmul float %29, %579 > %590 = fadd float %588, %589 > %591 = fadd float %590, %30 > %592 = fmul float %31, %557 > %593 = fmul float %32, %568 > %594 = fadd float %592, %593 > %595 = fmul float %33, %579 > %596 = fadd float %594, %595 > %597 = fadd float %596, %34 > %598 = fmul float %35, %557 > %599 = fmul float %36, %568 > %600 = fadd float %598, %599 > %601 = fmul float %37, %579 > %602 = fadd float %600, %601 > %603 = fadd float %602, %38 > %604 = fadd float %483, 1.000000e+02 > %605 = fadd float %540, 1.000000e+02 > %606 = fadd float %597, 1.000000e+02 > %607 = call float @llvm.fabs.f32(float %489) > %608 = call float @llvm.minnum.f32(float %607, float 1.000000e+02) > %609 = fcmp ogt float %471, 0.000000e+00 > %610 = fcmp ogt float %477, 0.000000e+00 > %611 = fcmp olt float %471, 0.000000e+00 > %612 = fcmp olt float %477, 0.000000e+00 > %613 = sext i1 %611 to i32 > %614 = sext i1 %612 to i32 > %615 = zext i1 %609 to i32 > %616 = zext i1 %610 to i32 > %617 = add nsw i32 %615, %613 > %618 = add nsw i32 %616, %614 > %619 = sitofp i32 %617 to float > %620 = sitofp i32 %618 to float > %621 = fsub float -0.000000e+00, %608 > %622 = call float @llvm.fma.f32(float %621, float %619, float %471) > %623 = fsub float -0.000000e+00, %608 > %624 = call float @llvm.fma.f32(float %623, float %620, float %477) > %625 = call float @llvm.fabs.f32(float %546) > %626 = call float @llvm.minnum.f32(float %625, float 1.000000e+02) > %627 = fcmp ogt float %528, 0.000000e+00 > %628 = fcmp ogt float %534, 0.000000e+00 > %629 = fcmp olt float %528, 0.000000e+00 > %630 = fcmp olt float %534, 0.000000e+00 > %631 = sext i1 %629 to i32 > %632 = sext i1 %630 to i32 > %633 = zext i1 %627 to i32 > %634 = zext i1 %628 to i32 > %635 = add nsw i32 %633, %631 > %636 = add nsw i32 %634, %632 > %637 = sitofp i32 %635 to float > %638 = sitofp i32 %636 to float > %639 = fsub float -0.000000e+00, %626 > %640 = call float @llvm.fma.f32(float %639, float %637, float %528) > %641 = fsub float -0.000000e+00, %626 > %642 = call float @llvm.fma.f32(float %641, float %638, float %534) > %643 = fcmp ogt float %585, 0.000000e+00 > %644 = fcmp ogt float %591, 0.000000e+00 > %645 = fcmp olt float %585, 0.000000e+00 > %646 = fcmp olt float %591, 0.000000e+00 > %647 = sext i1 %645 to i32 > %648 = sext i1 %646 to i32 > %649 = zext i1 %643 to i32 > %650 = zext i1 %644 to i32 > %651 = add nsw i32 %649, %647 > %652 = add nsw i32 %650, %648 > %653 = sitofp i32 %651 to float > %654 = sitofp i32 %652 to float > %655 = call float @llvm.fabs.f32(float %603) > %656 = call float @llvm.minnum.f32(float %655, float 1.000000e+02) > %657 = fsub float -0.000000e+00, %656 > %658 = call float @llvm.fma.f32(float %657, float %653, float %585) > %659 = fsub float -0.000000e+00, %656 > %660 = call float @llvm.fma.f32(float %659, float %654, float %591) > %661 = fsub float -0.000000e+00, %489 > %662 = fcmp olt float %622, %661 > %663 = fsub float -0.000000e+00, %489 > %664 = fcmp olt float %624, %663 > %665 = zext i1 %662 to i32 > %666 = zext i1 %664 to i32 > %667 = fsub float -0.000000e+00, %546 > %668 = fcmp olt float %640, %667 > %669 = fsub float -0.000000e+00, %546 > %670 = fcmp olt float %642, %669 > %671 = zext i1 %668 to i32 > %672 = zext i1 %670 to i32 > %673 = add nuw nsw i32 %671, %665 > %674 = add nuw nsw i32 %672, %666 > %675 = fsub float -0.000000e+00, %603 > %676 = fcmp olt float %658, %675 > %677 = fsub float -0.000000e+00, %603 > %678 = fcmp olt float %660, %677 > %679 = zext i1 %676 to i32 > %680 = zext i1 %678 to i32 > %681 = add nuw nsw i32 %679, %673 > %682 = add nuw nsw i32 %680, %674 > %683 = fcmp olt float %604, 0.000000e+00 > %684 = zext i1 %683 to i32 > %685 = bitcast i32 %684 to float > %686 = fcmp olt float %605, 0.000000e+00 > %687 = fcmp olt float %606, 0.000000e+00 > %688 = zext i1 %686 to i32 > %689 = zext i1 %687 to i32 > %690 = add nuw nsw i32 %688, %684 > %691 = add nuw nsw i32 %689, %690 > %692 = fcmp olt float %489, %622 > %693 = fcmp olt float %489, %624 > %694 = zext i1 %692 to i32 > %695 = zext i1 %693 to i32 > %696 = fcmp olt float %546, %640 > %697 = fcmp olt float %546, %642 > %698 = zext i1 %696 to i32 > %699 = zext i1 %697 to i32 > %700 = add nuw nsw i32 %694, %698 > %701 = add nuw nsw i32 %695, %699 > %702 = fcmp olt float %603, %658 > %703 = fcmp olt float %603, %660 > %704 = zext i1 %702 to i32 > %705 = zext i1 %703 to i32 > %706 = add nuw nsw i32 %700, %704 > %707 = add nuw nsw i32 %701, %705 > %708 = icmp eq i32 %681, 3 > %709 = icmp eq i32 %682, 3 > %710 = sext i1 %708 to i32 > %711 = sext i1 %709 to i32 > %712 = bitcast i32 %711 to float > %713 = icmp eq i32 %706, 3 > %714 = icmp eq i32 %707, 3 > %715 = sext i1 %714 to i32 > %716 = bitcast i32 %715 to float > %717 = bitcast i32 %711 to float > %718 = select i1 %714, float 0xFFFFFFFFE0000000, float %717 > %719 = bitcast float %718 to i32 > %720 = select i1 %713, i32 -1, i32 %710 > %721 = or i32 %719, %720 > %722 = icmp eq i32 %721, 0 > %not. = icmp ne i32 %691, 3 > %723 = and i1 %722, %not. > br i1 %723, label %IF, label %ENDIF > >IF: ; preds = %main_body > %724 = lshr i32 %7, 13 > %725 = and i32 %724, 255 > %726 = and i32 %7, 8191 > %727 = and i32 %10, 255 > %728 = mul nuw nsw i32 %726, %727 > %729 = add nuw nsw i32 %728, %725 > %730 = add nuw nsw i32 %729, 16 > %731 = zext i32 %730 to i64 > %732 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %731 > %733 = bitcast i32 addrspace(3)* %732 to float addrspace(3)* > %734 = load float, float addrspace(3)* %733, align 4 > %735 = and i32 %7, 8191 > %736 = and i32 %10, 255 > %737 = mul nuw nsw i32 %735, %736 > %738 = add nuw nsw i32 %737, 16 > %739 = zext i32 %738 to i64 > %740 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %739 > %741 = bitcast i32 addrspace(3)* %740 to float addrspace(3)* > %742 = load float, float addrspace(3)* %741, align 4 > %743 = fsub float %742, %734 > %744 = lshr i32 %7, 13 > %745 = and i32 %744, 255 > %746 = and i32 %7, 8191 > %747 = and i32 %10, 255 > %748 = mul nuw nsw i32 %746, %747 > %749 = add nuw nsw i32 %748, %745 > %750 = add nuw nsw i32 %749, 17 > %751 = zext i32 %750 to i64 > %752 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %751 > %753 = bitcast i32 addrspace(3)* %752 to float addrspace(3)* > %754 = load float, float addrspace(3)* %753, align 4 > %755 = and i32 %7, 8191 > %756 = and i32 %10, 255 > %757 = mul nuw nsw i32 %755, %756 > %758 = add nuw nsw i32 %757, 17 > %759 = zext i32 %758 to i64 > %760 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %759 > %761 = bitcast i32 addrspace(3)* %760 to float addrspace(3)* > %762 = load float, float addrspace(3)* %761, align 4 > %763 = fsub float %762, %754 > %764 = lshr i32 %7, 13 > %765 = and i32 %764, 255 > %766 = and i32 %7, 8191 > %767 = and i32 %10, 255 > %768 = mul nuw nsw i32 %766, %767 > %769 = add nuw nsw i32 %768, %765 > %770 = add nuw nsw i32 %769, 18 > %771 = zext i32 %770 to i64 > %772 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %771 > %773 = bitcast i32 addrspace(3)* %772 to float addrspace(3)* > %774 = load float, float addrspace(3)* %773, align 4 > %775 = and i32 %7, 8191 > %776 = and i32 %10, 255 > %777 = mul nuw nsw i32 %775, %776 > %778 = add nuw nsw i32 %777, 18 > %779 = zext i32 %778 to i64 > %780 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %779 > %781 = bitcast i32 addrspace(3)* %780 to float addrspace(3)* > %782 = load float, float addrspace(3)* %781, align 4 > %783 = fsub float %782, %774 > %784 = fmul float %743, %743 > %785 = fmul float %763, %763 > %786 = fadd float %785, %784 > %787 = fmul float %783, %783 > %788 = fadd float %786, %787 > %789 = and i32 %7, 8191 > %790 = and i32 %10, 255 > %791 = mul nuw nsw i32 %789, %790 > %792 = lshr i32 %7, 12 > %793 = and i32 %792, 510 > %794 = add nuw nsw i32 %791, %793 > %795 = add nuw nsw i32 %794, 16 > %796 = zext i32 %795 to i64 > %797 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %796 > %798 = bitcast i32 addrspace(3)* %797 to float addrspace(3)* > %799 = load float, float addrspace(3)* %798, align 4 > %800 = lshr i32 %7, 13 > %801 = and i32 %800, 255 > %802 = and i32 %7, 8191 > %803 = and i32 %10, 255 > %804 = mul nuw nsw i32 %802, %803 > %805 = add nuw nsw i32 %804, %801 > %806 = add nuw nsw i32 %805, 16 > %807 = zext i32 %806 to i64 > %808 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %807 > %809 = bitcast i32 addrspace(3)* %808 to float addrspace(3)* > %810 = load float, float addrspace(3)* %809, align 4 > %811 = fsub float %810, %799 > %812 = and i32 %7, 8191 > %813 = and i32 %10, 255 > %814 = mul nuw nsw i32 %812, %813 > %815 = lshr i32 %7, 12 > %816 = and i32 %815, 510 > %817 = add nuw nsw i32 %814, %816 > %818 = add nuw nsw i32 %817, 17 > %819 = zext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = lshr i32 %7, 13 > %824 = and i32 %823, 255 > %825 = and i32 %7, 8191 > %826 = and i32 %10, 255 > %827 = mul nuw nsw i32 %825, %826 > %828 = add nuw nsw i32 %827, %824 > %829 = add nuw nsw i32 %828, 17 > %830 = zext i32 %829 to i64 > %831 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %830 > %832 = bitcast i32 addrspace(3)* %831 to float addrspace(3)* > %833 = load float, float addrspace(3)* %832, align 4 > %834 = fsub float %833, %822 > %835 = and i32 %7, 8191 > %836 = and i32 %10, 255 > %837 = mul nuw nsw i32 %835, %836 > %838 = lshr i32 %7, 12 > %839 = and i32 %838, 510 > %840 = add nuw nsw i32 %837, %839 > %841 = add nuw nsw i32 %840, 18 > %842 = zext i32 %841 to i64 > %843 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %842 > %844 = bitcast i32 addrspace(3)* %843 to float addrspace(3)* > %845 = load float, float addrspace(3)* %844, align 4 > %846 = lshr i32 %7, 13 > %847 = and i32 %846, 255 > %848 = and i32 %7, 8191 > %849 = and i32 %10, 255 > %850 = mul nuw nsw i32 %848, %849 > %851 = add nuw nsw i32 %850, %847 > %852 = add nuw nsw i32 %851, 18 > %853 = zext i32 %852 to i64 > %854 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %853 > %855 = bitcast i32 addrspace(3)* %854 to float addrspace(3)* > %856 = load float, float addrspace(3)* %855, align 4 > %857 = fsub float %856, %845 > %858 = fmul float %811, %811 > %859 = fmul float %834, %834 > %860 = fadd float %859, %858 > %861 = fmul float %857, %857 > %862 = fadd float %860, %861 > %863 = and i32 %7, 8191 > %864 = and i32 %10, 255 > %865 = mul nuw nsw i32 %863, %864 > %866 = add nuw nsw i32 %865, 16 > %867 = zext i32 %866 to i64 > %868 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %867 > %869 = bitcast i32 addrspace(3)* %868 to float addrspace(3)* > %870 = load float, float addrspace(3)* %869, align 4 > %871 = and i32 %7, 8191 > %872 = and i32 %10, 255 > %873 = mul nuw nsw i32 %871, %872 > %874 = lshr i32 %7, 12 > %875 = and i32 %874, 510 > %876 = add nuw nsw i32 %873, %875 > %877 = add nuw nsw i32 %876, 16 > %878 = zext i32 %877 to i64 > %879 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %878 > %880 = bitcast i32 addrspace(3)* %879 to float addrspace(3)* > %881 = load float, float addrspace(3)* %880, align 4 > %882 = fsub float %881, %870 > %883 = and i32 %7, 8191 > %884 = and i32 %10, 255 > %885 = mul nuw nsw i32 %883, %884 > %886 = add nuw nsw i32 %885, 17 > %887 = zext i32 %886 to i64 > %888 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %887 > %889 = bitcast i32 addrspace(3)* %888 to float addrspace(3)* > %890 = load float, float addrspace(3)* %889, align 4 > %891 = and i32 %7, 8191 > %892 = and i32 %10, 255 > %893 = mul nuw nsw i32 %891, %892 > %894 = lshr i32 %7, 12 > %895 = and i32 %894, 510 > %896 = add nuw nsw i32 %893, %895 > %897 = add nuw nsw i32 %896, 17 > %898 = zext i32 %897 to i64 > %899 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %898 > %900 = bitcast i32 addrspace(3)* %899 to float addrspace(3)* > %901 = load float, float addrspace(3)* %900, align 4 > %902 = fsub float %901, %890 > %903 = and i32 %7, 8191 > %904 = and i32 %10, 255 > %905 = mul nuw nsw i32 %903, %904 > %906 = add nuw nsw i32 %905, 18 > %907 = zext i32 %906 to i64 > %908 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %907 > %909 = bitcast i32 addrspace(3)* %908 to float addrspace(3)* > %910 = load float, float addrspace(3)* %909, align 4 > %911 = and i32 %7, 8191 > %912 = and i32 %10, 255 > %913 = mul nuw nsw i32 %911, %912 > %914 = lshr i32 %7, 12 > %915 = and i32 %914, 510 > %916 = add nuw nsw i32 %913, %915 > %917 = add nuw nsw i32 %916, 18 > %918 = zext i32 %917 to i64 > %919 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %918 > %920 = bitcast i32 addrspace(3)* %919 to float addrspace(3)* > %921 = load float, float addrspace(3)* %920, align 4 > %922 = fsub float %921, %910 > %923 = fmul float %882, %882 > %924 = fmul float %902, %902 > %925 = fadd float %924, %923 > %926 = fmul float %922, %922 > %927 = fadd float %925, %926 > %928 = call float @llvm.sqrt.f32(float %788) > %929 = call float @llvm.sqrt.f32(float %862) > %930 = call float @llvm.sqrt.f32(float %927) > %931 = lshr i32 %7, 13 > %932 = and i32 %931, 255 > %933 = and i32 %7, 8191 > %934 = and i32 %10, 255 > %935 = mul nuw nsw i32 %933, %934 > %936 = add nuw nsw i32 %935, %932 > %937 = add nuw nsw i32 %936, 16 > %938 = zext i32 %937 to i64 > %939 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %938 > %940 = bitcast i32 addrspace(3)* %939 to float addrspace(3)* > %941 = load float, float addrspace(3)* %940, align 4 > %942 = and i32 %7, 8191 > %943 = and i32 %10, 255 > %944 = mul nuw nsw i32 %942, %943 > %945 = add nuw nsw i32 %944, 16 > %946 = zext i32 %945 to i64 > %947 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %946 > %948 = bitcast i32 addrspace(3)* %947 to float addrspace(3)* > %949 = load float, float addrspace(3)* %948, align 4 > %950 = fadd float %941, %949 > %951 = lshr i32 %7, 13 > %952 = and i32 %951, 255 > %953 = and i32 %7, 8191 > %954 = and i32 %10, 255 > %955 = mul nuw nsw i32 %953, %954 > %956 = add nuw nsw i32 %955, %952 > %957 = add nuw nsw i32 %956, 17 > %958 = zext i32 %957 to i64 > %959 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %958 > %960 = bitcast i32 addrspace(3)* %959 to float addrspace(3)* > %961 = load float, float addrspace(3)* %960, align 4 > %962 = and i32 %7, 8191 > %963 = and i32 %10, 255 > %964 = mul nuw nsw i32 %962, %963 > %965 = add nuw nsw i32 %964, 17 > %966 = zext i32 %965 to i64 > %967 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %966 > %968 = bitcast i32 addrspace(3)* %967 to float addrspace(3)* > %969 = load float, float addrspace(3)* %968, align 4 > %970 = fadd float %961, %969 > %971 = lshr i32 %7, 13 > %972 = and i32 %971, 255 > %973 = and i32 %7, 8191 > %974 = and i32 %10, 255 > %975 = mul nuw nsw i32 %973, %974 > %976 = add nuw nsw i32 %975, %972 > %977 = add nuw nsw i32 %976, 18 > %978 = zext i32 %977 to i64 > %979 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %978 > %980 = bitcast i32 addrspace(3)* %979 to float addrspace(3)* > %981 = load float, float addrspace(3)* %980, align 4 > %982 = and i32 %7, 8191 > %983 = and i32 %10, 255 > %984 = mul nuw nsw i32 %982, %983 > %985 = add nuw nsw i32 %984, 18 > %986 = zext i32 %985 to i64 > %987 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %986 > %988 = bitcast i32 addrspace(3)* %987 to float addrspace(3)* > %989 = load float, float addrspace(3)* %988, align 4 > %990 = fadd float %981, %989 > %991 = fmul float %950, 5.000000e-01 > %992 = fmul float %970, 5.000000e-01 > %993 = fmul float %990, 5.000000e-01 > %994 = and i32 %7, 8191 > %995 = and i32 %10, 255 > %996 = mul nuw nsw i32 %994, %995 > %997 = lshr i32 %7, 12 > %998 = and i32 %997, 510 > %999 = add nuw nsw i32 %996, %998 > %1000 = add nuw nsw i32 %999, 16 > %1001 = zext i32 %1000 to i64 > %1002 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1001 > %1003 = bitcast i32 addrspace(3)* %1002 to float addrspace(3)* > %1004 = load float, float addrspace(3)* %1003, align 4 > %1005 = lshr i32 %7, 13 > %1006 = and i32 %1005, 255 > %1007 = and i32 %7, 8191 > %1008 = and i32 %10, 255 > %1009 = mul nuw nsw i32 %1007, %1008 > %1010 = add nuw nsw i32 %1009, %1006 > %1011 = add nuw nsw i32 %1010, 16 > %1012 = zext i32 %1011 to i64 > %1013 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1012 > %1014 = bitcast i32 addrspace(3)* %1013 to float addrspace(3)* > %1015 = load float, float addrspace(3)* %1014, align 4 > %1016 = fadd float %1004, %1015 > %1017 = and i32 %7, 8191 > %1018 = and i32 %10, 255 > %1019 = mul nuw nsw i32 %1017, %1018 > %1020 = lshr i32 %7, 12 > %1021 = and i32 %1020, 510 > %1022 = add nuw nsw i32 %1019, %1021 > %1023 = add nuw nsw i32 %1022, 17 > %1024 = zext i32 %1023 to i64 > %1025 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1024 > %1026 = bitcast i32 addrspace(3)* %1025 to float addrspace(3)* > %1027 = load float, float addrspace(3)* %1026, align 4 > %1028 = lshr i32 %7, 13 > %1029 = and i32 %1028, 255 > %1030 = and i32 %7, 8191 > %1031 = and i32 %10, 255 > %1032 = mul nuw nsw i32 %1030, %1031 > %1033 = add nuw nsw i32 %1032, %1029 > %1034 = add nuw nsw i32 %1033, 17 > %1035 = zext i32 %1034 to i64 > %1036 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1035 > %1037 = bitcast i32 addrspace(3)* %1036 to float addrspace(3)* > %1038 = load float, float addrspace(3)* %1037, align 4 > %1039 = fadd float %1027, %1038 > %1040 = and i32 %7, 8191 > %1041 = and i32 %10, 255 > %1042 = mul nuw nsw i32 %1040, %1041 > %1043 = lshr i32 %7, 12 > %1044 = and i32 %1043, 510 > %1045 = add nuw nsw i32 %1042, %1044 > %1046 = add nuw nsw i32 %1045, 18 > %1047 = zext i32 %1046 to i64 > %1048 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1047 > %1049 = bitcast i32 addrspace(3)* %1048 to float addrspace(3)* > %1050 = load float, float addrspace(3)* %1049, align 4 > %1051 = lshr i32 %7, 13 > %1052 = and i32 %1051, 255 > %1053 = and i32 %7, 8191 > %1054 = and i32 %10, 255 > %1055 = mul nuw nsw i32 %1053, %1054 > %1056 = add nuw nsw i32 %1055, %1052 > %1057 = add nuw nsw i32 %1056, 18 > %1058 = zext i32 %1057 to i64 > %1059 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1058 > %1060 = bitcast i32 addrspace(3)* %1059 to float addrspace(3)* > %1061 = load float, float addrspace(3)* %1060, align 4 > %1062 = fadd float %1050, %1061 > %1063 = fmul float %1016, 5.000000e-01 > %1064 = fmul float %1039, 5.000000e-01 > %1065 = fmul float %1062, 5.000000e-01 > %1066 = and i32 %7, 8191 > %1067 = and i32 %10, 255 > %1068 = mul nuw nsw i32 %1066, %1067 > %1069 = add nuw nsw i32 %1068, 16 > %1070 = zext i32 %1069 to i64 > %1071 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1070 > %1072 = bitcast i32 addrspace(3)* %1071 to float addrspace(3)* > %1073 = load float, float addrspace(3)* %1072, align 4 > %1074 = and i32 %7, 8191 > %1075 = and i32 %10, 255 > %1076 = mul nuw nsw i32 %1074, %1075 > %1077 = lshr i32 %7, 12 > %1078 = and i32 %1077, 510 > %1079 = add nuw nsw i32 %1076, %1078 > %1080 = add nuw nsw i32 %1079, 16 > %1081 = zext i32 %1080 to i64 > %1082 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1081 > %1083 = bitcast i32 addrspace(3)* %1082 to float addrspace(3)* > %1084 = load float, float addrspace(3)* %1083, align 4 > %1085 = fadd float %1073, %1084 > %1086 = and i32 %7, 8191 > %1087 = and i32 %10, 255 > %1088 = mul nuw nsw i32 %1086, %1087 > %1089 = add nuw nsw i32 %1088, 17 > %1090 = zext i32 %1089 to i64 > %1091 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1090 > %1092 = bitcast i32 addrspace(3)* %1091 to float addrspace(3)* > %1093 = load float, float addrspace(3)* %1092, align 4 > %1094 = and i32 %7, 8191 > %1095 = and i32 %10, 255 > %1096 = mul nuw nsw i32 %1094, %1095 > %1097 = lshr i32 %7, 12 > %1098 = and i32 %1097, 510 > %1099 = add nuw nsw i32 %1096, %1098 > %1100 = add nuw nsw i32 %1099, 17 > %1101 = zext i32 %1100 to i64 > %1102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1101 > %1103 = bitcast i32 addrspace(3)* %1102 to float addrspace(3)* > %1104 = load float, float addrspace(3)* %1103, align 4 > %1105 = fadd float %1093, %1104 > %1106 = and i32 %7, 8191 > %1107 = and i32 %10, 255 > %1108 = mul nuw nsw i32 %1106, %1107 > %1109 = add nuw nsw i32 %1108, 18 > %1110 = zext i32 %1109 to i64 > %1111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1110 > %1112 = bitcast i32 addrspace(3)* %1111 to float addrspace(3)* > %1113 = load float, float addrspace(3)* %1112, align 4 > %1114 = and i32 %7, 8191 > %1115 = and i32 %10, 255 > %1116 = mul nuw nsw i32 %1114, %1115 > %1117 = lshr i32 %7, 12 > %1118 = and i32 %1117, 510 > %1119 = add nuw nsw i32 %1116, %1118 > %1120 = add nuw nsw i32 %1119, 18 > %1121 = zext i32 %1120 to i64 > %1122 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1121 > %1123 = bitcast i32 addrspace(3)* %1122 to float addrspace(3)* > %1124 = load float, float addrspace(3)* %1123, align 4 > %1125 = fadd float %1113, %1124 > %1126 = fmul float %1085, 5.000000e-01 > %1127 = fmul float %1105, 5.000000e-01 > %1128 = fmul float %1125, 5.000000e-01 > %1129 = call float @llvm.fma.f32(float %39, float %928, float %991) > %1130 = call float @llvm.fma.f32(float %40, float %928, float %992) > %1131 = call float @llvm.fma.f32(float %41, float %928, float %993) > %1132 = call float @llvm.fma.f32(float %39, float %929, float %1063) > %1133 = call float @llvm.fma.f32(float %40, float %929, float %1064) > %1134 = call float @llvm.fma.f32(float %41, float %929, float %1065) > %1135 = call float @llvm.fma.f32(float %39, float %930, float %1126) > %1136 = call float @llvm.fma.f32(float %40, float %930, float %1127) > %1137 = call float @llvm.fma.f32(float %41, float %930, float %1128) > %1138 = fmul float %23, %991 > %1139 = fmul float %24, %992 > %1140 = fadd float %1138, %1139 > %1141 = fmul float %25, %993 > %1142 = fadd float %1140, %1141 > %1143 = fadd float %1142, %26 > %1144 = fmul float %27, %991 > %1145 = fmul float %28, %992 > %1146 = fadd float %1144, %1145 > %1147 = fmul float %29, %993 > %1148 = fadd float %1146, %1147 > %1149 = fadd float %1148, %30 > %1150 = fmul float %35, %991 > %1151 = fmul float %36, %992 > %1152 = fadd float %1150, %1151 > %1153 = fmul float %37, %993 > %1154 = fadd float %1152, %1153 > %1155 = fadd float %1154, %38 > %1156 = fmul float %23, %1063 > %1157 = fmul float %24, %1064 > %1158 = fadd float %1156, %1157 > %1159 = fmul float %25, %1065 > %1160 = fadd float %1158, %1159 > %1161 = fadd float %1160, %26 > %1162 = fmul float %27, %1063 > %1163 = fmul float %28, %1064 > %1164 = fadd float %1162, %1163 > %1165 = fmul float %29, %1065 > %1166 = fadd float %1164, %1165 > %1167 = fadd float %1166, %30 > %1168 = fmul float %35, %1063 > %1169 = fmul float %36, %1064 > %1170 = fadd float %1168, %1169 > %1171 = fmul float %37, %1065 > %1172 = fadd float %1170, %1171 > %1173 = fadd float %1172, %38 > %1174 = fmul float %23, %1126 > %1175 = fmul float %24, %1127 > %1176 = fadd float %1174, %1175 > %1177 = fmul float %25, %1128 > %1178 = fadd float %1176, %1177 > %1179 = fadd float %1178, %26 > %1180 = fmul float %27, %1126 > %1181 = fmul float %28, %1127 > %1182 = fadd float %1180, %1181 > %1183 = fmul float %29, %1128 > %1184 = fadd float %1182, %1183 > %1185 = fadd float %1184, %30 > %1186 = fmul float %35, %1126 > %1187 = fmul float %36, %1127 > %1188 = fadd float %1186, %1187 > %1189 = fmul float %37, %1128 > %1190 = fadd float %1188, %1189 > %1191 = fadd float %1190, %38 > %1192 = fmul float %23, %1129 > %1193 = fmul float %24, %1130 > %1194 = fadd float %1192, %1193 > %1195 = fmul float %25, %1131 > %1196 = fadd float %1194, %1195 > %1197 = fadd float %1196, %26 > %1198 = fmul float %27, %1129 > %1199 = fmul float %28, %1130 > %1200 = fadd float %1198, %1199 > %1201 = fmul float %29, %1131 > %1202 = fadd float %1200, %1201 > %1203 = fadd float %1202, %30 > %1204 = fmul float %35, %1129 > %1205 = fmul float %36, %1130 > %1206 = fadd float %1204, %1205 > %1207 = fmul float %37, %1131 > %1208 = fadd float %1206, %1207 > %1209 = fadd float %1208, %38 > %1210 = fmul float %23, %1132 > %1211 = fmul float %24, %1133 > %1212 = fadd float %1210, %1211 > %1213 = fmul float %25, %1134 > %1214 = fadd float %1212, %1213 > %1215 = fadd float %1214, %26 > %1216 = fmul float %27, %1132 > %1217 = fmul float %28, %1133 > %1218 = fadd float %1216, %1217 > %1219 = fmul float %29, %1134 > %1220 = fadd float %1218, %1219 > %1221 = fadd float %1220, %30 > %1222 = fmul float %35, %1132 > %1223 = fmul float %36, %1133 > %1224 = fadd float %1222, %1223 > %1225 = fmul float %37, %1134 > %1226 = fadd float %1224, %1225 > %1227 = fadd float %1226, %38 > %1228 = fmul float %23, %1135 > %1229 = fmul float %24, %1136 > %1230 = fadd float %1228, %1229 > %1231 = fmul float %25, %1137 > %1232 = fadd float %1230, %1231 > %1233 = fadd float %1232, %26 > %1234 = fmul float %27, %1135 > %1235 = fmul float %28, %1136 > %1236 = fadd float %1234, %1235 > %1237 = fmul float %29, %1137 > %1238 = fadd float %1236, %1237 > %1239 = fadd float %1238, %30 > %1240 = fmul float %35, %1135 > %1241 = fmul float %36, %1136 > %1242 = fadd float %1240, %1241 > %1243 = fmul float %37, %1137 > %1244 = fadd float %1242, %1243 > %1245 = fadd float %1244, %38 > %1246 = fcmp oeq float %1173, 0.000000e+00 > %1247 = fcmp oeq float %1173, 0.000000e+00 > %1248 = fcmp ogt float %1161, 0.000000e+00 > %1249 = select i1 %1248, float 1.000000e+00, float %1161 > %1250 = fcmp oge float %1249, 0.000000e+00 > %1251 = fcmp ogt float %1167, 0.000000e+00 > %1252 = select i1 %1251, float 1.000000e+00, float %1167 > %1253 = fcmp oge float %1252, 0.000000e+00 > %.op = fmul float %1249, 0x4600000000000000 > %1254 = select i1 %1250, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1252, 0x4600000000000000 > %1255 = select i1 %1253, float %.op80, float 0xC600000000000000 > %1256 = fdiv float 1.000000e+00, %1173 > %1257 = fmul float %1161, %1256 > %1258 = fmul float %1167, %1256 > %1259 = select i1 %1246, float %1254, float %1257 > %1260 = select i1 %1247, float %1255, float %1258 > %1261 = fcmp oeq float %1191, 0.000000e+00 > %1262 = fcmp oeq float %1191, 0.000000e+00 > %1263 = fcmp ogt float %1179, 0.000000e+00 > %1264 = select i1 %1263, float 1.000000e+00, float %1179 > %1265 = fcmp oge float %1264, 0.000000e+00 > %1266 = fcmp ogt float %1185, 0.000000e+00 > %1267 = select i1 %1266, float 1.000000e+00, float %1185 > %1268 = fcmp oge float %1267, 0.000000e+00 > %.op81 = fmul float %1264, 0x4600000000000000 > %1269 = select i1 %1265, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1267, 0x4600000000000000 > %1270 = select i1 %1268, float %.op82, float 0xC600000000000000 > %1271 = fdiv float 1.000000e+00, %1191 > %1272 = fmul float %1179, %1271 > %1273 = fmul float %1185, %1271 > %1274 = select i1 %1261, float %1269, float %1272 > %1275 = select i1 %1262, float %1270, float %1273 > %1276 = fcmp oeq float %1209, 0.000000e+00 > %1277 = fcmp oeq float %1209, 0.000000e+00 > %1278 = fcmp ogt float %1197, 0.000000e+00 > %1279 = select i1 %1278, float 1.000000e+00, float %1197 > %1280 = fcmp oge float %1279, 0.000000e+00 > %1281 = fcmp ogt float %1203, 0.000000e+00 > %1282 = select i1 %1281, float 1.000000e+00, float %1203 > %1283 = fcmp oge float %1282, 0.000000e+00 > %.op83 = fmul float %1279, 0x4600000000000000 > %1284 = select i1 %1280, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1282, 0x4600000000000000 > %1285 = select i1 %1283, float %.op84, float 0xC600000000000000 > %1286 = fdiv float 1.000000e+00, %1209 > %1287 = fmul float %1197, %1286 > %1288 = fmul float %1203, %1286 > %1289 = select i1 %1276, float %1284, float %1287 > %1290 = select i1 %1277, float %1285, float %1288 > %1291 = fcmp oeq float %1155, 0.000000e+00 > %1292 = fcmp oeq float %1155, 0.000000e+00 > %1293 = fcmp ogt float %1143, 0.000000e+00 > %1294 = select i1 %1293, float 1.000000e+00, float %1143 > %1295 = fcmp oge float %1294, 0.000000e+00 > %1296 = fcmp ogt float %1149, 0.000000e+00 > %1297 = select i1 %1296, float 1.000000e+00, float %1149 > %1298 = fcmp oge float %1297, 0.000000e+00 > %.op85 = fmul float %1294, 0x4600000000000000 > %1299 = select i1 %1295, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1297, 0x4600000000000000 > %1300 = select i1 %1298, float %.op86, float 0xC600000000000000 > %1301 = fdiv float 1.000000e+00, %1155 > %1302 = fmul float %1143, %1301 > %1303 = fmul float %1149, %1301 > %1304 = select i1 %1291, float %1299, float %1302 > %1305 = select i1 %1292, float %1300, float %1303 > %1306 = fsub float %1304, %1289 > %1307 = fsub float %1305, %1290 > %1308 = fcmp oeq float %1227, 0.000000e+00 > %1309 = fcmp oeq float %1227, 0.000000e+00 > %1310 = fcmp ogt float %1215, 0.000000e+00 > %1311 = select i1 %1310, float 1.000000e+00, float %1215 > %1312 = fcmp oge float %1311, 0.000000e+00 > %1313 = fcmp ogt float %1221, 0.000000e+00 > %1314 = select i1 %1313, float 1.000000e+00, float %1221 > %1315 = fcmp oge float %1314, 0.000000e+00 > %.op87 = fmul float %1311, 0x4600000000000000 > %1316 = select i1 %1312, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1314, 0x4600000000000000 > %1317 = select i1 %1315, float %.op88, float 0xC600000000000000 > %1318 = fdiv float 1.000000e+00, %1227 > %1319 = fmul float %1215, %1318 > %1320 = fmul float %1221, %1318 > %1321 = select i1 %1308, float %1316, float %1319 > %1322 = select i1 %1309, float %1317, float %1320 > %1323 = fsub float %1259, %1321 > %1324 = fsub float %1260, %1322 > %1325 = fmul float %1323, %42 > %1326 = fmul float %1324, %43 > %1327 = fcmp oeq float %1245, 0.000000e+00 > %1328 = fcmp oeq float %1245, 0.000000e+00 > %1329 = fcmp ogt float %1233, 0.000000e+00 > %1330 = select i1 %1329, float 1.000000e+00, float %1233 > %1331 = fcmp oge float %1330, 0.000000e+00 > %1332 = fcmp ogt float %1239, 0.000000e+00 > %1333 = select i1 %1332, float 1.000000e+00, float %1239 > %1334 = fcmp oge float %1333, 0.000000e+00 > %.op89 = fmul float %1330, 0x4600000000000000 > %1335 = select i1 %1331, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1333, 0x4600000000000000 > %1336 = select i1 %1334, float %.op90, float 0xC600000000000000 > %1337 = fdiv float 1.000000e+00, %1245 > %1338 = fmul float %1233, %1337 > %1339 = fmul float %1239, %1337 > %1340 = select i1 %1327, float %1335, float %1338 > %1341 = select i1 %1328, float %1336, float %1339 > %1342 = fsub float %1274, %1340 > %1343 = fsub float %1275, %1341 > %1344 = fmul float %1342, %42 > %1345 = fmul float %1306, %42 > %1346 = fmul float %1307, %43 > %1347 = fmul float %1343, %43 > %1348 = fmul float %1345, %1345 > %1349 = fmul float %1346, %1346 > %1350 = fadd float %1348, %1349 > %1351 = fmul float %1325, %1325 > %1352 = fmul float %1326, %1326 > %1353 = fadd float %1351, %1352 > %1354 = fmul float %1344, %1344 > %1355 = fmul float %1347, %1347 > %1356 = fadd float %1354, %1355 > %1357 = call float @llvm.sqrt.f32(float %1356) > %1358 = call float @llvm.sqrt.f32(float %1350) > %1359 = call float @llvm.sqrt.f32(float %1353) > %1360 = fsub float %1155, %15 > %1361 = fsub float %1173, %15 > %1362 = fsub float %1191, %15 > %1363 = fcmp une float %16, 0.000000e+00 > br i1 %1363, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %16, %ENDIF77 ], [ %38, %main_body ] > %temp16.0 = phi float [ %1577, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1578, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1567, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1580, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %15, %ENDIF77 ], [ %37, %main_body ] > %temp13.0 = phi float [ %1560, %ENDIF77 ], [ %36, %main_body ] > %1364 = phi i32 [ 1065353216, %ENDIF77 ], [ %672, %main_body ] > %temp10.0 = phi float [ %1359, %ENDIF77 ], [ %716, %main_body ] > %temp9.0 = phi float [ %1552, %ENDIF77 ], [ %718, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %603, %main_body ] > %temp6.0 = phi float [ %993, %ENDIF77 ], [ %685, %main_body ] > %temp5.0 = phi float [ %1547, %ENDIF77 ], [ %712, %main_body ] > %1365 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1366 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1367 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1368 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1369 = lshr i32 %5, 16 > %1370 = shl nuw nsw i32 %1369, 2 > %1371 = and i32 %6, 8191 > %1372 = and i32 %10, 255 > %1373 = mul nuw nsw i32 %1371, %1372 > %1374 = add nuw nsw i32 %1370, %1373 > %1375 = add nuw nsw i32 %1374, 8 > %1376 = zext i32 %1375 to i64 > %1377 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1376 > %1378 = bitcast i32 addrspace(3)* %1377 to float addrspace(3)* > store float %1365, float addrspace(3)* %1378, align 4 > %1379 = add nuw nsw i32 %1374, 9 > %1380 = zext i32 %1379 to i64 > %1381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1380 > %1382 = bitcast i32 addrspace(3)* %1381 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1382, align 4 > %1383 = add nuw nsw i32 %1374, 10 > %1384 = zext i32 %1383 to i64 > %1385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1384 > %1386 = bitcast i32 addrspace(3)* %1385 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1386, align 4 > %1387 = add nuw nsw i32 %1374, 11 > %1388 = zext i32 %1387 to i64 > %1389 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1388 > %1390 = bitcast i32 addrspace(3)* %1389 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1390, align 4 > %1391 = lshr i32 %5, 16 > %1392 = shl nuw nsw i32 %1391, 2 > %1393 = and i32 %6, 8191 > %1394 = and i32 %10, 255 > %1395 = mul nuw nsw i32 %1393, %1394 > %1396 = add nuw nsw i32 %1392, %1395 > %1397 = add nuw nsw i32 %1396, 12 > %1398 = zext i32 %1397 to i64 > %1399 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1398 > %1400 = bitcast i32 addrspace(3)* %1399 to float addrspace(3)* > store float %1366, float addrspace(3)* %1400, align 4 > %1401 = add nuw nsw i32 %1396, 13 > %1402 = zext i32 %1401 to i64 > %1403 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1402 > %1404 = bitcast i32 addrspace(3)* %1403 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1404, align 4 > %1405 = add nuw nsw i32 %1396, 14 > %1406 = zext i32 %1405 to i64 > %1407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1406 > %1408 = bitcast i32 addrspace(3)* %1407 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1408, align 4 > %1409 = add nuw nsw i32 %1396, 15 > %1410 = zext i32 %1409 to i64 > %1411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1410 > store i32 %1364, i32 addrspace(3)* %1411, align 4 > %1412 = lshr i32 %5, 16 > %1413 = shl nuw nsw i32 %1412, 2 > %1414 = and i32 %6, 8191 > %1415 = and i32 %10, 255 > %1416 = mul nuw nsw i32 %1414, %1415 > %1417 = add nuw nsw i32 %1413, %1416 > %1418 = add nuw nsw i32 %1417, 16 > %1419 = zext i32 %1418 to i64 > %1420 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1419 > %1421 = bitcast i32 addrspace(3)* %1420 to float addrspace(3)* > store float %1367, float addrspace(3)* %1421, align 4 > %1422 = add nuw nsw i32 %1417, 17 > %1423 = zext i32 %1422 to i64 > %1424 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1423 > %1425 = bitcast i32 addrspace(3)* %1424 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1425, align 4 > %1426 = add nuw nsw i32 %1417, 18 > %1427 = zext i32 %1426 to i64 > %1428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1427 > %1429 = bitcast i32 addrspace(3)* %1428 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1429, align 4 > %1430 = add nuw nsw i32 %1417, 19 > %1431 = zext i32 %1430 to i64 > %1432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1431 > %1433 = bitcast i32 addrspace(3)* %1432 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1433, align 4 > %1434 = lshr i32 %5, 16 > %1435 = shl nuw nsw i32 %1434, 2 > %1436 = and i32 %6, 8191 > %1437 = and i32 %10, 255 > %1438 = mul nuw nsw i32 %1436, %1437 > %1439 = add nuw nsw i32 %1435, %1438 > %1440 = add nuw nsw i32 %1439, 20 > %1441 = zext i32 %1440 to i64 > %1442 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1441 > %1443 = bitcast i32 addrspace(3)* %1442 to float addrspace(3)* > store float %1368, float addrspace(3)* %1443, align 4 > %1444 = add nuw nsw i32 %1439, 21 > %1445 = zext i32 %1444 to i64 > %1446 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1445 > %1447 = bitcast i32 addrspace(3)* %1446 to float addrspace(3)* > store float %1366, float addrspace(3)* %1447, align 4 > %1448 = add nuw nsw i32 %1439, 22 > %1449 = zext i32 %1448 to i64 > %1450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1449 > %1451 = bitcast i32 addrspace(3)* %1450 to float addrspace(3)* > store float %1367, float addrspace(3)* %1451, align 4 > %1452 = add nuw nsw i32 %1439, 23 > %1453 = zext i32 %1452 to i64 > %1454 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1453 > %1455 = bitcast i32 addrspace(3)* %1454 to float addrspace(3)* > store float %1368, float addrspace(3)* %1455, align 4 > %1456 = lshr i32 %5, 16 > %1457 = shl nuw nsw i32 %1456, 2 > %1458 = and i32 %6, 8191 > %1459 = and i32 %10, 255 > %1460 = mul nuw nsw i32 %1458, %1459 > %1461 = add nuw nsw i32 %1457, %1460 > %1462 = zext i32 %1461 to i64 > %1463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1462 > %1464 = bitcast i32 addrspace(3)* %1463 to float addrspace(3)* > store float %1365, float addrspace(3)* %1464, align 4 > %1465 = lshr i32 %5, 16 > %1466 = shl nuw nsw i32 %1465, 2 > %1467 = and i32 %6, 8191 > %1468 = and i32 %10, 255 > %1469 = mul nuw nsw i32 %1467, %1468 > %1470 = add nuw nsw i32 %1466, %1469 > %1471 = add nuw nsw i32 %1470, 1 > %1472 = zext i32 %1471 to i64 > %1473 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1472 > %1474 = bitcast i32 addrspace(3)* %1473 to float addrspace(3)* > store float %1366, float addrspace(3)* %1474, align 4 > %1475 = lshr i32 %5, 16 > %1476 = shl nuw nsw i32 %1475, 2 > %1477 = and i32 %6, 8191 > %1478 = and i32 %10, 255 > %1479 = mul nuw nsw i32 %1477, %1478 > %1480 = add nuw nsw i32 %1476, %1479 > %1481 = add nuw nsw i32 %1480, 2 > %1482 = zext i32 %1481 to i64 > %1483 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1482 > %1484 = bitcast i32 addrspace(3)* %1483 to float addrspace(3)* > store float %1367, float addrspace(3)* %1484, align 4 > %1485 = lshr i32 %5, 16 > %1486 = shl nuw nsw i32 %1485, 2 > %1487 = and i32 %6, 8191 > %1488 = and i32 %10, 255 > %1489 = mul nuw nsw i32 %1487, %1488 > %1490 = add nuw nsw i32 %1486, %1489 > %1491 = add nuw nsw i32 %1490, 4 > %1492 = zext i32 %1491 to i64 > %1493 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1492 > %1494 = bitcast i32 addrspace(3)* %1493 to float addrspace(3)* > store float %1368, float addrspace(3)* %1494, align 4 > %1495 = and i32 %10, 255 > %1496 = lshr i32 %10, 8 > %1497 = and i32 %1496, 31 > %1498 = lshr i32 %5, 16 > %1499 = shl nuw nsw i32 %1498, 2 > %1500 = and i32 %6, 8191 > %1501 = and i32 %10, 255 > %1502 = mul nuw nsw i32 %1500, %1501 > %1503 = add nuw nsw i32 %1499, %1502 > %1504 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1505 = bitcast i64 %1504 to <2 x i32> > %1506 = extractelement <2 x i32> %1505, i32 0 > %1507 = extractelement <2 x i32> %1505, i32 1 > %1508 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1506, 0 > %1509 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1508, i32 %1507, 1 > %1510 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1509, i32 %8, 13 > %1511 = bitcast i32 %1495 to float > %1512 = bitcast i32 %1497 to float > %1513 = bitcast i32 %1503 to float > %1514 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1510, float %1511, 14 > %1515 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1514, float %1512, 15 > %1516 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1515, float %1513, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1516 > >IF69: ; preds = %IF > %1517 = fdiv float 1.000000e+00, %16 > %1518 = fmul float %1360, %1517 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1519 = fcmp ogt float %1360, 0.000000e+00 > %1520 = select i1 %1519, float 1.000000e+00, float %1360 > %1521 = fcmp oge float %1520, 0.000000e+00 > %.op91 = fmul float %1520, 0x4600000000000000 > %1522 = select i1 %1521, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1518, %IF69 ], [ %1522, %ELSE70 ] > %1523 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1524 = fsub float 1.000000e+00, %1523 > %1525 = fmul float %1524, %1358 > %1526 = fcmp une float %16, 0.000000e+00 > br i1 %1526, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1527 = fdiv float 1.000000e+00, %16 > %1528 = fmul float %1361, %1527 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1529 = fcmp ogt float %1361, 0.000000e+00 > %1530 = select i1 %1529, float 1.000000e+00, float %1361 > %1531 = fcmp oge float %1530, 0.000000e+00 > %.op92 = fmul float %1530, 0x4600000000000000 > %1532 = select i1 %1531, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1528, %IF72 ], [ %1532, %ELSE73 ] > %1533 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1534 = fsub float 1.000000e+00, %1533 > %1535 = fmul float %1534, %1359 > %1536 = fcmp une float %16, 0.000000e+00 > br i1 %1536, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1537 = fdiv float 1.000000e+00, %16 > %1538 = fmul float %1362, %1537 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1539 = fcmp ogt float %1362, 0.000000e+00 > %1540 = select i1 %1539, float 1.000000e+00, float %1362 > %1541 = fcmp oge float %1540, 0.000000e+00 > %.op93 = fmul float %1540, 0x4600000000000000 > %1542 = select i1 %1541, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1538, %IF75 ], [ %1542, %ELSE76 ] > %1543 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1544 = fsub float 1.000000e+00, %1543 > %1545 = fmul float %1544, %1357 > %1546 = fmul float %13, %19 > %1547 = fmul float %14, %20 > %1548 = call float @llvm.maxnum.f32(float %1547, float 1.000000e+00) > %1549 = fcmp oeq float %1546, 0.000000e+00 > %1550 = fcmp oeq float %1546, 0.000000e+00 > %1551 = sext i1 %1550 to i32 > %1552 = bitcast i32 %1551 to float > %1553 = fcmp ogt float %1545, 0.000000e+00 > %1554 = select i1 %1553, float 1.000000e+00, float %1545 > %1555 = fcmp oge float %1554, 0.000000e+00 > %1556 = fcmp ogt float %1525, 0.000000e+00 > %1557 = select i1 %1556, float 1.000000e+00, float %1525 > %1558 = fcmp oge float %1557, 0.000000e+00 > %.op94 = fmul float %1554, 0x4600000000000000 > %1559 = select i1 %1555, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1557, 0x4600000000000000 > %1560 = select i1 %1558, float %.op95, float 0xC600000000000000 > %1561 = fdiv float 1.000000e+00, %1546 > %1562 = fmul float %1545, %1561 > %1563 = fmul float %1525, %1561 > %1564 = select i1 %1549, float %1559, float %1562 > %1565 = select i1 %1550, float %1560, float %1563 > %1566 = call float @llvm.maxnum.f32(float %1565, float 1.000000e+00) > %1567 = call float @llvm.minnum.f32(float %1548, float %1566) > %1568 = fcmp une float %1546, 0.000000e+00 > br i1 %1568, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1569 = fdiv float 1.000000e+00, %1546 > %1570 = fmul float %1535, %1569 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1571 = fcmp ogt float %1535, 0.000000e+00 > %1572 = select i1 %1571, float 1.000000e+00, float %1535 > %1573 = fcmp oge float %1572, 0.000000e+00 > %.op96 = fmul float %1572, 0x4600000000000000 > %1574 = select i1 %1573, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1570, %IF78 ], [ %1574, %ELSE79 ] > %1575 = call float @llvm.maxnum.f32(float %1564, float 1.000000e+00) > %1576 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1577 = call float @llvm.minnum.f32(float %1548, float %1576) > %1578 = call float @llvm.minnum.f32(float %1548, float %1575) > %1579 = call float @llvm.maxnum.f32(float %1567, float %1578) > %1580 = call float @llvm.maxnum.f32(float %1579, float %1577) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[0].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[0].xxxx >101: MOV OUT[4], TEMP[3] >102: MOV OUT[2], TEMP[6] >103: MOV OUT[3], TEMP[4] >104: MOV OUT[1], TEMP[5] >105: MOV OUT[0], TEMP[1] >106: END >radeonsi: Compiling shader 269 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = bitcast i32 %10 to float > %711 = insertvalue <{ float, float, float }> undef, float %710, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %711 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL SV[0], FACE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..26] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 384, 416, 400} >IMM[1] INT32 {-1, 0, 1, 0} >IMM[2] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[3] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, -0.3765} >IMM[4] FLT32 { 2.0000, 0.5000, 0.0000, 0.0000} > 0: UIF SV[0].xxxx :0 > 1: MOV TEMP[0].x, IMM[1].xxxx > 2: ELSE :0 > 3: MOV TEMP[0].x, IMM[1].yyyy > 4: ENDIF > 5: ADD TEMP[1].x, CONST[1][24].yyyy, IMM[2].xxxx > 6: MOV TEMP[2].xy, IN[0].xyyy > 7: TEX TEMP[3].w, TEMP[2], SAMP[0], 2D > 8: ADD TEMP[3].xy, -TEMP[3].wwww, IMM[2].xyyy > 9: FMA TEMP[4].x, CONST[1][24].xxxx, TEMP[1].xxxx, TEMP[3].xxxx > 10: CEIL TEMP[3].x, TEMP[3].yyyy > 11: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 12: ADD TEMP[1].x, TEMP[4].xxxx, IMM[2].zzzz > 13: FSNE TEMP[4].x, CONST[1][24].yyyy, IMM[2].wwww > 14: UIF TEMP[4].xxxx :0 > 15: RCP TEMP[4].x, CONST[1][24].yyyy > 16: ELSE :0 > 17: MOV TEMP[4].x, IMM[3].xxxx > 18: ENDIF > 19: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].xxxx > 20: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 21: FMA TEMP[5].x, TEMP[4].xxxx, IMM[3].yyyy, IMM[3].zzzz > 22: MUL TEMP[1].x, TEMP[4].xxxx, TEMP[4].xxxx > 23: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx > 24: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[3].xxxx > 25: FMA TEMP[1].x, -TEMP[3].xxxx, TEMP[1].xxxx, IMM[2].xxxx > 26: LG2 TEMP[3].x, TEMP[1].xxxx > 27: MUL TEMP[1].x, TEMP[3].xxxx, CONST[1][26].xxxx > 28: EX2 TEMP[3].x, TEMP[1].xxxx > 29: MUL TEMP[1].x, TEMP[3].xxxx, CONST[1][25].wwww > 30: MUL TEMP[3].xyz, TEMP[1].xxxx, CONST[1][25].xyzz > 31: MOV TEMP[5].xy, IN[0].xyyy > 32: TEX TEMP[5], TEMP[5], SAMP[1], 2D > 33: MOV TEMP[6].xyz, TEMP[5].xyzx > 34: FMA TEMP[4].x, TEMP[4].xxxx, TEMP[5].wwww, IMM[3].wwww > 35: FSLT TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww > 36: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].zzzz > 37: INEG TEMP[4].x, TEMP[4].xxxx > 38: USNE TEMP[2].x, TEMP[4].xxxx, IMM[0].xxxx > 39: AND TEMP[4].x, TEMP[2].xxxx, IMM[2].xxxx > 40: KILL_IF -TEMP[4].xxxx > 41: MOV TEMP[3].w, IMM[2].wwww > 42: MOV TEMP[4].xy, IN[0].xyyy > 43: TEX TEMP[4].yw, TEMP[4], SAMP[2], 2D > 44: FMA TEMP[4].xy, TEMP[4].ywww, IMM[4].xxxx, IMM[2].zzzz > 45: MOV TEMP[1].xy, TEMP[4].xyxx > 46: FMA TEMP[5].x, -TEMP[4].xxxx, TEMP[4].xxxx, IMM[2].xxxx > 47: FMA TEMP[4].x, -TEMP[4].yyyy, TEMP[4].yyyy, TEMP[5].xxxx > 48: SQRT TEMP[4].x, TEMP[4].xxxx > 49: MOV TEMP[1].z, TEMP[4].xxxx > 50: DP3 TEMP[2].x, IN[1].xyzz, TEMP[1].xyzz > 51: DP3 TEMP[4].x, IN[2].xyzz, TEMP[1].xyzz > 52: MOV TEMP[2].y, TEMP[4].xxxx > 53: DP3 TEMP[4].x, IN[3].xyzz, TEMP[1].xyzz > 54: MOV TEMP[2].z, TEMP[4].xxxx > 55: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[2].xyzz > 56: RSQ TEMP[4].x, TEMP[1].xxxx > 57: MUL TEMP[1].xyz, TEMP[4].xxxx, TEMP[2].xyzz > 58: MOV TEMP[2].xyz, -TEMP[1].xyzx > 59: USNE TEMP[4].x, TEMP[0].xxxx, IMM[0].xxxx > 60: UIF TEMP[4].xxxx :0 > 61: MOV TEMP[4].x, TEMP[1].xxxx > 62: ELSE :0 > 63: MOV TEMP[4].x, TEMP[2].xxxx > 64: ENDIF > 65: MOV TEMP[4].x, TEMP[4].xxxx > 66: USNE TEMP[5].x, TEMP[0].xxxx, IMM[0].xxxx > 67: UIF TEMP[5].xxxx :0 > 68: MOV TEMP[5].x, TEMP[1].yyyy > 69: ELSE :0 > 70: MOV TEMP[5].x, TEMP[2].yyyy > 71: ENDIF > 72: MOV TEMP[4].y, TEMP[5].xxxx > 73: USNE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx > 74: UIF TEMP[0].xxxx :0 > 75: MOV TEMP[0].x, TEMP[1].zzzz > 76: ELSE :0 > 77: MOV TEMP[0].x, TEMP[2].zzzz > 78: ENDIF > 79: MOV TEMP[4].z, TEMP[0].xxxx > 80: FMA TEMP[2].xyz, TEMP[4].xyzz, IMM[4].yyyy, IMM[4].yyyy > 81: MOV TEMP[2].w, CONST[1][26].zzzz > 82: MOV TEMP[6].w, IMM[2].xxxx > 83: MOV TEMP[0].xy, IN[0].xyyy > 84: TEX TEMP[0], TEMP[0], SAMP[3], 2D > 85: MUL TEMP[1].x, TEMP[0].zzzz, CONST[1][26].yyyy > 86: MOV TEMP[1].yzw, TEMP[0].xyxw > 87: MOV OUT[0], TEMP[3] > 88: MOV OUT[1], TEMP[2] > 89: MOV OUT[2], TEMP[6] > 90: MOV OUT[3], TEMP[1] > 91: END >radeonsi: Compiling shader 270 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %34 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 > %36 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %37 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %36, i64 0, i64 3 > %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 > %39 = extractelement <8 x i32> %35, i32 7 > %40 = extractelement <4 x i32> %38, i32 0 > %41 = and i32 %40, %39 > %42 = insertelement <4 x i32> %38, i32 %41, i32 0 > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 7 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 11 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 > %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 15 > %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0 > %66 = extractelement <8 x i32> %62, i32 7 > %67 = extractelement <4 x i32> %65, i32 0 > %68 = and i32 %67, %66 > %69 = insertelement <4 x i32> %65, i32 %68, i32 0 > %70 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %81 = icmp ne i32 %19, 0 > %. = select i1 %81, float 0xFFFFFFFFE0000000, float 0.000000e+00 > %82 = fadd float %26, 1.000000e+00 > %83 = bitcast float %70 to i32 > %84 = bitcast float %71 to i32 > %85 = insertelement <2 x i32> undef, i32 %83, i32 0 > %86 = insertelement <2 x i32> %85, i32 %84, i32 1 > %87 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %86, <8 x i32> %35, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %88 = extractelement <4 x float> %87, i32 3 > %89 = fsub float 1.000000e+00, %88 > %90 = fsub float 0x3FEFD70A40000000, %88 > %91 = call float @llvm.fma.f32(float %25, float %82, float %89) > %92 = call float @llvm.ceil.f32(float %90) > %93 = call float @llvm.AMDGPU.clamp.(float %92, float 0.000000e+00, float 1.000000e+00) > %94 = fadd float %91, -1.000000e+00 > %95 = fcmp une float %26, 0.000000e+00 > %96 = fdiv float 1.000000e+00, %26 > %temp16.0 = select i1 %95, float %96, float 0x4600000000000000 > %97 = fmul float %temp16.0, %94 > %98 = call float @llvm.AMDGPU.clamp.(float %97, float 0.000000e+00, float 1.000000e+00) > %99 = call float @llvm.fma.f32(float %98, float -2.000000e+00, float 3.000000e+00) > %100 = fmul float %98, %98 > %101 = fmul float %100, %99 > %102 = fmul float %101, %93 > %103 = fsub float -0.000000e+00, %93 > %104 = call float @llvm.fma.f32(float %103, float %101, float 1.000000e+00) > %105 = call float @llvm.log2.f32(float %104) > %106 = fmul float %105, %31 > %107 = call float @llvm.exp2.f32(float %106) > %108 = fmul float %107, %30 > %109 = fmul float %108, %27 > %110 = fmul float %108, %28 > %111 = fmul float %108, %29 > %112 = bitcast float %70 to i32 > %113 = bitcast float %71 to i32 > %114 = insertelement <2 x i32> undef, i32 %112, i32 0 > %115 = insertelement <2 x i32> %114, i32 %113, i32 1 > %116 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %115, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %117 = extractelement <4 x float> %116, i32 0 > %118 = extractelement <4 x float> %116, i32 1 > %119 = extractelement <4 x float> %116, i32 2 > %120 = extractelement <4 x float> %116, i32 3 > %121 = call float @llvm.fma.f32(float %102, float %120, float 0xBFD8181820000000) > %122 = fcmp olt float %121, 0.000000e+00 > %123 = select i1 %122, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %123) > %124 = bitcast float %70 to i32 > %125 = bitcast float %71 to i32 > %126 = insertelement <2 x i32> undef, i32 %124, i32 0 > %127 = insertelement <2 x i32> %126, i32 %125, i32 1 > %128 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %127, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %129 = extractelement <4 x float> %128, i32 1 > %130 = extractelement <4 x float> %128, i32 3 > %131 = call float @llvm.fma.f32(float %129, float 2.000000e+00, float -1.000000e+00) > %132 = call float @llvm.fma.f32(float %130, float 2.000000e+00, float -1.000000e+00) > %133 = fsub float -0.000000e+00, %131 > %134 = call float @llvm.fma.f32(float %133, float %131, float 1.000000e+00) > %135 = fsub float -0.000000e+00, %132 > %136 = call float @llvm.fma.f32(float %135, float %132, float %134) > %137 = call float @llvm.sqrt.f32(float %136) > %138 = fmul float %72, %131 > %139 = fmul float %73, %132 > %140 = fadd float %139, %138 > %141 = fmul float %74, %137 > %142 = fadd float %140, %141 > %143 = fmul float %75, %131 > %144 = fmul float %76, %132 > %145 = fadd float %144, %143 > %146 = fmul float %77, %137 > %147 = fadd float %145, %146 > %148 = fmul float %78, %131 > %149 = fmul float %79, %132 > %150 = fadd float %149, %148 > %151 = fmul float %80, %137 > %152 = fadd float %150, %151 > %153 = fmul float %142, %142 > %154 = fmul float %147, %147 > %155 = fadd float %154, %153 > %156 = fmul float %152, %152 > %157 = fadd float %155, %156 > %158 = call float @llvm.AMDGPU.rsq.clamped.f32(float %157) > %159 = fmul float %158, %142 > %160 = fmul float %158, %147 > %161 = fmul float %158, %152 > %162 = fsub float -0.000000e+00, %159 > %163 = fsub float -0.000000e+00, %160 > %164 = fsub float -0.000000e+00, %161 > %165 = bitcast float %. to i32 > %166 = icmp ne i32 %165, 0 > %.40 = select i1 %166, float %159, float %162 > %167 = bitcast float %. to i32 > %168 = icmp ne i32 %167, 0 > %temp20.0 = select i1 %168, float %160, float %163 > %169 = bitcast float %. to i32 > %170 = icmp ne i32 %169, 0 > %.41 = select i1 %170, float %161, float %164 > %171 = call float @llvm.fma.f32(float %.40, float 5.000000e-01, float 5.000000e-01) > %172 = call float @llvm.fma.f32(float %temp20.0, float 5.000000e-01, float 5.000000e-01) > %173 = call float @llvm.fma.f32(float %.41, float 5.000000e-01, float 5.000000e-01) > %174 = bitcast float %70 to i32 > %175 = bitcast float %71 to i32 > %176 = insertelement <2 x i32> undef, i32 %174, i32 0 > %177 = insertelement <2 x i32> %176, i32 %175, i32 1 > %178 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %177, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %179 = extractelement <4 x float> %178, i32 0 > %180 = extractelement <4 x float> %178, i32 1 > %181 = extractelement <4 x float> %178, i32 2 > %182 = extractelement <4 x float> %178, i32 3 > %183 = fmul float %181, %32 > %184 = bitcast float %5 to i32 > %185 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %184, 10 > %186 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %185, float %109, 11 > %187 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %186, float %110, 12 > %188 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %187, float %111, 13 > %189 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %188, float 0.000000e+00, 14 > %190 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %189, float %171, 15 > %191 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %190, float %172, 16 > %192 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %191, float %173, 17 > %193 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %192, float %33, 18 > %194 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %193, float %117, 19 > %195 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %194, float %118, 20 > %196 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %195, float %119, 21 > %197 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %196, float 1.000000e+00, 22 > %198 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %197, float %183, 23 > %199 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %198, float %180, 24 > %200 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %199, float %179, 25 > %201 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %200, float %182, 26 > %202 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %201, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %202 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} >IMM[2] UINT32 {160, 0, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: MOV TEMP[2].xy, IN[2].xyxx > 8: MUL TEMP[0].xyz, IN[5].wwww, IN[5].xyzz > 9: MOV TEMP[0].w, IN[5].wwww > 10: MUL TEMP[0], TEMP[0], CONST[1][10] > 11: DP3 TEMP[3].x, CONST[1][7].xyzz, IN[3].xyzz > 12: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[4].xyzz > 13: MOV TEMP[3].y, TEMP[4].xxxx > 14: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[1].xyzz > 15: MOV TEMP[3].z, TEMP[4].xxxx > 16: DP3 TEMP[4].x, CONST[1][8].xyzz, IN[3].xyzz > 17: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[4].xyzz > 18: MOV TEMP[4].y, TEMP[5].xxxx > 19: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[1].xyzz > 20: MOV TEMP[4].z, TEMP[5].xxxx > 21: DP3 TEMP[5].x, CONST[1][9].xyzz, IN[3].xyzz > 22: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[4].xyzz > 23: MOV TEMP[5].y, TEMP[6].xxxx > 24: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[1].xyzz > 25: MOV TEMP[5].z, TEMP[6].xxxx > 26: MOV OUT[5], TEMP[5] > 27: MOV OUT[4], TEMP[4] > 28: MOV OUT[3], TEMP[3] > 29: MOV OUT[2], TEMP[0] > 30: MOV OUT[1], TEMP[2] > 31: MOV OUT[0], TEMP[1] > 32: END >radeonsi: Compiling shader 271 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 112) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 116) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 120) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 124) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 128) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 132) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 136) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 140) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 144) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 148) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 152) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 156) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 160) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 164) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 168) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 172) > %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 > %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %13) > %40 = extractelement <4 x float> %39, i32 0 > %41 = extractelement <4 x float> %39, i32 1 > %42 = extractelement <4 x float> %39, i32 2 > %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 > %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %14) > %46 = extractelement <4 x float> %45, i32 0 > %47 = extractelement <4 x float> %45, i32 1 > %48 = extractelement <4 x float> %45, i32 2 > %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 > %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %15) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %16) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %17) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 > %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %18) > %67 = extractelement <4 x float> %66, i32 0 > %68 = extractelement <4 x float> %66, i32 1 > %69 = extractelement <4 x float> %66, i32 2 > %70 = extractelement <4 x float> %66, i32 3 > %71 = fmul float %21, %40 > %72 = fmul float %22, %41 > %73 = fadd float %71, %72 > %74 = fmul float %23, %42 > %75 = fadd float %73, %74 > %76 = fadd float %75, %24 > %77 = fmul float %25, %40 > %78 = fmul float %26, %41 > %79 = fadd float %77, %78 > %80 = fmul float %27, %42 > %81 = fadd float %79, %80 > %82 = fadd float %81, %28 > %83 = fmul float %29, %40 > %84 = fmul float %30, %41 > %85 = fadd float %83, %84 > %86 = fmul float %31, %42 > %87 = fadd float %85, %86 > %88 = fadd float %87, %32 > %89 = fmul float %70, %67 > %90 = fmul float %70, %68 > %91 = fmul float %70, %69 > %92 = fmul float %89, %33 > %93 = fmul float %90, %34 > %94 = fmul float %91, %35 > %95 = fmul float %70, %36 > %96 = fmul float %21, %55 > %97 = fmul float %22, %56 > %98 = fadd float %97, %96 > %99 = fmul float %23, %57 > %100 = fadd float %98, %99 > %101 = fmul float %21, %61 > %102 = fmul float %22, %62 > %103 = fadd float %102, %101 > %104 = fmul float %23, %63 > %105 = fadd float %103, %104 > %106 = fmul float %21, %46 > %107 = fmul float %22, %47 > %108 = fadd float %107, %106 > %109 = fmul float %23, %48 > %110 = fadd float %108, %109 > %111 = fmul float %25, %55 > %112 = fmul float %26, %56 > %113 = fadd float %112, %111 > %114 = fmul float %27, %57 > %115 = fadd float %113, %114 > %116 = fmul float %25, %61 > %117 = fmul float %26, %62 > %118 = fadd float %117, %116 > %119 = fmul float %27, %63 > %120 = fadd float %118, %119 > %121 = fmul float %25, %46 > %122 = fmul float %26, %47 > %123 = fadd float %122, %121 > %124 = fmul float %27, %48 > %125 = fadd float %123, %124 > %126 = fmul float %29, %55 > %127 = fmul float %30, %56 > %128 = fadd float %127, %126 > %129 = fmul float %31, %57 > %130 = fadd float %128, %129 > %131 = fmul float %29, %61 > %132 = fmul float %30, %62 > %133 = fadd float %132, %131 > %134 = fmul float %31, %63 > %135 = fadd float %133, %134 > %136 = fmul float %29, %46 > %137 = fmul float %30, %47 > %138 = fadd float %137, %136 > %139 = fmul float %31, %48 > %140 = fadd float %138, %139 > %141 = lshr i32 %8, 13 > %142 = and i32 %141, 255 > %143 = mul i32 %142, %10 > %144 = add i32 %143, 16 > %145 = sext i32 %144 to i64 > %146 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %145 > %147 = bitcast i32 addrspace(3)* %146 to float addrspace(3)* > store float %76, float addrspace(3)* %147, align 4 > %148 = add i32 %143, 17 > %149 = sext i32 %148 to i64 > %150 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %149 > %151 = bitcast i32 addrspace(3)* %150 to float addrspace(3)* > store float %82, float addrspace(3)* %151, align 4 > %152 = add i32 %143, 18 > %153 = sext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = bitcast i32 addrspace(3)* %154 to float addrspace(3)* > store float %88, float addrspace(3)* %155, align 4 > %156 = add i32 %143, 20 > %bc = bitcast <4 x float> %51 to <4 x i32> > %157 = extractelement <4 x i32> %bc, i32 0 > %158 = sext i32 %156 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %157, i32 addrspace(3)* %159, align 4 > %160 = add i32 %143, 21 > %bc28 = bitcast <4 x float> %51 to <4 x i32> > %161 = extractelement <4 x i32> %bc28, i32 1 > %162 = sext i32 %160 to i64 > %163 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %162 > store i32 %161, i32 addrspace(3)* %163, align 4 > %164 = add i32 %143, 24 > %165 = sext i32 %164 to i64 > %166 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %165 > %167 = bitcast i32 addrspace(3)* %166 to float addrspace(3)* > store float %92, float addrspace(3)* %167, align 4 > %168 = add i32 %143, 25 > %169 = sext i32 %168 to i64 > %170 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %169 > %171 = bitcast i32 addrspace(3)* %170 to float addrspace(3)* > store float %93, float addrspace(3)* %171, align 4 > %172 = add i32 %143, 26 > %173 = sext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > %175 = bitcast i32 addrspace(3)* %174 to float addrspace(3)* > store float %94, float addrspace(3)* %175, align 4 > %176 = add i32 %143, 27 > %177 = sext i32 %176 to i64 > %178 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %177 > %179 = bitcast i32 addrspace(3)* %178 to float addrspace(3)* > store float %95, float addrspace(3)* %179, align 4 > %180 = add i32 %143, 28 > %181 = sext i32 %180 to i64 > %182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %181 > %183 = bitcast i32 addrspace(3)* %182 to float addrspace(3)* > store float %100, float addrspace(3)* %183, align 4 > %184 = add i32 %143, 29 > %185 = sext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > %187 = bitcast i32 addrspace(3)* %186 to float addrspace(3)* > store float %105, float addrspace(3)* %187, align 4 > %188 = add i32 %143, 30 > %189 = sext i32 %188 to i64 > %190 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %189 > %191 = bitcast i32 addrspace(3)* %190 to float addrspace(3)* > store float %110, float addrspace(3)* %191, align 4 > %192 = add i32 %143, 32 > %193 = sext i32 %192 to i64 > %194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %193 > %195 = bitcast i32 addrspace(3)* %194 to float addrspace(3)* > store float %115, float addrspace(3)* %195, align 4 > %196 = add i32 %143, 33 > %197 = sext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = bitcast i32 addrspace(3)* %198 to float addrspace(3)* > store float %120, float addrspace(3)* %199, align 4 > %200 = add i32 %143, 34 > %201 = sext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = bitcast i32 addrspace(3)* %202 to float addrspace(3)* > store float %125, float addrspace(3)* %203, align 4 > %204 = add i32 %143, 36 > %205 = sext i32 %204 to i64 > %206 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %205 > %207 = bitcast i32 addrspace(3)* %206 to float addrspace(3)* > store float %130, float addrspace(3)* %207, align 4 > %208 = add i32 %143, 37 > %209 = sext i32 %208 to i64 > %210 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %209 > %211 = bitcast i32 addrspace(3)* %210 to float addrspace(3)* > store float %135, float addrspace(3)* %211, align 4 > %212 = add i32 %143, 38 > %213 = sext i32 %212 to i64 > %214 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %213 > %215 = bitcast i32 addrspace(3)* %214 to float addrspace(3)* > store float %140, float addrspace(3)* %215, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, 0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 64, 80} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {96, 368, 352, 0} >IMM[5] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: MOV TEMP[1].w, TEMP[8].xxxx > 66: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 67: MOV TEMP[1].z, TEMP[2].xxxx > 68: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 69: MOV TEMP[0].yw, TEMP[2].yxyy > 70: ABS TEMP[2].x, TEMP[3].xxxx > 71: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 72: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 73: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 74: INEG TEMP[9].xy, TEMP[9].xyyy > 75: MOV TEMP[4].yz, TEMP[9].yxyy > 76: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 77: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 78: INEG TEMP[9].xy, TEMP[9].xyyy > 79: MOV TEMP[5].zw, TEMP[9].yyxy > 80: INEG TEMP[9].xy, TEMP[4].yzzz > 81: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 82: MOV TEMP[4].yz, TEMP[9].yxyy > 83: I2F TEMP[9].xy, TEMP[4].yzzz > 84: MOV TEMP[4].yz, TEMP[9].yxyy > 85: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 86: ABS TEMP[2].x, TEMP[6].xxxx > 87: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 88: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 89: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 90: INEG TEMP[9].xy, TEMP[9].xyyy > 91: MOV TEMP[4].yz, TEMP[9].yxyy > 92: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 93: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 94: INEG TEMP[9].xy, TEMP[9].xyyy > 95: MOV TEMP[5].zw, TEMP[9].yyxy > 96: INEG TEMP[9].xy, TEMP[4].yzzz > 97: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 98: MOV TEMP[4].yz, TEMP[9].yxyy > 99: I2F TEMP[9].xy, TEMP[4].yzzz >100: MOV TEMP[4].yz, TEMP[9].yxyy >101: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >102: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >103: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >104: INEG TEMP[2].xy, TEMP[2].xyyy >105: MOV TEMP[5].xy, TEMP[2].xyxx >106: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >107: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >108: INEG TEMP[2].xy, TEMP[2].xyyy >109: MOV TEMP[5].zw, TEMP[2].yyxy >110: INEG TEMP[2].xy, TEMP[5].xyyy >111: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >112: MOV TEMP[5].xy, TEMP[2].xyxx >113: I2F TEMP[5].xy, TEMP[5].xyyy >114: ABS TEMP[2].x, TEMP[8].xxxx >115: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >116: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >117: MOV TEMP[4].zw, TEMP[2].yyxy >118: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >119: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >120: INEG TEMP[2].xy, TEMP[2].xyyy >121: MOV TEMP[5].xy, TEMP[2].xyxx >122: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >123: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >124: INEG TEMP[2].xy, TEMP[2].xyyy >125: MOV TEMP[5].zw, TEMP[2].yyxy >126: AND TEMP[2], TEMP[5], IMM[2].yyyy >127: MOV TEMP[2], TEMP[2] >128: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >129: MOV TEMP[5].xy, TEMP[2].xyxx >130: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >131: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >132: INEG TEMP[2].xy, TEMP[2].xyyy >133: MOV TEMP[5].zw, TEMP[2].yyxy >134: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >135: MOV TEMP[5].zw, TEMP[2].yyxy >136: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >137: MOV TEMP[5].xy, TEMP[2].xyxx >138: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >139: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >140: INEG TEMP[2].x, TEMP[2].xxxx >141: MOV TEMP[1].z, TEMP[2].xxxx >142: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >143: MOV TEMP[1].z, TEMP[2].xxxx >144: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >145: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >146: INEG TEMP[2].xy, TEMP[2].xyyy >147: MOV TEMP[0].yw, TEMP[2].yxyy >148: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >149: MOV TEMP[0].yw, TEMP[2].yxyy >150: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >151: MOV TEMP[0].y, TEMP[2].xxxx >152: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >153: MOV TEMP[0].y, TEMP[2].xxxx >154: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >155: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >156: INEG TEMP[2].xy, TEMP[2].xyyy >157: MOV TEMP[0].xw, TEMP[2].xxxy >158: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >159: MOV TEMP[0].xw, TEMP[2].xxxy >160: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >161: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >162: INEG TEMP[2].xy, TEMP[2].xyyy >163: MOV TEMP[1].xy, TEMP[2].xyxx >164: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >165: MOV TEMP[1].xy, TEMP[2].xyxx >166: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >167: MOV TEMP[0].xz, TEMP[2].xxyx >168: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >169: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >170: INEG TEMP[2].xy, TEMP[2].xyyy >171: MOV TEMP[1].xy, TEMP[2].xyxx >172: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >173: MOV TEMP[1].xy, TEMP[2].xyxx >174: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >175: MOV TEMP[0].xz, TEMP[2].xxyx >176: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >177: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >178: INEG TEMP[2].xy, TEMP[2].xyyy >179: MOV TEMP[1].xy, TEMP[2].xyxx >180: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >181: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >182: INEG TEMP[2].xyz, TEMP[2].xyzz >183: MOV TEMP[0].xyz, TEMP[2].xyzx >184: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >185: MOV TEMP[0].xz, TEMP[2].xxyx >186: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >187: MOV TEMP[0].x, TEMP[2].xxxx >188: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >189: MOV TEMP[0].x, TEMP[2].xxxx >190: MOV TEMP[2].x, TEMP[0].xxxx >191: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >192: UIF TEMP[2].xxxx :0 >193: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >194: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >195: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >196: MOV TEMP[0].yzw, TEMP[2].yxyz >197: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >198: MOV TEMP[0].y, TEMP[2].xxxx >199: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >200: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >201: MOV TEMP[0].z, TEMP[2].xxxx >202: SQRT TEMP[2].x, TEMP[0].xxxx >203: SQRT TEMP[2].y, TEMP[0].yyyy >204: SQRT TEMP[2].z, TEMP[0].zzzz >205: MOV TEMP[0].xyz, TEMP[2].xyzx >206: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >207: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].wwww >208: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >209: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww >210: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >211: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[0].wwww >212: MOV TEMP[2].y, CONST[3][4] >213: MOV TEMP[7].x, TEMP[2].yyyy >214: MOV TEMP[2].y, CONST[3][5] >215: MOV TEMP[7].y, TEMP[2].yyyy >216: MOV TEMP[2].y, CONST[3][6] >217: MOV TEMP[7].z, TEMP[2].yyyy >218: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >219: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >220: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >221: MOV TEMP[1].w, IMM[0].xxxx >222: MOV TEMP[6], CONST[3][0] >223: DP4 TEMP[7].x, TEMP[6], TEMP[1] >224: MOV TEMP[6], CONST[3][1] >225: DP4 TEMP[6].x, TEMP[6], TEMP[1] >226: MOV TEMP[7].y, TEMP[6].xxxx >227: MOV TEMP[6], CONST[3][3] >228: DP4 TEMP[6].x, TEMP[6], TEMP[1] >229: MOV TEMP[4].w, IMM[0].xxxx >230: MOV TEMP[8], CONST[3][0] >231: DP4 TEMP[8].x, TEMP[8], TEMP[4] >232: MOV TEMP[9], CONST[3][1] >233: DP4 TEMP[9].x, TEMP[9], TEMP[4] >234: MOV TEMP[8].y, TEMP[9].xxxx >235: MOV TEMP[9], CONST[3][3] >236: DP4 TEMP[9].x, TEMP[9], TEMP[4] >237: MOV TEMP[5].w, IMM[0].xxxx >238: MOV TEMP[10], CONST[3][0] >239: DP4 TEMP[4].x, TEMP[10], TEMP[5] >240: MOV TEMP[10], CONST[3][1] >241: DP4 TEMP[10].x, TEMP[10], TEMP[5] >242: MOV TEMP[4].y, TEMP[10].xxxx >243: MOV TEMP[10], CONST[3][3] >244: DP4 TEMP[10].x, TEMP[10], TEMP[5] >245: MOV TEMP[2].w, IMM[0].xxxx >246: MOV TEMP[11], CONST[3][0] >247: DP4 TEMP[5].x, TEMP[11], TEMP[2] >248: MOV TEMP[11], CONST[3][1] >249: DP4 TEMP[11].x, TEMP[11], TEMP[2] >250: MOV TEMP[5].y, TEMP[11].xxxx >251: MOV TEMP[11], CONST[3][3] >252: DP4 TEMP[11].x, TEMP[11], TEMP[2] >253: MOV TEMP[3].w, IMM[0].xxxx >254: MOV TEMP[12], CONST[3][0] >255: DP4 TEMP[2].x, TEMP[12], TEMP[3] >256: MOV TEMP[12], CONST[3][1] >257: DP4 TEMP[12].x, TEMP[12], TEMP[3] >258: MOV TEMP[2].y, TEMP[12].xxxx >259: MOV TEMP[12], CONST[3][3] >260: DP4 TEMP[12].x, TEMP[12], TEMP[3] >261: MOV TEMP[0].w, IMM[0].xxxx >262: MOV TEMP[13], CONST[3][0] >263: DP4 TEMP[3].x, TEMP[13], TEMP[0] >264: MOV TEMP[13], CONST[3][1] >265: DP4 TEMP[13].x, TEMP[13], TEMP[0] >266: MOV TEMP[3].y, TEMP[13].xxxx >267: MOV TEMP[13], CONST[3][3] >268: DP4 TEMP[13].x, TEMP[13], TEMP[0] >269: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >270: SSG TEMP[15].xy, TEMP[8].xyyy >271: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >272: RCP TEMP[16].xy, TEMP[9].xxxx >273: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >274: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >275: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >276: SSG TEMP[15].xy, TEMP[4].xyyy >277: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >278: RCP TEMP[16].xy, TEMP[10].xxxx >279: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >280: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >281: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >282: SSG TEMP[16].xy, TEMP[5].xyyy >283: MUL TEMP[16].xy, IMM[5].xxxx, TEMP[16].xyyy >284: RCP TEMP[11].xy, TEMP[11].xxxx >285: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >286: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >287: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >288: SSG TEMP[15].xy, TEMP[7].xyyy >289: MUL TEMP[15].xy, IMM[5].xxxx, TEMP[15].xyyy >290: RCP TEMP[16].xy, TEMP[6].xxxx >291: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >292: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >293: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >294: MOV TEMP[0].yz, TEMP[5].yxyy >295: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >296: SSG TEMP[7].xy, TEMP[2].xyyy >297: MUL TEMP[7].xy, IMM[5].xxxx, TEMP[7].xyyy >298: RCP TEMP[11].xy, TEMP[12].xxxx >299: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >300: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >301: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >302: MOV TEMP[4].zw, TEMP[2].yyxy >303: MOV TEMP[2].xy, CONST[3][23] >304: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >305: MOV TEMP[4].zw, TEMP[2].yyxy >306: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >307: SSG TEMP[5].xy, TEMP[3].xyyy >308: MUL TEMP[5].xy, IMM[5].xxxx, TEMP[5].xyyy >309: RCP TEMP[7].xy, TEMP[13].xxxx >310: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >311: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >312: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >313: MOV TEMP[0].xw, TEMP[2].xxxy >314: MOV TEMP[2].xy, CONST[3][23] >315: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >316: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >317: MOV TEMP[0].y, TEMP[2].xxxx >318: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >319: MOV TEMP[0].z, TEMP[2].xxxx >320: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >321: SQRT TEMP[2].x, TEMP[0].xxxx >322: SQRT TEMP[2].y, TEMP[0].yyyy >323: SQRT TEMP[2].z, TEMP[0].zzzz >324: MOV TEMP[2].xyz, TEMP[2].xyzx >325: MOV TEMP[3].z, CONST[1][22] >326: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >327: MOV TEMP[0].w, TEMP[3].xxxx >328: MOV TEMP[3].z, CONST[1][22] >329: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >330: MOV TEMP[3].z, CONST[1][22] >331: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >332: MOV TEMP[1].y, TEMP[3].xxxx >333: MOV TEMP[3].w, CONST[1][22] >334: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >335: UIF TEMP[3].xxxx :0 >336: MOV TEMP[3].w, CONST[1][22] >337: RCP TEMP[3].x, TEMP[3].wwww >338: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >339: ELSE :0 >340: SSG TEMP[5].x, TEMP[0].wwww >341: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >342: ENDIF >343: MOV_SAT TEMP[3].x, TEMP[3].xxxx >344: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >345: MOV TEMP[0].w, TEMP[3].xxxx >346: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >347: MOV TEMP[0].y, TEMP[3].xxxx >348: MOV TEMP[3].w, CONST[1][22] >349: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >350: UIF TEMP[3].xxxx :0 >351: MOV TEMP[3].w, CONST[1][22] >352: RCP TEMP[3].x, TEMP[3].wwww >353: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >354: ELSE :0 >355: SSG TEMP[5].x, TEMP[1].xxxx >356: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >357: ENDIF >358: MOV_SAT TEMP[3].x, TEMP[3].xxxx >359: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >360: MOV TEMP[0].w, TEMP[3].xxxx >361: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >362: MOV TEMP[0].z, TEMP[3].xxxx >363: MOV TEMP[3].w, CONST[1][22] >364: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >365: UIF TEMP[3].xxxx :0 >366: MOV TEMP[3].w, CONST[1][22] >367: RCP TEMP[3].x, TEMP[3].wwww >368: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >369: ELSE :0 >370: SSG TEMP[5].x, TEMP[1].yyyy >371: MUL TEMP[3].x, IMM[5].xxxx, TEMP[5].xxxx >372: ENDIF >373: MOV_SAT TEMP[3].x, TEMP[3].xxxx >374: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >375: MOV TEMP[0].w, TEMP[3].xxxx >376: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >377: MOV TEMP[2].xy, CONST[1][22] >378: MOV TEMP[3].xy, CONST[2][4] >379: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >380: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >381: MOV TEMP[0].w, TEMP[2].xxxx >382: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >383: SSG TEMP[3].xy, TEMP[0].xyyy >384: MUL TEMP[3].xy, IMM[5].xxxx, TEMP[3].xyyy >385: RCP TEMP[5].xy, TEMP[1].xxxx >386: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >387: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >388: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >389: MOV TEMP[0].y, TEMP[2].xxxx >390: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >391: MOV TEMP[4].z, TEMP[2].xxxx >392: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >393: UIF TEMP[2].xxxx :0 >394: RCP TEMP[1].x, TEMP[1].xxxx >395: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >396: ELSE :0 >397: SSG TEMP[2].x, TEMP[0].zzzz >398: MUL TEMP[1].x, IMM[5].xxxx, TEMP[2].xxxx >399: ENDIF >400: MOV TEMP[0].y, TEMP[1].xxxx >401: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >402: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >403: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >404: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >405: MOV TEMP[4].w, TEMP[0].xxxx >406: ELSE :0 >407: MOV TEMP[4], IMM[0].zzzz >408: ENDIF >409: MIN TEMP[0], TEMP[4], IMM[5].yyyy >410: MOV TEMP[1].x, TEMP[0].xxxx >411: MOV TEMP[2].x, TEMP[0].yyyy >412: MOV TEMP[3].x, TEMP[0].zzzz >413: MOV TEMP[0].x, TEMP[0].wwww >414: MOV OUT[8], TEMP[1] >415: MOV OUT[9], TEMP[2] >416: MOV OUT[10], TEMP[3] >417: MOV OUT[11], TEMP[0] >418: MOV OUT[0].x, TEMP[1].xxxx >419: MOV OUT[0].y, TEMP[2].xxxx >420: MOV OUT[0].z, TEMP[3].xxxx >421: MOV OUT[1].x, TEMP[0].xxxx >422: END >radeonsi: Compiling shader 272 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 64) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 68) > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 84) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %44 = lshr i32 %10, 8 > %45 = and i32 %44, 31 > %46 = lshr i32 %7, 13 > %47 = and i32 %46, 255 > %48 = and i32 %7, 8191 > %49 = and i32 %10, 255 > %50 = mul nuw nsw i32 %48, %49 > %51 = mul nuw nsw i32 %45, %47 > %52 = add nuw nsw i32 %50, %51 > %53 = add nuw nsw i32 %52, 16 > %54 = zext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = load i32, i32 addrspace(3)* %55, align 4 > %57 = lshr i32 %7, 13 > %58 = and i32 %57, 255 > %59 = and i32 %7, 8191 > %60 = and i32 %10, 255 > %61 = mul nuw nsw i32 %59, %60 > %62 = mul nuw nsw i32 %45, %58 > %63 = add nuw nsw i32 %61, %62 > %64 = add nuw nsw i32 %63, 17 > %65 = zext i32 %64 to i64 > %66 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %65 > %67 = load i32, i32 addrspace(3)* %66, align 4 > %68 = lshr i32 %7, 13 > %69 = and i32 %68, 255 > %70 = and i32 %7, 8191 > %71 = and i32 %10, 255 > %72 = mul nuw nsw i32 %70, %71 > %73 = mul nuw nsw i32 %45, %69 > %74 = add nuw nsw i32 %72, %73 > %75 = add nuw nsw i32 %74, 18 > %76 = zext i32 %75 to i64 > %77 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %76 > %78 = load i32, i32 addrspace(3)* %77, align 4 > %79 = lshr i32 %7, 13 > %80 = and i32 %79, 255 > %81 = and i32 %7, 8191 > %82 = and i32 %10, 255 > %83 = mul nuw nsw i32 %81, %82 > %84 = mul nuw nsw i32 %45, %80 > %85 = add nuw nsw i32 %83, %84 > %86 = add nuw nsw i32 %85, 19 > %87 = zext i32 %86 to i64 > %88 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %87 > %89 = load i32, i32 addrspace(3)* %88, align 4 > %90 = lshr i32 %6, 13 > %91 = and i32 %90, 255 > %92 = shl i32 %5, 2 > %93 = and i32 %92, 262140 > %94 = and i32 %6, 8191 > %95 = and i32 %10, 255 > %96 = mul nuw nsw i32 %94, %95 > %97 = add nuw nsw i32 %93, %96 > %98 = mul nuw nsw i32 %45, %91 > %99 = add nuw nsw i32 %97, %98 > %100 = add nuw nsw i32 %99, 16 > %101 = zext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > store i32 %56, i32 addrspace(3)* %102, align 4 > %103 = add nuw nsw i32 %99, 17 > %104 = zext i32 %103 to i64 > %105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %104 > store i32 %67, i32 addrspace(3)* %105, align 4 > %106 = add nuw nsw i32 %99, 18 > %107 = zext i32 %106 to i64 > %108 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %107 > store i32 %78, i32 addrspace(3)* %108, align 4 > %109 = add nuw nsw i32 %99, 19 > %110 = zext i32 %109 to i64 > %111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %110 > store i32 %89, i32 addrspace(3)* %111, align 4 > %112 = lshr i32 %7, 13 > %113 = and i32 %112, 255 > %114 = and i32 %7, 8191 > %115 = and i32 %10, 255 > %116 = mul nuw nsw i32 %114, %115 > %117 = mul nuw nsw i32 %45, %113 > %118 = add nuw nsw i32 %116, %117 > %119 = add nuw nsw i32 %118, 20 > %120 = zext i32 %119 to i64 > %121 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %120 > %122 = load i32, i32 addrspace(3)* %121, align 4 > %123 = lshr i32 %7, 13 > %124 = and i32 %123, 255 > %125 = and i32 %7, 8191 > %126 = and i32 %10, 255 > %127 = mul nuw nsw i32 %125, %126 > %128 = mul nuw nsw i32 %45, %124 > %129 = add nuw nsw i32 %127, %128 > %130 = add nuw nsw i32 %129, 21 > %131 = zext i32 %130 to i64 > %132 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %131 > %133 = load i32, i32 addrspace(3)* %132, align 4 > %134 = lshr i32 %7, 13 > %135 = and i32 %134, 255 > %136 = and i32 %7, 8191 > %137 = and i32 %10, 255 > %138 = mul nuw nsw i32 %136, %137 > %139 = mul nuw nsw i32 %45, %135 > %140 = add nuw nsw i32 %138, %139 > %141 = add nuw nsw i32 %140, 22 > %142 = zext i32 %141 to i64 > %143 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %142 > %144 = load i32, i32 addrspace(3)* %143, align 4 > %145 = lshr i32 %7, 13 > %146 = and i32 %145, 255 > %147 = and i32 %7, 8191 > %148 = and i32 %10, 255 > %149 = mul nuw nsw i32 %147, %148 > %150 = mul nuw nsw i32 %45, %146 > %151 = add nuw nsw i32 %149, %150 > %152 = add nuw nsw i32 %151, 23 > %153 = zext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = load i32, i32 addrspace(3)* %154, align 4 > %156 = lshr i32 %6, 13 > %157 = and i32 %156, 255 > %158 = shl i32 %5, 2 > %159 = and i32 %158, 262140 > %160 = and i32 %6, 8191 > %161 = and i32 %10, 255 > %162 = mul nuw nsw i32 %160, %161 > %163 = add nuw nsw i32 %159, %162 > %164 = mul nuw nsw i32 %45, %157 > %165 = add nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 20 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > store i32 %122, i32 addrspace(3)* %168, align 4 > %169 = add nuw nsw i32 %165, 21 > %170 = zext i32 %169 to i64 > %171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %170 > store i32 %133, i32 addrspace(3)* %171, align 4 > %172 = add nuw nsw i32 %165, 22 > %173 = zext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > store i32 %144, i32 addrspace(3)* %174, align 4 > %175 = add nuw nsw i32 %165, 23 > %176 = zext i32 %175 to i64 > %177 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %176 > store i32 %155, i32 addrspace(3)* %177, align 4 > %178 = lshr i32 %7, 13 > %179 = and i32 %178, 255 > %180 = and i32 %7, 8191 > %181 = and i32 %10, 255 > %182 = mul nuw nsw i32 %180, %181 > %183 = mul nuw nsw i32 %45, %179 > %184 = add nuw nsw i32 %182, %183 > %185 = add nuw nsw i32 %184, 24 > %186 = zext i32 %185 to i64 > %187 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %186 > %188 = load i32, i32 addrspace(3)* %187, align 4 > %189 = lshr i32 %7, 13 > %190 = and i32 %189, 255 > %191 = and i32 %7, 8191 > %192 = and i32 %10, 255 > %193 = mul nuw nsw i32 %191, %192 > %194 = mul nuw nsw i32 %45, %190 > %195 = add nuw nsw i32 %193, %194 > %196 = add nuw nsw i32 %195, 25 > %197 = zext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = load i32, i32 addrspace(3)* %198, align 4 > %200 = lshr i32 %7, 13 > %201 = and i32 %200, 255 > %202 = and i32 %7, 8191 > %203 = and i32 %10, 255 > %204 = mul nuw nsw i32 %202, %203 > %205 = mul nuw nsw i32 %45, %201 > %206 = add nuw nsw i32 %204, %205 > %207 = add nuw nsw i32 %206, 26 > %208 = zext i32 %207 to i64 > %209 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %208 > %210 = load i32, i32 addrspace(3)* %209, align 4 > %211 = lshr i32 %7, 13 > %212 = and i32 %211, 255 > %213 = and i32 %7, 8191 > %214 = and i32 %10, 255 > %215 = mul nuw nsw i32 %213, %214 > %216 = mul nuw nsw i32 %45, %212 > %217 = add nuw nsw i32 %215, %216 > %218 = add nuw nsw i32 %217, 27 > %219 = zext i32 %218 to i64 > %220 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %219 > %221 = load i32, i32 addrspace(3)* %220, align 4 > %222 = lshr i32 %6, 13 > %223 = and i32 %222, 255 > %224 = shl i32 %5, 2 > %225 = and i32 %224, 262140 > %226 = and i32 %6, 8191 > %227 = and i32 %10, 255 > %228 = mul nuw nsw i32 %226, %227 > %229 = add nuw nsw i32 %225, %228 > %230 = mul nuw nsw i32 %45, %223 > %231 = add nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, 24 > %233 = zext i32 %232 to i64 > %234 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %233 > store i32 %188, i32 addrspace(3)* %234, align 4 > %235 = add nuw nsw i32 %231, 25 > %236 = zext i32 %235 to i64 > %237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %236 > store i32 %199, i32 addrspace(3)* %237, align 4 > %238 = add nuw nsw i32 %231, 26 > %239 = zext i32 %238 to i64 > %240 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %239 > store i32 %210, i32 addrspace(3)* %240, align 4 > %241 = add nuw nsw i32 %231, 27 > %242 = zext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > store i32 %221, i32 addrspace(3)* %243, align 4 > %244 = lshr i32 %7, 13 > %245 = and i32 %244, 255 > %246 = and i32 %7, 8191 > %247 = and i32 %10, 255 > %248 = mul nuw nsw i32 %246, %247 > %249 = mul nuw nsw i32 %45, %245 > %250 = add nuw nsw i32 %248, %249 > %251 = add nuw nsw i32 %250, 28 > %252 = zext i32 %251 to i64 > %253 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %252 > %254 = load i32, i32 addrspace(3)* %253, align 4 > %255 = lshr i32 %7, 13 > %256 = and i32 %255, 255 > %257 = and i32 %7, 8191 > %258 = and i32 %10, 255 > %259 = mul nuw nsw i32 %257, %258 > %260 = mul nuw nsw i32 %45, %256 > %261 = add nuw nsw i32 %259, %260 > %262 = add nuw nsw i32 %261, 29 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = load i32, i32 addrspace(3)* %264, align 4 > %266 = lshr i32 %7, 13 > %267 = and i32 %266, 255 > %268 = and i32 %7, 8191 > %269 = and i32 %10, 255 > %270 = mul nuw nsw i32 %268, %269 > %271 = mul nuw nsw i32 %45, %267 > %272 = add nuw nsw i32 %270, %271 > %273 = add nuw nsw i32 %272, 30 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = load i32, i32 addrspace(3)* %275, align 4 > %277 = lshr i32 %7, 13 > %278 = and i32 %277, 255 > %279 = and i32 %7, 8191 > %280 = and i32 %10, 255 > %281 = mul nuw nsw i32 %279, %280 > %282 = mul nuw nsw i32 %45, %278 > %283 = add nuw nsw i32 %281, %282 > %284 = add nuw nsw i32 %283, 31 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = load i32, i32 addrspace(3)* %286, align 4 > %288 = lshr i32 %6, 13 > %289 = and i32 %288, 255 > %290 = shl i32 %5, 2 > %291 = and i32 %290, 262140 > %292 = and i32 %6, 8191 > %293 = and i32 %10, 255 > %294 = mul nuw nsw i32 %292, %293 > %295 = add nuw nsw i32 %291, %294 > %296 = mul nuw nsw i32 %45, %289 > %297 = add nuw nsw i32 %295, %296 > %298 = add nuw nsw i32 %297, 28 > %299 = zext i32 %298 to i64 > %300 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %299 > store i32 %254, i32 addrspace(3)* %300, align 4 > %301 = add nuw nsw i32 %297, 29 > %302 = zext i32 %301 to i64 > %303 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %302 > store i32 %265, i32 addrspace(3)* %303, align 4 > %304 = add nuw nsw i32 %297, 30 > %305 = zext i32 %304 to i64 > %306 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %305 > store i32 %276, i32 addrspace(3)* %306, align 4 > %307 = add nuw nsw i32 %297, 31 > %308 = zext i32 %307 to i64 > %309 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %308 > store i32 %287, i32 addrspace(3)* %309, align 4 > %310 = lshr i32 %7, 13 > %311 = and i32 %310, 255 > %312 = and i32 %7, 8191 > %313 = and i32 %10, 255 > %314 = mul nuw nsw i32 %312, %313 > %315 = mul nuw nsw i32 %45, %311 > %316 = add nuw nsw i32 %314, %315 > %317 = add nuw nsw i32 %316, 32 > %318 = zext i32 %317 to i64 > %319 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %318 > %320 = load i32, i32 addrspace(3)* %319, align 4 > %321 = lshr i32 %7, 13 > %322 = and i32 %321, 255 > %323 = and i32 %7, 8191 > %324 = and i32 %10, 255 > %325 = mul nuw nsw i32 %323, %324 > %326 = mul nuw nsw i32 %45, %322 > %327 = add nuw nsw i32 %325, %326 > %328 = add nuw nsw i32 %327, 33 > %329 = zext i32 %328 to i64 > %330 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %329 > %331 = load i32, i32 addrspace(3)* %330, align 4 > %332 = lshr i32 %7, 13 > %333 = and i32 %332, 255 > %334 = and i32 %7, 8191 > %335 = and i32 %10, 255 > %336 = mul nuw nsw i32 %334, %335 > %337 = mul nuw nsw i32 %45, %333 > %338 = add nuw nsw i32 %336, %337 > %339 = add nuw nsw i32 %338, 34 > %340 = zext i32 %339 to i64 > %341 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %340 > %342 = load i32, i32 addrspace(3)* %341, align 4 > %343 = lshr i32 %7, 13 > %344 = and i32 %343, 255 > %345 = and i32 %7, 8191 > %346 = and i32 %10, 255 > %347 = mul nuw nsw i32 %345, %346 > %348 = mul nuw nsw i32 %45, %344 > %349 = add nuw nsw i32 %347, %348 > %350 = add nuw nsw i32 %349, 35 > %351 = zext i32 %350 to i64 > %352 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %351 > %353 = load i32, i32 addrspace(3)* %352, align 4 > %354 = lshr i32 %6, 13 > %355 = and i32 %354, 255 > %356 = shl i32 %5, 2 > %357 = and i32 %356, 262140 > %358 = and i32 %6, 8191 > %359 = and i32 %10, 255 > %360 = mul nuw nsw i32 %358, %359 > %361 = add nuw nsw i32 %357, %360 > %362 = mul nuw nsw i32 %45, %355 > %363 = add nuw nsw i32 %361, %362 > %364 = add nuw nsw i32 %363, 32 > %365 = zext i32 %364 to i64 > %366 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %365 > store i32 %320, i32 addrspace(3)* %366, align 4 > %367 = add nuw nsw i32 %363, 33 > %368 = zext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > store i32 %331, i32 addrspace(3)* %369, align 4 > %370 = add nuw nsw i32 %363, 34 > %371 = zext i32 %370 to i64 > %372 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %371 > store i32 %342, i32 addrspace(3)* %372, align 4 > %373 = add nuw nsw i32 %363, 35 > %374 = zext i32 %373 to i64 > %375 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %374 > store i32 %353, i32 addrspace(3)* %375, align 4 > %376 = lshr i32 %7, 13 > %377 = and i32 %376, 255 > %378 = and i32 %7, 8191 > %379 = and i32 %10, 255 > %380 = mul nuw nsw i32 %378, %379 > %381 = mul nuw nsw i32 %45, %377 > %382 = add nuw nsw i32 %380, %381 > %383 = add nuw nsw i32 %382, 36 > %384 = zext i32 %383 to i64 > %385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %384 > %386 = load i32, i32 addrspace(3)* %385, align 4 > %387 = lshr i32 %7, 13 > %388 = and i32 %387, 255 > %389 = and i32 %7, 8191 > %390 = and i32 %10, 255 > %391 = mul nuw nsw i32 %389, %390 > %392 = mul nuw nsw i32 %45, %388 > %393 = add nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 37 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = load i32, i32 addrspace(3)* %396, align 4 > %398 = lshr i32 %7, 13 > %399 = and i32 %398, 255 > %400 = and i32 %7, 8191 > %401 = and i32 %10, 255 > %402 = mul nuw nsw i32 %400, %401 > %403 = mul nuw nsw i32 %45, %399 > %404 = add nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, 38 > %406 = zext i32 %405 to i64 > %407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %406 > %408 = load i32, i32 addrspace(3)* %407, align 4 > %409 = lshr i32 %7, 13 > %410 = and i32 %409, 255 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = mul nuw nsw i32 %45, %410 > %415 = add nuw nsw i32 %413, %414 > %416 = add nuw nsw i32 %415, 39 > %417 = zext i32 %416 to i64 > %418 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %417 > %419 = load i32, i32 addrspace(3)* %418, align 4 > %420 = lshr i32 %6, 13 > %421 = and i32 %420, 255 > %422 = shl i32 %5, 2 > %423 = and i32 %422, 262140 > %424 = and i32 %6, 8191 > %425 = and i32 %10, 255 > %426 = mul nuw nsw i32 %424, %425 > %427 = add nuw nsw i32 %423, %426 > %428 = mul nuw nsw i32 %45, %421 > %429 = add nuw nsw i32 %427, %428 > %430 = add nuw nsw i32 %429, 36 > %431 = zext i32 %430 to i64 > %432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %431 > store i32 %386, i32 addrspace(3)* %432, align 4 > %433 = add nuw nsw i32 %429, 37 > %434 = zext i32 %433 to i64 > %435 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %434 > store i32 %397, i32 addrspace(3)* %435, align 4 > %436 = add nuw nsw i32 %429, 38 > %437 = zext i32 %436 to i64 > %438 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %437 > store i32 %408, i32 addrspace(3)* %438, align 4 > %439 = add nuw nsw i32 %429, 39 > %440 = zext i32 %439 to i64 > %441 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %440 > store i32 %419, i32 addrspace(3)* %441, align 4 > %442 = and i32 %7, 8191 > %443 = and i32 %10, 255 > %444 = mul nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 16 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > %448 = bitcast i32 addrspace(3)* %447 to float addrspace(3)* > %449 = load float, float addrspace(3)* %448, align 4 > %450 = and i32 %7, 8191 > %451 = and i32 %10, 255 > %452 = mul nuw nsw i32 %450, %451 > %453 = add nuw nsw i32 %452, 17 > %454 = zext i32 %453 to i64 > %455 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %454 > %456 = bitcast i32 addrspace(3)* %455 to float addrspace(3)* > %457 = load float, float addrspace(3)* %456, align 4 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, 18 > %462 = zext i32 %461 to i64 > %463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %462 > %464 = bitcast i32 addrspace(3)* %463 to float addrspace(3)* > %465 = load float, float addrspace(3)* %464, align 4 > %466 = fmul float %23, %449 > %467 = fmul float %24, %457 > %468 = fadd float %466, %467 > %469 = fmul float %25, %465 > %470 = fadd float %468, %469 > %471 = fadd float %470, %26 > %472 = fmul float %27, %449 > %473 = fmul float %28, %457 > %474 = fadd float %472, %473 > %475 = fmul float %29, %465 > %476 = fadd float %474, %475 > %477 = fadd float %476, %30 > %478 = fmul float %31, %449 > %479 = fmul float %32, %457 > %480 = fadd float %478, %479 > %481 = fmul float %33, %465 > %482 = fadd float %480, %481 > %483 = fadd float %482, %34 > %484 = fmul float %35, %449 > %485 = fmul float %36, %457 > %486 = fadd float %484, %485 > %487 = fmul float %37, %465 > %488 = fadd float %486, %487 > %489 = fadd float %488, %38 > %490 = lshr i32 %7, 13 > %491 = and i32 %490, 255 > %492 = and i32 %7, 8191 > %493 = and i32 %10, 255 > %494 = mul nuw nsw i32 %492, %493 > %495 = add nuw nsw i32 %494, %491 > %496 = add nuw nsw i32 %495, 16 > %497 = zext i32 %496 to i64 > %498 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %497 > %499 = bitcast i32 addrspace(3)* %498 to float addrspace(3)* > %500 = load float, float addrspace(3)* %499, align 4 > %501 = lshr i32 %7, 13 > %502 = and i32 %501, 255 > %503 = and i32 %7, 8191 > %504 = and i32 %10, 255 > %505 = mul nuw nsw i32 %503, %504 > %506 = add nuw nsw i32 %505, %502 > %507 = add nuw nsw i32 %506, 17 > %508 = zext i32 %507 to i64 > %509 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %508 > %510 = bitcast i32 addrspace(3)* %509 to float addrspace(3)* > %511 = load float, float addrspace(3)* %510, align 4 > %512 = lshr i32 %7, 13 > %513 = and i32 %512, 255 > %514 = and i32 %7, 8191 > %515 = and i32 %10, 255 > %516 = mul nuw nsw i32 %514, %515 > %517 = add nuw nsw i32 %516, %513 > %518 = add nuw nsw i32 %517, 18 > %519 = zext i32 %518 to i64 > %520 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %519 > %521 = bitcast i32 addrspace(3)* %520 to float addrspace(3)* > %522 = load float, float addrspace(3)* %521, align 4 > %523 = fmul float %23, %500 > %524 = fmul float %24, %511 > %525 = fadd float %523, %524 > %526 = fmul float %25, %522 > %527 = fadd float %525, %526 > %528 = fadd float %527, %26 > %529 = fmul float %27, %500 > %530 = fmul float %28, %511 > %531 = fadd float %529, %530 > %532 = fmul float %29, %522 > %533 = fadd float %531, %532 > %534 = fadd float %533, %30 > %535 = fmul float %31, %500 > %536 = fmul float %32, %511 > %537 = fadd float %535, %536 > %538 = fmul float %33, %522 > %539 = fadd float %537, %538 > %540 = fadd float %539, %34 > %541 = fmul float %35, %500 > %542 = fmul float %36, %511 > %543 = fadd float %541, %542 > %544 = fmul float %37, %522 > %545 = fadd float %543, %544 > %546 = fadd float %545, %38 > %547 = and i32 %7, 8191 > %548 = and i32 %10, 255 > %549 = mul nuw nsw i32 %547, %548 > %550 = lshr i32 %7, 12 > %551 = and i32 %550, 510 > %552 = add nuw nsw i32 %549, %551 > %553 = add nuw nsw i32 %552, 16 > %554 = zext i32 %553 to i64 > %555 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %554 > %556 = bitcast i32 addrspace(3)* %555 to float addrspace(3)* > %557 = load float, float addrspace(3)* %556, align 4 > %558 = and i32 %7, 8191 > %559 = and i32 %10, 255 > %560 = mul nuw nsw i32 %558, %559 > %561 = lshr i32 %7, 12 > %562 = and i32 %561, 510 > %563 = add nuw nsw i32 %560, %562 > %564 = add nuw nsw i32 %563, 17 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fmul float %23, %557 > %581 = fmul float %24, %568 > %582 = fadd float %580, %581 > %583 = fmul float %25, %579 > %584 = fadd float %582, %583 > %585 = fadd float %584, %26 > %586 = fmul float %27, %557 > %587 = fmul float %28, %568 > %588 = fadd float %586, %587 > %589 = fmul float %29, %579 > %590 = fadd float %588, %589 > %591 = fadd float %590, %30 > %592 = fmul float %31, %557 > %593 = fmul float %32, %568 > %594 = fadd float %592, %593 > %595 = fmul float %33, %579 > %596 = fadd float %594, %595 > %597 = fadd float %596, %34 > %598 = fmul float %35, %557 > %599 = fmul float %36, %568 > %600 = fadd float %598, %599 > %601 = fmul float %37, %579 > %602 = fadd float %600, %601 > %603 = fadd float %602, %38 > %604 = fadd float %483, 1.000000e+02 > %605 = fadd float %540, 1.000000e+02 > %606 = fadd float %597, 1.000000e+02 > %607 = call float @llvm.fabs.f32(float %489) > %608 = call float @llvm.minnum.f32(float %607, float 1.000000e+02) > %609 = fcmp ogt float %471, 0.000000e+00 > %610 = fcmp ogt float %477, 0.000000e+00 > %611 = fcmp olt float %471, 0.000000e+00 > %612 = fcmp olt float %477, 0.000000e+00 > %613 = sext i1 %611 to i32 > %614 = sext i1 %612 to i32 > %615 = zext i1 %609 to i32 > %616 = zext i1 %610 to i32 > %617 = add nsw i32 %615, %613 > %618 = add nsw i32 %616, %614 > %619 = sitofp i32 %617 to float > %620 = sitofp i32 %618 to float > %621 = fsub float -0.000000e+00, %608 > %622 = call float @llvm.fma.f32(float %621, float %619, float %471) > %623 = fsub float -0.000000e+00, %608 > %624 = call float @llvm.fma.f32(float %623, float %620, float %477) > %625 = call float @llvm.fabs.f32(float %546) > %626 = call float @llvm.minnum.f32(float %625, float 1.000000e+02) > %627 = fcmp ogt float %528, 0.000000e+00 > %628 = fcmp ogt float %534, 0.000000e+00 > %629 = fcmp olt float %528, 0.000000e+00 > %630 = fcmp olt float %534, 0.000000e+00 > %631 = sext i1 %629 to i32 > %632 = sext i1 %630 to i32 > %633 = zext i1 %627 to i32 > %634 = zext i1 %628 to i32 > %635 = add nsw i32 %633, %631 > %636 = add nsw i32 %634, %632 > %637 = sitofp i32 %635 to float > %638 = sitofp i32 %636 to float > %639 = fsub float -0.000000e+00, %626 > %640 = call float @llvm.fma.f32(float %639, float %637, float %528) > %641 = fsub float -0.000000e+00, %626 > %642 = call float @llvm.fma.f32(float %641, float %638, float %534) > %643 = fcmp ogt float %585, 0.000000e+00 > %644 = fcmp ogt float %591, 0.000000e+00 > %645 = fcmp olt float %585, 0.000000e+00 > %646 = fcmp olt float %591, 0.000000e+00 > %647 = sext i1 %645 to i32 > %648 = sext i1 %646 to i32 > %649 = zext i1 %643 to i32 > %650 = zext i1 %644 to i32 > %651 = add nsw i32 %649, %647 > %652 = add nsw i32 %650, %648 > %653 = sitofp i32 %651 to float > %654 = sitofp i32 %652 to float > %655 = call float @llvm.fabs.f32(float %603) > %656 = call float @llvm.minnum.f32(float %655, float 1.000000e+02) > %657 = fsub float -0.000000e+00, %656 > %658 = call float @llvm.fma.f32(float %657, float %653, float %585) > %659 = fsub float -0.000000e+00, %656 > %660 = call float @llvm.fma.f32(float %659, float %654, float %591) > %661 = fsub float -0.000000e+00, %489 > %662 = fcmp olt float %622, %661 > %663 = fsub float -0.000000e+00, %489 > %664 = fcmp olt float %624, %663 > %665 = zext i1 %662 to i32 > %666 = zext i1 %664 to i32 > %667 = fsub float -0.000000e+00, %546 > %668 = fcmp olt float %640, %667 > %669 = fsub float -0.000000e+00, %546 > %670 = fcmp olt float %642, %669 > %671 = zext i1 %668 to i32 > %672 = zext i1 %670 to i32 > %673 = add nuw nsw i32 %671, %665 > %674 = add nuw nsw i32 %672, %666 > %675 = fsub float -0.000000e+00, %603 > %676 = fcmp olt float %658, %675 > %677 = fsub float -0.000000e+00, %603 > %678 = fcmp olt float %660, %677 > %679 = zext i1 %676 to i32 > %680 = zext i1 %678 to i32 > %681 = add nuw nsw i32 %679, %673 > %682 = add nuw nsw i32 %680, %674 > %683 = fcmp olt float %604, 0.000000e+00 > %684 = zext i1 %683 to i32 > %685 = bitcast i32 %684 to float > %686 = fcmp olt float %605, 0.000000e+00 > %687 = fcmp olt float %606, 0.000000e+00 > %688 = zext i1 %686 to i32 > %689 = zext i1 %687 to i32 > %690 = add nuw nsw i32 %688, %684 > %691 = add nuw nsw i32 %689, %690 > %692 = fcmp olt float %489, %622 > %693 = fcmp olt float %489, %624 > %694 = zext i1 %692 to i32 > %695 = zext i1 %693 to i32 > %696 = fcmp olt float %546, %640 > %697 = fcmp olt float %546, %642 > %698 = zext i1 %696 to i32 > %699 = zext i1 %697 to i32 > %700 = add nuw nsw i32 %694, %698 > %701 = add nuw nsw i32 %695, %699 > %702 = fcmp olt float %603, %658 > %703 = fcmp olt float %603, %660 > %704 = zext i1 %702 to i32 > %705 = zext i1 %703 to i32 > %706 = add nuw nsw i32 %700, %704 > %707 = add nuw nsw i32 %701, %705 > %708 = icmp eq i32 %681, 3 > %709 = icmp eq i32 %682, 3 > %710 = sext i1 %708 to i32 > %711 = sext i1 %709 to i32 > %712 = bitcast i32 %711 to float > %713 = icmp eq i32 %706, 3 > %714 = icmp eq i32 %707, 3 > %715 = sext i1 %714 to i32 > %716 = bitcast i32 %715 to float > %717 = bitcast i32 %711 to float > %718 = select i1 %714, float 0xFFFFFFFFE0000000, float %717 > %719 = bitcast float %718 to i32 > %720 = select i1 %713, i32 -1, i32 %710 > %721 = or i32 %719, %720 > %722 = icmp eq i32 %721, 0 > %not. = icmp ne i32 %691, 3 > %723 = and i1 %722, %not. > br i1 %723, label %IF, label %ENDIF > >IF: ; preds = %main_body > %724 = lshr i32 %7, 13 > %725 = and i32 %724, 255 > %726 = and i32 %7, 8191 > %727 = and i32 %10, 255 > %728 = mul nuw nsw i32 %726, %727 > %729 = add nuw nsw i32 %728, %725 > %730 = add nuw nsw i32 %729, 16 > %731 = zext i32 %730 to i64 > %732 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %731 > %733 = bitcast i32 addrspace(3)* %732 to float addrspace(3)* > %734 = load float, float addrspace(3)* %733, align 4 > %735 = and i32 %7, 8191 > %736 = and i32 %10, 255 > %737 = mul nuw nsw i32 %735, %736 > %738 = add nuw nsw i32 %737, 16 > %739 = zext i32 %738 to i64 > %740 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %739 > %741 = bitcast i32 addrspace(3)* %740 to float addrspace(3)* > %742 = load float, float addrspace(3)* %741, align 4 > %743 = fsub float %742, %734 > %744 = lshr i32 %7, 13 > %745 = and i32 %744, 255 > %746 = and i32 %7, 8191 > %747 = and i32 %10, 255 > %748 = mul nuw nsw i32 %746, %747 > %749 = add nuw nsw i32 %748, %745 > %750 = add nuw nsw i32 %749, 17 > %751 = zext i32 %750 to i64 > %752 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %751 > %753 = bitcast i32 addrspace(3)* %752 to float addrspace(3)* > %754 = load float, float addrspace(3)* %753, align 4 > %755 = and i32 %7, 8191 > %756 = and i32 %10, 255 > %757 = mul nuw nsw i32 %755, %756 > %758 = add nuw nsw i32 %757, 17 > %759 = zext i32 %758 to i64 > %760 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %759 > %761 = bitcast i32 addrspace(3)* %760 to float addrspace(3)* > %762 = load float, float addrspace(3)* %761, align 4 > %763 = fsub float %762, %754 > %764 = lshr i32 %7, 13 > %765 = and i32 %764, 255 > %766 = and i32 %7, 8191 > %767 = and i32 %10, 255 > %768 = mul nuw nsw i32 %766, %767 > %769 = add nuw nsw i32 %768, %765 > %770 = add nuw nsw i32 %769, 18 > %771 = zext i32 %770 to i64 > %772 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %771 > %773 = bitcast i32 addrspace(3)* %772 to float addrspace(3)* > %774 = load float, float addrspace(3)* %773, align 4 > %775 = and i32 %7, 8191 > %776 = and i32 %10, 255 > %777 = mul nuw nsw i32 %775, %776 > %778 = add nuw nsw i32 %777, 18 > %779 = zext i32 %778 to i64 > %780 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %779 > %781 = bitcast i32 addrspace(3)* %780 to float addrspace(3)* > %782 = load float, float addrspace(3)* %781, align 4 > %783 = fsub float %782, %774 > %784 = fmul float %743, %743 > %785 = fmul float %763, %763 > %786 = fadd float %785, %784 > %787 = fmul float %783, %783 > %788 = fadd float %786, %787 > %789 = and i32 %7, 8191 > %790 = and i32 %10, 255 > %791 = mul nuw nsw i32 %789, %790 > %792 = lshr i32 %7, 12 > %793 = and i32 %792, 510 > %794 = add nuw nsw i32 %791, %793 > %795 = add nuw nsw i32 %794, 16 > %796 = zext i32 %795 to i64 > %797 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %796 > %798 = bitcast i32 addrspace(3)* %797 to float addrspace(3)* > %799 = load float, float addrspace(3)* %798, align 4 > %800 = lshr i32 %7, 13 > %801 = and i32 %800, 255 > %802 = and i32 %7, 8191 > %803 = and i32 %10, 255 > %804 = mul nuw nsw i32 %802, %803 > %805 = add nuw nsw i32 %804, %801 > %806 = add nuw nsw i32 %805, 16 > %807 = zext i32 %806 to i64 > %808 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %807 > %809 = bitcast i32 addrspace(3)* %808 to float addrspace(3)* > %810 = load float, float addrspace(3)* %809, align 4 > %811 = fsub float %810, %799 > %812 = and i32 %7, 8191 > %813 = and i32 %10, 255 > %814 = mul nuw nsw i32 %812, %813 > %815 = lshr i32 %7, 12 > %816 = and i32 %815, 510 > %817 = add nuw nsw i32 %814, %816 > %818 = add nuw nsw i32 %817, 17 > %819 = zext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = lshr i32 %7, 13 > %824 = and i32 %823, 255 > %825 = and i32 %7, 8191 > %826 = and i32 %10, 255 > %827 = mul nuw nsw i32 %825, %826 > %828 = add nuw nsw i32 %827, %824 > %829 = add nuw nsw i32 %828, 17 > %830 = zext i32 %829 to i64 > %831 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %830 > %832 = bitcast i32 addrspace(3)* %831 to float addrspace(3)* > %833 = load float, float addrspace(3)* %832, align 4 > %834 = fsub float %833, %822 > %835 = and i32 %7, 8191 > %836 = and i32 %10, 255 > %837 = mul nuw nsw i32 %835, %836 > %838 = lshr i32 %7, 12 > %839 = and i32 %838, 510 > %840 = add nuw nsw i32 %837, %839 > %841 = add nuw nsw i32 %840, 18 > %842 = zext i32 %841 to i64 > %843 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %842 > %844 = bitcast i32 addrspace(3)* %843 to float addrspace(3)* > %845 = load float, float addrspace(3)* %844, align 4 > %846 = lshr i32 %7, 13 > %847 = and i32 %846, 255 > %848 = and i32 %7, 8191 > %849 = and i32 %10, 255 > %850 = mul nuw nsw i32 %848, %849 > %851 = add nuw nsw i32 %850, %847 > %852 = add nuw nsw i32 %851, 18 > %853 = zext i32 %852 to i64 > %854 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %853 > %855 = bitcast i32 addrspace(3)* %854 to float addrspace(3)* > %856 = load float, float addrspace(3)* %855, align 4 > %857 = fsub float %856, %845 > %858 = fmul float %811, %811 > %859 = fmul float %834, %834 > %860 = fadd float %859, %858 > %861 = fmul float %857, %857 > %862 = fadd float %860, %861 > %863 = and i32 %7, 8191 > %864 = and i32 %10, 255 > %865 = mul nuw nsw i32 %863, %864 > %866 = add nuw nsw i32 %865, 16 > %867 = zext i32 %866 to i64 > %868 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %867 > %869 = bitcast i32 addrspace(3)* %868 to float addrspace(3)* > %870 = load float, float addrspace(3)* %869, align 4 > %871 = and i32 %7, 8191 > %872 = and i32 %10, 255 > %873 = mul nuw nsw i32 %871, %872 > %874 = lshr i32 %7, 12 > %875 = and i32 %874, 510 > %876 = add nuw nsw i32 %873, %875 > %877 = add nuw nsw i32 %876, 16 > %878 = zext i32 %877 to i64 > %879 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %878 > %880 = bitcast i32 addrspace(3)* %879 to float addrspace(3)* > %881 = load float, float addrspace(3)* %880, align 4 > %882 = fsub float %881, %870 > %883 = and i32 %7, 8191 > %884 = and i32 %10, 255 > %885 = mul nuw nsw i32 %883, %884 > %886 = add nuw nsw i32 %885, 17 > %887 = zext i32 %886 to i64 > %888 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %887 > %889 = bitcast i32 addrspace(3)* %888 to float addrspace(3)* > %890 = load float, float addrspace(3)* %889, align 4 > %891 = and i32 %7, 8191 > %892 = and i32 %10, 255 > %893 = mul nuw nsw i32 %891, %892 > %894 = lshr i32 %7, 12 > %895 = and i32 %894, 510 > %896 = add nuw nsw i32 %893, %895 > %897 = add nuw nsw i32 %896, 17 > %898 = zext i32 %897 to i64 > %899 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %898 > %900 = bitcast i32 addrspace(3)* %899 to float addrspace(3)* > %901 = load float, float addrspace(3)* %900, align 4 > %902 = fsub float %901, %890 > %903 = and i32 %7, 8191 > %904 = and i32 %10, 255 > %905 = mul nuw nsw i32 %903, %904 > %906 = add nuw nsw i32 %905, 18 > %907 = zext i32 %906 to i64 > %908 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %907 > %909 = bitcast i32 addrspace(3)* %908 to float addrspace(3)* > %910 = load float, float addrspace(3)* %909, align 4 > %911 = and i32 %7, 8191 > %912 = and i32 %10, 255 > %913 = mul nuw nsw i32 %911, %912 > %914 = lshr i32 %7, 12 > %915 = and i32 %914, 510 > %916 = add nuw nsw i32 %913, %915 > %917 = add nuw nsw i32 %916, 18 > %918 = zext i32 %917 to i64 > %919 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %918 > %920 = bitcast i32 addrspace(3)* %919 to float addrspace(3)* > %921 = load float, float addrspace(3)* %920, align 4 > %922 = fsub float %921, %910 > %923 = fmul float %882, %882 > %924 = fmul float %902, %902 > %925 = fadd float %924, %923 > %926 = fmul float %922, %922 > %927 = fadd float %925, %926 > %928 = call float @llvm.sqrt.f32(float %788) > %929 = call float @llvm.sqrt.f32(float %862) > %930 = call float @llvm.sqrt.f32(float %927) > %931 = lshr i32 %7, 13 > %932 = and i32 %931, 255 > %933 = and i32 %7, 8191 > %934 = and i32 %10, 255 > %935 = mul nuw nsw i32 %933, %934 > %936 = add nuw nsw i32 %935, %932 > %937 = add nuw nsw i32 %936, 16 > %938 = zext i32 %937 to i64 > %939 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %938 > %940 = bitcast i32 addrspace(3)* %939 to float addrspace(3)* > %941 = load float, float addrspace(3)* %940, align 4 > %942 = and i32 %7, 8191 > %943 = and i32 %10, 255 > %944 = mul nuw nsw i32 %942, %943 > %945 = add nuw nsw i32 %944, 16 > %946 = zext i32 %945 to i64 > %947 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %946 > %948 = bitcast i32 addrspace(3)* %947 to float addrspace(3)* > %949 = load float, float addrspace(3)* %948, align 4 > %950 = fadd float %941, %949 > %951 = lshr i32 %7, 13 > %952 = and i32 %951, 255 > %953 = and i32 %7, 8191 > %954 = and i32 %10, 255 > %955 = mul nuw nsw i32 %953, %954 > %956 = add nuw nsw i32 %955, %952 > %957 = add nuw nsw i32 %956, 17 > %958 = zext i32 %957 to i64 > %959 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %958 > %960 = bitcast i32 addrspace(3)* %959 to float addrspace(3)* > %961 = load float, float addrspace(3)* %960, align 4 > %962 = and i32 %7, 8191 > %963 = and i32 %10, 255 > %964 = mul nuw nsw i32 %962, %963 > %965 = add nuw nsw i32 %964, 17 > %966 = zext i32 %965 to i64 > %967 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %966 > %968 = bitcast i32 addrspace(3)* %967 to float addrspace(3)* > %969 = load float, float addrspace(3)* %968, align 4 > %970 = fadd float %961, %969 > %971 = lshr i32 %7, 13 > %972 = and i32 %971, 255 > %973 = and i32 %7, 8191 > %974 = and i32 %10, 255 > %975 = mul nuw nsw i32 %973, %974 > %976 = add nuw nsw i32 %975, %972 > %977 = add nuw nsw i32 %976, 18 > %978 = zext i32 %977 to i64 > %979 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %978 > %980 = bitcast i32 addrspace(3)* %979 to float addrspace(3)* > %981 = load float, float addrspace(3)* %980, align 4 > %982 = and i32 %7, 8191 > %983 = and i32 %10, 255 > %984 = mul nuw nsw i32 %982, %983 > %985 = add nuw nsw i32 %984, 18 > %986 = zext i32 %985 to i64 > %987 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %986 > %988 = bitcast i32 addrspace(3)* %987 to float addrspace(3)* > %989 = load float, float addrspace(3)* %988, align 4 > %990 = fadd float %981, %989 > %991 = fmul float %950, 5.000000e-01 > %992 = fmul float %970, 5.000000e-01 > %993 = fmul float %990, 5.000000e-01 > %994 = and i32 %7, 8191 > %995 = and i32 %10, 255 > %996 = mul nuw nsw i32 %994, %995 > %997 = lshr i32 %7, 12 > %998 = and i32 %997, 510 > %999 = add nuw nsw i32 %996, %998 > %1000 = add nuw nsw i32 %999, 16 > %1001 = zext i32 %1000 to i64 > %1002 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1001 > %1003 = bitcast i32 addrspace(3)* %1002 to float addrspace(3)* > %1004 = load float, float addrspace(3)* %1003, align 4 > %1005 = lshr i32 %7, 13 > %1006 = and i32 %1005, 255 > %1007 = and i32 %7, 8191 > %1008 = and i32 %10, 255 > %1009 = mul nuw nsw i32 %1007, %1008 > %1010 = add nuw nsw i32 %1009, %1006 > %1011 = add nuw nsw i32 %1010, 16 > %1012 = zext i32 %1011 to i64 > %1013 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1012 > %1014 = bitcast i32 addrspace(3)* %1013 to float addrspace(3)* > %1015 = load float, float addrspace(3)* %1014, align 4 > %1016 = fadd float %1004, %1015 > %1017 = and i32 %7, 8191 > %1018 = and i32 %10, 255 > %1019 = mul nuw nsw i32 %1017, %1018 > %1020 = lshr i32 %7, 12 > %1021 = and i32 %1020, 510 > %1022 = add nuw nsw i32 %1019, %1021 > %1023 = add nuw nsw i32 %1022, 17 > %1024 = zext i32 %1023 to i64 > %1025 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1024 > %1026 = bitcast i32 addrspace(3)* %1025 to float addrspace(3)* > %1027 = load float, float addrspace(3)* %1026, align 4 > %1028 = lshr i32 %7, 13 > %1029 = and i32 %1028, 255 > %1030 = and i32 %7, 8191 > %1031 = and i32 %10, 255 > %1032 = mul nuw nsw i32 %1030, %1031 > %1033 = add nuw nsw i32 %1032, %1029 > %1034 = add nuw nsw i32 %1033, 17 > %1035 = zext i32 %1034 to i64 > %1036 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1035 > %1037 = bitcast i32 addrspace(3)* %1036 to float addrspace(3)* > %1038 = load float, float addrspace(3)* %1037, align 4 > %1039 = fadd float %1027, %1038 > %1040 = and i32 %7, 8191 > %1041 = and i32 %10, 255 > %1042 = mul nuw nsw i32 %1040, %1041 > %1043 = lshr i32 %7, 12 > %1044 = and i32 %1043, 510 > %1045 = add nuw nsw i32 %1042, %1044 > %1046 = add nuw nsw i32 %1045, 18 > %1047 = zext i32 %1046 to i64 > %1048 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1047 > %1049 = bitcast i32 addrspace(3)* %1048 to float addrspace(3)* > %1050 = load float, float addrspace(3)* %1049, align 4 > %1051 = lshr i32 %7, 13 > %1052 = and i32 %1051, 255 > %1053 = and i32 %7, 8191 > %1054 = and i32 %10, 255 > %1055 = mul nuw nsw i32 %1053, %1054 > %1056 = add nuw nsw i32 %1055, %1052 > %1057 = add nuw nsw i32 %1056, 18 > %1058 = zext i32 %1057 to i64 > %1059 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1058 > %1060 = bitcast i32 addrspace(3)* %1059 to float addrspace(3)* > %1061 = load float, float addrspace(3)* %1060, align 4 > %1062 = fadd float %1050, %1061 > %1063 = fmul float %1016, 5.000000e-01 > %1064 = fmul float %1039, 5.000000e-01 > %1065 = fmul float %1062, 5.000000e-01 > %1066 = and i32 %7, 8191 > %1067 = and i32 %10, 255 > %1068 = mul nuw nsw i32 %1066, %1067 > %1069 = add nuw nsw i32 %1068, 16 > %1070 = zext i32 %1069 to i64 > %1071 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1070 > %1072 = bitcast i32 addrspace(3)* %1071 to float addrspace(3)* > %1073 = load float, float addrspace(3)* %1072, align 4 > %1074 = and i32 %7, 8191 > %1075 = and i32 %10, 255 > %1076 = mul nuw nsw i32 %1074, %1075 > %1077 = lshr i32 %7, 12 > %1078 = and i32 %1077, 510 > %1079 = add nuw nsw i32 %1076, %1078 > %1080 = add nuw nsw i32 %1079, 16 > %1081 = zext i32 %1080 to i64 > %1082 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1081 > %1083 = bitcast i32 addrspace(3)* %1082 to float addrspace(3)* > %1084 = load float, float addrspace(3)* %1083, align 4 > %1085 = fadd float %1073, %1084 > %1086 = and i32 %7, 8191 > %1087 = and i32 %10, 255 > %1088 = mul nuw nsw i32 %1086, %1087 > %1089 = add nuw nsw i32 %1088, 17 > %1090 = zext i32 %1089 to i64 > %1091 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1090 > %1092 = bitcast i32 addrspace(3)* %1091 to float addrspace(3)* > %1093 = load float, float addrspace(3)* %1092, align 4 > %1094 = and i32 %7, 8191 > %1095 = and i32 %10, 255 > %1096 = mul nuw nsw i32 %1094, %1095 > %1097 = lshr i32 %7, 12 > %1098 = and i32 %1097, 510 > %1099 = add nuw nsw i32 %1096, %1098 > %1100 = add nuw nsw i32 %1099, 17 > %1101 = zext i32 %1100 to i64 > %1102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1101 > %1103 = bitcast i32 addrspace(3)* %1102 to float addrspace(3)* > %1104 = load float, float addrspace(3)* %1103, align 4 > %1105 = fadd float %1093, %1104 > %1106 = and i32 %7, 8191 > %1107 = and i32 %10, 255 > %1108 = mul nuw nsw i32 %1106, %1107 > %1109 = add nuw nsw i32 %1108, 18 > %1110 = zext i32 %1109 to i64 > %1111 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1110 > %1112 = bitcast i32 addrspace(3)* %1111 to float addrspace(3)* > %1113 = load float, float addrspace(3)* %1112, align 4 > %1114 = and i32 %7, 8191 > %1115 = and i32 %10, 255 > %1116 = mul nuw nsw i32 %1114, %1115 > %1117 = lshr i32 %7, 12 > %1118 = and i32 %1117, 510 > %1119 = add nuw nsw i32 %1116, %1118 > %1120 = add nuw nsw i32 %1119, 18 > %1121 = zext i32 %1120 to i64 > %1122 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1121 > %1123 = bitcast i32 addrspace(3)* %1122 to float addrspace(3)* > %1124 = load float, float addrspace(3)* %1123, align 4 > %1125 = fadd float %1113, %1124 > %1126 = fmul float %1085, 5.000000e-01 > %1127 = fmul float %1105, 5.000000e-01 > %1128 = fmul float %1125, 5.000000e-01 > %1129 = call float @llvm.fma.f32(float %39, float %928, float %991) > %1130 = call float @llvm.fma.f32(float %40, float %928, float %992) > %1131 = call float @llvm.fma.f32(float %41, float %928, float %993) > %1132 = call float @llvm.fma.f32(float %39, float %929, float %1063) > %1133 = call float @llvm.fma.f32(float %40, float %929, float %1064) > %1134 = call float @llvm.fma.f32(float %41, float %929, float %1065) > %1135 = call float @llvm.fma.f32(float %39, float %930, float %1126) > %1136 = call float @llvm.fma.f32(float %40, float %930, float %1127) > %1137 = call float @llvm.fma.f32(float %41, float %930, float %1128) > %1138 = fmul float %23, %991 > %1139 = fmul float %24, %992 > %1140 = fadd float %1138, %1139 > %1141 = fmul float %25, %993 > %1142 = fadd float %1140, %1141 > %1143 = fadd float %1142, %26 > %1144 = fmul float %27, %991 > %1145 = fmul float %28, %992 > %1146 = fadd float %1144, %1145 > %1147 = fmul float %29, %993 > %1148 = fadd float %1146, %1147 > %1149 = fadd float %1148, %30 > %1150 = fmul float %35, %991 > %1151 = fmul float %36, %992 > %1152 = fadd float %1150, %1151 > %1153 = fmul float %37, %993 > %1154 = fadd float %1152, %1153 > %1155 = fadd float %1154, %38 > %1156 = fmul float %23, %1063 > %1157 = fmul float %24, %1064 > %1158 = fadd float %1156, %1157 > %1159 = fmul float %25, %1065 > %1160 = fadd float %1158, %1159 > %1161 = fadd float %1160, %26 > %1162 = fmul float %27, %1063 > %1163 = fmul float %28, %1064 > %1164 = fadd float %1162, %1163 > %1165 = fmul float %29, %1065 > %1166 = fadd float %1164, %1165 > %1167 = fadd float %1166, %30 > %1168 = fmul float %35, %1063 > %1169 = fmul float %36, %1064 > %1170 = fadd float %1168, %1169 > %1171 = fmul float %37, %1065 > %1172 = fadd float %1170, %1171 > %1173 = fadd float %1172, %38 > %1174 = fmul float %23, %1126 > %1175 = fmul float %24, %1127 > %1176 = fadd float %1174, %1175 > %1177 = fmul float %25, %1128 > %1178 = fadd float %1176, %1177 > %1179 = fadd float %1178, %26 > %1180 = fmul float %27, %1126 > %1181 = fmul float %28, %1127 > %1182 = fadd float %1180, %1181 > %1183 = fmul float %29, %1128 > %1184 = fadd float %1182, %1183 > %1185 = fadd float %1184, %30 > %1186 = fmul float %35, %1126 > %1187 = fmul float %36, %1127 > %1188 = fadd float %1186, %1187 > %1189 = fmul float %37, %1128 > %1190 = fadd float %1188, %1189 > %1191 = fadd float %1190, %38 > %1192 = fmul float %23, %1129 > %1193 = fmul float %24, %1130 > %1194 = fadd float %1192, %1193 > %1195 = fmul float %25, %1131 > %1196 = fadd float %1194, %1195 > %1197 = fadd float %1196, %26 > %1198 = fmul float %27, %1129 > %1199 = fmul float %28, %1130 > %1200 = fadd float %1198, %1199 > %1201 = fmul float %29, %1131 > %1202 = fadd float %1200, %1201 > %1203 = fadd float %1202, %30 > %1204 = fmul float %35, %1129 > %1205 = fmul float %36, %1130 > %1206 = fadd float %1204, %1205 > %1207 = fmul float %37, %1131 > %1208 = fadd float %1206, %1207 > %1209 = fadd float %1208, %38 > %1210 = fmul float %23, %1132 > %1211 = fmul float %24, %1133 > %1212 = fadd float %1210, %1211 > %1213 = fmul float %25, %1134 > %1214 = fadd float %1212, %1213 > %1215 = fadd float %1214, %26 > %1216 = fmul float %27, %1132 > %1217 = fmul float %28, %1133 > %1218 = fadd float %1216, %1217 > %1219 = fmul float %29, %1134 > %1220 = fadd float %1218, %1219 > %1221 = fadd float %1220, %30 > %1222 = fmul float %35, %1132 > %1223 = fmul float %36, %1133 > %1224 = fadd float %1222, %1223 > %1225 = fmul float %37, %1134 > %1226 = fadd float %1224, %1225 > %1227 = fadd float %1226, %38 > %1228 = fmul float %23, %1135 > %1229 = fmul float %24, %1136 > %1230 = fadd float %1228, %1229 > %1231 = fmul float %25, %1137 > %1232 = fadd float %1230, %1231 > %1233 = fadd float %1232, %26 > %1234 = fmul float %27, %1135 > %1235 = fmul float %28, %1136 > %1236 = fadd float %1234, %1235 > %1237 = fmul float %29, %1137 > %1238 = fadd float %1236, %1237 > %1239 = fadd float %1238, %30 > %1240 = fmul float %35, %1135 > %1241 = fmul float %36, %1136 > %1242 = fadd float %1240, %1241 > %1243 = fmul float %37, %1137 > %1244 = fadd float %1242, %1243 > %1245 = fadd float %1244, %38 > %1246 = fcmp oeq float %1173, 0.000000e+00 > %1247 = fcmp oeq float %1173, 0.000000e+00 > %1248 = fcmp ogt float %1161, 0.000000e+00 > %1249 = select i1 %1248, float 1.000000e+00, float %1161 > %1250 = fcmp oge float %1249, 0.000000e+00 > %1251 = fcmp ogt float %1167, 0.000000e+00 > %1252 = select i1 %1251, float 1.000000e+00, float %1167 > %1253 = fcmp oge float %1252, 0.000000e+00 > %.op = fmul float %1249, 0x4600000000000000 > %1254 = select i1 %1250, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1252, 0x4600000000000000 > %1255 = select i1 %1253, float %.op80, float 0xC600000000000000 > %1256 = fdiv float 1.000000e+00, %1173 > %1257 = fmul float %1161, %1256 > %1258 = fmul float %1167, %1256 > %1259 = select i1 %1246, float %1254, float %1257 > %1260 = select i1 %1247, float %1255, float %1258 > %1261 = fcmp oeq float %1191, 0.000000e+00 > %1262 = fcmp oeq float %1191, 0.000000e+00 > %1263 = fcmp ogt float %1179, 0.000000e+00 > %1264 = select i1 %1263, float 1.000000e+00, float %1179 > %1265 = fcmp oge float %1264, 0.000000e+00 > %1266 = fcmp ogt float %1185, 0.000000e+00 > %1267 = select i1 %1266, float 1.000000e+00, float %1185 > %1268 = fcmp oge float %1267, 0.000000e+00 > %.op81 = fmul float %1264, 0x4600000000000000 > %1269 = select i1 %1265, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1267, 0x4600000000000000 > %1270 = select i1 %1268, float %.op82, float 0xC600000000000000 > %1271 = fdiv float 1.000000e+00, %1191 > %1272 = fmul float %1179, %1271 > %1273 = fmul float %1185, %1271 > %1274 = select i1 %1261, float %1269, float %1272 > %1275 = select i1 %1262, float %1270, float %1273 > %1276 = fcmp oeq float %1209, 0.000000e+00 > %1277 = fcmp oeq float %1209, 0.000000e+00 > %1278 = fcmp ogt float %1197, 0.000000e+00 > %1279 = select i1 %1278, float 1.000000e+00, float %1197 > %1280 = fcmp oge float %1279, 0.000000e+00 > %1281 = fcmp ogt float %1203, 0.000000e+00 > %1282 = select i1 %1281, float 1.000000e+00, float %1203 > %1283 = fcmp oge float %1282, 0.000000e+00 > %.op83 = fmul float %1279, 0x4600000000000000 > %1284 = select i1 %1280, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1282, 0x4600000000000000 > %1285 = select i1 %1283, float %.op84, float 0xC600000000000000 > %1286 = fdiv float 1.000000e+00, %1209 > %1287 = fmul float %1197, %1286 > %1288 = fmul float %1203, %1286 > %1289 = select i1 %1276, float %1284, float %1287 > %1290 = select i1 %1277, float %1285, float %1288 > %1291 = fcmp oeq float %1155, 0.000000e+00 > %1292 = fcmp oeq float %1155, 0.000000e+00 > %1293 = fcmp ogt float %1143, 0.000000e+00 > %1294 = select i1 %1293, float 1.000000e+00, float %1143 > %1295 = fcmp oge float %1294, 0.000000e+00 > %1296 = fcmp ogt float %1149, 0.000000e+00 > %1297 = select i1 %1296, float 1.000000e+00, float %1149 > %1298 = fcmp oge float %1297, 0.000000e+00 > %.op85 = fmul float %1294, 0x4600000000000000 > %1299 = select i1 %1295, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1297, 0x4600000000000000 > %1300 = select i1 %1298, float %.op86, float 0xC600000000000000 > %1301 = fdiv float 1.000000e+00, %1155 > %1302 = fmul float %1143, %1301 > %1303 = fmul float %1149, %1301 > %1304 = select i1 %1291, float %1299, float %1302 > %1305 = select i1 %1292, float %1300, float %1303 > %1306 = fsub float %1304, %1289 > %1307 = fsub float %1305, %1290 > %1308 = fcmp oeq float %1227, 0.000000e+00 > %1309 = fcmp oeq float %1227, 0.000000e+00 > %1310 = fcmp ogt float %1215, 0.000000e+00 > %1311 = select i1 %1310, float 1.000000e+00, float %1215 > %1312 = fcmp oge float %1311, 0.000000e+00 > %1313 = fcmp ogt float %1221, 0.000000e+00 > %1314 = select i1 %1313, float 1.000000e+00, float %1221 > %1315 = fcmp oge float %1314, 0.000000e+00 > %.op87 = fmul float %1311, 0x4600000000000000 > %1316 = select i1 %1312, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1314, 0x4600000000000000 > %1317 = select i1 %1315, float %.op88, float 0xC600000000000000 > %1318 = fdiv float 1.000000e+00, %1227 > %1319 = fmul float %1215, %1318 > %1320 = fmul float %1221, %1318 > %1321 = select i1 %1308, float %1316, float %1319 > %1322 = select i1 %1309, float %1317, float %1320 > %1323 = fsub float %1259, %1321 > %1324 = fsub float %1260, %1322 > %1325 = fmul float %1323, %42 > %1326 = fmul float %1324, %43 > %1327 = fcmp oeq float %1245, 0.000000e+00 > %1328 = fcmp oeq float %1245, 0.000000e+00 > %1329 = fcmp ogt float %1233, 0.000000e+00 > %1330 = select i1 %1329, float 1.000000e+00, float %1233 > %1331 = fcmp oge float %1330, 0.000000e+00 > %1332 = fcmp ogt float %1239, 0.000000e+00 > %1333 = select i1 %1332, float 1.000000e+00, float %1239 > %1334 = fcmp oge float %1333, 0.000000e+00 > %.op89 = fmul float %1330, 0x4600000000000000 > %1335 = select i1 %1331, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1333, 0x4600000000000000 > %1336 = select i1 %1334, float %.op90, float 0xC600000000000000 > %1337 = fdiv float 1.000000e+00, %1245 > %1338 = fmul float %1233, %1337 > %1339 = fmul float %1239, %1337 > %1340 = select i1 %1327, float %1335, float %1338 > %1341 = select i1 %1328, float %1336, float %1339 > %1342 = fsub float %1274, %1340 > %1343 = fsub float %1275, %1341 > %1344 = fmul float %1342, %42 > %1345 = fmul float %1306, %42 > %1346 = fmul float %1307, %43 > %1347 = fmul float %1343, %43 > %1348 = fmul float %1345, %1345 > %1349 = fmul float %1346, %1346 > %1350 = fadd float %1348, %1349 > %1351 = fmul float %1325, %1325 > %1352 = fmul float %1326, %1326 > %1353 = fadd float %1351, %1352 > %1354 = fmul float %1344, %1344 > %1355 = fmul float %1347, %1347 > %1356 = fadd float %1354, %1355 > %1357 = call float @llvm.sqrt.f32(float %1356) > %1358 = call float @llvm.sqrt.f32(float %1350) > %1359 = call float @llvm.sqrt.f32(float %1353) > %1360 = fsub float %1155, %15 > %1361 = fsub float %1173, %15 > %1362 = fsub float %1191, %15 > %1363 = fcmp une float %16, 0.000000e+00 > br i1 %1363, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %16, %ENDIF77 ], [ %38, %main_body ] > %temp16.0 = phi float [ %1577, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1578, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1567, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1580, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %15, %ENDIF77 ], [ %37, %main_body ] > %temp13.0 = phi float [ %1560, %ENDIF77 ], [ %36, %main_body ] > %1364 = phi i32 [ 1065353216, %ENDIF77 ], [ %672, %main_body ] > %temp10.0 = phi float [ %1359, %ENDIF77 ], [ %716, %main_body ] > %temp9.0 = phi float [ %1552, %ENDIF77 ], [ %718, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %603, %main_body ] > %temp6.0 = phi float [ %993, %ENDIF77 ], [ %685, %main_body ] > %temp5.0 = phi float [ %1547, %ENDIF77 ], [ %712, %main_body ] > %1365 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1366 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1367 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1368 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1369 = lshr i32 %5, 16 > %1370 = shl nuw nsw i32 %1369, 2 > %1371 = and i32 %6, 8191 > %1372 = and i32 %10, 255 > %1373 = mul nuw nsw i32 %1371, %1372 > %1374 = add nuw nsw i32 %1370, %1373 > %1375 = add nuw nsw i32 %1374, 8 > %1376 = zext i32 %1375 to i64 > %1377 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1376 > %1378 = bitcast i32 addrspace(3)* %1377 to float addrspace(3)* > store float %1365, float addrspace(3)* %1378, align 4 > %1379 = add nuw nsw i32 %1374, 9 > %1380 = zext i32 %1379 to i64 > %1381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1380 > %1382 = bitcast i32 addrspace(3)* %1381 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1382, align 4 > %1383 = add nuw nsw i32 %1374, 10 > %1384 = zext i32 %1383 to i64 > %1385 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1384 > %1386 = bitcast i32 addrspace(3)* %1385 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1386, align 4 > %1387 = add nuw nsw i32 %1374, 11 > %1388 = zext i32 %1387 to i64 > %1389 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1388 > %1390 = bitcast i32 addrspace(3)* %1389 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1390, align 4 > %1391 = lshr i32 %5, 16 > %1392 = shl nuw nsw i32 %1391, 2 > %1393 = and i32 %6, 8191 > %1394 = and i32 %10, 255 > %1395 = mul nuw nsw i32 %1393, %1394 > %1396 = add nuw nsw i32 %1392, %1395 > %1397 = add nuw nsw i32 %1396, 12 > %1398 = zext i32 %1397 to i64 > %1399 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1398 > %1400 = bitcast i32 addrspace(3)* %1399 to float addrspace(3)* > store float %1366, float addrspace(3)* %1400, align 4 > %1401 = add nuw nsw i32 %1396, 13 > %1402 = zext i32 %1401 to i64 > %1403 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1402 > %1404 = bitcast i32 addrspace(3)* %1403 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1404, align 4 > %1405 = add nuw nsw i32 %1396, 14 > %1406 = zext i32 %1405 to i64 > %1407 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1406 > %1408 = bitcast i32 addrspace(3)* %1407 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1408, align 4 > %1409 = add nuw nsw i32 %1396, 15 > %1410 = zext i32 %1409 to i64 > %1411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1410 > store i32 %1364, i32 addrspace(3)* %1411, align 4 > %1412 = lshr i32 %5, 16 > %1413 = shl nuw nsw i32 %1412, 2 > %1414 = and i32 %6, 8191 > %1415 = and i32 %10, 255 > %1416 = mul nuw nsw i32 %1414, %1415 > %1417 = add nuw nsw i32 %1413, %1416 > %1418 = add nuw nsw i32 %1417, 16 > %1419 = zext i32 %1418 to i64 > %1420 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1419 > %1421 = bitcast i32 addrspace(3)* %1420 to float addrspace(3)* > store float %1367, float addrspace(3)* %1421, align 4 > %1422 = add nuw nsw i32 %1417, 17 > %1423 = zext i32 %1422 to i64 > %1424 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1423 > %1425 = bitcast i32 addrspace(3)* %1424 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1425, align 4 > %1426 = add nuw nsw i32 %1417, 18 > %1427 = zext i32 %1426 to i64 > %1428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1427 > %1429 = bitcast i32 addrspace(3)* %1428 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1429, align 4 > %1430 = add nuw nsw i32 %1417, 19 > %1431 = zext i32 %1430 to i64 > %1432 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1431 > %1433 = bitcast i32 addrspace(3)* %1432 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1433, align 4 > %1434 = lshr i32 %5, 16 > %1435 = shl nuw nsw i32 %1434, 2 > %1436 = and i32 %6, 8191 > %1437 = and i32 %10, 255 > %1438 = mul nuw nsw i32 %1436, %1437 > %1439 = add nuw nsw i32 %1435, %1438 > %1440 = add nuw nsw i32 %1439, 20 > %1441 = zext i32 %1440 to i64 > %1442 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1441 > %1443 = bitcast i32 addrspace(3)* %1442 to float addrspace(3)* > store float %1368, float addrspace(3)* %1443, align 4 > %1444 = add nuw nsw i32 %1439, 21 > %1445 = zext i32 %1444 to i64 > %1446 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1445 > %1447 = bitcast i32 addrspace(3)* %1446 to float addrspace(3)* > store float %1366, float addrspace(3)* %1447, align 4 > %1448 = add nuw nsw i32 %1439, 22 > %1449 = zext i32 %1448 to i64 > %1450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1449 > %1451 = bitcast i32 addrspace(3)* %1450 to float addrspace(3)* > store float %1367, float addrspace(3)* %1451, align 4 > %1452 = add nuw nsw i32 %1439, 23 > %1453 = zext i32 %1452 to i64 > %1454 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1453 > %1455 = bitcast i32 addrspace(3)* %1454 to float addrspace(3)* > store float %1368, float addrspace(3)* %1455, align 4 > %1456 = lshr i32 %5, 16 > %1457 = shl nuw nsw i32 %1456, 2 > %1458 = and i32 %6, 8191 > %1459 = and i32 %10, 255 > %1460 = mul nuw nsw i32 %1458, %1459 > %1461 = add nuw nsw i32 %1457, %1460 > %1462 = zext i32 %1461 to i64 > %1463 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1462 > %1464 = bitcast i32 addrspace(3)* %1463 to float addrspace(3)* > store float %1365, float addrspace(3)* %1464, align 4 > %1465 = lshr i32 %5, 16 > %1466 = shl nuw nsw i32 %1465, 2 > %1467 = and i32 %6, 8191 > %1468 = and i32 %10, 255 > %1469 = mul nuw nsw i32 %1467, %1468 > %1470 = add nuw nsw i32 %1466, %1469 > %1471 = add nuw nsw i32 %1470, 1 > %1472 = zext i32 %1471 to i64 > %1473 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1472 > %1474 = bitcast i32 addrspace(3)* %1473 to float addrspace(3)* > store float %1366, float addrspace(3)* %1474, align 4 > %1475 = lshr i32 %5, 16 > %1476 = shl nuw nsw i32 %1475, 2 > %1477 = and i32 %6, 8191 > %1478 = and i32 %10, 255 > %1479 = mul nuw nsw i32 %1477, %1478 > %1480 = add nuw nsw i32 %1476, %1479 > %1481 = add nuw nsw i32 %1480, 2 > %1482 = zext i32 %1481 to i64 > %1483 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1482 > %1484 = bitcast i32 addrspace(3)* %1483 to float addrspace(3)* > store float %1367, float addrspace(3)* %1484, align 4 > %1485 = lshr i32 %5, 16 > %1486 = shl nuw nsw i32 %1485, 2 > %1487 = and i32 %6, 8191 > %1488 = and i32 %10, 255 > %1489 = mul nuw nsw i32 %1487, %1488 > %1490 = add nuw nsw i32 %1486, %1489 > %1491 = add nuw nsw i32 %1490, 4 > %1492 = zext i32 %1491 to i64 > %1493 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1492 > %1494 = bitcast i32 addrspace(3)* %1493 to float addrspace(3)* > store float %1368, float addrspace(3)* %1494, align 4 > %1495 = and i32 %10, 255 > %1496 = lshr i32 %10, 8 > %1497 = and i32 %1496, 31 > %1498 = lshr i32 %5, 16 > %1499 = shl nuw nsw i32 %1498, 2 > %1500 = and i32 %6, 8191 > %1501 = and i32 %10, 255 > %1502 = mul nuw nsw i32 %1500, %1501 > %1503 = add nuw nsw i32 %1499, %1502 > %1504 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1505 = bitcast i64 %1504 to <2 x i32> > %1506 = extractelement <2 x i32> %1505, i32 0 > %1507 = extractelement <2 x i32> %1505, i32 1 > %1508 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1506, 0 > %1509 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1508, i32 %1507, 1 > %1510 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1509, i32 %8, 13 > %1511 = bitcast i32 %1495 to float > %1512 = bitcast i32 %1497 to float > %1513 = bitcast i32 %1503 to float > %1514 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1510, float %1511, 14 > %1515 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1514, float %1512, 15 > %1516 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1515, float %1513, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1516 > >IF69: ; preds = %IF > %1517 = fdiv float 1.000000e+00, %16 > %1518 = fmul float %1360, %1517 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1519 = fcmp ogt float %1360, 0.000000e+00 > %1520 = select i1 %1519, float 1.000000e+00, float %1360 > %1521 = fcmp oge float %1520, 0.000000e+00 > %.op91 = fmul float %1520, 0x4600000000000000 > %1522 = select i1 %1521, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1518, %IF69 ], [ %1522, %ELSE70 ] > %1523 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1524 = fsub float 1.000000e+00, %1523 > %1525 = fmul float %1524, %1358 > %1526 = fcmp une float %16, 0.000000e+00 > br i1 %1526, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1527 = fdiv float 1.000000e+00, %16 > %1528 = fmul float %1361, %1527 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1529 = fcmp ogt float %1361, 0.000000e+00 > %1530 = select i1 %1529, float 1.000000e+00, float %1361 > %1531 = fcmp oge float %1530, 0.000000e+00 > %.op92 = fmul float %1530, 0x4600000000000000 > %1532 = select i1 %1531, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1528, %IF72 ], [ %1532, %ELSE73 ] > %1533 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1534 = fsub float 1.000000e+00, %1533 > %1535 = fmul float %1534, %1359 > %1536 = fcmp une float %16, 0.000000e+00 > br i1 %1536, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1537 = fdiv float 1.000000e+00, %16 > %1538 = fmul float %1362, %1537 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1539 = fcmp ogt float %1362, 0.000000e+00 > %1540 = select i1 %1539, float 1.000000e+00, float %1362 > %1541 = fcmp oge float %1540, 0.000000e+00 > %.op93 = fmul float %1540, 0x4600000000000000 > %1542 = select i1 %1541, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1538, %IF75 ], [ %1542, %ELSE76 ] > %1543 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1544 = fsub float 1.000000e+00, %1543 > %1545 = fmul float %1544, %1357 > %1546 = fmul float %13, %19 > %1547 = fmul float %14, %20 > %1548 = call float @llvm.maxnum.f32(float %1547, float 1.000000e+00) > %1549 = fcmp oeq float %1546, 0.000000e+00 > %1550 = fcmp oeq float %1546, 0.000000e+00 > %1551 = sext i1 %1550 to i32 > %1552 = bitcast i32 %1551 to float > %1553 = fcmp ogt float %1545, 0.000000e+00 > %1554 = select i1 %1553, float 1.000000e+00, float %1545 > %1555 = fcmp oge float %1554, 0.000000e+00 > %1556 = fcmp ogt float %1525, 0.000000e+00 > %1557 = select i1 %1556, float 1.000000e+00, float %1525 > %1558 = fcmp oge float %1557, 0.000000e+00 > %.op94 = fmul float %1554, 0x4600000000000000 > %1559 = select i1 %1555, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1557, 0x4600000000000000 > %1560 = select i1 %1558, float %.op95, float 0xC600000000000000 > %1561 = fdiv float 1.000000e+00, %1546 > %1562 = fmul float %1545, %1561 > %1563 = fmul float %1525, %1561 > %1564 = select i1 %1549, float %1559, float %1562 > %1565 = select i1 %1550, float %1560, float %1563 > %1566 = call float @llvm.maxnum.f32(float %1565, float 1.000000e+00) > %1567 = call float @llvm.minnum.f32(float %1548, float %1566) > %1568 = fcmp une float %1546, 0.000000e+00 > br i1 %1568, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1569 = fdiv float 1.000000e+00, %1546 > %1570 = fmul float %1535, %1569 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1571 = fcmp ogt float %1535, 0.000000e+00 > %1572 = select i1 %1571, float 1.000000e+00, float %1535 > %1573 = fcmp oge float %1572, 0.000000e+00 > %.op96 = fmul float %1572, 0x4600000000000000 > %1574 = select i1 %1573, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1570, %IF78 ], [ %1574, %ELSE79 ] > %1575 = call float @llvm.maxnum.f32(float %1564, float 1.000000e+00) > %1576 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1577 = call float @llvm.minnum.f32(float %1548, float %1576) > %1578 = call float @llvm.minnum.f32(float %1548, float %1575) > %1579 = call float @llvm.maxnum.f32(float %1567, float %1578) > %1580 = call float @llvm.maxnum.f32(float %1579, float %1577) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[0].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[0].xxxx >101: MOV OUT[4], TEMP[3] >102: MOV OUT[2], TEMP[6] >103: MOV OUT[3], TEMP[4] >104: MOV OUT[1], TEMP[5] >105: MOV OUT[0], TEMP[1] >106: END >radeonsi: Compiling shader 273 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = bitcast i32 %10 to float > %711 = insertvalue <{ float, float, float }> undef, float %710, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %711 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..33] >DCL CONST[2][0..4095] >DCL TEMP[0..20], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 256} >IMM[3] UINT32 {272, 288, 304, 512} >IMM[4] UINT32 {336, 432, 528, 480} >IMM[5] UINT32 {496, 320, 352, 464} >IMM[6] FLT32 { 0.0000, -0.1500, 0.0597, -1.5000} >IMM[7] UINT32 {384, 368, 448, 400} >IMM[8] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.4427, 0.5000} >IMM[9] UINT32 {416, 0, 0, 0} >IMM[10] FLT32 { 0.4545, -0.0040, 6.2000, 1.7000} >IMM[11] FLT32 { 0.0600, 0.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, IN[4].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[3].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[3].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[3].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[3].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[3].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[3].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[3].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[3].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[3].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[3].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[4].y, TEMP[18].xxxx >224: UMUL TEMP[18].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[19].xxxx >227: MOV TEMP[18].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[20].xxxx >231: MOV TEMP[19].z, CONST[2][ADDR[0].x] >232: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].zzzz, -TEMP[8].zzzz >233: MUL TEMP[18].x, TEMP[18].xxxx, IN[3].xxxx >234: MUL TEMP[18].x, IMM[0].yyyy, TEMP[18].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[18].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[19].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[19].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[19].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[20].xxxx >244: MOV TEMP[19].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[19].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[3].xxxx, TEMP[10].zzzz >249: MOV TEMP[18].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[3].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[3].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[3].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].zzzz >259: MOV TEMP[18].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[18] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[3].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[3].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[3].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[4].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[7].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[7].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[6].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[3].zzzz, TEMP[2].xxxx >307: MOV TEMP[16].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[6].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[7].xxxx >315: MOV TEMP[6].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[8].xxxx >317: ADD TEMP[2].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[3].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[9].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[6].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[7].xxxx >329: MOV TEMP[6].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[2].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[6].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[6].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[6].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[2].x, TEMP[2].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[2].xxxx >341: ADD TEMP[2].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[2].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].yzzz >344: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[2].yxyy >346: ADD TEMP[2].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy >348: MOV TEMP[0].y, TEMP[2].xxxx >349: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[2].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[4].x, TEMP[0], TEMP[5] >354: MOV TEMP[4].w, IMM[0].zzzz >355: DP4 TEMP[0].x, CONST[1][16], TEMP[4] >356: DP4 TEMP[2].x, CONST[1][17], TEMP[4] >357: MOV TEMP[0].y, TEMP[2].xxxx >358: DP4 TEMP[2].x, CONST[1][18], TEMP[4] >359: MOV TEMP[0].z, TEMP[2].xxxx >360: DP4 TEMP[2].x, CONST[1][19], TEMP[4] >361: MOV TEMP[0].w, TEMP[2].xxxx >362: ADD TEMP[3].xyz, -TEMP[4].xyzz, CONST[1][32].xyzz >363: DP4 TEMP[5].x, CONST[1][21], TEMP[4] >364: ADD TEMP[1].x, TEMP[5].xxxx, CONST[1][27].wwww >365: MOV TEMP[5], TEMP[0] >366: MOV TEMP[6].xy, IN[1].xyxx >367: MUL TEMP[8].xyz, CONST[1][30].xyzz, CONST[1][31].xyzz >368: MOV TEMP[8].w, CONST[1][30].wwww >369: ABS TEMP[9].x, TEMP[2].xxxx >370: MUL TEMP[0].x, TEMP[9].xxxx, IMM[6].xxxx >371: MIN TEMP[9].x, TEMP[0].xxxx, IMM[0].zzzz >372: ADD TEMP[0].x, -TEMP[9].xxxx, IMM[0].zzzz >373: DP3 TEMP[7].x, CONST[1][20].xyzz, TEMP[3].xyzz >374: DP3 TEMP[9].x, CONST[1][22].xyzz, TEMP[3].xyzz >375: MOV TEMP[7].z, TEMP[9].xxxx >376: DP3 TEMP[3].x, CONST[1][21].xyzz, TEMP[3].xyzz >377: MOV TEMP[7].y, TEMP[3].xxxx >378: DP3 TEMP[9].x, TEMP[7].xyzz, TEMP[7].xyzz >379: RSQ TEMP[9].x, TEMP[9].xxxx >380: MUL TEMP[10].xyz, TEMP[9].xxxx, TEMP[7].xyzz >381: FMA TEMP[3].x, -TEMP[3].xxxx, TEMP[9].xxxx, IMM[6].yyyy >382: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].zzzz >383: MOV_SAT TEMP[3].x, TEMP[3].xxxx >384: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx >385: DP3 TEMP[9].x, -TEMP[10].xyzz, CONST[1][29].xyzz >386: FMA TEMP[10].x, -CONST[1][24].yyyy, TEMP[9].xxxx, CONST[1][24].xxxx >387: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].zzzz >388: MUL TEMP[9].x, TEMP[9].xxxx, IMM[6].zzzz >389: ABS TEMP[10].x, TEMP[10].xxxx >390: LG2 TEMP[10].x, TEMP[10].xxxx >391: MUL TEMP[10].x, TEMP[10].xxxx, IMM[6].wwww >392: EX2 TEMP[10].x, TEMP[10].xxxx >393: FMA TEMP[11].x, CONST[1][24].zzzz, TEMP[10].xxxx, -CONST[1][23].zzzz >394: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][24].zzzz >395: MAX TEMP[11].x, TEMP[11].xxxx, IMM[8].xxxx >396: FMA TEMP[0].x, -TEMP[11].xxxx, TEMP[0].xxxx, TEMP[10].xxxx >397: MAX TEMP[10].x, TEMP[0].xxxx, CONST[1][28].wwww >398: FSNE TEMP[11].x, CONST[1][23].xxxx, IMM[8].xxxx >399: UIF TEMP[11].xxxx :0 >400: RCP TEMP[11].x, CONST[1][23].xxxx >401: MUL TEMP[11].x, -TEMP[1].xxxx, TEMP[11].xxxx >402: ELSE :0 >403: SSG TEMP[12].x, -TEMP[1].xxxx >404: MUL TEMP[11].x, IMM[8].yyyy, TEMP[12].xxxx >405: ENDIF >406: MUL TEMP[11].x, TEMP[11].xxxx, IMM[8].zzzz >407: EX2 TEMP[11].x, TEMP[11].xxxx >408: ADD TEMP[11].x, TEMP[11].xxxx, CONST[1][24].wwww >409: MUL TEMP[11].x, TEMP[11].xxxx, CONST[1][25].yyyy >410: MUL TEMP[11].x, TEMP[11].xxxx, IMM[8].wwww >411: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[11].xxxx >412: MIN TEMP[3].x, TEMP[3].xxxx, CONST[1][23].wwww >413: MAX TEMP[3].x, TEMP[3].xxxx, CONST[1][25].xxxx >414: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[10].xxxx >415: FSNE TEMP[10].x, CONST[1][26].wwww, IMM[8].xxxx >416: UIF TEMP[10].xxxx :0 >417: RCP TEMP[10].x, CONST[1][26].wwww >418: MUL TEMP[10].x, -TEMP[1].xxxx, TEMP[10].xxxx >419: ELSE :0 >420: SSG TEMP[11].x, -TEMP[1].xxxx >421: MUL TEMP[10].x, IMM[8].yyyy, TEMP[11].xxxx >422: ENDIF >423: ADD TEMP[1].x, -TEMP[1].xxxx, CONST[1][27].zzzz >424: FSNE TEMP[11].x, CONST[1][23].yyyy, IMM[8].xxxx >425: UIF TEMP[11].xxxx :0 >426: RCP TEMP[11].x, CONST[1][23].yyyy >427: MUL TEMP[11].x, TEMP[1].xxxx, TEMP[11].xxxx >428: ELSE :0 >429: SSG TEMP[12].x, TEMP[1].xxxx >430: MUL TEMP[11].x, IMM[8].yyyy, TEMP[12].xxxx >431: ENDIF >432: MUL TEMP[10].x, TEMP[10].xxxx, IMM[8].zzzz >433: EX2 TEMP[10].x, TEMP[10].xxxx >434: MUL TEMP[7].xyz, TEMP[10].xxxx, CONST[1][26].xyzz >435: FMA TEMP[3].xyz, CONST[1][26].xyzz, TEMP[10].xxxx, TEMP[3].xxxx >436: FMA TEMP[9].xyz, TEMP[7].xyzz, TEMP[9].xxxx, TEMP[0].xxxx >437: MUL TEMP[7].xyz, TEMP[11].xxxx, -TEMP[3].xyzz >438: ABS TEMP[2].xyz, TEMP[2].xxxx >439: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[3].xyzz >440: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[8].zzzz >441: EX2 TEMP[2].x, TEMP[1].xxxx >442: EX2 TEMP[2].y, TEMP[1].yyyy >443: EX2 TEMP[2].z, TEMP[1].zzzz >444: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[8].zzzz >445: LG2 TEMP[10].x, CONST[1][28].xxxx >446: LG2 TEMP[10].y, CONST[1][28].yyyy >447: LG2 TEMP[10].z, CONST[1][28].zzzz >448: MUL TEMP[4].xyz, TEMP[10].xyzz, IMM[10].xxxx >449: EX2 TEMP[10].x, TEMP[4].xxxx >450: EX2 TEMP[10].y, TEMP[4].yyyy >451: EX2 TEMP[10].z, TEMP[4].zzzz >452: EX2 TEMP[4].x, TEMP[7].xxxx >453: EX2 TEMP[4].y, TEMP[7].yyyy >454: EX2 TEMP[4].z, TEMP[7].zzzz >455: MUL TEMP[7].xyz, TEMP[4].xyzz, TEMP[10].xyzz >456: FSEQ TEMP[4].xyz, TEMP[3].xyzz, IMM[8].xxxx >457: SSG TEMP[10].xyz, TEMP[9].xyzz >458: MUL TEMP[10].xyz, IMM[8].yyyy, TEMP[10].xyzz >459: RCP TEMP[11].x, TEMP[3].xxxx >460: RCP TEMP[11].y, TEMP[3].yyyy >461: RCP TEMP[11].z, TEMP[3].zzzz >462: MUL TEMP[3].xyz, TEMP[9].xyzz, TEMP[11].xyzz >463: UCMP TEMP[3].xyz, TEMP[4].xyzz, TEMP[10].xyzz, TEMP[3].xyzz >464: MUL TEMP[0].xyz, TEMP[3].xyzz, TEMP[7].xyzz >465: ADD TEMP[3].xyz, -TEMP[2].xyzz, IMM[0].zzzz >466: MOV TEMP[2].w, TEMP[2].xxxx >467: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xyzz, IMM[10].yyyy >468: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[8].xxxx >469: FMA TEMP[3].xyz, TEMP[0].xyzz, IMM[10].zzzz, IMM[8].wwww >470: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[3].xyzz >471: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[10].zzzz, IMM[10].wwww >472: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[11].xxxx >473: FSEQ TEMP[3].xyz, TEMP[0].xyzz, IMM[8].xxxx >474: SSG TEMP[4].xyz, TEMP[1].xyzz >475: MUL TEMP[4].xyz, IMM[8].yyyy, TEMP[4].xyzz >476: RCP TEMP[7].x, TEMP[0].xxxx >477: RCP TEMP[7].y, TEMP[0].yyyy >478: RCP TEMP[7].z, TEMP[0].zzzz >479: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[7].xyzz >480: UCMP TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[0].xyzz >481: MOV OUT[4], IN[2] >482: MOV OUT[3], TEMP[2] >483: MOV OUT[2], TEMP[8] >484: MOV OUT[1], TEMP[6] >485: MOV OUT[0], TEMP[5] >486: END >radeonsi: Compiling shader 274 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 256) > %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 260) > %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 264) > %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 268) > %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 272) > %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 276) > %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 280) > %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 284) > %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 288) > %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 292) > %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 296) > %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 300) > %32 = call float @llvm.SI.load.const(<16 x i8> %19, i32 304) > %33 = call float @llvm.SI.load.const(<16 x i8> %19, i32 308) > %34 = call float @llvm.SI.load.const(<16 x i8> %19, i32 312) > %35 = call float @llvm.SI.load.const(<16 x i8> %19, i32 316) > %36 = call float @llvm.SI.load.const(<16 x i8> %19, i32 320) > %37 = call float @llvm.SI.load.const(<16 x i8> %19, i32 324) > %38 = call float @llvm.SI.load.const(<16 x i8> %19, i32 328) > %39 = call float @llvm.SI.load.const(<16 x i8> %19, i32 336) > %40 = call float @llvm.SI.load.const(<16 x i8> %19, i32 340) > %41 = call float @llvm.SI.load.const(<16 x i8> %19, i32 344) > %42 = call float @llvm.SI.load.const(<16 x i8> %19, i32 348) > %43 = call float @llvm.SI.load.const(<16 x i8> %19, i32 352) > %44 = call float @llvm.SI.load.const(<16 x i8> %19, i32 356) > %45 = call float @llvm.SI.load.const(<16 x i8> %19, i32 360) > %46 = call float @llvm.SI.load.const(<16 x i8> %19, i32 368) > %47 = call float @llvm.SI.load.const(<16 x i8> %19, i32 372) > %48 = call float @llvm.SI.load.const(<16 x i8> %19, i32 376) > %49 = call float @llvm.SI.load.const(<16 x i8> %19, i32 380) > %50 = call float @llvm.SI.load.const(<16 x i8> %19, i32 384) > %51 = call float @llvm.SI.load.const(<16 x i8> %19, i32 388) > %52 = call float @llvm.SI.load.const(<16 x i8> %19, i32 392) > %53 = call float @llvm.SI.load.const(<16 x i8> %19, i32 396) > %54 = call float @llvm.SI.load.const(<16 x i8> %19, i32 400) > %55 = call float @llvm.SI.load.const(<16 x i8> %19, i32 404) > %56 = call float @llvm.SI.load.const(<16 x i8> %19, i32 416) > %57 = call float @llvm.SI.load.const(<16 x i8> %19, i32 420) > %58 = call float @llvm.SI.load.const(<16 x i8> %19, i32 424) > %59 = call float @llvm.SI.load.const(<16 x i8> %19, i32 428) > %60 = call float @llvm.SI.load.const(<16 x i8> %19, i32 440) > %61 = call float @llvm.SI.load.const(<16 x i8> %19, i32 444) > %62 = call float @llvm.SI.load.const(<16 x i8> %19, i32 448) > %63 = call float @llvm.SI.load.const(<16 x i8> %19, i32 452) > %64 = call float @llvm.SI.load.const(<16 x i8> %19, i32 456) > %65 = call float @llvm.SI.load.const(<16 x i8> %19, i32 460) > %66 = call float @llvm.SI.load.const(<16 x i8> %19, i32 464) > %67 = call float @llvm.SI.load.const(<16 x i8> %19, i32 468) > %68 = call float @llvm.SI.load.const(<16 x i8> %19, i32 472) > %69 = call float @llvm.SI.load.const(<16 x i8> %19, i32 480) > %70 = call float @llvm.SI.load.const(<16 x i8> %19, i32 484) > %71 = call float @llvm.SI.load.const(<16 x i8> %19, i32 488) > %72 = call float @llvm.SI.load.const(<16 x i8> %19, i32 492) > %73 = call float @llvm.SI.load.const(<16 x i8> %19, i32 496) > %74 = call float @llvm.SI.load.const(<16 x i8> %19, i32 500) > %75 = call float @llvm.SI.load.const(<16 x i8> %19, i32 504) > %76 = call float @llvm.SI.load.const(<16 x i8> %19, i32 512) > %77 = call float @llvm.SI.load.const(<16 x i8> %19, i32 516) > %78 = call float @llvm.SI.load.const(<16 x i8> %19, i32 520) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %13) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 > %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %14) > %90 = extractelement <4 x float> %89, i32 0 > %91 = extractelement <4 x float> %89, i32 1 > %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 > %94 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %15) > %95 = extractelement <4 x float> %94, i32 0 > %96 = extractelement <4 x float> %94, i32 1 > %97 = extractelement <4 x float> %94, i32 2 > %98 = extractelement <4 x float> %94, i32 3 > %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 > %101 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %100, i32 0, i32 %16) > %102 = extractelement <4 x float> %101, i32 0 > %103 = extractelement <4 x float> %101, i32 1 > %104 = extractelement <4 x float> %101, i32 2 > %105 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0 > %107 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %106, i32 0, i32 %17) > %108 = extractelement <4 x float> %107, i32 0 > %109 = extractelement <4 x float> %107, i32 1 > %110 = extractelement <4 x float> %107, i32 2 > %111 = fmul float %110, 0x406FE01000000000 > %112 = fmul float %109, 0x406FE01000000000 > %113 = fmul float %108, 0x406FE01000000000 > %114 = fptosi float %111 to i32 > %115 = fptosi float %112 to i32 > %116 = fptosi float %113 to i32 > %117 = shl i32 %114, 1 > %118 = or i32 %117, 1 > %119 = shl i32 %115, 1 > %120 = or i32 %119, 1 > %121 = shl i32 %116, 1 > %122 = or i32 %121, 1 > %123 = shl i32 %114, 5 > %124 = or i32 %123, 4 > %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %124) > %126 = fmul float %102, %125 > %127 = shl i32 %115, 5 > %128 = or i32 %127, 4 > %129 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %128) > %130 = fmul float %103, %129 > %131 = shl i32 %118, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %131) > %133 = shl i32 %118, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %118, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %138) > %140 = shl i32 %118, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %118, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %146) > %148 = shl i32 %118, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %102 > %153 = fmul float %144, %102 > %154 = fmul float %153, 2.000000e+00 > %155 = shl i32 %120, 4 > %156 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %155) > %157 = shl i32 %120, 4 > %158 = or i32 %157, 12 > %159 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %158) > %160 = fmul float %156, %159 > %161 = shl i32 %120, 4 > %162 = or i32 %161, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %162) > %164 = shl i32 %120, 4 > %165 = or i32 %164, 8 > %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %165) > %167 = fsub float -0.000000e+00, %160 > %168 = call float @llvm.fma.f32(float %163, float %166, float %167) > %169 = shl i32 %120, 4 > %170 = or i32 %169, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %170) > %172 = shl i32 %120, 4 > %173 = or i32 %172, 8 > %174 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %160) > %176 = fmul float %175, %103 > %177 = fmul float %176, 2.000000e+00 > %178 = fmul float %168, %103 > %179 = fmul float %178, 2.000000e+00 > %180 = shl i32 %118, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %181) > %183 = shl i32 %118, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %184) > %186 = shl i32 %118, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %186) > %188 = shl i32 %118, 4 > %189 = or i32 %188, 12 > %190 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %189) > %191 = fmul float %185, %190 > %192 = fmul float %185, %187 > %193 = fmul float %182, %190 > %194 = shl i32 %118, 4 > %195 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %194) > %196 = shl i32 %118, 4 > %197 = or i32 %196, 4 > %198 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %197) > %199 = call float @llvm.fma.f32(float %195, float %198, float %191) > %200 = fmul float %199, %102 > %201 = fmul float %200, 2.000000e+00 > %202 = shl i32 %118, 4 > %203 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %202) > %204 = shl i32 %118, 4 > %205 = or i32 %204, 4 > %206 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %205) > %207 = shl i32 %118, 4 > %208 = or i32 %207, 8 > %209 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %208) > %210 = shl i32 %118, 4 > %211 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %210) > %212 = shl i32 %118, 4 > %213 = or i32 %212, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %213) > %215 = shl i32 %118, 4 > %216 = or i32 %215, 8 > %217 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %216) > %218 = fmul float %203, %211 > %219 = fmul float %206, %214 > %220 = fmul float %209, %217 > %221 = fadd float %220, %219 > %222 = fadd float %220, %218 > %223 = fadd float %219, %218 > %224 = fsub float -0.000000e+00, %221 > %225 = call float @llvm.fma.f32(float %224, float 2.000000e+00, float 1.000000e+00) > %226 = fsub float -0.000000e+00, %222 > %227 = call float @llvm.fma.f32(float %226, float 2.000000e+00, float 1.000000e+00) > %228 = fsub float -0.000000e+00, %223 > %229 = call float @llvm.fma.f32(float %228, float 2.000000e+00, float 1.000000e+00) > %230 = fmul float %102, %227 > %231 = shl i32 %120, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %232) > %234 = shl i32 %120, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %235) > %237 = shl i32 %120, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %237) > %239 = shl i32 %120, 4 > %240 = or i32 %239, 12 > %241 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %240) > %242 = fmul float %236, %241 > %243 = fmul float %236, %238 > %244 = fmul float %233, %241 > %245 = shl i32 %120, 4 > %246 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %245) > %247 = shl i32 %120, 4 > %248 = or i32 %247, 4 > %249 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %248) > %250 = call float @llvm.fma.f32(float %246, float %249, float %242) > %251 = fmul float %250, %103 > %252 = fmul float %251, 2.000000e+00 > %253 = shl i32 %120, 4 > %254 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %253) > %255 = shl i32 %120, 4 > %256 = or i32 %255, 4 > %257 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %256) > %258 = shl i32 %120, 4 > %259 = or i32 %258, 8 > %260 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %259) > %261 = shl i32 %120, 4 > %262 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %261) > %263 = shl i32 %120, 4 > %264 = or i32 %263, 4 > %265 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %264) > %266 = shl i32 %120, 4 > %267 = or i32 %266, 8 > %268 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %267) > %269 = fmul float %254, %262 > %270 = fmul float %257, %265 > %271 = fmul float %260, %268 > %272 = fadd float %271, %270 > %273 = fadd float %271, %269 > %274 = fadd float %270, %269 > %275 = fsub float -0.000000e+00, %272 > %276 = call float @llvm.fma.f32(float %275, float 2.000000e+00, float 1.000000e+00) > %277 = fsub float -0.000000e+00, %273 > %278 = call float @llvm.fma.f32(float %277, float 2.000000e+00, float 1.000000e+00) > %279 = fsub float -0.000000e+00, %274 > %280 = call float @llvm.fma.f32(float %279, float 2.000000e+00, float 1.000000e+00) > %281 = fmul float %103, %278 > %282 = fadd float %201, %252 > %283 = fadd float %230, %281 > %284 = fadd float %154, %179 > %285 = fadd float %126, %130 > %286 = shl i32 %116, 5 > %287 = or i32 %286, 4 > %288 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %287) > %289 = fmul float %104, %288 > %290 = shl i32 %122, 4 > %291 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %290) > %292 = shl i32 %122, 4 > %293 = or i32 %292, 12 > %294 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %293) > %295 = fmul float %291, %294 > %296 = shl i32 %122, 4 > %297 = or i32 %296, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %297) > %299 = shl i32 %122, 4 > %300 = or i32 %299, 8 > %301 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %300) > %302 = fsub float -0.000000e+00, %295 > %303 = call float @llvm.fma.f32(float %298, float %301, float %302) > %304 = shl i32 %122, 4 > %305 = or i32 %304, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %305) > %307 = shl i32 %122, 4 > %308 = or i32 %307, 8 > %309 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %295) > %311 = fmul float %310, %104 > %312 = fmul float %311, 2.000000e+00 > %313 = fmul float %303, %104 > %314 = fmul float %313, 2.000000e+00 > %315 = shl i32 %122, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %316) > %318 = shl i32 %122, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %319) > %321 = shl i32 %122, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %321) > %323 = shl i32 %122, 4 > %324 = or i32 %323, 12 > %325 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %324) > %326 = fmul float %320, %325 > %327 = fmul float %320, %322 > %328 = fmul float %317, %325 > %329 = shl i32 %122, 4 > %330 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %329) > %331 = shl i32 %122, 4 > %332 = or i32 %331, 4 > %333 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %332) > %334 = call float @llvm.fma.f32(float %330, float %333, float %326) > %335 = fmul float %334, %104 > %336 = fmul float %335, 2.000000e+00 > %337 = shl i32 %122, 4 > %338 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %337) > %339 = shl i32 %122, 4 > %340 = or i32 %339, 4 > %341 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %340) > %342 = shl i32 %122, 4 > %343 = or i32 %342, 8 > %344 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %343) > %345 = shl i32 %122, 4 > %346 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %345) > %347 = shl i32 %122, 4 > %348 = or i32 %347, 4 > %349 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %348) > %350 = shl i32 %122, 4 > %351 = or i32 %350, 8 > %352 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %351) > %353 = fmul float %338, %346 > %354 = fmul float %341, %349 > %355 = fmul float %344, %352 > %356 = fadd float %355, %354 > %357 = fadd float %355, %353 > %358 = fadd float %354, %353 > %359 = fsub float -0.000000e+00, %356 > %360 = call float @llvm.fma.f32(float %359, float 2.000000e+00, float 1.000000e+00) > %361 = fsub float -0.000000e+00, %357 > %362 = call float @llvm.fma.f32(float %361, float 2.000000e+00, float 1.000000e+00) > %363 = fsub float -0.000000e+00, %358 > %364 = call float @llvm.fma.f32(float %363, float 2.000000e+00, float 1.000000e+00) > %365 = fmul float %104, %362 > %366 = fadd float %282, %336 > %367 = fadd float %283, %365 > %368 = fadd float %284, %314 > %369 = fadd float %285, %289 > %370 = fmul float %366, %84 > %371 = fmul float %367, %85 > %372 = fadd float %370, %371 > %373 = fmul float %368, %86 > %374 = fadd float %372, %373 > %375 = fadd float %374, %369 > %376 = shl i32 %118, 4 > %377 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %376) > %378 = shl i32 %118, 4 > %379 = or i32 %378, 8 > %380 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %379) > %381 = fsub float -0.000000e+00, %193 > %382 = call float @llvm.fma.f32(float %377, float %380, float %381) > %383 = fmul float %382, %102 > %384 = fmul float %383, 2.000000e+00 > %385 = fmul float %152, 2.000000e+00 > %386 = shl i32 %120, 4 > %387 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %386) > %388 = shl i32 %120, 4 > %389 = or i32 %388, 8 > %390 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %389) > %391 = fsub float -0.000000e+00, %244 > %392 = call float @llvm.fma.f32(float %387, float %390, float %391) > %393 = fmul float %392, %103 > %394 = fmul float %393, 2.000000e+00 > %395 = fmul float %102, %229 > %396 = fmul float %102, %225 > %397 = fmul float %103, %280 > %398 = fmul float %103, %276 > %399 = shl i32 %114, 5 > %400 = or i32 %399, 8 > %401 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %400) > %402 = fmul float %102, %401 > %403 = shl i32 %115, 5 > %404 = or i32 %403, 8 > %405 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %404) > %406 = fmul float %103, %405 > %407 = fadd float %394, %384 > %408 = fadd float %177, %385 > %409 = fadd float %397, %395 > %410 = fadd float %406, %402 > %411 = shl i32 %122, 4 > %412 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %411) > %413 = shl i32 %122, 4 > %414 = or i32 %413, 8 > %415 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %414) > %416 = fsub float -0.000000e+00, %328 > %417 = call float @llvm.fma.f32(float %412, float %415, float %416) > %418 = fmul float %417, %104 > %419 = fmul float %418, 2.000000e+00 > %420 = fmul float %104, %364 > %421 = fmul float %104, %360 > %422 = shl i32 %116, 5 > %423 = or i32 %422, 8 > %424 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %423) > %425 = fmul float %104, %424 > %426 = fadd float %407, %419 > %427 = fadd float %408, %312 > %428 = fadd float %409, %420 > %429 = fadd float %410, %425 > %430 = fmul float %426, %84 > %431 = fmul float %427, %85 > %432 = fadd float %430, %431 > %433 = fmul float %428, %86 > %434 = fadd float %432, %433 > %435 = fadd float %434, %429 > %436 = shl i32 %114, 5 > %437 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %436) > %438 = fmul float %102, %437 > %439 = shl i32 %115, 5 > %440 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %439) > %441 = fmul float %103, %440 > %442 = shl i32 %116, 5 > %443 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %442) > %444 = fmul float %104, %443 > %445 = shl i32 %118, 4 > %446 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %445) > %447 = shl i32 %118, 4 > %448 = or i32 %447, 4 > %449 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %448) > %450 = fsub float -0.000000e+00, %191 > %451 = call float @llvm.fma.f32(float %446, float %449, float %450) > %452 = fadd float %193, %192 > %453 = fmul float %451, %102 > %454 = fmul float %452, %102 > %455 = fmul float %453, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = shl i32 %120, 4 > %458 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %457) > %459 = shl i32 %120, 4 > %460 = or i32 %459, 4 > %461 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %460) > %462 = fsub float -0.000000e+00, %242 > %463 = call float @llvm.fma.f32(float %458, float %461, float %462) > %464 = shl i32 %122, 4 > %465 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %464) > %466 = shl i32 %122, 4 > %467 = or i32 %466, 4 > %468 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %467) > %469 = fsub float -0.000000e+00, %326 > %470 = call float @llvm.fma.f32(float %465, float %468, float %469) > %471 = fadd float %328, %327 > %472 = fmul float %463, %103 > %473 = fmul float %470, %104 > %474 = fmul float %471, %104 > %475 = fmul float %473, 2.000000e+00 > %476 = fmul float %474, 2.000000e+00 > %477 = fadd float %244, %243 > %478 = fmul float %477, %103 > %479 = fmul float %472, 2.000000e+00 > %480 = fmul float %478, 2.000000e+00 > %481 = fadd float %396, %398 > %482 = fadd float %455, %479 > %483 = fadd float %456, %480 > %484 = fadd float %438, %441 > %485 = fadd float %421, %481 > %486 = fadd float %475, %482 > %487 = fadd float %476, %483 > %488 = fadd float %444, %484 > %489 = fmul float %485, %84 > %490 = fmul float %486, %85 > %491 = fadd float %489, %490 > %492 = fmul float %487, %86 > %493 = fadd float %491, %492 > %494 = fadd float %493, %488 > %495 = fmul float %20, %494 > %496 = fmul float %21, %375 > %497 = fadd float %495, %496 > %498 = fmul float %22, %435 > %499 = fadd float %497, %498 > %500 = fadd float %499, %23 > %501 = fmul float %24, %494 > %502 = fmul float %25, %375 > %503 = fadd float %501, %502 > %504 = fmul float %26, %435 > %505 = fadd float %503, %504 > %506 = fadd float %505, %27 > %507 = fmul float %28, %494 > %508 = fmul float %29, %375 > %509 = fadd float %507, %508 > %510 = fmul float %30, %435 > %511 = fadd float %509, %510 > %512 = fadd float %511, %31 > %513 = fmul float %32, %494 > %514 = fmul float %33, %375 > %515 = fadd float %513, %514 > %516 = fmul float %34, %435 > %517 = fadd float %515, %516 > %518 = fadd float %517, %35 > %519 = fsub float %76, %494 > %520 = fsub float %77, %375 > %521 = fsub float %78, %435 > %522 = fmul float %39, %494 > %523 = fmul float %40, %375 > %524 = fadd float %522, %523 > %525 = fmul float %41, %435 > %526 = fadd float %524, %525 > %527 = fadd float %526, %42 > %528 = fadd float %527, %61 > %529 = fmul float %69, %73 > %530 = fmul float %70, %74 > %531 = fmul float %71, %75 > %532 = call float @llvm.fabs.f32(float %518) > %533 = fmul float %532, 0x3EF4F8B580000000 > %534 = call float @llvm.minnum.f32(float %533, float 1.000000e+00) > %535 = fsub float 1.000000e+00, %534 > %536 = fmul float %36, %519 > %537 = fmul float %37, %520 > %538 = fadd float %537, %536 > %539 = fmul float %38, %521 > %540 = fadd float %538, %539 > %541 = fmul float %43, %519 > %542 = fmul float %44, %520 > %543 = fadd float %542, %541 > %544 = fmul float %45, %521 > %545 = fadd float %543, %544 > %546 = fmul float %39, %519 > %547 = fmul float %40, %520 > %548 = fadd float %547, %546 > %549 = fmul float %41, %521 > %550 = fadd float %548, %549 > %551 = fmul float %540, %540 > %552 = fmul float %550, %550 > %553 = fadd float %552, %551 > %554 = fmul float %545, %545 > %555 = fadd float %553, %554 > %556 = call float @llvm.AMDGPU.rsq.clamped.f32(float %555) > %557 = fmul float %556, %540 > %558 = fmul float %556, %550 > %559 = fmul float %556, %545 > %560 = fsub float -0.000000e+00, %550 > %561 = call float @llvm.fma.f32(float %560, float %556, float 0xBFC3333340000000) > %562 = fsub float 1.000000e+00, %561 > %563 = call float @llvm.AMDGPU.clamp.(float %562, float 0.000000e+00, float 1.000000e+00) > %564 = fmul float %563, %563 > %565 = fmul float %557, %66 > %566 = fsub float -0.000000e+00, %565 > %567 = fmul float %558, %67 > %568 = fsub float %566, %567 > %569 = fmul float %559, %68 > %570 = fsub float %568, %569 > %571 = fsub float -0.000000e+00, %51 > %572 = call float @llvm.fma.f32(float %571, float %570, float %50) > %573 = call float @llvm.fma.f32(float %570, float %570, float 1.000000e+00) > %574 = fmul float %573, 0x3FAE8EC8A0000000 > %575 = call float @llvm.fabs.f32(float %572) > %576 = call float @llvm.log2.f32(float %575) > %577 = fmul float %576, -1.500000e+00 > %578 = call float @llvm.exp2.f32(float %577) > %579 = fsub float -0.000000e+00, %48 > %580 = call float @llvm.fma.f32(float %52, float %578, float %579) > %581 = fmul float %578, %52 > %582 = call float @llvm.maxnum.f32(float %580, float 0.000000e+00) > %583 = fsub float -0.000000e+00, %582 > %584 = call float @llvm.fma.f32(float %583, float %535, float %581) > %585 = call float @llvm.maxnum.f32(float %584, float %65) > %586 = fcmp une float %46, 0.000000e+00 > br i1 %586, label %IF, label %ELSE > >IF: ; preds = %main_body > %587 = fdiv float 1.000000e+00, %46 > %588 = fmul float %528, %587 > %589 = fsub float -0.000000e+00, %588 > br label %ENDIF > >ELSE: ; preds = %main_body > %590 = fsub float -0.000000e+00, %528 > %591 = fcmp olt float %528, -0.000000e+00 > %592 = select i1 %591, float 1.000000e+00, float %590 > %593 = fcmp oge float %592, 0.000000e+00 > %.op = fmul float %592, 0x4600000000000000 > %594 = select i1 %593, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %589, %IF ], [ %594, %ELSE ] > %595 = fmul float %temp44.0, 0x3FF7154760000000 > %596 = call float @llvm.exp2.f32(float %595) > %597 = fadd float %596, %53 > %598 = fmul float %597, %55 > %599 = fmul float %598, 5.000000e-01 > %600 = fmul float %564, %599 > %601 = call float @llvm.minnum.f32(float %600, float %49) > %602 = call float @llvm.maxnum.f32(float %601, float %54) > %603 = fmul float %602, %585 > %604 = fcmp une float %59, 0.000000e+00 > br i1 %604, label %IF159, label %ELSE160 > >IF159: ; preds = %ENDIF > %605 = fdiv float 1.000000e+00, %59 > %606 = fmul float %528, %605 > %607 = fsub float -0.000000e+00, %606 > br label %ENDIF158 > >ELSE160: ; preds = %ENDIF > %608 = fsub float -0.000000e+00, %528 > %609 = fcmp olt float %528, -0.000000e+00 > %610 = select i1 %609, float 1.000000e+00, float %608 > %611 = fcmp oge float %610, 0.000000e+00 > %.op164 = fmul float %610, 0x4600000000000000 > %612 = select i1 %611, float %.op164, float 0xC600000000000000 > br label %ENDIF158 > >ENDIF158: ; preds = %ELSE160, %IF159 > %temp40.0 = phi float [ %607, %IF159 ], [ %612, %ELSE160 ] > %613 = fsub float %60, %528 > %614 = fcmp une float %47, 0.000000e+00 > br i1 %614, label %IF162, label %ELSE163 > >IF162: ; preds = %ENDIF158 > %615 = fdiv float 1.000000e+00, %47 > %616 = fmul float %613, %615 > br label %ENDIF161 > >ELSE163: ; preds = %ENDIF158 > %617 = fcmp ogt float %613, 0.000000e+00 > %618 = select i1 %617, float 1.000000e+00, float %613 > %619 = fcmp oge float %618, 0.000000e+00 > %.op165 = fmul float %618, 0x4600000000000000 > %620 = select i1 %619, float %.op165, float 0xC600000000000000 > br label %ENDIF161 > >ENDIF161: ; preds = %ELSE163, %IF162 > %temp44.1 = phi float [ %616, %IF162 ], [ %620, %ELSE163 ] > %621 = fmul float %temp40.0, 0x3FF7154760000000 > %622 = call float @llvm.exp2.f32(float %621) > %623 = fmul float %622, %56 > %624 = fmul float %622, %57 > %625 = fmul float %622, %58 > %626 = call float @llvm.fma.f32(float %56, float %622, float %602) > %627 = call float @llvm.fma.f32(float %57, float %622, float %602) > %628 = call float @llvm.fma.f32(float %58, float %622, float %602) > %629 = call float @llvm.fma.f32(float %623, float %574, float %603) > %630 = call float @llvm.fma.f32(float %624, float %574, float %603) > %631 = call float @llvm.fma.f32(float %625, float %574, float %603) > %632 = fmul float %626, %temp44.1 > %633 = fmul float %627, %temp44.1 > %634 = fmul float %628, %temp44.1 > %635 = call float @llvm.fabs.f32(float %518) > %636 = call float @llvm.fabs.f32(float %518) > %637 = call float @llvm.fabs.f32(float %518) > %638 = fmul float %626, %635 > %639 = fmul float %627, %636 > %640 = fmul float %628, %637 > %641 = fmul float %638, 0xBFF7154760000000 > %642 = fmul float %639, 0xBFF7154760000000 > %643 = fmul float %640, 0xBFF7154760000000 > %644 = call float @llvm.exp2.f32(float %641) > %645 = call float @llvm.exp2.f32(float %642) > %646 = call float @llvm.exp2.f32(float %643) > %647 = fmul float %632, 0xBFF7154760000000 > %648 = fmul float %633, 0xBFF7154760000000 > %649 = fmul float %634, 0xBFF7154760000000 > %650 = call float @llvm.log2.f32(float %62) > %651 = call float @llvm.log2.f32(float %63) > %652 = call float @llvm.log2.f32(float %64) > %653 = fmul float %650, 0x3FDD1745E0000000 > %654 = fmul float %651, 0x3FDD1745E0000000 > %655 = fmul float %652, 0x3FDD1745E0000000 > %656 = call float @llvm.exp2.f32(float %653) > %657 = call float @llvm.exp2.f32(float %654) > %658 = call float @llvm.exp2.f32(float %655) > %659 = call float @llvm.exp2.f32(float %647) > %660 = call float @llvm.exp2.f32(float %648) > %661 = call float @llvm.exp2.f32(float %649) > %662 = fmul float %659, %656 > %663 = fmul float %660, %657 > %664 = fmul float %661, %658 > %665 = fcmp oeq float %626, 0.000000e+00 > %666 = fcmp oeq float %627, 0.000000e+00 > %667 = fcmp oeq float %628, 0.000000e+00 > %668 = fcmp ogt float %629, 0.000000e+00 > %669 = select i1 %668, float 1.000000e+00, float %629 > %670 = fcmp oge float %669, 0.000000e+00 > %671 = fcmp ogt float %630, 0.000000e+00 > %672 = select i1 %671, float 1.000000e+00, float %630 > %673 = fcmp oge float %672, 0.000000e+00 > %674 = fcmp ogt float %631, 0.000000e+00 > %675 = select i1 %674, float 1.000000e+00, float %631 > %676 = fcmp oge float %675, 0.000000e+00 > %.op166 = fmul float %669, 0x4600000000000000 > %677 = select i1 %670, float %.op166, float 0xC600000000000000 > %.op167 = fmul float %672, 0x4600000000000000 > %678 = select i1 %673, float %.op167, float 0xC600000000000000 > %.op168 = fmul float %675, 0x4600000000000000 > %679 = select i1 %676, float %.op168, float 0xC600000000000000 > %680 = fdiv float 1.000000e+00, %626 > %681 = fdiv float 1.000000e+00, %627 > %682 = fdiv float 1.000000e+00, %628 > %683 = fmul float %629, %680 > %684 = fmul float %630, %681 > %685 = fmul float %631, %682 > %686 = select i1 %665, float %677, float %683 > %687 = select i1 %666, float %678, float %684 > %688 = select i1 %667, float %679, float %685 > %689 = fmul float %686, %662 > %690 = fmul float %687, %663 > %691 = fmul float %688, %664 > %692 = fsub float 1.000000e+00, %644 > %693 = fsub float 1.000000e+00, %645 > %694 = fsub float 1.000000e+00, %646 > %695 = call float @llvm.fma.f32(float %689, float %692, float 0xBF70624DE0000000) > %696 = call float @llvm.fma.f32(float %690, float %693, float 0xBF70624DE0000000) > %697 = call float @llvm.fma.f32(float %691, float %694, float 0xBF70624DE0000000) > %698 = call float @llvm.maxnum.f32(float %695, float 0.000000e+00) > %699 = call float @llvm.maxnum.f32(float %696, float 0.000000e+00) > %700 = call float @llvm.maxnum.f32(float %697, float 0.000000e+00) > %701 = call float @llvm.fma.f32(float %698, float 0x4018CCCCC0000000, float 5.000000e-01) > %702 = call float @llvm.fma.f32(float %699, float 0x4018CCCCC0000000, float 5.000000e-01) > %703 = call float @llvm.fma.f32(float %700, float 0x4018CCCCC0000000, float 5.000000e-01) > %704 = fmul float %698, %701 > %705 = fmul float %699, %702 > %706 = fmul float %700, %703 > %707 = call float @llvm.fma.f32(float %698, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %708 = call float @llvm.fma.f32(float %699, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %709 = call float @llvm.fma.f32(float %700, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %710 = call float @llvm.fma.f32(float %698, float %707, float 0x3FAEB851E0000000) > %711 = call float @llvm.fma.f32(float %699, float %708, float 0x3FAEB851E0000000) > %712 = call float @llvm.fma.f32(float %700, float %709, float 0x3FAEB851E0000000) > %713 = fcmp oeq float %710, 0.000000e+00 > %714 = fcmp oeq float %711, 0.000000e+00 > %715 = fcmp oeq float %712, 0.000000e+00 > %716 = fcmp ogt float %704, 0.000000e+00 > %717 = select i1 %716, float 1.000000e+00, float %704 > %718 = fcmp oge float %717, 0.000000e+00 > %719 = fcmp ogt float %705, 0.000000e+00 > %720 = select i1 %719, float 1.000000e+00, float %705 > %721 = fcmp oge float %720, 0.000000e+00 > %722 = fcmp ogt float %706, 0.000000e+00 > %723 = select i1 %722, float 1.000000e+00, float %706 > %724 = fcmp oge float %723, 0.000000e+00 > %.op169 = fmul float %717, 0x4600000000000000 > %725 = select i1 %718, float %.op169, float 0xC600000000000000 > %.op170 = fmul float %720, 0x4600000000000000 > %726 = select i1 %721, float %.op170, float 0xC600000000000000 > %.op171 = fmul float %723, 0x4600000000000000 > %727 = select i1 %724, float %.op171, float 0xC600000000000000 > %728 = fdiv float 1.000000e+00, %710 > %729 = fdiv float 1.000000e+00, %711 > %730 = fdiv float 1.000000e+00, %712 > %731 = fmul float %704, %728 > %732 = fmul float %705, %729 > %733 = fmul float %706, %730 > %734 = select i1 %713, float %725, float %731 > %735 = select i1 %714, float %726, float %732 > %736 = select i1 %715, float %727, float %733 > %737 = bitcast i32 %11 to float > %738 = insertvalue <{ float, float, float }> undef, float %737, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %90, float %91, float %424, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %529, float %530, float %531, float %72) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %734, float %735, float %736, float %644) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %96, float %97, float %98) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %500, float %506, float %512, float %518) > ret <{ float, float, float }> %738 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], BUFFER, FLOAT >DCL TEMP[0..2], LOCAL >IMM[0] INT32 {0, 0, 0, 0} >IMM[1] UINT32 {0, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0], TEMP[0], IN[3] > 3: MUL TEMP[0], TEMP[0], IN[1] > 4: FMA TEMP[1].xyz, TEMP[0].xyzz, IN[2].wwww, IN[2].xyzz > 5: MUL TEMP[0].x, TEMP[0].wwww, IN[2].wwww > 6: MOV TEMP[0].w, TEMP[0].xxxx > 7: MOV TEMP[2].x, IMM[0].xxxx > 8: MOV TEMP[2].w, IMM[1].xxxx > 9: TXF TEMP[2].x, TEMP[2], SAMP[1], BUFFER > 10: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 11: MOV OUT[0], TEMP[0] > 12: END >radeonsi: Compiling shader 275 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %33 = bitcast <8 x i32> addrspace(2)* %32 to <2 x i128> addrspace(2)* > %34 = load <2 x i128>, <2 x i128> addrspace(2)* %33, align 32, !tbaa !0 > %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %49 = bitcast float %35 to i32 > %50 = bitcast float %36 to i32 > %51 = insertelement <2 x i32> undef, i32 %49, i32 0 > %52 = insertelement <2 x i32> %51, i32 %50, i32 1 > %53 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %52, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = extractelement <4 x float> %53, i32 2 > %57 = extractelement <4 x float> %53, i32 3 > %58 = fmul float %54, %45 > %59 = fmul float %55, %46 > %60 = fmul float %56, %47 > %61 = fmul float %57, %48 > %62 = fmul float %58, %37 > %63 = fmul float %59, %38 > %64 = fmul float %60, %39 > %65 = fmul float %61, %40 > %66 = call float @llvm.fma.f32(float %62, float %44, float %41) > %67 = call float @llvm.fma.f32(float %63, float %44, float %42) > %68 = call float @llvm.fma.f32(float %64, float %44, float %43) > %69 = fmul float %65, %44 > %70 = extractelement <2 x i128> %34, i32 1 > %71 = bitcast i128 %70 to <16 x i8> > %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 0) > %73 = extractelement <4 x float> %72, i32 0 > %74 = fmul float %73, %66 > %75 = fmul float %73, %67 > %76 = fmul float %73, %68 > %77 = bitcast float %5 to i32 > %78 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %77, 10 > %79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %78, float %74, 11 > %80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %79, float %75, 12 > %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %80, float %76, 13 > %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %69, 14 > %83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %83 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..33] >DCL TEMP[0..12], LOCAL >IMM[0] FLT32 { 1.0000, -0.1500, 0.0597, -1.5000} >IMM[1] UINT32 {0, 256, 272, 288} >IMM[2] UINT32 {304, 336, 432, 512} >IMM[3] UINT32 {480, 496, 320, 352} >IMM[4] UINT32 {464, 384, 368, 448} >IMM[5] FLT32 { 0.0000, 0.0000, 158456325028528675187087900672.0000, 1.4427} >IMM[6] UINT32 {400, 416, 0, 0} >IMM[7] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[8] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][16], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][17], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][18], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][21], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][27].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][32].xyzz > 14: MUL TEMP[6].xyz, CONST[1][30].xyzz, CONST[1][31].xyzz > 15: MOV TEMP[6].w, CONST[1][30].wwww > 16: DP3 TEMP[1].x, CONST[1][20].xyzz, TEMP[5].xyzz > 17: DP3 TEMP[7].x, CONST[1][22].xyzz, TEMP[5].xyzz > 18: MOV TEMP[1].z, TEMP[7].xxxx > 19: DP3 TEMP[5].x, CONST[1][21].xyzz, TEMP[5].xyzz > 20: MOV TEMP[1].y, TEMP[5].xxxx > 21: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz > 22: RSQ TEMP[7].x, TEMP[7].xxxx > 23: MUL TEMP[8].xyz, TEMP[7].xxxx, TEMP[1].xyzz > 24: FMA TEMP[5].x, -TEMP[5].xxxx, TEMP[7].xxxx, IMM[0].yyyy > 25: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].xxxx > 26: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 27: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 28: DP3 TEMP[7].x, -TEMP[8].xyzz, CONST[1][29].xyzz > 29: FMA TEMP[9].x, -CONST[1][24].yyyy, TEMP[7].xxxx, CONST[1][24].xxxx > 30: FMA TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx, IMM[0].xxxx > 31: MOV TEMP[0].z, TEMP[7].xxxx > 32: ABS TEMP[7].x, TEMP[9].xxxx > 33: LG2 TEMP[7].x, TEMP[7].xxxx > 34: MOV TEMP[0].w, TEMP[7].xxxx > 35: MUL TEMP[7].xy, TEMP[0].zwww, IMM[0].zwww > 36: EX2 TEMP[9].x, TEMP[7].yyyy > 37: FMA TEMP[1].x, CONST[1][24].zzzz, TEMP[9].xxxx, -CONST[1][23].zzzz > 38: MUL TEMP[9].x, TEMP[9].xxxx, CONST[1][24].zzzz > 39: MAX TEMP[10].x, TEMP[1].xxxx, IMM[5].xxxx > 40: ABS TEMP[11].x, TEMP[2].xxxx > 41: MUL TEMP[11].x, TEMP[11].xxxx, IMM[5].yyyy > 42: MIN TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx > 43: ADD TEMP[11].x, -TEMP[11].xxxx, IMM[0].xxxx > 44: FMA TEMP[9].x, -TEMP[10].xxxx, TEMP[11].xxxx, TEMP[9].xxxx > 45: MAX TEMP[9].x, TEMP[9].xxxx, CONST[1][28].wwww > 46: FSNE TEMP[10].x, CONST[1][23].xxxx, IMM[5].xxxx > 47: UIF TEMP[10].xxxx :0 > 48: RCP TEMP[10].x, CONST[1][23].xxxx > 49: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 50: ELSE :0 > 51: SSG TEMP[11].x, -TEMP[0].xxxx > 52: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 53: ENDIF > 54: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 55: EX2 TEMP[10].x, TEMP[1].xxxx > 56: ADD TEMP[1].x, TEMP[10].xxxx, CONST[1][24].wwww > 57: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][25].yyyy > 58: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].xxxx > 59: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx > 60: MIN TEMP[5].x, TEMP[5].xxxx, CONST[1][23].wwww > 61: MAX TEMP[5].x, TEMP[5].xxxx, CONST[1][25].xxxx > 62: MUL TEMP[9].x, TEMP[5].xxxx, TEMP[9].xxxx > 63: FSNE TEMP[10].x, CONST[1][26].wwww, IMM[5].xxxx > 64: UIF TEMP[10].xxxx :0 > 65: RCP TEMP[10].x, CONST[1][26].wwww > 66: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 67: ELSE :0 > 68: SSG TEMP[11].x, -TEMP[0].xxxx > 69: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 70: ENDIF > 71: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][27].zzzz > 72: FSNE TEMP[11].x, CONST[1][23].yyyy, IMM[5].xxxx > 73: UIF TEMP[11].xxxx :0 > 74: RCP TEMP[11].x, CONST[1][23].yyyy > 75: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx > 76: ELSE :0 > 77: SSG TEMP[12].x, TEMP[0].xxxx > 78: MUL TEMP[11].x, IMM[5].zzzz, TEMP[12].xxxx > 79: ENDIF > 80: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 81: EX2 TEMP[10].x, TEMP[1].xxxx > 82: MUL TEMP[8].xyz, TEMP[10].xxxx, CONST[1][26].xyzz > 83: FMA TEMP[5].xyz, CONST[1][26].xyzz, TEMP[10].xxxx, TEMP[5].xxxx > 84: FMA TEMP[7].xyz, TEMP[8].xyzz, TEMP[7].xxxx, TEMP[9].xxxx > 85: FSEQ TEMP[9].xyz, TEMP[5].xyzz, IMM[5].xxxx > 86: SSG TEMP[10].xyz, TEMP[7].xyzz > 87: MUL TEMP[10].xyz, IMM[5].zzzz, TEMP[10].xyzz > 88: RCP TEMP[12].x, TEMP[5].xxxx > 89: RCP TEMP[12].y, TEMP[5].yyyy > 90: RCP TEMP[12].z, TEMP[5].zzzz > 91: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[12].xyzz > 92: UCMP TEMP[7].xyz, TEMP[9].xyzz, TEMP[10].xyzz, TEMP[7].xyzz > 93: MUL TEMP[8].xyz, TEMP[11].xxxx, -TEMP[5].xyzz > 94: ABS TEMP[2].xyz, TEMP[2].xxxx > 95: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[5].xyzz > 96: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].wwww > 97: EX2 TEMP[2].x, TEMP[1].xxxx > 98: EX2 TEMP[2].y, TEMP[1].yyyy > 99: EX2 TEMP[2].z, TEMP[1].zzzz >100: MUL TEMP[8].xyz, TEMP[8].xyzz, IMM[5].wwww >101: LG2 TEMP[5].x, CONST[1][28].xxxx >102: LG2 TEMP[5].y, CONST[1][28].yyyy >103: LG2 TEMP[5].z, CONST[1][28].zzzz >104: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[7].yyyy >105: EX2 TEMP[9].x, TEMP[5].xxxx >106: EX2 TEMP[9].y, TEMP[5].yyyy >107: EX2 TEMP[9].z, TEMP[5].zzzz >108: EX2 TEMP[5].x, TEMP[8].xxxx >109: EX2 TEMP[5].y, TEMP[8].yyyy >110: EX2 TEMP[5].z, TEMP[8].zzzz >111: MUL TEMP[8].xyz, TEMP[5].xyzz, TEMP[9].xyzz >112: MUL TEMP[0].xyz, TEMP[7].xyzz, TEMP[8].xyzz >113: ADD TEMP[5].xyz, -TEMP[2].xyzz, IMM[0].xxxx >114: MOV TEMP[2].w, TEMP[2].xxxx >115: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz, IMM[7].zzzz >116: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[5].xxxx >117: FMA TEMP[5].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[7].xxxx >118: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[5].xyzz >119: FMA TEMP[8].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[8].xxxx >120: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[8].xyzz, IMM[8].yyyy >121: FSEQ TEMP[5].xyz, TEMP[0].xyzz, IMM[5].xxxx >122: SSG TEMP[7].xyz, TEMP[1].xyzz >123: MUL TEMP[7].xyz, IMM[5].zzzz, TEMP[7].xyzz >124: RCP TEMP[8].x, TEMP[0].xxxx >125: RCP TEMP[8].y, TEMP[0].yyyy >126: RCP TEMP[8].z, TEMP[0].zzzz >127: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[8].xyzz >128: UCMP TEMP[2].xyz, TEMP[5].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >129: MOV OUT[4], IN[2] >130: MOV OUT[3], TEMP[2] >131: MOV OUT[2], TEMP[6] >132: MOV OUT[1], TEMP[4] >133: MOV OUT[0], TEMP[3] >134: END >radeonsi: Compiling shader 276 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 256) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 260) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 264) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 268) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 272) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 276) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 280) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 284) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 288) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 292) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 296) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 300) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 304) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 308) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 312) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 316) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 428) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 492) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %13) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %14) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %15) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = extractelement <4 x float> %90, i32 3 > %95 = fmul float %18, %80 > %96 = fmul float %19, %81 > %97 = fadd float %95, %96 > %98 = fmul float %20, %82 > %99 = fadd float %97, %98 > %100 = fadd float %99, %21 > %101 = fmul float %22, %80 > %102 = fmul float %23, %81 > %103 = fadd float %101, %102 > %104 = fmul float %24, %82 > %105 = fadd float %103, %104 > %106 = fadd float %105, %25 > %107 = fmul float %26, %80 > %108 = fmul float %27, %81 > %109 = fadd float %107, %108 > %110 = fmul float %28, %82 > %111 = fadd float %109, %110 > %112 = fadd float %111, %29 > %113 = fmul float %30, %80 > %114 = fmul float %31, %81 > %115 = fadd float %113, %114 > %116 = fmul float %32, %82 > %117 = fadd float %115, %116 > %118 = fadd float %117, %33 > %119 = fmul float %37, %80 > %120 = fmul float %38, %81 > %121 = fadd float %119, %120 > %122 = fmul float %39, %82 > %123 = fadd float %121, %122 > %124 = fadd float %123, %40 > %125 = fadd float %124, %59 > %126 = fsub float %74, %80 > %127 = fsub float %75, %81 > %128 = fsub float %76, %82 > %129 = fmul float %67, %71 > %130 = fmul float %68, %72 > %131 = fmul float %69, %73 > %132 = fmul float %34, %126 > %133 = fmul float %35, %127 > %134 = fadd float %133, %132 > %135 = fmul float %36, %128 > %136 = fadd float %134, %135 > %137 = fmul float %41, %126 > %138 = fmul float %42, %127 > %139 = fadd float %138, %137 > %140 = fmul float %43, %128 > %141 = fadd float %139, %140 > %142 = fmul float %37, %126 > %143 = fmul float %38, %127 > %144 = fadd float %143, %142 > %145 = fmul float %39, %128 > %146 = fadd float %144, %145 > %147 = fmul float %136, %136 > %148 = fmul float %146, %146 > %149 = fadd float %148, %147 > %150 = fmul float %141, %141 > %151 = fadd float %149, %150 > %152 = call float @llvm.AMDGPU.rsq.clamped.f32(float %151) > %153 = fmul float %152, %136 > %154 = fmul float %152, %146 > %155 = fmul float %152, %141 > %156 = fsub float -0.000000e+00, %146 > %157 = call float @llvm.fma.f32(float %156, float %152, float 0xBFC3333340000000) > %158 = fsub float 1.000000e+00, %157 > %159 = call float @llvm.AMDGPU.clamp.(float %158, float 0.000000e+00, float 1.000000e+00) > %160 = fmul float %159, %159 > %161 = fmul float %153, %64 > %162 = fsub float -0.000000e+00, %161 > %163 = fmul float %154, %65 > %164 = fsub float %162, %163 > %165 = fmul float %155, %66 > %166 = fsub float %164, %165 > %167 = fsub float -0.000000e+00, %49 > %168 = call float @llvm.fma.f32(float %167, float %166, float %48) > %169 = call float @llvm.fma.f32(float %166, float %166, float 1.000000e+00) > %170 = call float @llvm.fabs.f32(float %168) > %171 = call float @llvm.log2.f32(float %170) > %172 = fmul float %169, 0x3FAE8EC8A0000000 > %173 = fmul float %171, -1.500000e+00 > %174 = call float @llvm.exp2.f32(float %173) > %175 = fsub float -0.000000e+00, %46 > %176 = call float @llvm.fma.f32(float %50, float %174, float %175) > %177 = fmul float %174, %50 > %178 = call float @llvm.maxnum.f32(float %176, float 0.000000e+00) > %179 = call float @llvm.fabs.f32(float %118) > %180 = fmul float %179, 0x3EF4F8B580000000 > %181 = call float @llvm.minnum.f32(float %180, float 1.000000e+00) > %182 = fsub float 1.000000e+00, %181 > %183 = fsub float -0.000000e+00, %178 > %184 = call float @llvm.fma.f32(float %183, float %182, float %177) > %185 = call float @llvm.maxnum.f32(float %184, float %63) > %186 = fcmp une float %44, 0.000000e+00 > br i1 %186, label %IF, label %ELSE > >IF: ; preds = %main_body > %187 = fdiv float 1.000000e+00, %44 > %188 = fmul float %125, %187 > %189 = fsub float -0.000000e+00, %188 > br label %ENDIF > >ELSE: ; preds = %main_body > %190 = fsub float -0.000000e+00, %125 > %191 = fcmp olt float %125, -0.000000e+00 > %192 = select i1 %191, float 1.000000e+00, float %190 > %193 = fcmp oge float %192, 0.000000e+00 > %.op = fmul float %192, 0x4600000000000000 > %194 = select i1 %193, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp40.0 = phi float [ %189, %IF ], [ %194, %ELSE ] > %195 = fmul float %temp40.0, 0x3FF7154760000000 > %196 = call float @llvm.exp2.f32(float %195) > %197 = fadd float %196, %51 > %198 = fmul float %197, %53 > %199 = fmul float %198, 5.000000e-01 > %200 = fmul float %160, %199 > %201 = call float @llvm.minnum.f32(float %200, float %47) > %202 = call float @llvm.maxnum.f32(float %201, float %52) > %203 = fmul float %202, %185 > %204 = fcmp une float %57, 0.000000e+00 > br i1 %204, label %IF53, label %ELSE54 > >IF53: ; preds = %ENDIF > %205 = fdiv float 1.000000e+00, %57 > %206 = fmul float %125, %205 > %207 = fsub float -0.000000e+00, %206 > br label %ENDIF52 > >ELSE54: ; preds = %ENDIF > %208 = fsub float -0.000000e+00, %125 > %209 = fcmp olt float %125, -0.000000e+00 > %210 = select i1 %209, float 1.000000e+00, float %208 > %211 = fcmp oge float %210, 0.000000e+00 > %.op58 = fmul float %210, 0x4600000000000000 > %212 = select i1 %211, float %.op58, float 0xC600000000000000 > br label %ENDIF52 > >ENDIF52: ; preds = %ELSE54, %IF53 > %temp40.1 = phi float [ %207, %IF53 ], [ %212, %ELSE54 ] > %213 = fsub float %58, %125 > %214 = fcmp une float %45, 0.000000e+00 > br i1 %214, label %IF56, label %ELSE57 > >IF56: ; preds = %ENDIF52 > %215 = fdiv float 1.000000e+00, %45 > %216 = fmul float %213, %215 > br label %ENDIF55 > >ELSE57: ; preds = %ENDIF52 > %217 = fcmp ogt float %213, 0.000000e+00 > %218 = select i1 %217, float 1.000000e+00, float %213 > %219 = fcmp oge float %218, 0.000000e+00 > %.op59 = fmul float %218, 0x4600000000000000 > %220 = select i1 %219, float %.op59, float 0xC600000000000000 > br label %ENDIF55 > >ENDIF55: ; preds = %ELSE57, %IF56 > %temp44.0 = phi float [ %216, %IF56 ], [ %220, %ELSE57 ] > %221 = fmul float %temp40.1, 0x3FF7154760000000 > %222 = call float @llvm.exp2.f32(float %221) > %223 = fmul float %222, %54 > %224 = fmul float %222, %55 > %225 = fmul float %222, %56 > %226 = call float @llvm.fma.f32(float %54, float %222, float %202) > %227 = call float @llvm.fma.f32(float %55, float %222, float %202) > %228 = call float @llvm.fma.f32(float %56, float %222, float %202) > %229 = call float @llvm.fma.f32(float %223, float %172, float %203) > %230 = call float @llvm.fma.f32(float %224, float %172, float %203) > %231 = call float @llvm.fma.f32(float %225, float %172, float %203) > %232 = fcmp oeq float %226, 0.000000e+00 > %233 = fcmp oeq float %227, 0.000000e+00 > %234 = fcmp oeq float %228, 0.000000e+00 > %235 = fcmp ogt float %229, 0.000000e+00 > %236 = select i1 %235, float 1.000000e+00, float %229 > %237 = fcmp oge float %236, 0.000000e+00 > %238 = fcmp ogt float %230, 0.000000e+00 > %239 = select i1 %238, float 1.000000e+00, float %230 > %240 = fcmp oge float %239, 0.000000e+00 > %241 = fcmp ogt float %231, 0.000000e+00 > %242 = select i1 %241, float 1.000000e+00, float %231 > %243 = fcmp oge float %242, 0.000000e+00 > %.op60 = fmul float %236, 0x4600000000000000 > %244 = select i1 %237, float %.op60, float 0xC600000000000000 > %.op61 = fmul float %239, 0x4600000000000000 > %245 = select i1 %240, float %.op61, float 0xC600000000000000 > %.op62 = fmul float %242, 0x4600000000000000 > %246 = select i1 %243, float %.op62, float 0xC600000000000000 > %247 = fdiv float 1.000000e+00, %226 > %248 = fdiv float 1.000000e+00, %227 > %249 = fdiv float 1.000000e+00, %228 > %250 = fmul float %229, %247 > %251 = fmul float %230, %248 > %252 = fmul float %231, %249 > %253 = select i1 %232, float %244, float %250 > %254 = select i1 %233, float %245, float %251 > %255 = select i1 %234, float %246, float %252 > %256 = fmul float %226, %temp44.0 > %257 = fmul float %227, %temp44.0 > %258 = fmul float %228, %temp44.0 > %259 = call float @llvm.fabs.f32(float %118) > %260 = call float @llvm.fabs.f32(float %118) > %261 = call float @llvm.fabs.f32(float %118) > %262 = fmul float %226, %259 > %263 = fmul float %227, %260 > %264 = fmul float %228, %261 > %265 = fmul float %262, 0xBFF7154760000000 > %266 = fmul float %263, 0xBFF7154760000000 > %267 = fmul float %264, 0xBFF7154760000000 > %268 = call float @llvm.exp2.f32(float %265) > %269 = call float @llvm.exp2.f32(float %266) > %270 = call float @llvm.exp2.f32(float %267) > %271 = fmul float %256, 0xBFF7154760000000 > %272 = fmul float %257, 0xBFF7154760000000 > %273 = fmul float %258, 0xBFF7154760000000 > %274 = call float @llvm.log2.f32(float %60) > %275 = call float @llvm.log2.f32(float %61) > %276 = call float @llvm.log2.f32(float %62) > %277 = fmul float %274, 0x3FDD1745E0000000 > %278 = fmul float %275, 0x3FDD1745E0000000 > %279 = fmul float %276, 0x3FDD1745E0000000 > %280 = call float @llvm.exp2.f32(float %277) > %281 = call float @llvm.exp2.f32(float %278) > %282 = call float @llvm.exp2.f32(float %279) > %283 = call float @llvm.exp2.f32(float %271) > %284 = call float @llvm.exp2.f32(float %272) > %285 = call float @llvm.exp2.f32(float %273) > %286 = fmul float %283, %280 > %287 = fmul float %284, %281 > %288 = fmul float %285, %282 > %289 = fmul float %253, %286 > %290 = fmul float %254, %287 > %291 = fmul float %255, %288 > %292 = fsub float 1.000000e+00, %268 > %293 = fsub float 1.000000e+00, %269 > %294 = fsub float 1.000000e+00, %270 > %295 = call float @llvm.fma.f32(float %289, float %292, float 0xBF70624DE0000000) > %296 = call float @llvm.fma.f32(float %290, float %293, float 0xBF70624DE0000000) > %297 = call float @llvm.fma.f32(float %291, float %294, float 0xBF70624DE0000000) > %298 = call float @llvm.maxnum.f32(float %295, float 0.000000e+00) > %299 = call float @llvm.maxnum.f32(float %296, float 0.000000e+00) > %300 = call float @llvm.maxnum.f32(float %297, float 0.000000e+00) > %301 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 5.000000e-01) > %302 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 5.000000e-01) > %303 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 5.000000e-01) > %304 = fmul float %298, %301 > %305 = fmul float %299, %302 > %306 = fmul float %300, %303 > %307 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %308 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %309 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %310 = call float @llvm.fma.f32(float %298, float %307, float 0x3FAEB851E0000000) > %311 = call float @llvm.fma.f32(float %299, float %308, float 0x3FAEB851E0000000) > %312 = call float @llvm.fma.f32(float %300, float %309, float 0x3FAEB851E0000000) > %313 = fcmp oeq float %310, 0.000000e+00 > %314 = fcmp oeq float %311, 0.000000e+00 > %315 = fcmp oeq float %312, 0.000000e+00 > %316 = fcmp ogt float %304, 0.000000e+00 > %317 = select i1 %316, float 1.000000e+00, float %304 > %318 = fcmp oge float %317, 0.000000e+00 > %319 = fcmp ogt float %305, 0.000000e+00 > %320 = select i1 %319, float 1.000000e+00, float %305 > %321 = fcmp oge float %320, 0.000000e+00 > %322 = fcmp ogt float %306, 0.000000e+00 > %323 = select i1 %322, float 1.000000e+00, float %306 > %324 = fcmp oge float %323, 0.000000e+00 > %.op63 = fmul float %317, 0x4600000000000000 > %325 = select i1 %318, float %.op63, float 0xC600000000000000 > %.op64 = fmul float %320, 0x4600000000000000 > %326 = select i1 %321, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %323, 0x4600000000000000 > %327 = select i1 %324, float %.op65, float 0xC600000000000000 > %328 = fdiv float 1.000000e+00, %310 > %329 = fdiv float 1.000000e+00, %311 > %330 = fdiv float 1.000000e+00, %312 > %331 = fmul float %304, %328 > %332 = fmul float %305, %329 > %333 = fmul float %306, %330 > %334 = select i1 %313, float %325, float %331 > %335 = select i1 %314, float %326, float %332 > %336 = select i1 %315, float %327, float %333 > %337 = bitcast i32 %11 to float > %338 = insertvalue <{ float, float, float }> undef, float %337, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %129, float %130, float %131, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %334, float %335, float %336, float %268) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %91, float %92, float %93, float %94) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %100, float %106, float %112, float %118) > ret <{ float, float, float }> %338 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..34] >DCL TEMP[0..12], LOCAL >IMM[0] FLT32 { 1.0000, -0.1500, 0.0597, -1.5000} >IMM[1] UINT32 {0, 256, 272, 288} >IMM[2] UINT32 {304, 336, 432, 528} >IMM[3] UINT32 {496, 512, 320, 352} >IMM[4] UINT32 {464, 384, 368, 448} >IMM[5] FLT32 { 0.0000, 0.0000, 158456325028528675187087900672.0000, 1.4427} >IMM[6] UINT32 {400, 416, 0, 0} >IMM[7] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[8] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][16], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][17], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][18], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][21], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][27].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][33].xyzz > 14: MUL TEMP[6].xyz, CONST[1][31].xyzz, CONST[1][32].xyzz > 15: MOV TEMP[6].w, CONST[1][31].wwww > 16: DP3 TEMP[1].x, CONST[1][20].xyzz, TEMP[5].xyzz > 17: DP3 TEMP[7].x, CONST[1][22].xyzz, TEMP[5].xyzz > 18: MOV TEMP[1].z, TEMP[7].xxxx > 19: DP3 TEMP[5].x, CONST[1][21].xyzz, TEMP[5].xyzz > 20: MOV TEMP[1].y, TEMP[5].xxxx > 21: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz > 22: RSQ TEMP[7].x, TEMP[7].xxxx > 23: MUL TEMP[8].xyz, TEMP[7].xxxx, TEMP[1].xyzz > 24: FMA TEMP[5].x, -TEMP[5].xxxx, TEMP[7].xxxx, IMM[0].yyyy > 25: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].xxxx > 26: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 27: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 28: DP3 TEMP[7].x, -TEMP[8].xyzz, CONST[1][29].xyzz > 29: FMA TEMP[9].x, -CONST[1][24].yyyy, TEMP[7].xxxx, CONST[1][24].xxxx > 30: FMA TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx, IMM[0].xxxx > 31: MOV TEMP[0].z, TEMP[7].xxxx > 32: ABS TEMP[7].x, TEMP[9].xxxx > 33: LG2 TEMP[7].x, TEMP[7].xxxx > 34: MOV TEMP[0].w, TEMP[7].xxxx > 35: MUL TEMP[7].xy, TEMP[0].zwww, IMM[0].zwww > 36: EX2 TEMP[9].x, TEMP[7].yyyy > 37: FMA TEMP[1].x, CONST[1][24].zzzz, TEMP[9].xxxx, -CONST[1][23].zzzz > 38: MUL TEMP[9].x, TEMP[9].xxxx, CONST[1][24].zzzz > 39: MAX TEMP[10].x, TEMP[1].xxxx, IMM[5].xxxx > 40: ABS TEMP[11].x, TEMP[2].xxxx > 41: MUL TEMP[11].x, TEMP[11].xxxx, IMM[5].yyyy > 42: MIN TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx > 43: ADD TEMP[11].x, -TEMP[11].xxxx, IMM[0].xxxx > 44: FMA TEMP[9].x, -TEMP[10].xxxx, TEMP[11].xxxx, TEMP[9].xxxx > 45: MAX TEMP[9].x, TEMP[9].xxxx, CONST[1][28].wwww > 46: FSNE TEMP[10].x, CONST[1][23].xxxx, IMM[5].xxxx > 47: UIF TEMP[10].xxxx :0 > 48: RCP TEMP[10].x, CONST[1][23].xxxx > 49: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 50: ELSE :0 > 51: SSG TEMP[11].x, -TEMP[0].xxxx > 52: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 53: ENDIF > 54: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 55: EX2 TEMP[10].x, TEMP[1].xxxx > 56: ADD TEMP[1].x, TEMP[10].xxxx, CONST[1][24].wwww > 57: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][25].yyyy > 58: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].xxxx > 59: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx > 60: MIN TEMP[5].x, TEMP[5].xxxx, CONST[1][23].wwww > 61: MAX TEMP[5].x, TEMP[5].xxxx, CONST[1][25].xxxx > 62: MUL TEMP[9].x, TEMP[5].xxxx, TEMP[9].xxxx > 63: FSNE TEMP[10].x, CONST[1][26].wwww, IMM[5].xxxx > 64: UIF TEMP[10].xxxx :0 > 65: RCP TEMP[10].x, CONST[1][26].wwww > 66: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 67: ELSE :0 > 68: SSG TEMP[11].x, -TEMP[0].xxxx > 69: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 70: ENDIF > 71: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][27].zzzz > 72: FSNE TEMP[11].x, CONST[1][23].yyyy, IMM[5].xxxx > 73: UIF TEMP[11].xxxx :0 > 74: RCP TEMP[11].x, CONST[1][23].yyyy > 75: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx > 76: ELSE :0 > 77: SSG TEMP[12].x, TEMP[0].xxxx > 78: MUL TEMP[11].x, IMM[5].zzzz, TEMP[12].xxxx > 79: ENDIF > 80: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 81: EX2 TEMP[10].x, TEMP[1].xxxx > 82: MUL TEMP[8].xyz, TEMP[10].xxxx, CONST[1][26].xyzz > 83: FMA TEMP[5].xyz, CONST[1][26].xyzz, TEMP[10].xxxx, TEMP[5].xxxx > 84: FMA TEMP[7].xyz, TEMP[8].xyzz, TEMP[7].xxxx, TEMP[9].xxxx > 85: FSEQ TEMP[9].xyz, TEMP[5].xyzz, IMM[5].xxxx > 86: SSG TEMP[10].xyz, TEMP[7].xyzz > 87: MUL TEMP[10].xyz, IMM[5].zzzz, TEMP[10].xyzz > 88: RCP TEMP[12].x, TEMP[5].xxxx > 89: RCP TEMP[12].y, TEMP[5].yyyy > 90: RCP TEMP[12].z, TEMP[5].zzzz > 91: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[12].xyzz > 92: UCMP TEMP[7].xyz, TEMP[9].xyzz, TEMP[10].xyzz, TEMP[7].xyzz > 93: MUL TEMP[8].xyz, TEMP[11].xxxx, -TEMP[5].xyzz > 94: ABS TEMP[2].xyz, TEMP[2].xxxx > 95: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[5].xyzz > 96: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].wwww > 97: EX2 TEMP[2].x, TEMP[1].xxxx > 98: EX2 TEMP[2].y, TEMP[1].yyyy > 99: EX2 TEMP[2].z, TEMP[1].zzzz >100: MUL TEMP[8].xyz, TEMP[8].xyzz, IMM[5].wwww >101: LG2 TEMP[5].x, CONST[1][28].xxxx >102: LG2 TEMP[5].y, CONST[1][28].yyyy >103: LG2 TEMP[5].z, CONST[1][28].zzzz >104: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[7].yyyy >105: EX2 TEMP[9].x, TEMP[5].xxxx >106: EX2 TEMP[9].y, TEMP[5].yyyy >107: EX2 TEMP[9].z, TEMP[5].zzzz >108: EX2 TEMP[5].x, TEMP[8].xxxx >109: EX2 TEMP[5].y, TEMP[8].yyyy >110: EX2 TEMP[5].z, TEMP[8].zzzz >111: MUL TEMP[8].xyz, TEMP[5].xyzz, TEMP[9].xyzz >112: MUL TEMP[0].xyz, TEMP[7].xyzz, TEMP[8].xyzz >113: ADD TEMP[5].xyz, -TEMP[2].xyzz, IMM[0].xxxx >114: MOV TEMP[2].w, TEMP[2].xxxx >115: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz, IMM[7].zzzz >116: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[5].xxxx >117: FMA TEMP[5].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[7].xxxx >118: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[5].xyzz >119: FMA TEMP[8].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[8].xxxx >120: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[8].xyzz, IMM[8].yyyy >121: FSEQ TEMP[5].xyz, TEMP[0].xyzz, IMM[5].xxxx >122: SSG TEMP[7].xyz, TEMP[1].xyzz >123: MUL TEMP[7].xyz, IMM[5].zzzz, TEMP[7].xyzz >124: RCP TEMP[8].x, TEMP[0].xxxx >125: RCP TEMP[8].y, TEMP[0].yyyy >126: RCP TEMP[8].z, TEMP[0].zzzz >127: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[8].xyzz >128: UCMP TEMP[2].xyz, TEMP[5].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >129: MOV OUT[4], IN[2] >130: MOV OUT[3], TEMP[2] >131: MOV OUT[2], TEMP[6] >132: MOV OUT[1], TEMP[4] >133: MOV OUT[0], TEMP[3] >134: END >radeonsi: Compiling shader 277 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 256) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 260) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 264) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 268) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 272) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 276) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 280) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 284) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 288) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 292) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 296) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 300) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 304) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 308) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 312) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 316) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 428) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %13) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %14) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %15) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = extractelement <4 x float> %90, i32 3 > %95 = fmul float %18, %80 > %96 = fmul float %19, %81 > %97 = fadd float %95, %96 > %98 = fmul float %20, %82 > %99 = fadd float %97, %98 > %100 = fadd float %99, %21 > %101 = fmul float %22, %80 > %102 = fmul float %23, %81 > %103 = fadd float %101, %102 > %104 = fmul float %24, %82 > %105 = fadd float %103, %104 > %106 = fadd float %105, %25 > %107 = fmul float %26, %80 > %108 = fmul float %27, %81 > %109 = fadd float %107, %108 > %110 = fmul float %28, %82 > %111 = fadd float %109, %110 > %112 = fadd float %111, %29 > %113 = fmul float %30, %80 > %114 = fmul float %31, %81 > %115 = fadd float %113, %114 > %116 = fmul float %32, %82 > %117 = fadd float %115, %116 > %118 = fadd float %117, %33 > %119 = fmul float %37, %80 > %120 = fmul float %38, %81 > %121 = fadd float %119, %120 > %122 = fmul float %39, %82 > %123 = fadd float %121, %122 > %124 = fadd float %123, %40 > %125 = fadd float %124, %59 > %126 = fsub float %74, %80 > %127 = fsub float %75, %81 > %128 = fsub float %76, %82 > %129 = fmul float %67, %71 > %130 = fmul float %68, %72 > %131 = fmul float %69, %73 > %132 = fmul float %34, %126 > %133 = fmul float %35, %127 > %134 = fadd float %133, %132 > %135 = fmul float %36, %128 > %136 = fadd float %134, %135 > %137 = fmul float %41, %126 > %138 = fmul float %42, %127 > %139 = fadd float %138, %137 > %140 = fmul float %43, %128 > %141 = fadd float %139, %140 > %142 = fmul float %37, %126 > %143 = fmul float %38, %127 > %144 = fadd float %143, %142 > %145 = fmul float %39, %128 > %146 = fadd float %144, %145 > %147 = fmul float %136, %136 > %148 = fmul float %146, %146 > %149 = fadd float %148, %147 > %150 = fmul float %141, %141 > %151 = fadd float %149, %150 > %152 = call float @llvm.AMDGPU.rsq.clamped.f32(float %151) > %153 = fmul float %152, %136 > %154 = fmul float %152, %146 > %155 = fmul float %152, %141 > %156 = fsub float -0.000000e+00, %146 > %157 = call float @llvm.fma.f32(float %156, float %152, float 0xBFC3333340000000) > %158 = fsub float 1.000000e+00, %157 > %159 = call float @llvm.AMDGPU.clamp.(float %158, float 0.000000e+00, float 1.000000e+00) > %160 = fmul float %159, %159 > %161 = fmul float %153, %64 > %162 = fsub float -0.000000e+00, %161 > %163 = fmul float %154, %65 > %164 = fsub float %162, %163 > %165 = fmul float %155, %66 > %166 = fsub float %164, %165 > %167 = fsub float -0.000000e+00, %49 > %168 = call float @llvm.fma.f32(float %167, float %166, float %48) > %169 = call float @llvm.fma.f32(float %166, float %166, float 1.000000e+00) > %170 = call float @llvm.fabs.f32(float %168) > %171 = call float @llvm.log2.f32(float %170) > %172 = fmul float %169, 0x3FAE8EC8A0000000 > %173 = fmul float %171, -1.500000e+00 > %174 = call float @llvm.exp2.f32(float %173) > %175 = fsub float -0.000000e+00, %46 > %176 = call float @llvm.fma.f32(float %50, float %174, float %175) > %177 = fmul float %174, %50 > %178 = call float @llvm.maxnum.f32(float %176, float 0.000000e+00) > %179 = call float @llvm.fabs.f32(float %118) > %180 = fmul float %179, 0x3EF4F8B580000000 > %181 = call float @llvm.minnum.f32(float %180, float 1.000000e+00) > %182 = fsub float 1.000000e+00, %181 > %183 = fsub float -0.000000e+00, %178 > %184 = call float @llvm.fma.f32(float %183, float %182, float %177) > %185 = call float @llvm.maxnum.f32(float %184, float %63) > %186 = fcmp une float %44, 0.000000e+00 > br i1 %186, label %IF, label %ELSE > >IF: ; preds = %main_body > %187 = fdiv float 1.000000e+00, %44 > %188 = fmul float %125, %187 > %189 = fsub float -0.000000e+00, %188 > br label %ENDIF > >ELSE: ; preds = %main_body > %190 = fsub float -0.000000e+00, %125 > %191 = fcmp olt float %125, -0.000000e+00 > %192 = select i1 %191, float 1.000000e+00, float %190 > %193 = fcmp oge float %192, 0.000000e+00 > %.op = fmul float %192, 0x4600000000000000 > %194 = select i1 %193, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp40.0 = phi float [ %189, %IF ], [ %194, %ELSE ] > %195 = fmul float %temp40.0, 0x3FF7154760000000 > %196 = call float @llvm.exp2.f32(float %195) > %197 = fadd float %196, %51 > %198 = fmul float %197, %53 > %199 = fmul float %198, 5.000000e-01 > %200 = fmul float %160, %199 > %201 = call float @llvm.minnum.f32(float %200, float %47) > %202 = call float @llvm.maxnum.f32(float %201, float %52) > %203 = fmul float %202, %185 > %204 = fcmp une float %57, 0.000000e+00 > br i1 %204, label %IF53, label %ELSE54 > >IF53: ; preds = %ENDIF > %205 = fdiv float 1.000000e+00, %57 > %206 = fmul float %125, %205 > %207 = fsub float -0.000000e+00, %206 > br label %ENDIF52 > >ELSE54: ; preds = %ENDIF > %208 = fsub float -0.000000e+00, %125 > %209 = fcmp olt float %125, -0.000000e+00 > %210 = select i1 %209, float 1.000000e+00, float %208 > %211 = fcmp oge float %210, 0.000000e+00 > %.op58 = fmul float %210, 0x4600000000000000 > %212 = select i1 %211, float %.op58, float 0xC600000000000000 > br label %ENDIF52 > >ENDIF52: ; preds = %ELSE54, %IF53 > %temp40.1 = phi float [ %207, %IF53 ], [ %212, %ELSE54 ] > %213 = fsub float %58, %125 > %214 = fcmp une float %45, 0.000000e+00 > br i1 %214, label %IF56, label %ELSE57 > >IF56: ; preds = %ENDIF52 > %215 = fdiv float 1.000000e+00, %45 > %216 = fmul float %213, %215 > br label %ENDIF55 > >ELSE57: ; preds = %ENDIF52 > %217 = fcmp ogt float %213, 0.000000e+00 > %218 = select i1 %217, float 1.000000e+00, float %213 > %219 = fcmp oge float %218, 0.000000e+00 > %.op59 = fmul float %218, 0x4600000000000000 > %220 = select i1 %219, float %.op59, float 0xC600000000000000 > br label %ENDIF55 > >ENDIF55: ; preds = %ELSE57, %IF56 > %temp44.0 = phi float [ %216, %IF56 ], [ %220, %ELSE57 ] > %221 = fmul float %temp40.1, 0x3FF7154760000000 > %222 = call float @llvm.exp2.f32(float %221) > %223 = fmul float %222, %54 > %224 = fmul float %222, %55 > %225 = fmul float %222, %56 > %226 = call float @llvm.fma.f32(float %54, float %222, float %202) > %227 = call float @llvm.fma.f32(float %55, float %222, float %202) > %228 = call float @llvm.fma.f32(float %56, float %222, float %202) > %229 = call float @llvm.fma.f32(float %223, float %172, float %203) > %230 = call float @llvm.fma.f32(float %224, float %172, float %203) > %231 = call float @llvm.fma.f32(float %225, float %172, float %203) > %232 = fcmp oeq float %226, 0.000000e+00 > %233 = fcmp oeq float %227, 0.000000e+00 > %234 = fcmp oeq float %228, 0.000000e+00 > %235 = fcmp ogt float %229, 0.000000e+00 > %236 = select i1 %235, float 1.000000e+00, float %229 > %237 = fcmp oge float %236, 0.000000e+00 > %238 = fcmp ogt float %230, 0.000000e+00 > %239 = select i1 %238, float 1.000000e+00, float %230 > %240 = fcmp oge float %239, 0.000000e+00 > %241 = fcmp ogt float %231, 0.000000e+00 > %242 = select i1 %241, float 1.000000e+00, float %231 > %243 = fcmp oge float %242, 0.000000e+00 > %.op60 = fmul float %236, 0x4600000000000000 > %244 = select i1 %237, float %.op60, float 0xC600000000000000 > %.op61 = fmul float %239, 0x4600000000000000 > %245 = select i1 %240, float %.op61, float 0xC600000000000000 > %.op62 = fmul float %242, 0x4600000000000000 > %246 = select i1 %243, float %.op62, float 0xC600000000000000 > %247 = fdiv float 1.000000e+00, %226 > %248 = fdiv float 1.000000e+00, %227 > %249 = fdiv float 1.000000e+00, %228 > %250 = fmul float %229, %247 > %251 = fmul float %230, %248 > %252 = fmul float %231, %249 > %253 = select i1 %232, float %244, float %250 > %254 = select i1 %233, float %245, float %251 > %255 = select i1 %234, float %246, float %252 > %256 = fmul float %226, %temp44.0 > %257 = fmul float %227, %temp44.0 > %258 = fmul float %228, %temp44.0 > %259 = call float @llvm.fabs.f32(float %118) > %260 = call float @llvm.fabs.f32(float %118) > %261 = call float @llvm.fabs.f32(float %118) > %262 = fmul float %226, %259 > %263 = fmul float %227, %260 > %264 = fmul float %228, %261 > %265 = fmul float %262, 0xBFF7154760000000 > %266 = fmul float %263, 0xBFF7154760000000 > %267 = fmul float %264, 0xBFF7154760000000 > %268 = call float @llvm.exp2.f32(float %265) > %269 = call float @llvm.exp2.f32(float %266) > %270 = call float @llvm.exp2.f32(float %267) > %271 = fmul float %256, 0xBFF7154760000000 > %272 = fmul float %257, 0xBFF7154760000000 > %273 = fmul float %258, 0xBFF7154760000000 > %274 = call float @llvm.log2.f32(float %60) > %275 = call float @llvm.log2.f32(float %61) > %276 = call float @llvm.log2.f32(float %62) > %277 = fmul float %274, 0x3FDD1745E0000000 > %278 = fmul float %275, 0x3FDD1745E0000000 > %279 = fmul float %276, 0x3FDD1745E0000000 > %280 = call float @llvm.exp2.f32(float %277) > %281 = call float @llvm.exp2.f32(float %278) > %282 = call float @llvm.exp2.f32(float %279) > %283 = call float @llvm.exp2.f32(float %271) > %284 = call float @llvm.exp2.f32(float %272) > %285 = call float @llvm.exp2.f32(float %273) > %286 = fmul float %283, %280 > %287 = fmul float %284, %281 > %288 = fmul float %285, %282 > %289 = fmul float %253, %286 > %290 = fmul float %254, %287 > %291 = fmul float %255, %288 > %292 = fsub float 1.000000e+00, %268 > %293 = fsub float 1.000000e+00, %269 > %294 = fsub float 1.000000e+00, %270 > %295 = call float @llvm.fma.f32(float %289, float %292, float 0xBF70624DE0000000) > %296 = call float @llvm.fma.f32(float %290, float %293, float 0xBF70624DE0000000) > %297 = call float @llvm.fma.f32(float %291, float %294, float 0xBF70624DE0000000) > %298 = call float @llvm.maxnum.f32(float %295, float 0.000000e+00) > %299 = call float @llvm.maxnum.f32(float %296, float 0.000000e+00) > %300 = call float @llvm.maxnum.f32(float %297, float 0.000000e+00) > %301 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 5.000000e-01) > %302 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 5.000000e-01) > %303 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 5.000000e-01) > %304 = fmul float %298, %301 > %305 = fmul float %299, %302 > %306 = fmul float %300, %303 > %307 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %308 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %309 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %310 = call float @llvm.fma.f32(float %298, float %307, float 0x3FAEB851E0000000) > %311 = call float @llvm.fma.f32(float %299, float %308, float 0x3FAEB851E0000000) > %312 = call float @llvm.fma.f32(float %300, float %309, float 0x3FAEB851E0000000) > %313 = fcmp oeq float %310, 0.000000e+00 > %314 = fcmp oeq float %311, 0.000000e+00 > %315 = fcmp oeq float %312, 0.000000e+00 > %316 = fcmp ogt float %304, 0.000000e+00 > %317 = select i1 %316, float 1.000000e+00, float %304 > %318 = fcmp oge float %317, 0.000000e+00 > %319 = fcmp ogt float %305, 0.000000e+00 > %320 = select i1 %319, float 1.000000e+00, float %305 > %321 = fcmp oge float %320, 0.000000e+00 > %322 = fcmp ogt float %306, 0.000000e+00 > %323 = select i1 %322, float 1.000000e+00, float %306 > %324 = fcmp oge float %323, 0.000000e+00 > %.op63 = fmul float %317, 0x4600000000000000 > %325 = select i1 %318, float %.op63, float 0xC600000000000000 > %.op64 = fmul float %320, 0x4600000000000000 > %326 = select i1 %321, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %323, 0x4600000000000000 > %327 = select i1 %324, float %.op65, float 0xC600000000000000 > %328 = fdiv float 1.000000e+00, %310 > %329 = fdiv float 1.000000e+00, %311 > %330 = fdiv float 1.000000e+00, %312 > %331 = fmul float %304, %328 > %332 = fmul float %305, %329 > %333 = fmul float %306, %330 > %334 = select i1 %313, float %325, float %331 > %335 = select i1 %314, float %326, float %332 > %336 = select i1 %315, float %327, float %333 > %337 = bitcast i32 %11 to float > %338 = insertvalue <{ float, float, float }> undef, float %337, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %129, float %130, float %131, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %334, float %335, float %336, float %268) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %91, float %92, float %93, float %94) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %100, float %106, float %112, float %118) > ret <{ float, float, float }> %338 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..30] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 480, 240, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0], TEMP[0], IN[3] > 3: MUL TEMP[0], TEMP[0], IN[1] > 4: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[2].wwww > 5: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 6: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][30].xyzz > 7: MOV TEMP[0].xyz, TEMP[0].xyzx > 8: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][15].zzzz > 9: MOV TEMP[0].w, TEMP[1].xxxx > 10: MOV OUT[0], TEMP[0] > 11: END >radeonsi: Compiling shader 278 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 484) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 488) > %29 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 > %31 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %32 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %31, i64 0, i64 3 > %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 > %34 = extractelement <8 x i32> %30, i32 7 > %35 = extractelement <4 x i32> %33, i32 0 > %36 = and i32 %35, %34 > %37 = insertelement <4 x i32> %33, i32 %36, i32 0 > %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %49 = bitcast float %38 to i32 > %50 = bitcast float %39 to i32 > %51 = insertelement <2 x i32> undef, i32 %49, i32 0 > %52 = insertelement <2 x i32> %51, i32 %50, i32 1 > %53 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %52, <8 x i32> %30, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = extractelement <4 x float> %53, i32 2 > %57 = extractelement <4 x float> %53, i32 3 > %58 = fmul float %54, %45 > %59 = fmul float %55, %46 > %60 = fmul float %56, %47 > %61 = fmul float %57, %48 > %62 = fmul float %58, %40 > %63 = fmul float %59, %41 > %64 = fmul float %60, %42 > %65 = fmul float %61, %43 > %66 = fmul float %62, %44 > %67 = fmul float %63, %44 > %68 = fmul float %64, %44 > %69 = fmul float %65, %66 > %70 = fmul float %65, %67 > %71 = fmul float %65, %68 > %72 = fmul float %69, %26 > %73 = fmul float %70, %27 > %74 = fadd float %73, %72 > %75 = fmul float %71, %28 > %76 = fadd float %74, %75 > %77 = fmul float %76, %25 > %78 = bitcast float %5 to i32 > %79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %78, 10 > %80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %79, float %69, 11 > %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %80, float %70, 12 > %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %71, 13 > %83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82, float %77, 14 > %84 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %83, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %84 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..38] >DCL TEMP[0..12], LOCAL >IMM[0] FLT32 { 1.0000, -0.1500, 0.0597, -1.5000} >IMM[1] UINT32 {0, 320, 336, 352} >IMM[2] UINT32 {368, 400, 496, 592} >IMM[3] UINT32 {560, 576, 384, 416} >IMM[4] UINT32 {528, 448, 432, 512} >IMM[5] FLT32 { 0.0000, 0.0000, 158456325028528675187087900672.0000, 1.4427} >IMM[6] UINT32 {464, 480, 0, 0} >IMM[7] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[8] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][20], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][22], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][23], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][25], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][31].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][37].xyzz > 14: MUL TEMP[6].xyz, CONST[1][35].xyzz, CONST[1][36].xyzz > 15: MOV TEMP[6].w, CONST[1][35].wwww > 16: DP3 TEMP[1].x, CONST[1][24].xyzz, TEMP[5].xyzz > 17: DP3 TEMP[7].x, CONST[1][26].xyzz, TEMP[5].xyzz > 18: MOV TEMP[1].z, TEMP[7].xxxx > 19: DP3 TEMP[5].x, CONST[1][25].xyzz, TEMP[5].xyzz > 20: MOV TEMP[1].y, TEMP[5].xxxx > 21: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz > 22: RSQ TEMP[7].x, TEMP[7].xxxx > 23: MUL TEMP[8].xyz, TEMP[7].xxxx, TEMP[1].xyzz > 24: FMA TEMP[5].x, -TEMP[5].xxxx, TEMP[7].xxxx, IMM[0].yyyy > 25: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].xxxx > 26: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 27: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 28: DP3 TEMP[7].x, -TEMP[8].xyzz, CONST[1][33].xyzz > 29: FMA TEMP[9].x, -CONST[1][28].yyyy, TEMP[7].xxxx, CONST[1][28].xxxx > 30: FMA TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx, IMM[0].xxxx > 31: MOV TEMP[0].z, TEMP[7].xxxx > 32: ABS TEMP[7].x, TEMP[9].xxxx > 33: LG2 TEMP[7].x, TEMP[7].xxxx > 34: MOV TEMP[0].w, TEMP[7].xxxx > 35: MUL TEMP[7].xy, TEMP[0].zwww, IMM[0].zwww > 36: EX2 TEMP[9].x, TEMP[7].yyyy > 37: FMA TEMP[1].x, CONST[1][28].zzzz, TEMP[9].xxxx, -CONST[1][27].zzzz > 38: MUL TEMP[9].x, TEMP[9].xxxx, CONST[1][28].zzzz > 39: MAX TEMP[10].x, TEMP[1].xxxx, IMM[5].xxxx > 40: ABS TEMP[11].x, TEMP[2].xxxx > 41: MUL TEMP[11].x, TEMP[11].xxxx, IMM[5].yyyy > 42: MIN TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx > 43: ADD TEMP[11].x, -TEMP[11].xxxx, IMM[0].xxxx > 44: FMA TEMP[9].x, -TEMP[10].xxxx, TEMP[11].xxxx, TEMP[9].xxxx > 45: MAX TEMP[9].x, TEMP[9].xxxx, CONST[1][32].wwww > 46: FSNE TEMP[10].x, CONST[1][27].xxxx, IMM[5].xxxx > 47: UIF TEMP[10].xxxx :0 > 48: RCP TEMP[10].x, CONST[1][27].xxxx > 49: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 50: ELSE :0 > 51: SSG TEMP[11].x, -TEMP[0].xxxx > 52: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 53: ENDIF > 54: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 55: EX2 TEMP[10].x, TEMP[1].xxxx > 56: ADD TEMP[1].x, TEMP[10].xxxx, CONST[1][28].wwww > 57: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][29].yyyy > 58: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].xxxx > 59: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx > 60: MIN TEMP[5].x, TEMP[5].xxxx, CONST[1][27].wwww > 61: MAX TEMP[5].x, TEMP[5].xxxx, CONST[1][29].xxxx > 62: MUL TEMP[9].x, TEMP[5].xxxx, TEMP[9].xxxx > 63: FSNE TEMP[10].x, CONST[1][30].wwww, IMM[5].xxxx > 64: UIF TEMP[10].xxxx :0 > 65: RCP TEMP[10].x, CONST[1][30].wwww > 66: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 67: ELSE :0 > 68: SSG TEMP[11].x, -TEMP[0].xxxx > 69: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 70: ENDIF > 71: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][31].zzzz > 72: FSNE TEMP[11].x, CONST[1][27].yyyy, IMM[5].xxxx > 73: UIF TEMP[11].xxxx :0 > 74: RCP TEMP[11].x, CONST[1][27].yyyy > 75: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx > 76: ELSE :0 > 77: SSG TEMP[12].x, TEMP[0].xxxx > 78: MUL TEMP[11].x, IMM[5].zzzz, TEMP[12].xxxx > 79: ENDIF > 80: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 81: EX2 TEMP[10].x, TEMP[1].xxxx > 82: MUL TEMP[8].xyz, TEMP[10].xxxx, CONST[1][30].xyzz > 83: FMA TEMP[5].xyz, CONST[1][30].xyzz, TEMP[10].xxxx, TEMP[5].xxxx > 84: FMA TEMP[7].xyz, TEMP[8].xyzz, TEMP[7].xxxx, TEMP[9].xxxx > 85: FSEQ TEMP[9].xyz, TEMP[5].xyzz, IMM[5].xxxx > 86: SSG TEMP[10].xyz, TEMP[7].xyzz > 87: MUL TEMP[10].xyz, IMM[5].zzzz, TEMP[10].xyzz > 88: RCP TEMP[12].x, TEMP[5].xxxx > 89: RCP TEMP[12].y, TEMP[5].yyyy > 90: RCP TEMP[12].z, TEMP[5].zzzz > 91: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[12].xyzz > 92: UCMP TEMP[7].xyz, TEMP[9].xyzz, TEMP[10].xyzz, TEMP[7].xyzz > 93: MUL TEMP[8].xyz, TEMP[11].xxxx, -TEMP[5].xyzz > 94: ABS TEMP[2].xyz, TEMP[2].xxxx > 95: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[5].xyzz > 96: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].wwww > 97: EX2 TEMP[2].x, TEMP[1].xxxx > 98: EX2 TEMP[2].y, TEMP[1].yyyy > 99: EX2 TEMP[2].z, TEMP[1].zzzz >100: MUL TEMP[8].xyz, TEMP[8].xyzz, IMM[5].wwww >101: LG2 TEMP[5].x, CONST[1][32].xxxx >102: LG2 TEMP[5].y, CONST[1][32].yyyy >103: LG2 TEMP[5].z, CONST[1][32].zzzz >104: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[7].yyyy >105: EX2 TEMP[9].x, TEMP[5].xxxx >106: EX2 TEMP[9].y, TEMP[5].yyyy >107: EX2 TEMP[9].z, TEMP[5].zzzz >108: EX2 TEMP[5].x, TEMP[8].xxxx >109: EX2 TEMP[5].y, TEMP[8].yyyy >110: EX2 TEMP[5].z, TEMP[8].zzzz >111: MUL TEMP[8].xyz, TEMP[5].xyzz, TEMP[9].xyzz >112: MUL TEMP[0].xyz, TEMP[7].xyzz, TEMP[8].xyzz >113: ADD TEMP[5].xyz, -TEMP[2].xyzz, IMM[0].xxxx >114: MOV TEMP[2].w, TEMP[2].xxxx >115: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz, IMM[7].zzzz >116: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[5].xxxx >117: FMA TEMP[5].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[7].xxxx >118: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[5].xyzz >119: FMA TEMP[8].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[8].xxxx >120: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[8].xyzz, IMM[8].yyyy >121: FSEQ TEMP[5].xyz, TEMP[0].xyzz, IMM[5].xxxx >122: SSG TEMP[7].xyz, TEMP[1].xyzz >123: MUL TEMP[7].xyz, IMM[5].zzzz, TEMP[7].xyzz >124: RCP TEMP[8].x, TEMP[0].xxxx >125: RCP TEMP[8].y, TEMP[0].yyyy >126: RCP TEMP[8].z, TEMP[0].zzzz >127: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[8].xyzz >128: UCMP TEMP[2].xyz, TEMP[5].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >129: MOV OUT[4], IN[2] >130: MOV OUT[3], TEMP[2] >131: MOV OUT[2], TEMP[6] >132: MOV OUT[1], TEMP[4] >133: MOV OUT[0], TEMP[3] >134: END >radeonsi: Compiling shader 279 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 332) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 412) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 492) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 524) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 568) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 572) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 576) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 580) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 584) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 592) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 596) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 600) > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %13) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %14) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %15) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = extractelement <4 x float> %90, i32 3 > %95 = fmul float %18, %80 > %96 = fmul float %19, %81 > %97 = fadd float %95, %96 > %98 = fmul float %20, %82 > %99 = fadd float %97, %98 > %100 = fadd float %99, %21 > %101 = fmul float %22, %80 > %102 = fmul float %23, %81 > %103 = fadd float %101, %102 > %104 = fmul float %24, %82 > %105 = fadd float %103, %104 > %106 = fadd float %105, %25 > %107 = fmul float %26, %80 > %108 = fmul float %27, %81 > %109 = fadd float %107, %108 > %110 = fmul float %28, %82 > %111 = fadd float %109, %110 > %112 = fadd float %111, %29 > %113 = fmul float %30, %80 > %114 = fmul float %31, %81 > %115 = fadd float %113, %114 > %116 = fmul float %32, %82 > %117 = fadd float %115, %116 > %118 = fadd float %117, %33 > %119 = fmul float %37, %80 > %120 = fmul float %38, %81 > %121 = fadd float %119, %120 > %122 = fmul float %39, %82 > %123 = fadd float %121, %122 > %124 = fadd float %123, %40 > %125 = fadd float %124, %59 > %126 = fsub float %74, %80 > %127 = fsub float %75, %81 > %128 = fsub float %76, %82 > %129 = fmul float %67, %71 > %130 = fmul float %68, %72 > %131 = fmul float %69, %73 > %132 = fmul float %34, %126 > %133 = fmul float %35, %127 > %134 = fadd float %133, %132 > %135 = fmul float %36, %128 > %136 = fadd float %134, %135 > %137 = fmul float %41, %126 > %138 = fmul float %42, %127 > %139 = fadd float %138, %137 > %140 = fmul float %43, %128 > %141 = fadd float %139, %140 > %142 = fmul float %37, %126 > %143 = fmul float %38, %127 > %144 = fadd float %143, %142 > %145 = fmul float %39, %128 > %146 = fadd float %144, %145 > %147 = fmul float %136, %136 > %148 = fmul float %146, %146 > %149 = fadd float %148, %147 > %150 = fmul float %141, %141 > %151 = fadd float %149, %150 > %152 = call float @llvm.AMDGPU.rsq.clamped.f32(float %151) > %153 = fmul float %152, %136 > %154 = fmul float %152, %146 > %155 = fmul float %152, %141 > %156 = fsub float -0.000000e+00, %146 > %157 = call float @llvm.fma.f32(float %156, float %152, float 0xBFC3333340000000) > %158 = fsub float 1.000000e+00, %157 > %159 = call float @llvm.AMDGPU.clamp.(float %158, float 0.000000e+00, float 1.000000e+00) > %160 = fmul float %159, %159 > %161 = fmul float %153, %64 > %162 = fsub float -0.000000e+00, %161 > %163 = fmul float %154, %65 > %164 = fsub float %162, %163 > %165 = fmul float %155, %66 > %166 = fsub float %164, %165 > %167 = fsub float -0.000000e+00, %49 > %168 = call float @llvm.fma.f32(float %167, float %166, float %48) > %169 = call float @llvm.fma.f32(float %166, float %166, float 1.000000e+00) > %170 = call float @llvm.fabs.f32(float %168) > %171 = call float @llvm.log2.f32(float %170) > %172 = fmul float %169, 0x3FAE8EC8A0000000 > %173 = fmul float %171, -1.500000e+00 > %174 = call float @llvm.exp2.f32(float %173) > %175 = fsub float -0.000000e+00, %46 > %176 = call float @llvm.fma.f32(float %50, float %174, float %175) > %177 = fmul float %174, %50 > %178 = call float @llvm.maxnum.f32(float %176, float 0.000000e+00) > %179 = call float @llvm.fabs.f32(float %118) > %180 = fmul float %179, 0x3EF4F8B580000000 > %181 = call float @llvm.minnum.f32(float %180, float 1.000000e+00) > %182 = fsub float 1.000000e+00, %181 > %183 = fsub float -0.000000e+00, %178 > %184 = call float @llvm.fma.f32(float %183, float %182, float %177) > %185 = call float @llvm.maxnum.f32(float %184, float %63) > %186 = fcmp une float %44, 0.000000e+00 > br i1 %186, label %IF, label %ELSE > >IF: ; preds = %main_body > %187 = fdiv float 1.000000e+00, %44 > %188 = fmul float %125, %187 > %189 = fsub float -0.000000e+00, %188 > br label %ENDIF > >ELSE: ; preds = %main_body > %190 = fsub float -0.000000e+00, %125 > %191 = fcmp olt float %125, -0.000000e+00 > %192 = select i1 %191, float 1.000000e+00, float %190 > %193 = fcmp oge float %192, 0.000000e+00 > %.op = fmul float %192, 0x4600000000000000 > %194 = select i1 %193, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp40.0 = phi float [ %189, %IF ], [ %194, %ELSE ] > %195 = fmul float %temp40.0, 0x3FF7154760000000 > %196 = call float @llvm.exp2.f32(float %195) > %197 = fadd float %196, %51 > %198 = fmul float %197, %53 > %199 = fmul float %198, 5.000000e-01 > %200 = fmul float %160, %199 > %201 = call float @llvm.minnum.f32(float %200, float %47) > %202 = call float @llvm.maxnum.f32(float %201, float %52) > %203 = fmul float %202, %185 > %204 = fcmp une float %57, 0.000000e+00 > br i1 %204, label %IF53, label %ELSE54 > >IF53: ; preds = %ENDIF > %205 = fdiv float 1.000000e+00, %57 > %206 = fmul float %125, %205 > %207 = fsub float -0.000000e+00, %206 > br label %ENDIF52 > >ELSE54: ; preds = %ENDIF > %208 = fsub float -0.000000e+00, %125 > %209 = fcmp olt float %125, -0.000000e+00 > %210 = select i1 %209, float 1.000000e+00, float %208 > %211 = fcmp oge float %210, 0.000000e+00 > %.op58 = fmul float %210, 0x4600000000000000 > %212 = select i1 %211, float %.op58, float 0xC600000000000000 > br label %ENDIF52 > >ENDIF52: ; preds = %ELSE54, %IF53 > %temp40.1 = phi float [ %207, %IF53 ], [ %212, %ELSE54 ] > %213 = fsub float %58, %125 > %214 = fcmp une float %45, 0.000000e+00 > br i1 %214, label %IF56, label %ELSE57 > >IF56: ; preds = %ENDIF52 > %215 = fdiv float 1.000000e+00, %45 > %216 = fmul float %213, %215 > br label %ENDIF55 > >ELSE57: ; preds = %ENDIF52 > %217 = fcmp ogt float %213, 0.000000e+00 > %218 = select i1 %217, float 1.000000e+00, float %213 > %219 = fcmp oge float %218, 0.000000e+00 > %.op59 = fmul float %218, 0x4600000000000000 > %220 = select i1 %219, float %.op59, float 0xC600000000000000 > br label %ENDIF55 > >ENDIF55: ; preds = %ELSE57, %IF56 > %temp44.0 = phi float [ %216, %IF56 ], [ %220, %ELSE57 ] > %221 = fmul float %temp40.1, 0x3FF7154760000000 > %222 = call float @llvm.exp2.f32(float %221) > %223 = fmul float %222, %54 > %224 = fmul float %222, %55 > %225 = fmul float %222, %56 > %226 = call float @llvm.fma.f32(float %54, float %222, float %202) > %227 = call float @llvm.fma.f32(float %55, float %222, float %202) > %228 = call float @llvm.fma.f32(float %56, float %222, float %202) > %229 = call float @llvm.fma.f32(float %223, float %172, float %203) > %230 = call float @llvm.fma.f32(float %224, float %172, float %203) > %231 = call float @llvm.fma.f32(float %225, float %172, float %203) > %232 = fcmp oeq float %226, 0.000000e+00 > %233 = fcmp oeq float %227, 0.000000e+00 > %234 = fcmp oeq float %228, 0.000000e+00 > %235 = fcmp ogt float %229, 0.000000e+00 > %236 = select i1 %235, float 1.000000e+00, float %229 > %237 = fcmp oge float %236, 0.000000e+00 > %238 = fcmp ogt float %230, 0.000000e+00 > %239 = select i1 %238, float 1.000000e+00, float %230 > %240 = fcmp oge float %239, 0.000000e+00 > %241 = fcmp ogt float %231, 0.000000e+00 > %242 = select i1 %241, float 1.000000e+00, float %231 > %243 = fcmp oge float %242, 0.000000e+00 > %.op60 = fmul float %236, 0x4600000000000000 > %244 = select i1 %237, float %.op60, float 0xC600000000000000 > %.op61 = fmul float %239, 0x4600000000000000 > %245 = select i1 %240, float %.op61, float 0xC600000000000000 > %.op62 = fmul float %242, 0x4600000000000000 > %246 = select i1 %243, float %.op62, float 0xC600000000000000 > %247 = fdiv float 1.000000e+00, %226 > %248 = fdiv float 1.000000e+00, %227 > %249 = fdiv float 1.000000e+00, %228 > %250 = fmul float %229, %247 > %251 = fmul float %230, %248 > %252 = fmul float %231, %249 > %253 = select i1 %232, float %244, float %250 > %254 = select i1 %233, float %245, float %251 > %255 = select i1 %234, float %246, float %252 > %256 = fmul float %226, %temp44.0 > %257 = fmul float %227, %temp44.0 > %258 = fmul float %228, %temp44.0 > %259 = call float @llvm.fabs.f32(float %118) > %260 = call float @llvm.fabs.f32(float %118) > %261 = call float @llvm.fabs.f32(float %118) > %262 = fmul float %226, %259 > %263 = fmul float %227, %260 > %264 = fmul float %228, %261 > %265 = fmul float %262, 0xBFF7154760000000 > %266 = fmul float %263, 0xBFF7154760000000 > %267 = fmul float %264, 0xBFF7154760000000 > %268 = call float @llvm.exp2.f32(float %265) > %269 = call float @llvm.exp2.f32(float %266) > %270 = call float @llvm.exp2.f32(float %267) > %271 = fmul float %256, 0xBFF7154760000000 > %272 = fmul float %257, 0xBFF7154760000000 > %273 = fmul float %258, 0xBFF7154760000000 > %274 = call float @llvm.log2.f32(float %60) > %275 = call float @llvm.log2.f32(float %61) > %276 = call float @llvm.log2.f32(float %62) > %277 = fmul float %274, 0x3FDD1745E0000000 > %278 = fmul float %275, 0x3FDD1745E0000000 > %279 = fmul float %276, 0x3FDD1745E0000000 > %280 = call float @llvm.exp2.f32(float %277) > %281 = call float @llvm.exp2.f32(float %278) > %282 = call float @llvm.exp2.f32(float %279) > %283 = call float @llvm.exp2.f32(float %271) > %284 = call float @llvm.exp2.f32(float %272) > %285 = call float @llvm.exp2.f32(float %273) > %286 = fmul float %283, %280 > %287 = fmul float %284, %281 > %288 = fmul float %285, %282 > %289 = fmul float %253, %286 > %290 = fmul float %254, %287 > %291 = fmul float %255, %288 > %292 = fsub float 1.000000e+00, %268 > %293 = fsub float 1.000000e+00, %269 > %294 = fsub float 1.000000e+00, %270 > %295 = call float @llvm.fma.f32(float %289, float %292, float 0xBF70624DE0000000) > %296 = call float @llvm.fma.f32(float %290, float %293, float 0xBF70624DE0000000) > %297 = call float @llvm.fma.f32(float %291, float %294, float 0xBF70624DE0000000) > %298 = call float @llvm.maxnum.f32(float %295, float 0.000000e+00) > %299 = call float @llvm.maxnum.f32(float %296, float 0.000000e+00) > %300 = call float @llvm.maxnum.f32(float %297, float 0.000000e+00) > %301 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 5.000000e-01) > %302 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 5.000000e-01) > %303 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 5.000000e-01) > %304 = fmul float %298, %301 > %305 = fmul float %299, %302 > %306 = fmul float %300, %303 > %307 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %308 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %309 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %310 = call float @llvm.fma.f32(float %298, float %307, float 0x3FAEB851E0000000) > %311 = call float @llvm.fma.f32(float %299, float %308, float 0x3FAEB851E0000000) > %312 = call float @llvm.fma.f32(float %300, float %309, float 0x3FAEB851E0000000) > %313 = fcmp oeq float %310, 0.000000e+00 > %314 = fcmp oeq float %311, 0.000000e+00 > %315 = fcmp oeq float %312, 0.000000e+00 > %316 = fcmp ogt float %304, 0.000000e+00 > %317 = select i1 %316, float 1.000000e+00, float %304 > %318 = fcmp oge float %317, 0.000000e+00 > %319 = fcmp ogt float %305, 0.000000e+00 > %320 = select i1 %319, float 1.000000e+00, float %305 > %321 = fcmp oge float %320, 0.000000e+00 > %322 = fcmp ogt float %306, 0.000000e+00 > %323 = select i1 %322, float 1.000000e+00, float %306 > %324 = fcmp oge float %323, 0.000000e+00 > %.op63 = fmul float %317, 0x4600000000000000 > %325 = select i1 %318, float %.op63, float 0xC600000000000000 > %.op64 = fmul float %320, 0x4600000000000000 > %326 = select i1 %321, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %323, 0x4600000000000000 > %327 = select i1 %324, float %.op65, float 0xC600000000000000 > %328 = fdiv float 1.000000e+00, %310 > %329 = fdiv float 1.000000e+00, %311 > %330 = fdiv float 1.000000e+00, %312 > %331 = fmul float %304, %328 > %332 = fmul float %305, %329 > %333 = fmul float %306, %330 > %334 = select i1 %313, float %325, float %331 > %335 = select i1 %314, float %326, float %332 > %336 = select i1 %315, float %327, float %333 > %337 = bitcast i32 %11 to float > %338 = insertvalue <{ float, float, float }> undef, float %337, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %129, float %130, float %131, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %334, float %335, float %336, float %268) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %91, float %92, float %93, float %94) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %100, float %106, float %112, float %118) > ret <{ float, float, float }> %338 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..34] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 288, 240, 304} >IMM[1] UINT32 {256, 272, 544, 0} >IMM[2] FLT32 { 2.0000, -1.0000, 0.0000, 0.0000} > 0: FMA TEMP[0].xy, IN[0].xyyy, CONST[1][18].xyyy, CONST[1][15].zwww > 1: FMA TEMP[0].xy, CONST[1][19].wwww, CONST[1][16].xyyy, TEMP[0].xyyy > 2: MOV TEMP[1].xy, TEMP[0].xyyy > 3: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 4: FMA TEMP[1].xy, TEMP[1].ywww, IMM[2].xxxx, IMM[2].yyyy > 5: MOV TEMP[0].x, TEMP[1].xyxx > 6: MOV TEMP[0].z, -TEMP[1].yyyy > 7: MUL TEMP[0].xy, TEMP[0].xzzz, CONST[1][19].xyyy > 8: FMA TEMP[0].xy, IN[0].xyyy, CONST[1][17].xyyy, TEMP[0].xyyy > 9: MOV TEMP[1].xy, TEMP[0].xyyy > 10: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 11: MUL TEMP[0], TEMP[1], IN[3] > 12: MUL TEMP[0], TEMP[0], IN[1] > 13: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[2].wwww > 14: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 15: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][34].xyzz > 16: MOV TEMP[1].w, TEMP[1].xxxx > 17: MOV TEMP[1].xyz, TEMP[0].xyzx > 18: MOV OUT[0], TEMP[1] > 19: END >radeonsi: Compiling shader 280 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 544) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 548) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 552) > %39 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 > %41 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %42 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %41, i64 0, i64 3 > %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 > %44 = extractelement <8 x i32> %40, i32 7 > %45 = extractelement <4 x i32> %43, i32 0 > %46 = and i32 %45, %44 > %47 = insertelement <4 x i32> %43, i32 %46, i32 0 > %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 > %50 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %50, i64 0, i64 7 > %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 > %53 = extractelement <8 x i32> %49, i32 7 > %54 = extractelement <4 x i32> %52, i32 0 > %55 = and i32 %54, %53 > %56 = insertelement <4 x i32> %52, i32 %55, i32 0 > %57 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %68 = call float @llvm.fma.f32(float %57, float %31, float %25) > %69 = call float @llvm.fma.f32(float %58, float %32, float %26) > %70 = call float @llvm.fma.f32(float %35, float %27, float %68) > %71 = call float @llvm.fma.f32(float %35, float %28, float %69) > %72 = bitcast float %70 to i32 > %73 = bitcast float %71 to i32 > %74 = insertelement <2 x i32> undef, i32 %72, i32 0 > %75 = insertelement <2 x i32> %74, i32 %73, i32 1 > %76 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %75, <8 x i32> %40, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %77 = extractelement <4 x float> %76, i32 1 > %78 = extractelement <4 x float> %76, i32 3 > %79 = call float @llvm.fma.f32(float %77, float 2.000000e+00, float -1.000000e+00) > %80 = call float @llvm.fma.f32(float %78, float 2.000000e+00, float -1.000000e+00) > %81 = fmul float %79, %33 > %82 = fmul float %80, %34 > %83 = fsub float -0.000000e+00, %82 > %84 = call float @llvm.fma.f32(float %57, float %29, float %81) > %85 = call float @llvm.fma.f32(float %58, float %30, float %83) > %86 = bitcast float %84 to i32 > %87 = bitcast float %85 to i32 > %88 = insertelement <2 x i32> undef, i32 %86, i32 0 > %89 = insertelement <2 x i32> %88, i32 %87, i32 1 > %90 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %89, <8 x i32> %49, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = extractelement <4 x float> %90, i32 3 > %95 = fmul float %91, %64 > %96 = fmul float %92, %65 > %97 = fmul float %93, %66 > %98 = fmul float %94, %67 > %99 = fmul float %95, %59 > %100 = fmul float %96, %60 > %101 = fmul float %97, %61 > %102 = fmul float %98, %62 > %103 = fmul float %99, %63 > %104 = fmul float %100, %63 > %105 = fmul float %101, %63 > %106 = fmul float %102, %103 > %107 = fmul float %102, %104 > %108 = fmul float %102, %105 > %109 = fmul float %106, %36 > %110 = fmul float %107, %37 > %111 = fadd float %110, %109 > %112 = fmul float %108, %38 > %113 = fadd float %111, %112 > %114 = bitcast float %5 to i32 > %115 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %114, 10 > %116 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %115, float %106, 11 > %117 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %116, float %107, 12 > %118 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %117, float %108, 13 > %119 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %118, float %113, 14 > %120 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %119, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %120 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..37] >DCL CONST[2][0..4095] >DCL TEMP[0..20], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 320} >IMM[3] UINT32 {336, 352, 368, 576} >IMM[4] UINT32 {400, 496, 592, 544} >IMM[5] UINT32 {560, 384, 416, 528} >IMM[6] FLT32 { 0.0000, -0.1500, 0.0597, -1.5000} >IMM[7] UINT32 {448, 432, 512, 464} >IMM[8] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.4427, 0.5000} >IMM[9] UINT32 {480, 0, 0, 0} >IMM[10] FLT32 { 0.4545, -0.0040, 6.2000, 1.7000} >IMM[11] FLT32 { 0.0600, 0.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, IN[4].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[3].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[3].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[3].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[3].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[3].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[3].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[3].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[3].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[3].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[3].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[4].y, TEMP[18].xxxx >224: UMUL TEMP[18].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[19].xxxx >227: MOV TEMP[18].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[20].xxxx >231: MOV TEMP[19].z, CONST[2][ADDR[0].x] >232: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].zzzz, -TEMP[8].zzzz >233: MUL TEMP[18].x, TEMP[18].xxxx, IN[3].xxxx >234: MUL TEMP[18].x, IMM[0].yyyy, TEMP[18].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[18].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[19].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[19].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[19].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[20].xxxx >244: MOV TEMP[19].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[19].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[3].xxxx, TEMP[10].zzzz >249: MOV TEMP[18].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[3].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[3].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[3].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].zzzz >259: MOV TEMP[18].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[18] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[3].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[3].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[3].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[4].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[7].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[7].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[6].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[3].zzzz, TEMP[2].xxxx >307: MOV TEMP[16].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[6].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[7].xxxx >315: MOV TEMP[6].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[8].xxxx >317: ADD TEMP[2].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[3].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[9].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[6].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[7].xxxx >329: MOV TEMP[6].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[2].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[6].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[6].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[6].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[2].x, TEMP[2].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[2].xxxx >341: ADD TEMP[2].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[2].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].yzzz >344: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[2].yxyy >346: ADD TEMP[2].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy >348: MOV TEMP[0].y, TEMP[2].xxxx >349: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[2].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[4].x, TEMP[0], TEMP[5] >354: MOV TEMP[4].w, IMM[0].zzzz >355: DP4 TEMP[0].x, CONST[1][20], TEMP[4] >356: DP4 TEMP[2].x, CONST[1][21], TEMP[4] >357: MOV TEMP[0].y, TEMP[2].xxxx >358: DP4 TEMP[2].x, CONST[1][22], TEMP[4] >359: MOV TEMP[0].z, TEMP[2].xxxx >360: DP4 TEMP[2].x, CONST[1][23], TEMP[4] >361: MOV TEMP[0].w, TEMP[2].xxxx >362: ADD TEMP[3].xyz, -TEMP[4].xyzz, CONST[1][36].xyzz >363: DP4 TEMP[5].x, CONST[1][25], TEMP[4] >364: ADD TEMP[1].x, TEMP[5].xxxx, CONST[1][31].wwww >365: MOV TEMP[5], TEMP[0] >366: MOV TEMP[6].xy, IN[1].xyxx >367: MUL TEMP[7].xy, TEMP[2].xxxx, CONST[1][37].xyyy >368: MUL TEMP[8].xy, CONST[1][37].xyyy, IMM[0].zwww >369: FMA TEMP[8].xy, TEMP[0].xyyy, TEMP[8].xyyy, TEMP[7].xyyy >370: MOV TEMP[8].zw, TEMP[0].wwzw >371: MUL TEMP[9].xyz, CONST[1][34].xyzz, CONST[1][35].xyzz >372: MOV TEMP[9].w, CONST[1][34].wwww >373: ABS TEMP[10].x, TEMP[2].xxxx >374: MUL TEMP[0].x, TEMP[10].xxxx, IMM[6].xxxx >375: MIN TEMP[10].x, TEMP[0].xxxx, IMM[0].zzzz >376: ADD TEMP[0].x, -TEMP[10].xxxx, IMM[0].zzzz >377: DP3 TEMP[7].x, CONST[1][24].xyzz, TEMP[3].xyzz >378: DP3 TEMP[10].x, CONST[1][26].xyzz, TEMP[3].xyzz >379: MOV TEMP[7].z, TEMP[10].xxxx >380: DP3 TEMP[3].x, CONST[1][25].xyzz, TEMP[3].xyzz >381: MOV TEMP[7].y, TEMP[3].xxxx >382: DP3 TEMP[10].x, TEMP[7].xyzz, TEMP[7].xyzz >383: RSQ TEMP[10].x, TEMP[10].xxxx >384: MUL TEMP[11].xyz, TEMP[10].xxxx, TEMP[7].xyzz >385: FMA TEMP[3].x, -TEMP[3].xxxx, TEMP[10].xxxx, IMM[6].yyyy >386: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].zzzz >387: MOV_SAT TEMP[3].x, TEMP[3].xxxx >388: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx >389: DP3 TEMP[10].x, -TEMP[11].xyzz, CONST[1][33].xyzz >390: FMA TEMP[11].x, -CONST[1][28].yyyy, TEMP[10].xxxx, CONST[1][28].xxxx >391: FMA TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx, IMM[0].zzzz >392: MUL TEMP[10].x, TEMP[10].xxxx, IMM[6].zzzz >393: ABS TEMP[11].x, TEMP[11].xxxx >394: LG2 TEMP[11].x, TEMP[11].xxxx >395: MUL TEMP[11].x, TEMP[11].xxxx, IMM[6].wwww >396: EX2 TEMP[11].x, TEMP[11].xxxx >397: FMA TEMP[12].x, CONST[1][28].zzzz, TEMP[11].xxxx, -CONST[1][27].zzzz >398: MUL TEMP[11].x, TEMP[11].xxxx, CONST[1][28].zzzz >399: MAX TEMP[12].x, TEMP[12].xxxx, IMM[8].xxxx >400: FMA TEMP[0].x, -TEMP[12].xxxx, TEMP[0].xxxx, TEMP[11].xxxx >401: MAX TEMP[11].x, TEMP[0].xxxx, CONST[1][32].wwww >402: FSNE TEMP[12].x, CONST[1][27].xxxx, IMM[8].xxxx >403: UIF TEMP[12].xxxx :0 >404: RCP TEMP[12].x, CONST[1][27].xxxx >405: MUL TEMP[12].x, -TEMP[1].xxxx, TEMP[12].xxxx >406: ELSE :0 >407: SSG TEMP[13].x, -TEMP[1].xxxx >408: MUL TEMP[12].x, IMM[8].yyyy, TEMP[13].xxxx >409: ENDIF >410: MUL TEMP[12].x, TEMP[12].xxxx, IMM[8].zzzz >411: EX2 TEMP[12].x, TEMP[12].xxxx >412: ADD TEMP[12].x, TEMP[12].xxxx, CONST[1][28].wwww >413: MUL TEMP[12].x, TEMP[12].xxxx, CONST[1][29].yyyy >414: MUL TEMP[12].x, TEMP[12].xxxx, IMM[8].wwww >415: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[12].xxxx >416: MIN TEMP[3].x, TEMP[3].xxxx, CONST[1][27].wwww >417: MAX TEMP[3].x, TEMP[3].xxxx, CONST[1][29].xxxx >418: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[11].xxxx >419: FSNE TEMP[11].x, CONST[1][30].wwww, IMM[8].xxxx >420: UIF TEMP[11].xxxx :0 >421: RCP TEMP[11].x, CONST[1][30].wwww >422: MUL TEMP[11].x, -TEMP[1].xxxx, TEMP[11].xxxx >423: ELSE :0 >424: SSG TEMP[12].x, -TEMP[1].xxxx >425: MUL TEMP[11].x, IMM[8].yyyy, TEMP[12].xxxx >426: ENDIF >427: ADD TEMP[1].x, -TEMP[1].xxxx, CONST[1][31].zzzz >428: FSNE TEMP[12].x, CONST[1][27].yyyy, IMM[8].xxxx >429: UIF TEMP[12].xxxx :0 >430: RCP TEMP[12].x, CONST[1][27].yyyy >431: MUL TEMP[12].x, TEMP[1].xxxx, TEMP[12].xxxx >432: ELSE :0 >433: SSG TEMP[13].x, TEMP[1].xxxx >434: MUL TEMP[12].x, IMM[8].yyyy, TEMP[13].xxxx >435: ENDIF >436: MUL TEMP[11].x, TEMP[11].xxxx, IMM[8].zzzz >437: EX2 TEMP[11].x, TEMP[11].xxxx >438: MUL TEMP[7].xyz, TEMP[11].xxxx, CONST[1][30].xyzz >439: FMA TEMP[3].xyz, CONST[1][30].xyzz, TEMP[11].xxxx, TEMP[3].xxxx >440: FMA TEMP[10].xyz, TEMP[7].xyzz, TEMP[10].xxxx, TEMP[0].xxxx >441: MUL TEMP[7].xyz, TEMP[12].xxxx, -TEMP[3].xyzz >442: ABS TEMP[2].xyz, TEMP[2].xxxx >443: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[3].xyzz >444: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[8].zzzz >445: EX2 TEMP[2].x, TEMP[1].xxxx >446: EX2 TEMP[2].y, TEMP[1].yyyy >447: EX2 TEMP[2].z, TEMP[1].zzzz >448: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[8].zzzz >449: LG2 TEMP[11].x, CONST[1][32].xxxx >450: LG2 TEMP[11].y, CONST[1][32].yyyy >451: LG2 TEMP[11].z, CONST[1][32].zzzz >452: MUL TEMP[4].xyz, TEMP[11].xyzz, IMM[10].xxxx >453: EX2 TEMP[11].x, TEMP[4].xxxx >454: EX2 TEMP[11].y, TEMP[4].yyyy >455: EX2 TEMP[11].z, TEMP[4].zzzz >456: EX2 TEMP[4].x, TEMP[7].xxxx >457: EX2 TEMP[4].y, TEMP[7].yyyy >458: EX2 TEMP[4].z, TEMP[7].zzzz >459: MUL TEMP[7].xyz, TEMP[4].xyzz, TEMP[11].xyzz >460: FSEQ TEMP[4].xyz, TEMP[3].xyzz, IMM[8].xxxx >461: SSG TEMP[11].xyz, TEMP[10].xyzz >462: MUL TEMP[11].xyz, IMM[8].yyyy, TEMP[11].xyzz >463: RCP TEMP[12].x, TEMP[3].xxxx >464: RCP TEMP[12].y, TEMP[3].yyyy >465: RCP TEMP[12].z, TEMP[3].zzzz >466: MUL TEMP[3].xyz, TEMP[10].xyzz, TEMP[12].xyzz >467: UCMP TEMP[3].xyz, TEMP[4].xyzz, TEMP[11].xyzz, TEMP[3].xyzz >468: MUL TEMP[0].xyz, TEMP[3].xyzz, TEMP[7].xyzz >469: ADD TEMP[3].xyz, -TEMP[2].xyzz, IMM[0].zzzz >470: MOV TEMP[2].w, TEMP[2].xxxx >471: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xyzz, IMM[10].yyyy >472: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[8].xxxx >473: FMA TEMP[3].xyz, TEMP[0].xyzz, IMM[10].zzzz, IMM[8].wwww >474: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[3].xyzz >475: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[10].zzzz, IMM[10].wwww >476: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[11].xxxx >477: FSEQ TEMP[3].xyz, TEMP[0].xyzz, IMM[8].xxxx >478: SSG TEMP[4].xyz, TEMP[1].xyzz >479: MUL TEMP[4].xyz, IMM[8].yyyy, TEMP[4].xyzz >480: RCP TEMP[7].x, TEMP[0].xxxx >481: RCP TEMP[7].y, TEMP[0].yyyy >482: RCP TEMP[7].z, TEMP[0].zzzz >483: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[7].xyzz >484: UCMP TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[0].xyzz >485: MOV OUT[5], IN[2] >486: MOV OUT[4], TEMP[2] >487: MOV OUT[3], TEMP[9] >488: MOV OUT[2], TEMP[8] >489: MOV OUT[1], TEMP[6] >490: MOV OUT[0], TEMP[5] >491: END >radeonsi: Compiling shader 281 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 320) > %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 324) > %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 328) > %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 332) > %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 336) > %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 340) > %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 344) > %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 348) > %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 360) > %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 364) > %32 = call float @llvm.SI.load.const(<16 x i8> %19, i32 368) > %33 = call float @llvm.SI.load.const(<16 x i8> %19, i32 372) > %34 = call float @llvm.SI.load.const(<16 x i8> %19, i32 376) > %35 = call float @llvm.SI.load.const(<16 x i8> %19, i32 380) > %36 = call float @llvm.SI.load.const(<16 x i8> %19, i32 384) > %37 = call float @llvm.SI.load.const(<16 x i8> %19, i32 388) > %38 = call float @llvm.SI.load.const(<16 x i8> %19, i32 392) > %39 = call float @llvm.SI.load.const(<16 x i8> %19, i32 400) > %40 = call float @llvm.SI.load.const(<16 x i8> %19, i32 404) > %41 = call float @llvm.SI.load.const(<16 x i8> %19, i32 408) > %42 = call float @llvm.SI.load.const(<16 x i8> %19, i32 412) > %43 = call float @llvm.SI.load.const(<16 x i8> %19, i32 416) > %44 = call float @llvm.SI.load.const(<16 x i8> %19, i32 420) > %45 = call float @llvm.SI.load.const(<16 x i8> %19, i32 424) > %46 = call float @llvm.SI.load.const(<16 x i8> %19, i32 432) > %47 = call float @llvm.SI.load.const(<16 x i8> %19, i32 436) > %48 = call float @llvm.SI.load.const(<16 x i8> %19, i32 440) > %49 = call float @llvm.SI.load.const(<16 x i8> %19, i32 444) > %50 = call float @llvm.SI.load.const(<16 x i8> %19, i32 448) > %51 = call float @llvm.SI.load.const(<16 x i8> %19, i32 452) > %52 = call float @llvm.SI.load.const(<16 x i8> %19, i32 456) > %53 = call float @llvm.SI.load.const(<16 x i8> %19, i32 460) > %54 = call float @llvm.SI.load.const(<16 x i8> %19, i32 464) > %55 = call float @llvm.SI.load.const(<16 x i8> %19, i32 468) > %56 = call float @llvm.SI.load.const(<16 x i8> %19, i32 480) > %57 = call float @llvm.SI.load.const(<16 x i8> %19, i32 484) > %58 = call float @llvm.SI.load.const(<16 x i8> %19, i32 488) > %59 = call float @llvm.SI.load.const(<16 x i8> %19, i32 492) > %60 = call float @llvm.SI.load.const(<16 x i8> %19, i32 504) > %61 = call float @llvm.SI.load.const(<16 x i8> %19, i32 508) > %62 = call float @llvm.SI.load.const(<16 x i8> %19, i32 512) > %63 = call float @llvm.SI.load.const(<16 x i8> %19, i32 516) > %64 = call float @llvm.SI.load.const(<16 x i8> %19, i32 520) > %65 = call float @llvm.SI.load.const(<16 x i8> %19, i32 524) > %66 = call float @llvm.SI.load.const(<16 x i8> %19, i32 528) > %67 = call float @llvm.SI.load.const(<16 x i8> %19, i32 532) > %68 = call float @llvm.SI.load.const(<16 x i8> %19, i32 536) > %69 = call float @llvm.SI.load.const(<16 x i8> %19, i32 544) > %70 = call float @llvm.SI.load.const(<16 x i8> %19, i32 548) > %71 = call float @llvm.SI.load.const(<16 x i8> %19, i32 552) > %72 = call float @llvm.SI.load.const(<16 x i8> %19, i32 556) > %73 = call float @llvm.SI.load.const(<16 x i8> %19, i32 560) > %74 = call float @llvm.SI.load.const(<16 x i8> %19, i32 564) > %75 = call float @llvm.SI.load.const(<16 x i8> %19, i32 568) > %76 = call float @llvm.SI.load.const(<16 x i8> %19, i32 576) > %77 = call float @llvm.SI.load.const(<16 x i8> %19, i32 580) > %78 = call float @llvm.SI.load.const(<16 x i8> %19, i32 584) > %79 = call float @llvm.SI.load.const(<16 x i8> %19, i32 592) > %80 = call float @llvm.SI.load.const(<16 x i8> %19, i32 596) > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %13) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = extractelement <4 x float> %85, i32 2 > %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 > %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %14) > %92 = extractelement <4 x float> %91, i32 0 > %93 = extractelement <4 x float> %91, i32 1 > %94 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 > %96 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %95, i32 0, i32 %15) > %97 = extractelement <4 x float> %96, i32 0 > %98 = extractelement <4 x float> %96, i32 1 > %99 = extractelement <4 x float> %96, i32 2 > %100 = extractelement <4 x float> %96, i32 3 > %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 > %103 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %16) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = extractelement <4 x float> %103, i32 2 > %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 > %109 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %17) > %110 = extractelement <4 x float> %109, i32 0 > %111 = extractelement <4 x float> %109, i32 1 > %112 = extractelement <4 x float> %109, i32 2 > %113 = fmul float %112, 0x406FE01000000000 > %114 = fmul float %111, 0x406FE01000000000 > %115 = fmul float %110, 0x406FE01000000000 > %116 = fptosi float %113 to i32 > %117 = fptosi float %114 to i32 > %118 = fptosi float %115 to i32 > %119 = shl i32 %116, 1 > %120 = or i32 %119, 1 > %121 = shl i32 %117, 1 > %122 = or i32 %121, 1 > %123 = shl i32 %118, 1 > %124 = or i32 %123, 1 > %125 = shl i32 %116, 5 > %126 = or i32 %125, 4 > %127 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %126) > %128 = fmul float %104, %127 > %129 = shl i32 %117, 5 > %130 = or i32 %129, 4 > %131 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %130) > %132 = fmul float %105, %131 > %133 = shl i32 %120, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %133) > %135 = shl i32 %120, 4 > %136 = or i32 %135, 12 > %137 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %136) > %138 = fmul float %134, %137 > %139 = shl i32 %120, 4 > %140 = or i32 %139, 4 > %141 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %140) > %142 = shl i32 %120, 4 > %143 = or i32 %142, 8 > %144 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %143) > %145 = fsub float -0.000000e+00, %138 > %146 = call float @llvm.fma.f32(float %141, float %144, float %145) > %147 = shl i32 %120, 4 > %148 = or i32 %147, 4 > %149 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %148) > %150 = shl i32 %120, 4 > %151 = or i32 %150, 8 > %152 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %151) > %153 = call float @llvm.fma.f32(float %149, float %152, float %138) > %154 = fmul float %153, %104 > %155 = fmul float %146, %104 > %156 = fmul float %155, 2.000000e+00 > %157 = shl i32 %122, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %157) > %159 = shl i32 %122, 4 > %160 = or i32 %159, 12 > %161 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %160) > %162 = fmul float %158, %161 > %163 = shl i32 %122, 4 > %164 = or i32 %163, 4 > %165 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %164) > %166 = shl i32 %122, 4 > %167 = or i32 %166, 8 > %168 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %167) > %169 = fsub float -0.000000e+00, %162 > %170 = call float @llvm.fma.f32(float %165, float %168, float %169) > %171 = shl i32 %122, 4 > %172 = or i32 %171, 4 > %173 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %172) > %174 = shl i32 %122, 4 > %175 = or i32 %174, 8 > %176 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %175) > %177 = call float @llvm.fma.f32(float %173, float %176, float %162) > %178 = fmul float %177, %105 > %179 = fmul float %178, 2.000000e+00 > %180 = fmul float %170, %105 > %181 = fmul float %180, 2.000000e+00 > %182 = shl i32 %120, 4 > %183 = or i32 %182, 4 > %184 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %183) > %185 = shl i32 %120, 4 > %186 = or i32 %185, 8 > %187 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %186) > %188 = shl i32 %120, 4 > %189 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %188) > %190 = shl i32 %120, 4 > %191 = or i32 %190, 12 > %192 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %191) > %193 = fmul float %187, %192 > %194 = fmul float %187, %189 > %195 = fmul float %184, %192 > %196 = shl i32 %120, 4 > %197 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %196) > %198 = shl i32 %120, 4 > %199 = or i32 %198, 4 > %200 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %199) > %201 = call float @llvm.fma.f32(float %197, float %200, float %193) > %202 = fmul float %201, %104 > %203 = fmul float %202, 2.000000e+00 > %204 = shl i32 %120, 4 > %205 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %204) > %206 = shl i32 %120, 4 > %207 = or i32 %206, 4 > %208 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %207) > %209 = shl i32 %120, 4 > %210 = or i32 %209, 8 > %211 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %210) > %212 = shl i32 %120, 4 > %213 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %212) > %214 = shl i32 %120, 4 > %215 = or i32 %214, 4 > %216 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %215) > %217 = shl i32 %120, 4 > %218 = or i32 %217, 8 > %219 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %218) > %220 = fmul float %205, %213 > %221 = fmul float %208, %216 > %222 = fmul float %211, %219 > %223 = fadd float %222, %221 > %224 = fadd float %222, %220 > %225 = fadd float %221, %220 > %226 = fsub float -0.000000e+00, %223 > %227 = call float @llvm.fma.f32(float %226, float 2.000000e+00, float 1.000000e+00) > %228 = fsub float -0.000000e+00, %224 > %229 = call float @llvm.fma.f32(float %228, float 2.000000e+00, float 1.000000e+00) > %230 = fsub float -0.000000e+00, %225 > %231 = call float @llvm.fma.f32(float %230, float 2.000000e+00, float 1.000000e+00) > %232 = fmul float %104, %229 > %233 = shl i32 %122, 4 > %234 = or i32 %233, 4 > %235 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %234) > %236 = shl i32 %122, 4 > %237 = or i32 %236, 8 > %238 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %237) > %239 = shl i32 %122, 4 > %240 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %239) > %241 = shl i32 %122, 4 > %242 = or i32 %241, 12 > %243 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %242) > %244 = fmul float %238, %243 > %245 = fmul float %238, %240 > %246 = fmul float %235, %243 > %247 = shl i32 %122, 4 > %248 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %247) > %249 = shl i32 %122, 4 > %250 = or i32 %249, 4 > %251 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %250) > %252 = call float @llvm.fma.f32(float %248, float %251, float %244) > %253 = fmul float %252, %105 > %254 = fmul float %253, 2.000000e+00 > %255 = shl i32 %122, 4 > %256 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %255) > %257 = shl i32 %122, 4 > %258 = or i32 %257, 4 > %259 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %258) > %260 = shl i32 %122, 4 > %261 = or i32 %260, 8 > %262 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %261) > %263 = shl i32 %122, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %263) > %265 = shl i32 %122, 4 > %266 = or i32 %265, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %266) > %268 = shl i32 %122, 4 > %269 = or i32 %268, 8 > %270 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %269) > %271 = fmul float %256, %264 > %272 = fmul float %259, %267 > %273 = fmul float %262, %270 > %274 = fadd float %273, %272 > %275 = fadd float %273, %271 > %276 = fadd float %272, %271 > %277 = fsub float -0.000000e+00, %274 > %278 = call float @llvm.fma.f32(float %277, float 2.000000e+00, float 1.000000e+00) > %279 = fsub float -0.000000e+00, %275 > %280 = call float @llvm.fma.f32(float %279, float 2.000000e+00, float 1.000000e+00) > %281 = fsub float -0.000000e+00, %276 > %282 = call float @llvm.fma.f32(float %281, float 2.000000e+00, float 1.000000e+00) > %283 = fmul float %105, %280 > %284 = fadd float %203, %254 > %285 = fadd float %232, %283 > %286 = fadd float %156, %181 > %287 = fadd float %128, %132 > %288 = shl i32 %118, 5 > %289 = or i32 %288, 4 > %290 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %289) > %291 = fmul float %106, %290 > %292 = shl i32 %124, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %292) > %294 = shl i32 %124, 4 > %295 = or i32 %294, 12 > %296 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %295) > %297 = fmul float %293, %296 > %298 = shl i32 %124, 4 > %299 = or i32 %298, 4 > %300 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %299) > %301 = shl i32 %124, 4 > %302 = or i32 %301, 8 > %303 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %302) > %304 = fsub float -0.000000e+00, %297 > %305 = call float @llvm.fma.f32(float %300, float %303, float %304) > %306 = shl i32 %124, 4 > %307 = or i32 %306, 4 > %308 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %307) > %309 = shl i32 %124, 4 > %310 = or i32 %309, 8 > %311 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %310) > %312 = call float @llvm.fma.f32(float %308, float %311, float %297) > %313 = fmul float %312, %106 > %314 = fmul float %313, 2.000000e+00 > %315 = fmul float %305, %106 > %316 = fmul float %315, 2.000000e+00 > %317 = shl i32 %124, 4 > %318 = or i32 %317, 4 > %319 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %318) > %320 = shl i32 %124, 4 > %321 = or i32 %320, 8 > %322 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %321) > %323 = shl i32 %124, 4 > %324 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %323) > %325 = shl i32 %124, 4 > %326 = or i32 %325, 12 > %327 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %326) > %328 = fmul float %322, %327 > %329 = fmul float %322, %324 > %330 = fmul float %319, %327 > %331 = shl i32 %124, 4 > %332 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %331) > %333 = shl i32 %124, 4 > %334 = or i32 %333, 4 > %335 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %334) > %336 = call float @llvm.fma.f32(float %332, float %335, float %328) > %337 = fmul float %336, %106 > %338 = fmul float %337, 2.000000e+00 > %339 = shl i32 %124, 4 > %340 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %339) > %341 = shl i32 %124, 4 > %342 = or i32 %341, 4 > %343 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %342) > %344 = shl i32 %124, 4 > %345 = or i32 %344, 8 > %346 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %345) > %347 = shl i32 %124, 4 > %348 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %347) > %349 = shl i32 %124, 4 > %350 = or i32 %349, 4 > %351 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %350) > %352 = shl i32 %124, 4 > %353 = or i32 %352, 8 > %354 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %353) > %355 = fmul float %340, %348 > %356 = fmul float %343, %351 > %357 = fmul float %346, %354 > %358 = fadd float %357, %356 > %359 = fadd float %357, %355 > %360 = fadd float %356, %355 > %361 = fsub float -0.000000e+00, %358 > %362 = call float @llvm.fma.f32(float %361, float 2.000000e+00, float 1.000000e+00) > %363 = fsub float -0.000000e+00, %359 > %364 = call float @llvm.fma.f32(float %363, float 2.000000e+00, float 1.000000e+00) > %365 = fsub float -0.000000e+00, %360 > %366 = call float @llvm.fma.f32(float %365, float 2.000000e+00, float 1.000000e+00) > %367 = fmul float %106, %364 > %368 = fadd float %284, %338 > %369 = fadd float %285, %367 > %370 = fadd float %286, %316 > %371 = fadd float %287, %291 > %372 = fmul float %368, %86 > %373 = fmul float %369, %87 > %374 = fadd float %372, %373 > %375 = fmul float %370, %88 > %376 = fadd float %374, %375 > %377 = fadd float %376, %371 > %378 = shl i32 %120, 4 > %379 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %378) > %380 = shl i32 %120, 4 > %381 = or i32 %380, 8 > %382 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %381) > %383 = fsub float -0.000000e+00, %195 > %384 = call float @llvm.fma.f32(float %379, float %382, float %383) > %385 = fmul float %384, %104 > %386 = fmul float %385, 2.000000e+00 > %387 = fmul float %154, 2.000000e+00 > %388 = shl i32 %122, 4 > %389 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %388) > %390 = shl i32 %122, 4 > %391 = or i32 %390, 8 > %392 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %391) > %393 = fsub float -0.000000e+00, %246 > %394 = call float @llvm.fma.f32(float %389, float %392, float %393) > %395 = fmul float %394, %105 > %396 = fmul float %395, 2.000000e+00 > %397 = fmul float %104, %231 > %398 = fmul float %104, %227 > %399 = fmul float %105, %282 > %400 = fmul float %105, %278 > %401 = shl i32 %116, 5 > %402 = or i32 %401, 8 > %403 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %402) > %404 = fmul float %104, %403 > %405 = shl i32 %117, 5 > %406 = or i32 %405, 8 > %407 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %406) > %408 = fmul float %105, %407 > %409 = fadd float %396, %386 > %410 = fadd float %179, %387 > %411 = fadd float %399, %397 > %412 = fadd float %408, %404 > %413 = shl i32 %124, 4 > %414 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %413) > %415 = shl i32 %124, 4 > %416 = or i32 %415, 8 > %417 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %416) > %418 = fsub float -0.000000e+00, %330 > %419 = call float @llvm.fma.f32(float %414, float %417, float %418) > %420 = fmul float %419, %106 > %421 = fmul float %420, 2.000000e+00 > %422 = fmul float %106, %366 > %423 = fmul float %106, %362 > %424 = shl i32 %118, 5 > %425 = or i32 %424, 8 > %426 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %425) > %427 = fmul float %106, %426 > %428 = fadd float %409, %421 > %429 = fadd float %410, %314 > %430 = fadd float %411, %422 > %431 = fadd float %412, %427 > %432 = fmul float %428, %86 > %433 = fmul float %429, %87 > %434 = fadd float %432, %433 > %435 = fmul float %430, %88 > %436 = fadd float %434, %435 > %437 = fadd float %436, %431 > %438 = shl i32 %116, 5 > %439 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %438) > %440 = fmul float %104, %439 > %441 = shl i32 %117, 5 > %442 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %441) > %443 = fmul float %105, %442 > %444 = shl i32 %118, 5 > %445 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %444) > %446 = fmul float %106, %445 > %447 = shl i32 %120, 4 > %448 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %447) > %449 = shl i32 %120, 4 > %450 = or i32 %449, 4 > %451 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %450) > %452 = fsub float -0.000000e+00, %193 > %453 = call float @llvm.fma.f32(float %448, float %451, float %452) > %454 = fadd float %195, %194 > %455 = fmul float %453, %104 > %456 = fmul float %454, %104 > %457 = fmul float %455, 2.000000e+00 > %458 = fmul float %456, 2.000000e+00 > %459 = shl i32 %122, 4 > %460 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %459) > %461 = shl i32 %122, 4 > %462 = or i32 %461, 4 > %463 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %462) > %464 = fsub float -0.000000e+00, %244 > %465 = call float @llvm.fma.f32(float %460, float %463, float %464) > %466 = shl i32 %124, 4 > %467 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %466) > %468 = shl i32 %124, 4 > %469 = or i32 %468, 4 > %470 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %469) > %471 = fsub float -0.000000e+00, %328 > %472 = call float @llvm.fma.f32(float %467, float %470, float %471) > %473 = fadd float %330, %329 > %474 = fmul float %465, %105 > %475 = fmul float %472, %106 > %476 = fmul float %473, %106 > %477 = fmul float %475, 2.000000e+00 > %478 = fmul float %476, 2.000000e+00 > %479 = fadd float %246, %245 > %480 = fmul float %479, %105 > %481 = fmul float %474, 2.000000e+00 > %482 = fmul float %480, 2.000000e+00 > %483 = fadd float %398, %400 > %484 = fadd float %457, %481 > %485 = fadd float %458, %482 > %486 = fadd float %440, %443 > %487 = fadd float %423, %483 > %488 = fadd float %477, %484 > %489 = fadd float %478, %485 > %490 = fadd float %446, %486 > %491 = fmul float %487, %86 > %492 = fmul float %488, %87 > %493 = fadd float %491, %492 > %494 = fmul float %489, %88 > %495 = fadd float %493, %494 > %496 = fadd float %495, %490 > %497 = fmul float %20, %496 > %498 = fmul float %21, %377 > %499 = fadd float %497, %498 > %500 = fmul float %22, %437 > %501 = fadd float %499, %500 > %502 = fadd float %501, %23 > %503 = fmul float %24, %496 > %504 = fmul float %25, %377 > %505 = fadd float %503, %504 > %506 = fmul float %26, %437 > %507 = fadd float %505, %506 > %508 = fadd float %507, %27 > %509 = fmul float %28, %496 > %510 = fmul float %29, %377 > %511 = fadd float %509, %510 > %512 = fmul float %30, %437 > %513 = fadd float %511, %512 > %514 = fadd float %513, %31 > %515 = fmul float %32, %496 > %516 = fmul float %33, %377 > %517 = fadd float %515, %516 > %518 = fmul float %34, %437 > %519 = fadd float %517, %518 > %520 = fadd float %519, %35 > %521 = fsub float %76, %496 > %522 = fsub float %77, %377 > %523 = fsub float %78, %437 > %524 = fmul float %39, %496 > %525 = fmul float %40, %377 > %526 = fadd float %524, %525 > %527 = fmul float %41, %437 > %528 = fadd float %526, %527 > %529 = fadd float %528, %42 > %530 = fadd float %529, %61 > %531 = fmul float %520, %79 > %532 = fmul float %520, %80 > %533 = fsub float -0.000000e+00, %80 > %534 = call float @llvm.fma.f32(float %502, float %79, float %531) > %535 = call float @llvm.fma.f32(float %508, float %533, float %532) > %536 = fmul float %69, %73 > %537 = fmul float %70, %74 > %538 = fmul float %71, %75 > %539 = call float @llvm.fabs.f32(float %520) > %540 = fmul float %539, 0x3EF4F8B580000000 > %541 = call float @llvm.minnum.f32(float %540, float 1.000000e+00) > %542 = fsub float 1.000000e+00, %541 > %543 = fmul float %36, %521 > %544 = fmul float %37, %522 > %545 = fadd float %544, %543 > %546 = fmul float %38, %523 > %547 = fadd float %545, %546 > %548 = fmul float %43, %521 > %549 = fmul float %44, %522 > %550 = fadd float %549, %548 > %551 = fmul float %45, %523 > %552 = fadd float %550, %551 > %553 = fmul float %39, %521 > %554 = fmul float %40, %522 > %555 = fadd float %554, %553 > %556 = fmul float %41, %523 > %557 = fadd float %555, %556 > %558 = fmul float %547, %547 > %559 = fmul float %557, %557 > %560 = fadd float %559, %558 > %561 = fmul float %552, %552 > %562 = fadd float %560, %561 > %563 = call float @llvm.AMDGPU.rsq.clamped.f32(float %562) > %564 = fmul float %563, %547 > %565 = fmul float %563, %557 > %566 = fmul float %563, %552 > %567 = fsub float -0.000000e+00, %557 > %568 = call float @llvm.fma.f32(float %567, float %563, float 0xBFC3333340000000) > %569 = fsub float 1.000000e+00, %568 > %570 = call float @llvm.AMDGPU.clamp.(float %569, float 0.000000e+00, float 1.000000e+00) > %571 = fmul float %570, %570 > %572 = fmul float %564, %66 > %573 = fsub float -0.000000e+00, %572 > %574 = fmul float %565, %67 > %575 = fsub float %573, %574 > %576 = fmul float %566, %68 > %577 = fsub float %575, %576 > %578 = fsub float -0.000000e+00, %51 > %579 = call float @llvm.fma.f32(float %578, float %577, float %50) > %580 = call float @llvm.fma.f32(float %577, float %577, float 1.000000e+00) > %581 = fmul float %580, 0x3FAE8EC8A0000000 > %582 = call float @llvm.fabs.f32(float %579) > %583 = call float @llvm.log2.f32(float %582) > %584 = fmul float %583, -1.500000e+00 > %585 = call float @llvm.exp2.f32(float %584) > %586 = fsub float -0.000000e+00, %48 > %587 = call float @llvm.fma.f32(float %52, float %585, float %586) > %588 = fmul float %585, %52 > %589 = call float @llvm.maxnum.f32(float %587, float 0.000000e+00) > %590 = fsub float -0.000000e+00, %589 > %591 = call float @llvm.fma.f32(float %590, float %542, float %588) > %592 = call float @llvm.maxnum.f32(float %591, float %65) > %593 = fcmp une float %46, 0.000000e+00 > br i1 %593, label %IF, label %ELSE > >IF: ; preds = %main_body > %594 = fdiv float 1.000000e+00, %46 > %595 = fmul float %530, %594 > %596 = fsub float -0.000000e+00, %595 > br label %ENDIF > >ELSE: ; preds = %main_body > %597 = fsub float -0.000000e+00, %530 > %598 = fcmp olt float %530, -0.000000e+00 > %599 = select i1 %598, float 1.000000e+00, float %597 > %600 = fcmp oge float %599, 0.000000e+00 > %.op = fmul float %599, 0x4600000000000000 > %601 = select i1 %600, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp48.0 = phi float [ %596, %IF ], [ %601, %ELSE ] > %602 = fmul float %temp48.0, 0x3FF7154760000000 > %603 = call float @llvm.exp2.f32(float %602) > %604 = fadd float %603, %53 > %605 = fmul float %604, %55 > %606 = fmul float %605, 5.000000e-01 > %607 = fmul float %571, %606 > %608 = call float @llvm.minnum.f32(float %607, float %49) > %609 = call float @llvm.maxnum.f32(float %608, float %54) > %610 = fmul float %609, %592 > %611 = fcmp une float %59, 0.000000e+00 > br i1 %611, label %IF159, label %ELSE160 > >IF159: ; preds = %ENDIF > %612 = fdiv float 1.000000e+00, %59 > %613 = fmul float %530, %612 > %614 = fsub float -0.000000e+00, %613 > br label %ENDIF158 > >ELSE160: ; preds = %ENDIF > %615 = fsub float -0.000000e+00, %530 > %616 = fcmp olt float %530, -0.000000e+00 > %617 = select i1 %616, float 1.000000e+00, float %615 > %618 = fcmp oge float %617, 0.000000e+00 > %.op164 = fmul float %617, 0x4600000000000000 > %619 = select i1 %618, float %.op164, float 0xC600000000000000 > br label %ENDIF158 > >ENDIF158: ; preds = %ELSE160, %IF159 > %temp44.0 = phi float [ %614, %IF159 ], [ %619, %ELSE160 ] > %620 = fsub float %60, %530 > %621 = fcmp une float %47, 0.000000e+00 > br i1 %621, label %IF162, label %ELSE163 > >IF162: ; preds = %ENDIF158 > %622 = fdiv float 1.000000e+00, %47 > %623 = fmul float %620, %622 > br label %ENDIF161 > >ELSE163: ; preds = %ENDIF158 > %624 = fcmp ogt float %620, 0.000000e+00 > %625 = select i1 %624, float 1.000000e+00, float %620 > %626 = fcmp oge float %625, 0.000000e+00 > %.op165 = fmul float %625, 0x4600000000000000 > %627 = select i1 %626, float %.op165, float 0xC600000000000000 > br label %ENDIF161 > >ENDIF161: ; preds = %ELSE163, %IF162 > %temp48.1 = phi float [ %623, %IF162 ], [ %627, %ELSE163 ] > %628 = fmul float %temp44.0, 0x3FF7154760000000 > %629 = call float @llvm.exp2.f32(float %628) > %630 = fmul float %629, %56 > %631 = fmul float %629, %57 > %632 = fmul float %629, %58 > %633 = call float @llvm.fma.f32(float %56, float %629, float %609) > %634 = call float @llvm.fma.f32(float %57, float %629, float %609) > %635 = call float @llvm.fma.f32(float %58, float %629, float %609) > %636 = call float @llvm.fma.f32(float %630, float %581, float %610) > %637 = call float @llvm.fma.f32(float %631, float %581, float %610) > %638 = call float @llvm.fma.f32(float %632, float %581, float %610) > %639 = fmul float %633, %temp48.1 > %640 = fmul float %634, %temp48.1 > %641 = fmul float %635, %temp48.1 > %642 = call float @llvm.fabs.f32(float %520) > %643 = call float @llvm.fabs.f32(float %520) > %644 = call float @llvm.fabs.f32(float %520) > %645 = fmul float %633, %642 > %646 = fmul float %634, %643 > %647 = fmul float %635, %644 > %648 = fmul float %645, 0xBFF7154760000000 > %649 = fmul float %646, 0xBFF7154760000000 > %650 = fmul float %647, 0xBFF7154760000000 > %651 = call float @llvm.exp2.f32(float %648) > %652 = call float @llvm.exp2.f32(float %649) > %653 = call float @llvm.exp2.f32(float %650) > %654 = fmul float %639, 0xBFF7154760000000 > %655 = fmul float %640, 0xBFF7154760000000 > %656 = fmul float %641, 0xBFF7154760000000 > %657 = call float @llvm.log2.f32(float %62) > %658 = call float @llvm.log2.f32(float %63) > %659 = call float @llvm.log2.f32(float %64) > %660 = fmul float %657, 0x3FDD1745E0000000 > %661 = fmul float %658, 0x3FDD1745E0000000 > %662 = fmul float %659, 0x3FDD1745E0000000 > %663 = call float @llvm.exp2.f32(float %660) > %664 = call float @llvm.exp2.f32(float %661) > %665 = call float @llvm.exp2.f32(float %662) > %666 = call float @llvm.exp2.f32(float %654) > %667 = call float @llvm.exp2.f32(float %655) > %668 = call float @llvm.exp2.f32(float %656) > %669 = fmul float %666, %663 > %670 = fmul float %667, %664 > %671 = fmul float %668, %665 > %672 = fcmp oeq float %633, 0.000000e+00 > %673 = fcmp oeq float %634, 0.000000e+00 > %674 = fcmp oeq float %635, 0.000000e+00 > %675 = fcmp ogt float %636, 0.000000e+00 > %676 = select i1 %675, float 1.000000e+00, float %636 > %677 = fcmp oge float %676, 0.000000e+00 > %678 = fcmp ogt float %637, 0.000000e+00 > %679 = select i1 %678, float 1.000000e+00, float %637 > %680 = fcmp oge float %679, 0.000000e+00 > %681 = fcmp ogt float %638, 0.000000e+00 > %682 = select i1 %681, float 1.000000e+00, float %638 > %683 = fcmp oge float %682, 0.000000e+00 > %.op166 = fmul float %676, 0x4600000000000000 > %684 = select i1 %677, float %.op166, float 0xC600000000000000 > %.op167 = fmul float %679, 0x4600000000000000 > %685 = select i1 %680, float %.op167, float 0xC600000000000000 > %.op168 = fmul float %682, 0x4600000000000000 > %686 = select i1 %683, float %.op168, float 0xC600000000000000 > %687 = fdiv float 1.000000e+00, %633 > %688 = fdiv float 1.000000e+00, %634 > %689 = fdiv float 1.000000e+00, %635 > %690 = fmul float %636, %687 > %691 = fmul float %637, %688 > %692 = fmul float %638, %689 > %693 = select i1 %672, float %684, float %690 > %694 = select i1 %673, float %685, float %691 > %695 = select i1 %674, float %686, float %692 > %696 = fmul float %693, %669 > %697 = fmul float %694, %670 > %698 = fmul float %695, %671 > %699 = fsub float 1.000000e+00, %651 > %700 = fsub float 1.000000e+00, %652 > %701 = fsub float 1.000000e+00, %653 > %702 = call float @llvm.fma.f32(float %696, float %699, float 0xBF70624DE0000000) > %703 = call float @llvm.fma.f32(float %697, float %700, float 0xBF70624DE0000000) > %704 = call float @llvm.fma.f32(float %698, float %701, float 0xBF70624DE0000000) > %705 = call float @llvm.maxnum.f32(float %702, float 0.000000e+00) > %706 = call float @llvm.maxnum.f32(float %703, float 0.000000e+00) > %707 = call float @llvm.maxnum.f32(float %704, float 0.000000e+00) > %708 = call float @llvm.fma.f32(float %705, float 0x4018CCCCC0000000, float 5.000000e-01) > %709 = call float @llvm.fma.f32(float %706, float 0x4018CCCCC0000000, float 5.000000e-01) > %710 = call float @llvm.fma.f32(float %707, float 0x4018CCCCC0000000, float 5.000000e-01) > %711 = fmul float %705, %708 > %712 = fmul float %706, %709 > %713 = fmul float %707, %710 > %714 = call float @llvm.fma.f32(float %705, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %715 = call float @llvm.fma.f32(float %706, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %716 = call float @llvm.fma.f32(float %707, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %717 = call float @llvm.fma.f32(float %705, float %714, float 0x3FAEB851E0000000) > %718 = call float @llvm.fma.f32(float %706, float %715, float 0x3FAEB851E0000000) > %719 = call float @llvm.fma.f32(float %707, float %716, float 0x3FAEB851E0000000) > %720 = fcmp oeq float %717, 0.000000e+00 > %721 = fcmp oeq float %718, 0.000000e+00 > %722 = fcmp oeq float %719, 0.000000e+00 > %723 = fcmp ogt float %711, 0.000000e+00 > %724 = select i1 %723, float 1.000000e+00, float %711 > %725 = fcmp oge float %724, 0.000000e+00 > %726 = fcmp ogt float %712, 0.000000e+00 > %727 = select i1 %726, float 1.000000e+00, float %712 > %728 = fcmp oge float %727, 0.000000e+00 > %729 = fcmp ogt float %713, 0.000000e+00 > %730 = select i1 %729, float 1.000000e+00, float %713 > %731 = fcmp oge float %730, 0.000000e+00 > %.op169 = fmul float %724, 0x4600000000000000 > %732 = select i1 %725, float %.op169, float 0xC600000000000000 > %.op170 = fmul float %727, 0x4600000000000000 > %733 = select i1 %728, float %.op170, float 0xC600000000000000 > %.op171 = fmul float %730, 0x4600000000000000 > %734 = select i1 %731, float %.op171, float 0xC600000000000000 > %735 = fdiv float 1.000000e+00, %717 > %736 = fdiv float 1.000000e+00, %718 > %737 = fdiv float 1.000000e+00, %719 > %738 = fmul float %711, %735 > %739 = fmul float %712, %736 > %740 = fmul float %713, %737 > %741 = select i1 %720, float %732, float %738 > %742 = select i1 %721, float %733, float %739 > %743 = select i1 %722, float %734, float %740 > %744 = bitcast i32 %11 to float > %745 = insertvalue <{ float, float, float }> undef, float %744, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %92, float %93, float %426, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %534, float %535, float %514, float %520) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %536, float %537, float %538, float %72) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %741, float %742, float %743, float %651) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %97, float %98, float %99, float %100) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %502, float %508, float %514, float %520) > ret <{ float, float, float }> %745 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], BUFFER, FLOAT >DCL CONST[1][0..19] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[1] UINT32 {0, 240, 288, 256} >IMM[2] UINT32 {304, 272, 0, 0} >IMM[3] FLT32 { -1.0000, 0.0000, 0.0000, 0.0000} >IMM[4] INT32 {0, 0, 0, 0} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: FSNE TEMP[1].x, CONST[1][15].zzzz, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][15].zzzz > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][15].wwww > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: FMA TEMP[1].xy, IN[0].xyyy, CONST[1][18].xyyy, CONST[1][16].xyyy > 26: FMA TEMP[1].xy, CONST[1][19].wwww, CONST[1][16].zwww, TEMP[1].xyyy > 27: MOV TEMP[1].xy, TEMP[1].xyyy > 28: TEX TEMP[1].yw, TEMP[1], SAMP[1], 2D > 29: FMA TEMP[1].xy, TEMP[1].ywww, IMM[0].wwww, IMM[3].xxxx > 30: MOV TEMP[2].x, TEMP[1].xyxx > 31: MOV TEMP[2].z, -TEMP[1].yyyy > 32: MUL TEMP[1].xy, TEMP[2].xzzz, CONST[1][19].xyyy > 33: FMA TEMP[1].xy, IN[0].xyyy, CONST[1][17].xyyy, TEMP[1].xyyy > 34: MOV TEMP[1].xy, TEMP[1].xyyy > 35: TEX TEMP[1], TEMP[1], SAMP[2], 2D > 36: MOV TEMP[2].xyz, TEMP[1].xyzx > 37: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].wwww > 38: MOV TEMP[2].w, TEMP[1].xxxx > 39: MUL TEMP[0], TEMP[2], IN[4] > 40: MUL TEMP[0], TEMP[0], IN[2] > 41: FMA TEMP[1].xyz, TEMP[0].xyzz, IN[3].wwww, IN[3].xyzz > 42: MUL TEMP[0].x, TEMP[0].wwww, IN[3].wwww > 43: MOV TEMP[0].w, TEMP[0].xxxx > 44: MOV TEMP[2].x, IMM[4].xxxx > 45: MOV TEMP[2].w, IMM[1].xxxx > 46: TXF TEMP[2].x, TEMP[2], SAMP[3], BUFFER > 47: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 48: MOV OUT[0], TEMP[0] > 49: END >radeonsi: Compiling shader 282 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) > %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 > %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 3 > %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 > %43 = extractelement <8 x i32> %39, i32 7 > %44 = extractelement <4 x i32> %42, i32 0 > %45 = and i32 %44, %43 > %46 = insertelement <4 x i32> %42, i32 %45, i32 0 > %47 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 > %49 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %50 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %49, i64 0, i64 7 > %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 > %52 = extractelement <8 x i32> %48, i32 7 > %53 = extractelement <4 x i32> %51, i32 0 > %54 = and i32 %53, %52 > %55 = insertelement <4 x i32> %51, i32 %54, i32 0 > %56 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 > %58 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 11 > %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0 > %61 = extractelement <8 x i32> %57, i32 7 > %62 = extractelement <4 x i32> %60, i32 0 > %63 = and i32 %62, %61 > %64 = insertelement <4 x i32> %60, i32 %63, i32 0 > %65 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %66 = bitcast <8 x i32> addrspace(2)* %65 to <2 x i128> addrspace(2)* > %67 = load <2 x i128>, <2 x i128> addrspace(2)* %66, align 32, !tbaa !0 > %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %85 = fcmp oeq float %72, 0.000000e+00 > %86 = fcmp oeq float %72, 0.000000e+00 > %87 = fcmp ogt float %70, 0.000000e+00 > %88 = select i1 %87, float 1.000000e+00, float %70 > %89 = fcmp oge float %88, 0.000000e+00 > %90 = fcmp ogt float %71, 0.000000e+00 > %91 = select i1 %90, float 1.000000e+00, float %71 > %92 = fcmp oge float %91, 0.000000e+00 > %.op = fmul float %88, 0x4600000000000000 > %93 = select i1 %89, float %.op, float 0xC600000000000000 > %.op12 = fmul float %91, 0x4600000000000000 > %94 = select i1 %92, float %.op12, float 0xC600000000000000 > %95 = fdiv float 1.000000e+00, %72 > %96 = fmul float %70, %95 > %97 = fmul float %71, %95 > %98 = select i1 %85, float %93, float %96 > %99 = select i1 %86, float %94, float %97 > %100 = bitcast float %98 to i32 > %101 = bitcast float %99 to i32 > %102 = insertelement <2 x i32> undef, i32 %100, i32 0 > %103 = insertelement <2 x i32> %102, i32 %101, i32 1 > %104 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %103, <8 x i32> %39, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %105 = extractelement <4 x float> %104, i32 0 > %106 = fsub float %72, %105 > %107 = fcmp une float %25, 0.000000e+00 > %108 = call float @llvm.fabs.f32(float %106) > br i1 %107, label %IF, label %ELSE > >IF: ; preds = %main_body > %109 = fdiv float 1.000000e+00, %25 > %110 = fmul float %108, %109 > br label %ENDIF > >ELSE: ; preds = %main_body > %111 = fcmp one float %106, 0.000000e+00 > %112 = select i1 %111, float 1.000000e+00, float %108 > %113 = fcmp oge float %112, 0.000000e+00 > %.op13 = fmul float %112, 0x4600000000000000 > %114 = select i1 %113, float %.op13, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %110, %IF ], [ %114, %ELSE ] > %115 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %116 = fsub float 1.000000e+00, %115 > %117 = call float @llvm.log2.f32(float %116) > %118 = fmul float %117, %26 > %119 = call float @llvm.exp2.f32(float %118) > %120 = fsub float 1.000000e+00, %119 > %121 = call float @llvm.fma.f32(float %68, float %33, float %27) > %122 = call float @llvm.fma.f32(float %69, float %34, float %28) > %123 = call float @llvm.fma.f32(float %37, float %29, float %121) > %124 = call float @llvm.fma.f32(float %37, float %30, float %122) > %125 = bitcast float %123 to i32 > %126 = bitcast float %124 to i32 > %127 = insertelement <2 x i32> undef, i32 %125, i32 0 > %128 = insertelement <2 x i32> %127, i32 %126, i32 1 > %129 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %128, <8 x i32> %48, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %130 = extractelement <4 x float> %129, i32 1 > %131 = extractelement <4 x float> %129, i32 3 > %132 = call float @llvm.fma.f32(float %130, float 2.000000e+00, float -1.000000e+00) > %133 = call float @llvm.fma.f32(float %131, float 2.000000e+00, float -1.000000e+00) > %134 = fmul float %132, %35 > %135 = fmul float %133, %36 > %136 = fsub float -0.000000e+00, %135 > %137 = call float @llvm.fma.f32(float %68, float %31, float %134) > %138 = call float @llvm.fma.f32(float %69, float %32, float %136) > %139 = bitcast float %137 to i32 > %140 = bitcast float %138 to i32 > %141 = insertelement <2 x i32> undef, i32 %139, i32 0 > %142 = insertelement <2 x i32> %141, i32 %140, i32 1 > %143 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %142, <8 x i32> %57, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %144 = extractelement <4 x float> %143, i32 0 > %145 = extractelement <4 x float> %143, i32 1 > %146 = extractelement <4 x float> %143, i32 2 > %147 = extractelement <4 x float> %143, i32 3 > %148 = fmul float %120, %147 > %149 = fmul float %144, %81 > %150 = fmul float %145, %82 > %151 = fmul float %146, %83 > %152 = fmul float %148, %84 > %153 = fmul float %149, %73 > %154 = fmul float %150, %74 > %155 = fmul float %151, %75 > %156 = fmul float %152, %76 > %157 = call float @llvm.fma.f32(float %153, float %80, float %77) > %158 = call float @llvm.fma.f32(float %154, float %80, float %78) > %159 = call float @llvm.fma.f32(float %155, float %80, float %79) > %160 = fmul float %156, %80 > %161 = extractelement <2 x i128> %67, i32 1 > %162 = bitcast i128 %161 to <16 x i8> > %163 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %162, i32 0, i32 0) > %164 = extractelement <4 x float> %163, i32 0 > %165 = fmul float %164, %157 > %166 = fmul float %164, %158 > %167 = fmul float %164, %159 > %168 = bitcast float %5 to i32 > %169 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %168, 10 > %170 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %169, float %165, 11 > %171 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %170, float %166, 12 > %172 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %171, float %167, 13 > %173 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %172, float %160, 14 > %174 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %173, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %174 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..39] >DCL TEMP[0..12], LOCAL >IMM[0] FLT32 { 1.0000, -0.1500, 0.0597, -1.5000} >IMM[1] UINT32 {0, 352, 368, 384} >IMM[2] UINT32 {400, 432, 528, 608} >IMM[3] UINT32 {576, 592, 416, 448} >IMM[4] UINT32 {560, 480, 464, 544} >IMM[5] FLT32 { 0.0000, 0.0000, 158456325028528675187087900672.0000, 1.4427} >IMM[6] UINT32 {496, 512, 0, 0} >IMM[7] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[8] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][22], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][23], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][24], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][25], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][27], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][33].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][38].xyzz > 14: MUL TEMP[6].xyz, CONST[1][36].xyzz, CONST[1][37].xyzz > 15: MOV TEMP[6].w, CONST[1][36].wwww > 16: DP3 TEMP[1].x, CONST[1][26].xyzz, TEMP[5].xyzz > 17: DP3 TEMP[7].x, CONST[1][28].xyzz, TEMP[5].xyzz > 18: MOV TEMP[1].z, TEMP[7].xxxx > 19: DP3 TEMP[5].x, CONST[1][27].xyzz, TEMP[5].xyzz > 20: MOV TEMP[1].y, TEMP[5].xxxx > 21: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz > 22: RSQ TEMP[7].x, TEMP[7].xxxx > 23: MUL TEMP[8].xyz, TEMP[7].xxxx, TEMP[1].xyzz > 24: FMA TEMP[5].x, -TEMP[5].xxxx, TEMP[7].xxxx, IMM[0].yyyy > 25: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].xxxx > 26: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 27: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 28: DP3 TEMP[7].x, -TEMP[8].xyzz, CONST[1][35].xyzz > 29: FMA TEMP[9].x, -CONST[1][30].yyyy, TEMP[7].xxxx, CONST[1][30].xxxx > 30: FMA TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx, IMM[0].xxxx > 31: MOV TEMP[0].z, TEMP[7].xxxx > 32: ABS TEMP[7].x, TEMP[9].xxxx > 33: LG2 TEMP[7].x, TEMP[7].xxxx > 34: MOV TEMP[0].w, TEMP[7].xxxx > 35: MUL TEMP[7].xy, TEMP[0].zwww, IMM[0].zwww > 36: EX2 TEMP[9].x, TEMP[7].yyyy > 37: FMA TEMP[1].x, CONST[1][30].zzzz, TEMP[9].xxxx, -CONST[1][29].zzzz > 38: MUL TEMP[9].x, TEMP[9].xxxx, CONST[1][30].zzzz > 39: MAX TEMP[10].x, TEMP[1].xxxx, IMM[5].xxxx > 40: ABS TEMP[11].x, TEMP[2].xxxx > 41: MUL TEMP[11].x, TEMP[11].xxxx, IMM[5].yyyy > 42: MIN TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx > 43: ADD TEMP[11].x, -TEMP[11].xxxx, IMM[0].xxxx > 44: FMA TEMP[9].x, -TEMP[10].xxxx, TEMP[11].xxxx, TEMP[9].xxxx > 45: MAX TEMP[9].x, TEMP[9].xxxx, CONST[1][34].wwww > 46: FSNE TEMP[10].x, CONST[1][29].xxxx, IMM[5].xxxx > 47: UIF TEMP[10].xxxx :0 > 48: RCP TEMP[10].x, CONST[1][29].xxxx > 49: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 50: ELSE :0 > 51: SSG TEMP[11].x, -TEMP[0].xxxx > 52: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 53: ENDIF > 54: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 55: EX2 TEMP[10].x, TEMP[1].xxxx > 56: ADD TEMP[1].x, TEMP[10].xxxx, CONST[1][30].wwww > 57: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][31].yyyy > 58: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].xxxx > 59: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx > 60: MIN TEMP[5].x, TEMP[5].xxxx, CONST[1][29].wwww > 61: MAX TEMP[5].x, TEMP[5].xxxx, CONST[1][31].xxxx > 62: MUL TEMP[9].x, TEMP[5].xxxx, TEMP[9].xxxx > 63: FSNE TEMP[10].x, CONST[1][32].wwww, IMM[5].xxxx > 64: UIF TEMP[10].xxxx :0 > 65: RCP TEMP[10].x, CONST[1][32].wwww > 66: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 67: ELSE :0 > 68: SSG TEMP[11].x, -TEMP[0].xxxx > 69: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 70: ENDIF > 71: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][33].zzzz > 72: FSNE TEMP[11].x, CONST[1][29].yyyy, IMM[5].xxxx > 73: UIF TEMP[11].xxxx :0 > 74: RCP TEMP[11].x, CONST[1][29].yyyy > 75: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx > 76: ELSE :0 > 77: SSG TEMP[12].x, TEMP[0].xxxx > 78: MUL TEMP[11].x, IMM[5].zzzz, TEMP[12].xxxx > 79: ENDIF > 80: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 81: EX2 TEMP[10].x, TEMP[1].xxxx > 82: MUL TEMP[8].xyz, TEMP[10].xxxx, CONST[1][32].xyzz > 83: FMA TEMP[5].xyz, CONST[1][32].xyzz, TEMP[10].xxxx, TEMP[5].xxxx > 84: FMA TEMP[7].xyz, TEMP[8].xyzz, TEMP[7].xxxx, TEMP[9].xxxx > 85: FSEQ TEMP[9].xyz, TEMP[5].xyzz, IMM[5].xxxx > 86: SSG TEMP[10].xyz, TEMP[7].xyzz > 87: MUL TEMP[10].xyz, IMM[5].zzzz, TEMP[10].xyzz > 88: RCP TEMP[12].x, TEMP[5].xxxx > 89: RCP TEMP[12].y, TEMP[5].yyyy > 90: RCP TEMP[12].z, TEMP[5].zzzz > 91: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[12].xyzz > 92: UCMP TEMP[7].xyz, TEMP[9].xyzz, TEMP[10].xyzz, TEMP[7].xyzz > 93: MUL TEMP[8].xyz, TEMP[11].xxxx, -TEMP[5].xyzz > 94: ABS TEMP[2].xyz, TEMP[2].xxxx > 95: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[5].xyzz > 96: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].wwww > 97: EX2 TEMP[2].x, TEMP[1].xxxx > 98: EX2 TEMP[2].y, TEMP[1].yyyy > 99: EX2 TEMP[2].z, TEMP[1].zzzz >100: MUL TEMP[8].xyz, TEMP[8].xyzz, IMM[5].wwww >101: LG2 TEMP[5].x, CONST[1][34].xxxx >102: LG2 TEMP[5].y, CONST[1][34].yyyy >103: LG2 TEMP[5].z, CONST[1][34].zzzz >104: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[7].yyyy >105: EX2 TEMP[9].x, TEMP[5].xxxx >106: EX2 TEMP[9].y, TEMP[5].yyyy >107: EX2 TEMP[9].z, TEMP[5].zzzz >108: EX2 TEMP[5].x, TEMP[8].xxxx >109: EX2 TEMP[5].y, TEMP[8].yyyy >110: EX2 TEMP[5].z, TEMP[8].zzzz >111: MUL TEMP[8].xyz, TEMP[5].xyzz, TEMP[9].xyzz >112: MUL TEMP[0].xyz, TEMP[7].xyzz, TEMP[8].xyzz >113: ADD TEMP[5].xyz, -TEMP[2].xyzz, IMM[0].xxxx >114: MOV TEMP[2].w, TEMP[2].xxxx >115: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz, IMM[7].zzzz >116: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[5].xxxx >117: FMA TEMP[5].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[7].xxxx >118: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[5].xyzz >119: FMA TEMP[8].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[8].xxxx >120: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[8].xyzz, IMM[8].yyyy >121: FSEQ TEMP[5].xyz, TEMP[0].xyzz, IMM[5].xxxx >122: SSG TEMP[7].xyz, TEMP[1].xyzz >123: MUL TEMP[7].xyz, IMM[5].zzzz, TEMP[7].xyzz >124: RCP TEMP[8].x, TEMP[0].xxxx >125: RCP TEMP[8].y, TEMP[0].yyyy >126: RCP TEMP[8].z, TEMP[0].zzzz >127: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[8].xyzz >128: UCMP TEMP[2].xyz, TEMP[5].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >129: MOV OUT[4], IN[2] >130: MOV OUT[3], TEMP[2] >131: MOV OUT[2], TEMP[6] >132: MOV OUT[1], TEMP[4] >133: MOV OUT[0], TEMP[3] >134: END >radeonsi: Compiling shader 283 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 412) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 476) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 492) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 524) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 540) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 544) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 548) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 552) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 556) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 568) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 576) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 580) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 584) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 588) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 592) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 596) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 600) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 608) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 612) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 616) > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %13) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %14) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %15) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = extractelement <4 x float> %90, i32 3 > %95 = fmul float %18, %80 > %96 = fmul float %19, %81 > %97 = fadd float %95, %96 > %98 = fmul float %20, %82 > %99 = fadd float %97, %98 > %100 = fadd float %99, %21 > %101 = fmul float %22, %80 > %102 = fmul float %23, %81 > %103 = fadd float %101, %102 > %104 = fmul float %24, %82 > %105 = fadd float %103, %104 > %106 = fadd float %105, %25 > %107 = fmul float %26, %80 > %108 = fmul float %27, %81 > %109 = fadd float %107, %108 > %110 = fmul float %28, %82 > %111 = fadd float %109, %110 > %112 = fadd float %111, %29 > %113 = fmul float %30, %80 > %114 = fmul float %31, %81 > %115 = fadd float %113, %114 > %116 = fmul float %32, %82 > %117 = fadd float %115, %116 > %118 = fadd float %117, %33 > %119 = fmul float %37, %80 > %120 = fmul float %38, %81 > %121 = fadd float %119, %120 > %122 = fmul float %39, %82 > %123 = fadd float %121, %122 > %124 = fadd float %123, %40 > %125 = fadd float %124, %59 > %126 = fsub float %74, %80 > %127 = fsub float %75, %81 > %128 = fsub float %76, %82 > %129 = fmul float %67, %71 > %130 = fmul float %68, %72 > %131 = fmul float %69, %73 > %132 = fmul float %34, %126 > %133 = fmul float %35, %127 > %134 = fadd float %133, %132 > %135 = fmul float %36, %128 > %136 = fadd float %134, %135 > %137 = fmul float %41, %126 > %138 = fmul float %42, %127 > %139 = fadd float %138, %137 > %140 = fmul float %43, %128 > %141 = fadd float %139, %140 > %142 = fmul float %37, %126 > %143 = fmul float %38, %127 > %144 = fadd float %143, %142 > %145 = fmul float %39, %128 > %146 = fadd float %144, %145 > %147 = fmul float %136, %136 > %148 = fmul float %146, %146 > %149 = fadd float %148, %147 > %150 = fmul float %141, %141 > %151 = fadd float %149, %150 > %152 = call float @llvm.AMDGPU.rsq.clamped.f32(float %151) > %153 = fmul float %152, %136 > %154 = fmul float %152, %146 > %155 = fmul float %152, %141 > %156 = fsub float -0.000000e+00, %146 > %157 = call float @llvm.fma.f32(float %156, float %152, float 0xBFC3333340000000) > %158 = fsub float 1.000000e+00, %157 > %159 = call float @llvm.AMDGPU.clamp.(float %158, float 0.000000e+00, float 1.000000e+00) > %160 = fmul float %159, %159 > %161 = fmul float %153, %64 > %162 = fsub float -0.000000e+00, %161 > %163 = fmul float %154, %65 > %164 = fsub float %162, %163 > %165 = fmul float %155, %66 > %166 = fsub float %164, %165 > %167 = fsub float -0.000000e+00, %49 > %168 = call float @llvm.fma.f32(float %167, float %166, float %48) > %169 = call float @llvm.fma.f32(float %166, float %166, float 1.000000e+00) > %170 = call float @llvm.fabs.f32(float %168) > %171 = call float @llvm.log2.f32(float %170) > %172 = fmul float %169, 0x3FAE8EC8A0000000 > %173 = fmul float %171, -1.500000e+00 > %174 = call float @llvm.exp2.f32(float %173) > %175 = fsub float -0.000000e+00, %46 > %176 = call float @llvm.fma.f32(float %50, float %174, float %175) > %177 = fmul float %174, %50 > %178 = call float @llvm.maxnum.f32(float %176, float 0.000000e+00) > %179 = call float @llvm.fabs.f32(float %118) > %180 = fmul float %179, 0x3EF4F8B580000000 > %181 = call float @llvm.minnum.f32(float %180, float 1.000000e+00) > %182 = fsub float 1.000000e+00, %181 > %183 = fsub float -0.000000e+00, %178 > %184 = call float @llvm.fma.f32(float %183, float %182, float %177) > %185 = call float @llvm.maxnum.f32(float %184, float %63) > %186 = fcmp une float %44, 0.000000e+00 > br i1 %186, label %IF, label %ELSE > >IF: ; preds = %main_body > %187 = fdiv float 1.000000e+00, %44 > %188 = fmul float %125, %187 > %189 = fsub float -0.000000e+00, %188 > br label %ENDIF > >ELSE: ; preds = %main_body > %190 = fsub float -0.000000e+00, %125 > %191 = fcmp olt float %125, -0.000000e+00 > %192 = select i1 %191, float 1.000000e+00, float %190 > %193 = fcmp oge float %192, 0.000000e+00 > %.op = fmul float %192, 0x4600000000000000 > %194 = select i1 %193, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp40.0 = phi float [ %189, %IF ], [ %194, %ELSE ] > %195 = fmul float %temp40.0, 0x3FF7154760000000 > %196 = call float @llvm.exp2.f32(float %195) > %197 = fadd float %196, %51 > %198 = fmul float %197, %53 > %199 = fmul float %198, 5.000000e-01 > %200 = fmul float %160, %199 > %201 = call float @llvm.minnum.f32(float %200, float %47) > %202 = call float @llvm.maxnum.f32(float %201, float %52) > %203 = fmul float %202, %185 > %204 = fcmp une float %57, 0.000000e+00 > br i1 %204, label %IF53, label %ELSE54 > >IF53: ; preds = %ENDIF > %205 = fdiv float 1.000000e+00, %57 > %206 = fmul float %125, %205 > %207 = fsub float -0.000000e+00, %206 > br label %ENDIF52 > >ELSE54: ; preds = %ENDIF > %208 = fsub float -0.000000e+00, %125 > %209 = fcmp olt float %125, -0.000000e+00 > %210 = select i1 %209, float 1.000000e+00, float %208 > %211 = fcmp oge float %210, 0.000000e+00 > %.op58 = fmul float %210, 0x4600000000000000 > %212 = select i1 %211, float %.op58, float 0xC600000000000000 > br label %ENDIF52 > >ENDIF52: ; preds = %ELSE54, %IF53 > %temp40.1 = phi float [ %207, %IF53 ], [ %212, %ELSE54 ] > %213 = fsub float %58, %125 > %214 = fcmp une float %45, 0.000000e+00 > br i1 %214, label %IF56, label %ELSE57 > >IF56: ; preds = %ENDIF52 > %215 = fdiv float 1.000000e+00, %45 > %216 = fmul float %213, %215 > br label %ENDIF55 > >ELSE57: ; preds = %ENDIF52 > %217 = fcmp ogt float %213, 0.000000e+00 > %218 = select i1 %217, float 1.000000e+00, float %213 > %219 = fcmp oge float %218, 0.000000e+00 > %.op59 = fmul float %218, 0x4600000000000000 > %220 = select i1 %219, float %.op59, float 0xC600000000000000 > br label %ENDIF55 > >ENDIF55: ; preds = %ELSE57, %IF56 > %temp44.0 = phi float [ %216, %IF56 ], [ %220, %ELSE57 ] > %221 = fmul float %temp40.1, 0x3FF7154760000000 > %222 = call float @llvm.exp2.f32(float %221) > %223 = fmul float %222, %54 > %224 = fmul float %222, %55 > %225 = fmul float %222, %56 > %226 = call float @llvm.fma.f32(float %54, float %222, float %202) > %227 = call float @llvm.fma.f32(float %55, float %222, float %202) > %228 = call float @llvm.fma.f32(float %56, float %222, float %202) > %229 = call float @llvm.fma.f32(float %223, float %172, float %203) > %230 = call float @llvm.fma.f32(float %224, float %172, float %203) > %231 = call float @llvm.fma.f32(float %225, float %172, float %203) > %232 = fcmp oeq float %226, 0.000000e+00 > %233 = fcmp oeq float %227, 0.000000e+00 > %234 = fcmp oeq float %228, 0.000000e+00 > %235 = fcmp ogt float %229, 0.000000e+00 > %236 = select i1 %235, float 1.000000e+00, float %229 > %237 = fcmp oge float %236, 0.000000e+00 > %238 = fcmp ogt float %230, 0.000000e+00 > %239 = select i1 %238, float 1.000000e+00, float %230 > %240 = fcmp oge float %239, 0.000000e+00 > %241 = fcmp ogt float %231, 0.000000e+00 > %242 = select i1 %241, float 1.000000e+00, float %231 > %243 = fcmp oge float %242, 0.000000e+00 > %.op60 = fmul float %236, 0x4600000000000000 > %244 = select i1 %237, float %.op60, float 0xC600000000000000 > %.op61 = fmul float %239, 0x4600000000000000 > %245 = select i1 %240, float %.op61, float 0xC600000000000000 > %.op62 = fmul float %242, 0x4600000000000000 > %246 = select i1 %243, float %.op62, float 0xC600000000000000 > %247 = fdiv float 1.000000e+00, %226 > %248 = fdiv float 1.000000e+00, %227 > %249 = fdiv float 1.000000e+00, %228 > %250 = fmul float %229, %247 > %251 = fmul float %230, %248 > %252 = fmul float %231, %249 > %253 = select i1 %232, float %244, float %250 > %254 = select i1 %233, float %245, float %251 > %255 = select i1 %234, float %246, float %252 > %256 = fmul float %226, %temp44.0 > %257 = fmul float %227, %temp44.0 > %258 = fmul float %228, %temp44.0 > %259 = call float @llvm.fabs.f32(float %118) > %260 = call float @llvm.fabs.f32(float %118) > %261 = call float @llvm.fabs.f32(float %118) > %262 = fmul float %226, %259 > %263 = fmul float %227, %260 > %264 = fmul float %228, %261 > %265 = fmul float %262, 0xBFF7154760000000 > %266 = fmul float %263, 0xBFF7154760000000 > %267 = fmul float %264, 0xBFF7154760000000 > %268 = call float @llvm.exp2.f32(float %265) > %269 = call float @llvm.exp2.f32(float %266) > %270 = call float @llvm.exp2.f32(float %267) > %271 = fmul float %256, 0xBFF7154760000000 > %272 = fmul float %257, 0xBFF7154760000000 > %273 = fmul float %258, 0xBFF7154760000000 > %274 = call float @llvm.log2.f32(float %60) > %275 = call float @llvm.log2.f32(float %61) > %276 = call float @llvm.log2.f32(float %62) > %277 = fmul float %274, 0x3FDD1745E0000000 > %278 = fmul float %275, 0x3FDD1745E0000000 > %279 = fmul float %276, 0x3FDD1745E0000000 > %280 = call float @llvm.exp2.f32(float %277) > %281 = call float @llvm.exp2.f32(float %278) > %282 = call float @llvm.exp2.f32(float %279) > %283 = call float @llvm.exp2.f32(float %271) > %284 = call float @llvm.exp2.f32(float %272) > %285 = call float @llvm.exp2.f32(float %273) > %286 = fmul float %283, %280 > %287 = fmul float %284, %281 > %288 = fmul float %285, %282 > %289 = fmul float %253, %286 > %290 = fmul float %254, %287 > %291 = fmul float %255, %288 > %292 = fsub float 1.000000e+00, %268 > %293 = fsub float 1.000000e+00, %269 > %294 = fsub float 1.000000e+00, %270 > %295 = call float @llvm.fma.f32(float %289, float %292, float 0xBF70624DE0000000) > %296 = call float @llvm.fma.f32(float %290, float %293, float 0xBF70624DE0000000) > %297 = call float @llvm.fma.f32(float %291, float %294, float 0xBF70624DE0000000) > %298 = call float @llvm.maxnum.f32(float %295, float 0.000000e+00) > %299 = call float @llvm.maxnum.f32(float %296, float 0.000000e+00) > %300 = call float @llvm.maxnum.f32(float %297, float 0.000000e+00) > %301 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 5.000000e-01) > %302 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 5.000000e-01) > %303 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 5.000000e-01) > %304 = fmul float %298, %301 > %305 = fmul float %299, %302 > %306 = fmul float %300, %303 > %307 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %308 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %309 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %310 = call float @llvm.fma.f32(float %298, float %307, float 0x3FAEB851E0000000) > %311 = call float @llvm.fma.f32(float %299, float %308, float 0x3FAEB851E0000000) > %312 = call float @llvm.fma.f32(float %300, float %309, float 0x3FAEB851E0000000) > %313 = fcmp oeq float %310, 0.000000e+00 > %314 = fcmp oeq float %311, 0.000000e+00 > %315 = fcmp oeq float %312, 0.000000e+00 > %316 = fcmp ogt float %304, 0.000000e+00 > %317 = select i1 %316, float 1.000000e+00, float %304 > %318 = fcmp oge float %317, 0.000000e+00 > %319 = fcmp ogt float %305, 0.000000e+00 > %320 = select i1 %319, float 1.000000e+00, float %305 > %321 = fcmp oge float %320, 0.000000e+00 > %322 = fcmp ogt float %306, 0.000000e+00 > %323 = select i1 %322, float 1.000000e+00, float %306 > %324 = fcmp oge float %323, 0.000000e+00 > %.op63 = fmul float %317, 0x4600000000000000 > %325 = select i1 %318, float %.op63, float 0xC600000000000000 > %.op64 = fmul float %320, 0x4600000000000000 > %326 = select i1 %321, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %323, 0x4600000000000000 > %327 = select i1 %324, float %.op65, float 0xC600000000000000 > %328 = fdiv float 1.000000e+00, %310 > %329 = fdiv float 1.000000e+00, %311 > %330 = fdiv float 1.000000e+00, %312 > %331 = fmul float %304, %328 > %332 = fmul float %305, %329 > %333 = fmul float %306, %330 > %334 = select i1 %313, float %325, float %331 > %335 = select i1 %314, float %326, float %332 > %336 = select i1 %315, float %327, float %333 > %337 = bitcast i32 %11 to float > %338 = insertvalue <{ float, float, float }> undef, float %337, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %129, float %130, float %131, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %334, float %335, float %336, float %268) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %91, float %92, float %93, float %94) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %100, float %106, float %112, float %118) > ret <{ float, float, float }> %338 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], BUFFER, FLOAT >DCL CONST[1][0..21] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 272, 336, 288} >IMM[1] UINT32 {304, 256, 320, 0} >IMM[2] FLT32 { 2.0000, -1.0000, 0.0000, 0.0000} >IMM[3] INT32 {0, 0, 0, 0} > 0: MUL TEMP[0].x, CONST[1][17].wwww, CONST[1][21].wwww > 1: MOV TEMP[0].z, TEMP[0].xxxx > 2: MUL TEMP[1].x, CONST[1][18].xxxx, CONST[1][21].wwww > 3: MOV TEMP[0].w, TEMP[1].xxxx > 4: FMA TEMP[0].xy, IN[0].xyyy, CONST[1][21].xyyy, TEMP[0].zwww > 5: FMA TEMP[1].x, IN[0].xxxx, CONST[1][19].xxxx, CONST[1][16].wwww > 6: FMA TEMP[2].x, IN[0].yyyy, CONST[1][19].yyyy, CONST[1][17].xxxx > 7: MOV TEMP[1].y, TEMP[2].xxxx > 8: FMA TEMP[2].xy, CONST[1][21].wwww, CONST[1][17].yzzz, TEMP[1].xyyy > 9: MOV TEMP[2].xy, TEMP[2].xyyy > 10: TEX TEMP[2].yw, TEMP[2], SAMP[0], 2D > 11: FMA TEMP[2].xy, TEMP[2].ywww, IMM[2].xxxx, IMM[2].yyyy > 12: MOV TEMP[1].x, TEMP[2].xyxx > 13: MOV TEMP[1].z, -TEMP[2].yyyy > 14: MUL TEMP[2].xy, TEMP[1].xzzz, CONST[1][20].xyyy > 15: FMA TEMP[1].xy, TEMP[1].xzzz, CONST[1][20].xyyy, IN[0].xyyy > 16: MOV TEMP[3].xy, TEMP[1].xyyy > 17: TEX TEMP[3], TEMP[3], SAMP[1], 2D > 18: FMA TEMP[0].xy, TEMP[2].xyyy, CONST[1][21].xyyy, TEMP[0].xyyy > 19: MUL TEMP[1].xyz, TEMP[3].xyzz, CONST[1][16].xyzz > 20: MUL TEMP[2].x, TEMP[3].wwww, CONST[1][18].yyyy > 21: MOV TEMP[1].w, TEMP[2].xxxx > 22: MOV TEMP[2].xy, TEMP[0].xyyy > 23: TEX TEMP[2], TEMP[2], SAMP[2], 2D > 24: MUL TEMP[0], TEMP[2], TEMP[1] > 25: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[1].xyzz > 26: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[2].wwww > 27: MUL TEMP[1].x, TEMP[0].wwww, IN[3].wwww > 28: MUL TEMP[1].x, TEMP[1].xxxx, IN[1].wwww > 29: MOV TEMP[1].w, TEMP[1].xxxx > 30: MOV TEMP[2].x, IMM[3].xxxx > 31: MOV TEMP[2].w, IMM[0].xxxx > 32: TXF TEMP[2].x, TEMP[2], SAMP[3], BUFFER > 33: MUL TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz > 34: MOV OUT[0], TEMP[1] > 35: END >radeonsi: Compiling shader 284 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 340) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) > %42 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !tbaa !0 > %44 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %45 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %44, i64 0, i64 3 > %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 > %47 = extractelement <8 x i32> %43, i32 7 > %48 = extractelement <4 x i32> %46, i32 0 > %49 = and i32 %48, %47 > %50 = insertelement <4 x i32> %46, i32 %49, i32 0 > %51 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 > %53 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %54 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %53, i64 0, i64 7 > %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 > %56 = extractelement <8 x i32> %52, i32 7 > %57 = extractelement <4 x i32> %55, i32 0 > %58 = and i32 %57, %56 > %59 = insertelement <4 x i32> %55, i32 %58, i32 0 > %60 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 > %62 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %63 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %62, i64 0, i64 11 > %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 > %65 = extractelement <8 x i32> %61, i32 7 > %66 = extractelement <4 x i32> %64, i32 0 > %67 = and i32 %66, %65 > %68 = insertelement <4 x i32> %64, i32 %67, i32 0 > %69 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %70 = bitcast <8 x i32> addrspace(2)* %69 to <2 x i128> addrspace(2)* > %71 = load <2 x i128>, <2 x i128> addrspace(2)* %70, align 32, !tbaa !0 > %72 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %80 = fmul float %32, %41 > %81 = fmul float %33, %41 > %82 = call float @llvm.fma.f32(float %72, float %39, float %80) > %83 = call float @llvm.fma.f32(float %73, float %40, float %81) > %84 = call float @llvm.fma.f32(float %72, float %35, float %28) > %85 = call float @llvm.fma.f32(float %73, float %36, float %29) > %86 = call float @llvm.fma.f32(float %41, float %30, float %84) > %87 = call float @llvm.fma.f32(float %41, float %31, float %85) > %88 = bitcast float %86 to i32 > %89 = bitcast float %87 to i32 > %90 = insertelement <2 x i32> undef, i32 %88, i32 0 > %91 = insertelement <2 x i32> %90, i32 %89, i32 1 > %92 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %91, <8 x i32> %43, <4 x i32> %50, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %93 = extractelement <4 x float> %92, i32 1 > %94 = extractelement <4 x float> %92, i32 3 > %95 = call float @llvm.fma.f32(float %93, float 2.000000e+00, float -1.000000e+00) > %96 = call float @llvm.fma.f32(float %94, float 2.000000e+00, float -1.000000e+00) > %97 = fsub float -0.000000e+00, %96 > %98 = fmul float %95, %37 > %99 = fmul float %38, %97 > %100 = call float @llvm.fma.f32(float %95, float %37, float %72) > %101 = call float @llvm.fma.f32(float %97, float %38, float %73) > %102 = bitcast float %100 to i32 > %103 = bitcast float %101 to i32 > %104 = insertelement <2 x i32> undef, i32 %102, i32 0 > %105 = insertelement <2 x i32> %104, i32 %103, i32 1 > %106 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %105, <8 x i32> %52, <4 x i32> %59, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %107 = extractelement <4 x float> %106, i32 0 > %108 = extractelement <4 x float> %106, i32 1 > %109 = extractelement <4 x float> %106, i32 2 > %110 = extractelement <4 x float> %106, i32 3 > %111 = call float @llvm.fma.f32(float %98, float %39, float %82) > %112 = call float @llvm.fma.f32(float %99, float %40, float %83) > %113 = fmul float %107, %25 > %114 = fmul float %108, %26 > %115 = fmul float %109, %27 > %116 = fmul float %110, %34 > %117 = bitcast float %111 to i32 > %118 = bitcast float %112 to i32 > %119 = insertelement <2 x i32> undef, i32 %117, i32 0 > %120 = insertelement <2 x i32> %119, i32 %118, i32 1 > %121 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %120, <8 x i32> %61, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %122 = extractelement <4 x float> %121, i32 0 > %123 = extractelement <4 x float> %121, i32 1 > %124 = extractelement <4 x float> %121, i32 2 > %125 = extractelement <4 x float> %121, i32 3 > %126 = fmul float %122, %113 > %127 = fmul float %123, %114 > %128 = fmul float %124, %115 > %129 = fmul float %125, %116 > %130 = fmul float %126, %74 > %131 = fmul float %127, %75 > %132 = fmul float %128, %76 > %133 = fmul float %130, %78 > %134 = fmul float %131, %78 > %135 = fmul float %132, %78 > %136 = fmul float %129, %79 > %137 = fmul float %136, %77 > %138 = extractelement <2 x i128> %71, i32 1 > %139 = bitcast i128 %138 to <16 x i8> > %140 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %139, i32 0, i32 0) > %141 = extractelement <4 x float> %140, i32 0 > %142 = fmul float %141, %133 > %143 = fmul float %141, %134 > %144 = fmul float %141, %135 > %145 = bitcast float %5 to i32 > %146 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %145, 10 > %147 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %146, float %142, 11 > %148 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %147, float %143, 12 > %149 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %148, float %144, 13 > %150 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %149, float %137, 14 > %151 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %150, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %151 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], BUFFER, FLOAT >DCL CONST[1][0..21] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 304, 256, 272} >IMM[1] UINT32 {336, 320, 0, 0} >IMM[2] FLT32 { 2.0000, -1.0000, 0.0000, 0.0000} >IMM[3] INT32 {0, 0, 0, 0} > 0: FMA TEMP[0].x, IN[0].xxxx, CONST[1][19].xxxx, CONST[1][16].wwww > 1: FMA TEMP[1].x, IN[0].yyyy, CONST[1][19].yyyy, CONST[1][17].xxxx > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: FMA TEMP[0].xy, CONST[1][21].wwww, CONST[1][17].yzzz, TEMP[0].xyyy > 4: MOV TEMP[1].xy, TEMP[0].xyyy > 5: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 6: FMA TEMP[1].xy, TEMP[1].ywww, IMM[2].xxxx, IMM[2].yyyy > 7: MOV TEMP[0].x, TEMP[1].xyxx > 8: MOV TEMP[0].z, -TEMP[1].yyyy > 9: FMA TEMP[0].xy, TEMP[0].xzzz, CONST[1][20].xyyy, IN[0].xyyy > 10: MOV TEMP[1].xy, TEMP[0].xyyy > 11: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 12: MUL TEMP[2], IN[1], IN[3] > 13: MUL TEMP[0], TEMP[1], TEMP[2] > 14: FMA TEMP[1].xyz, TEMP[0].xyzz, IN[2].wwww, IN[2].xyzz > 15: MUL TEMP[0].x, TEMP[0].wwww, IN[2].wwww > 16: MOV TEMP[0].w, TEMP[0].xxxx > 17: MOV TEMP[2].x, IMM[3].xxxx > 18: MOV TEMP[2].w, IMM[0].xxxx > 19: TXF TEMP[2].x, TEMP[2], SAMP[2], BUFFER > 20: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 21: MOV OUT[0], TEMP[0] > 22: END >radeonsi: Compiling shader 285 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) > %34 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 > %36 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %37 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %36, i64 0, i64 3 > %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 > %39 = extractelement <8 x i32> %35, i32 7 > %40 = extractelement <4 x i32> %38, i32 0 > %41 = and i32 %40, %39 > %42 = insertelement <4 x i32> %38, i32 %41, i32 0 > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 7 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %53 = bitcast <8 x i32> addrspace(2)* %52 to <2 x i128> addrspace(2)* > %54 = load <2 x i128>, <2 x i128> addrspace(2)* %53, align 32, !tbaa !0 > %55 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %69 = call float @llvm.fma.f32(float %55, float %29, float %25) > %70 = call float @llvm.fma.f32(float %56, float %30, float %26) > %71 = call float @llvm.fma.f32(float %33, float %27, float %69) > %72 = call float @llvm.fma.f32(float %33, float %28, float %70) > %73 = bitcast float %71 to i32 > %74 = bitcast float %72 to i32 > %75 = insertelement <2 x i32> undef, i32 %73, i32 0 > %76 = insertelement <2 x i32> %75, i32 %74, i32 1 > %77 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %76, <8 x i32> %35, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %78 = extractelement <4 x float> %77, i32 1 > %79 = extractelement <4 x float> %77, i32 3 > %80 = call float @llvm.fma.f32(float %78, float 2.000000e+00, float -1.000000e+00) > %81 = call float @llvm.fma.f32(float %79, float 2.000000e+00, float -1.000000e+00) > %82 = fsub float -0.000000e+00, %81 > %83 = call float @llvm.fma.f32(float %80, float %31, float %55) > %84 = call float @llvm.fma.f32(float %82, float %32, float %56) > %85 = bitcast float %83 to i32 > %86 = bitcast float %84 to i32 > %87 = insertelement <2 x i32> undef, i32 %85, i32 0 > %88 = insertelement <2 x i32> %87, i32 %86, i32 1 > %89 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %88, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %90 = extractelement <4 x float> %89, i32 0 > %91 = extractelement <4 x float> %89, i32 1 > %92 = extractelement <4 x float> %89, i32 2 > %93 = extractelement <4 x float> %89, i32 3 > %94 = fmul float %57, %65 > %95 = fmul float %58, %66 > %96 = fmul float %59, %67 > %97 = fmul float %60, %68 > %98 = fmul float %90, %94 > %99 = fmul float %91, %95 > %100 = fmul float %92, %96 > %101 = fmul float %93, %97 > %102 = call float @llvm.fma.f32(float %98, float %64, float %61) > %103 = call float @llvm.fma.f32(float %99, float %64, float %62) > %104 = call float @llvm.fma.f32(float %100, float %64, float %63) > %105 = fmul float %101, %64 > %106 = extractelement <2 x i128> %54, i32 1 > %107 = bitcast i128 %106 to <16 x i8> > %108 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %107, i32 0, i32 0) > %109 = extractelement <4 x float> %108, i32 0 > %110 = fmul float %109, %102 > %111 = fmul float %109, %103 > %112 = fmul float %109, %104 > %113 = bitcast float %5 to i32 > %114 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %113, 10 > %115 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %114, float %110, 11 > %116 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %115, float %111, 12 > %117 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %116, float %112, 13 > %118 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %117, float %105, 14 > %119 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %118, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %119 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL OUT[6], GENERIC[5] >DCL OUT[7], GENERIC[6] >DCL OUT[8], GENERIC[7] >DCL CONST[1][0..43] >DCL TEMP[0..14], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 416, 432, 448} >IMM[2] UINT32 {464, 496, 592, 672} >IMM[3] UINT32 {640, 656, 480, 512} >IMM[4] UINT32 {624, 544, 528, 608} >IMM[5] FLT32 { 0.0597, -1.5000, 158456325028528675187087900672.0000, 1.4427} >IMM[6] UINT32 {560, 576, 0, 0} >IMM[7] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[8] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][26], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][27], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][28], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][29], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][31], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][37].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[2].xyxx > 13: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][42].xyzz > 14: MUL TEMP[6].xyz, CONST[1][40].xyzz, CONST[1][41].xyzz > 15: MOV TEMP[6].w, CONST[1][40].wwww > 16: ABS TEMP[7].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, TEMP[7].xxxx, IMM[0].yyyy > 18: MIN TEMP[7].x, TEMP[1].xxxx, IMM[0].xxxx > 19: ADD TEMP[1].x, -TEMP[7].xxxx, IMM[0].xxxx > 20: DP3 TEMP[7].x, CONST[1][30].xyzz, TEMP[5].xyzz > 21: DP3 TEMP[8].x, CONST[1][32].xyzz, TEMP[5].xyzz > 22: MOV TEMP[7].z, TEMP[8].xxxx > 23: DP3 TEMP[5].x, CONST[1][31].xyzz, TEMP[5].xyzz > 24: MOV TEMP[7].y, TEMP[5].xxxx > 25: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz > 26: RSQ TEMP[8].x, TEMP[8].xxxx > 27: MUL TEMP[9].xyz, TEMP[8].xxxx, TEMP[7].xyzz > 28: MOV TEMP[10].xyz, TEMP[7].xyzx > 29: FMA TEMP[5].x, -TEMP[5].xxxx, TEMP[8].xxxx, IMM[0].zzzz > 30: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].xxxx > 31: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 32: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 33: DP3 TEMP[8].x, -TEMP[9].xyzz, CONST[1][39].xyzz > 34: FMA TEMP[11].x, -CONST[1][34].yyyy, TEMP[8].xxxx, CONST[1][34].xxxx > 35: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[8].xxxx, IMM[0].xxxx > 36: MOV TEMP[0].z, TEMP[8].xxxx > 37: ABS TEMP[8].x, TEMP[11].xxxx > 38: LG2 TEMP[8].x, TEMP[8].xxxx > 39: MOV TEMP[0].w, TEMP[8].xxxx > 40: MUL TEMP[8].xy, TEMP[0].zwww, IMM[5].xyyy > 41: EX2 TEMP[11].x, TEMP[8].yyyy > 42: FMA TEMP[12].x, CONST[1][34].zzzz, TEMP[11].xxxx, -CONST[1][33].zzzz > 43: MUL TEMP[11].x, TEMP[11].xxxx, CONST[1][34].zzzz > 44: MAX TEMP[12].x, TEMP[12].xxxx, IMM[0].wwww > 45: FMA TEMP[11].x, -TEMP[12].xxxx, TEMP[1].xxxx, TEMP[11].xxxx > 46: MAX TEMP[11].x, TEMP[11].xxxx, CONST[1][38].wwww > 47: FSNE TEMP[12].x, CONST[1][33].xxxx, IMM[0].wwww > 48: UIF TEMP[12].xxxx :0 > 49: RCP TEMP[12].x, CONST[1][33].xxxx > 50: MUL TEMP[12].x, -TEMP[0].xxxx, TEMP[12].xxxx > 51: ELSE :0 > 52: SSG TEMP[13].x, -TEMP[0].xxxx > 53: MUL TEMP[12].x, IMM[5].zzzz, TEMP[13].xxxx > 54: ENDIF > 55: MUL TEMP[1].x, TEMP[12].xxxx, IMM[5].wwww > 56: EX2 TEMP[12].x, TEMP[1].xxxx > 57: ADD TEMP[1].x, TEMP[12].xxxx, CONST[1][34].wwww > 58: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][35].yyyy > 59: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].xxxx > 60: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx > 61: MIN TEMP[5].x, TEMP[5].xxxx, CONST[1][33].wwww > 62: MAX TEMP[5].x, TEMP[5].xxxx, CONST[1][35].xxxx > 63: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[11].xxxx > 64: FSNE TEMP[12].x, CONST[1][36].wwww, IMM[0].wwww > 65: UIF TEMP[12].xxxx :0 > 66: RCP TEMP[12].x, CONST[1][36].wwww > 67: MUL TEMP[12].x, -TEMP[0].xxxx, TEMP[12].xxxx > 68: ELSE :0 > 69: SSG TEMP[13].x, -TEMP[0].xxxx > 70: MUL TEMP[12].x, IMM[5].zzzz, TEMP[13].xxxx > 71: ENDIF > 72: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][37].zzzz > 73: FSNE TEMP[13].x, CONST[1][33].yyyy, IMM[0].wwww > 74: UIF TEMP[13].xxxx :0 > 75: RCP TEMP[13].x, CONST[1][33].yyyy > 76: MUL TEMP[13].x, TEMP[0].xxxx, TEMP[13].xxxx > 77: ELSE :0 > 78: SSG TEMP[14].x, TEMP[0].xxxx > 79: MUL TEMP[13].x, IMM[5].zzzz, TEMP[14].xxxx > 80: ENDIF > 81: MUL TEMP[1].x, TEMP[12].xxxx, IMM[5].wwww > 82: EX2 TEMP[12].x, TEMP[1].xxxx > 83: MUL TEMP[7].xyz, TEMP[12].xxxx, CONST[1][36].xyzz > 84: FMA TEMP[5].xyz, CONST[1][36].xyzz, TEMP[12].xxxx, TEMP[5].xxxx > 85: FMA TEMP[8].xyz, TEMP[7].xyzz, TEMP[8].xxxx, TEMP[11].xxxx > 86: FSEQ TEMP[11].xyz, TEMP[5].xyzz, IMM[0].wwww > 87: SSG TEMP[12].xyz, TEMP[8].xyzz > 88: MUL TEMP[12].xyz, IMM[5].zzzz, TEMP[12].xyzz > 89: RCP TEMP[14].x, TEMP[5].xxxx > 90: RCP TEMP[14].y, TEMP[5].yyyy > 91: RCP TEMP[14].z, TEMP[5].zzzz > 92: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[14].xyzz > 93: UCMP TEMP[8].xyz, TEMP[11].xyzz, TEMP[12].xyzz, TEMP[8].xyzz > 94: MUL TEMP[7].xyz, TEMP[13].xxxx, -TEMP[5].xyzz > 95: ABS TEMP[2].xyz, TEMP[2].xxxx > 96: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[5].xyzz > 97: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].wwww > 98: EX2 TEMP[2].x, TEMP[1].xxxx > 99: EX2 TEMP[2].y, TEMP[1].yyyy >100: EX2 TEMP[2].z, TEMP[1].zzzz >101: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[5].wwww >102: LG2 TEMP[5].x, CONST[1][38].xxxx >103: LG2 TEMP[5].y, CONST[1][38].yyyy >104: LG2 TEMP[5].z, CONST[1][38].zzzz >105: MUL TEMP[9].xyz, TEMP[5].xyzz, IMM[7].yyyy >106: EX2 TEMP[5].x, TEMP[9].xxxx >107: EX2 TEMP[5].y, TEMP[9].yyyy >108: EX2 TEMP[5].z, TEMP[9].zzzz >109: EX2 TEMP[9].x, TEMP[7].xxxx >110: EX2 TEMP[9].y, TEMP[7].yyyy >111: EX2 TEMP[9].z, TEMP[7].zzzz >112: MUL TEMP[7].xyz, TEMP[9].xyzz, TEMP[5].xyzz >113: MUL TEMP[0].xyz, TEMP[8].xyzz, TEMP[7].xyzz >114: ADD TEMP[5].xyz, -TEMP[2].xyzz, IMM[0].xxxx >115: MOV TEMP[2].w, TEMP[2].xxxx >116: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz, IMM[7].zzzz >117: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >118: FMA TEMP[5].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[7].xxxx >119: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[5].xyzz >120: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[8].xxxx >121: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[8].yyyy >122: FSEQ TEMP[5].xyz, TEMP[0].xyzz, IMM[0].wwww >123: SSG TEMP[7].xyz, TEMP[1].xyzz >124: MUL TEMP[7].xyz, IMM[5].zzzz, TEMP[7].xyzz >125: RCP TEMP[8].x, TEMP[0].xxxx >126: RCP TEMP[8].y, TEMP[0].yyyy >127: RCP TEMP[8].z, TEMP[0].zzzz >128: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[8].xyzz >129: UCMP TEMP[2].xyz, TEMP[5].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >130: DP3 TEMP[0].x, CONST[1][30].xyzz, IN[3].xyzz >131: DP3 TEMP[1].x, CONST[1][30].xyzz, IN[4].xyzz >132: MOV TEMP[0].y, TEMP[1].xxxx >133: DP3 TEMP[1].x, CONST[1][30].xyzz, IN[1].xyzz >134: MOV TEMP[0].z, TEMP[1].xxxx >135: DP3 TEMP[1].x, CONST[1][31].xyzz, IN[3].xyzz >136: DP3 TEMP[5].x, CONST[1][31].xyzz, IN[4].xyzz >137: MOV TEMP[1].y, TEMP[5].xxxx >138: DP3 TEMP[5].x, CONST[1][31].xyzz, IN[1].xyzz >139: MOV TEMP[1].z, TEMP[5].xxxx >140: DP3 TEMP[5].x, CONST[1][32].xyzz, IN[3].xyzz >141: DP3 TEMP[7].x, CONST[1][32].xyzz, IN[4].xyzz >142: MOV TEMP[5].y, TEMP[7].xxxx >143: DP3 TEMP[7].x, CONST[1][32].xyzz, IN[1].xyzz >144: MOV TEMP[5].z, TEMP[7].xxxx >145: MOV OUT[8], IN[5] >146: MOV OUT[7], TEMP[5] >147: MOV OUT[6], TEMP[1] >148: MOV OUT[5], TEMP[0] >149: MOV OUT[3], TEMP[2] >150: MOV OUT[4], TEMP[10] >151: MOV OUT[2], TEMP[6] >152: MOV OUT[1], TEMP[4] >153: MOV OUT[0], TEMP[3] >154: END >radeonsi: Compiling shader 286 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 416) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 420) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 424) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 428) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 432) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 436) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 440) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 444) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 448) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 452) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 456) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 460) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 464) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 468) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 472) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 476) > %37 = call float @llvm.SI.load.const(<16 x i8> %20, i32 480) > %38 = call float @llvm.SI.load.const(<16 x i8> %20, i32 484) > %39 = call float @llvm.SI.load.const(<16 x i8> %20, i32 488) > %40 = call float @llvm.SI.load.const(<16 x i8> %20, i32 496) > %41 = call float @llvm.SI.load.const(<16 x i8> %20, i32 500) > %42 = call float @llvm.SI.load.const(<16 x i8> %20, i32 504) > %43 = call float @llvm.SI.load.const(<16 x i8> %20, i32 508) > %44 = call float @llvm.SI.load.const(<16 x i8> %20, i32 512) > %45 = call float @llvm.SI.load.const(<16 x i8> %20, i32 516) > %46 = call float @llvm.SI.load.const(<16 x i8> %20, i32 520) > %47 = call float @llvm.SI.load.const(<16 x i8> %20, i32 528) > %48 = call float @llvm.SI.load.const(<16 x i8> %20, i32 532) > %49 = call float @llvm.SI.load.const(<16 x i8> %20, i32 536) > %50 = call float @llvm.SI.load.const(<16 x i8> %20, i32 540) > %51 = call float @llvm.SI.load.const(<16 x i8> %20, i32 544) > %52 = call float @llvm.SI.load.const(<16 x i8> %20, i32 548) > %53 = call float @llvm.SI.load.const(<16 x i8> %20, i32 552) > %54 = call float @llvm.SI.load.const(<16 x i8> %20, i32 556) > %55 = call float @llvm.SI.load.const(<16 x i8> %20, i32 560) > %56 = call float @llvm.SI.load.const(<16 x i8> %20, i32 564) > %57 = call float @llvm.SI.load.const(<16 x i8> %20, i32 576) > %58 = call float @llvm.SI.load.const(<16 x i8> %20, i32 580) > %59 = call float @llvm.SI.load.const(<16 x i8> %20, i32 584) > %60 = call float @llvm.SI.load.const(<16 x i8> %20, i32 588) > %61 = call float @llvm.SI.load.const(<16 x i8> %20, i32 600) > %62 = call float @llvm.SI.load.const(<16 x i8> %20, i32 604) > %63 = call float @llvm.SI.load.const(<16 x i8> %20, i32 608) > %64 = call float @llvm.SI.load.const(<16 x i8> %20, i32 612) > %65 = call float @llvm.SI.load.const(<16 x i8> %20, i32 616) > %66 = call float @llvm.SI.load.const(<16 x i8> %20, i32 620) > %67 = call float @llvm.SI.load.const(<16 x i8> %20, i32 624) > %68 = call float @llvm.SI.load.const(<16 x i8> %20, i32 628) > %69 = call float @llvm.SI.load.const(<16 x i8> %20, i32 632) > %70 = call float @llvm.SI.load.const(<16 x i8> %20, i32 640) > %71 = call float @llvm.SI.load.const(<16 x i8> %20, i32 644) > %72 = call float @llvm.SI.load.const(<16 x i8> %20, i32 648) > %73 = call float @llvm.SI.load.const(<16 x i8> %20, i32 652) > %74 = call float @llvm.SI.load.const(<16 x i8> %20, i32 656) > %75 = call float @llvm.SI.load.const(<16 x i8> %20, i32 660) > %76 = call float @llvm.SI.load.const(<16 x i8> %20, i32 664) > %77 = call float @llvm.SI.load.const(<16 x i8> %20, i32 672) > %78 = call float @llvm.SI.load.const(<16 x i8> %20, i32 676) > %79 = call float @llvm.SI.load.const(<16 x i8> %20, i32 680) > %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 > %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %13) > %83 = extractelement <4 x float> %82, i32 0 > %84 = extractelement <4 x float> %82, i32 1 > %85 = extractelement <4 x float> %82, i32 2 > %86 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 > %88 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %14) > %89 = extractelement <4 x float> %88, i32 0 > %90 = extractelement <4 x float> %88, i32 1 > %91 = extractelement <4 x float> %88, i32 2 > %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 > %94 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %15) > %95 = extractelement <4 x float> %94, i32 0 > %96 = extractelement <4 x float> %94, i32 1 > %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 > %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %16) > %100 = extractelement <4 x float> %99, i32 0 > %101 = extractelement <4 x float> %99, i32 1 > %102 = extractelement <4 x float> %99, i32 2 > %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 > %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %17) > %106 = extractelement <4 x float> %105, i32 0 > %107 = extractelement <4 x float> %105, i32 1 > %108 = extractelement <4 x float> %105, i32 2 > %109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 > %111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %18) > %112 = extractelement <4 x float> %111, i32 0 > %113 = extractelement <4 x float> %111, i32 1 > %114 = extractelement <4 x float> %111, i32 2 > %115 = extractelement <4 x float> %111, i32 3 > %116 = fmul float %21, %83 > %117 = fmul float %22, %84 > %118 = fadd float %116, %117 > %119 = fmul float %23, %85 > %120 = fadd float %118, %119 > %121 = fadd float %120, %24 > %122 = fmul float %25, %83 > %123 = fmul float %26, %84 > %124 = fadd float %122, %123 > %125 = fmul float %27, %85 > %126 = fadd float %124, %125 > %127 = fadd float %126, %28 > %128 = fmul float %29, %83 > %129 = fmul float %30, %84 > %130 = fadd float %128, %129 > %131 = fmul float %31, %85 > %132 = fadd float %130, %131 > %133 = fadd float %132, %32 > %134 = fmul float %33, %83 > %135 = fmul float %34, %84 > %136 = fadd float %134, %135 > %137 = fmul float %35, %85 > %138 = fadd float %136, %137 > %139 = fadd float %138, %36 > %140 = fmul float %40, %83 > %141 = fmul float %41, %84 > %142 = fadd float %140, %141 > %143 = fmul float %42, %85 > %144 = fadd float %142, %143 > %145 = fadd float %144, %43 > %146 = fadd float %145, %62 > %147 = fsub float %77, %83 > %148 = fsub float %78, %84 > %149 = fsub float %79, %85 > %150 = fmul float %70, %74 > %151 = fmul float %71, %75 > %152 = fmul float %72, %76 > %153 = call float @llvm.fabs.f32(float %139) > %154 = fmul float %153, 0x3EF4F8B580000000 > %155 = call float @llvm.minnum.f32(float %154, float 1.000000e+00) > %156 = fsub float 1.000000e+00, %155 > %157 = fmul float %37, %147 > %158 = fmul float %38, %148 > %159 = fadd float %158, %157 > %160 = fmul float %39, %149 > %161 = fadd float %159, %160 > %162 = fmul float %44, %147 > %163 = fmul float %45, %148 > %164 = fadd float %163, %162 > %165 = fmul float %46, %149 > %166 = fadd float %164, %165 > %167 = fmul float %40, %147 > %168 = fmul float %41, %148 > %169 = fadd float %168, %167 > %170 = fmul float %42, %149 > %171 = fadd float %169, %170 > %172 = fmul float %161, %161 > %173 = fmul float %171, %171 > %174 = fadd float %173, %172 > %175 = fmul float %166, %166 > %176 = fadd float %174, %175 > %177 = call float @llvm.AMDGPU.rsq.clamped.f32(float %176) > %178 = fmul float %177, %161 > %179 = fmul float %177, %171 > %180 = fmul float %177, %166 > %181 = fsub float -0.000000e+00, %171 > %182 = call float @llvm.fma.f32(float %181, float %177, float 0xBFC3333340000000) > %183 = fsub float 1.000000e+00, %182 > %184 = call float @llvm.AMDGPU.clamp.(float %183, float 0.000000e+00, float 1.000000e+00) > %185 = fmul float %184, %184 > %186 = fmul float %178, %67 > %187 = fsub float -0.000000e+00, %186 > %188 = fmul float %179, %68 > %189 = fsub float %187, %188 > %190 = fmul float %180, %69 > %191 = fsub float %189, %190 > %192 = fsub float -0.000000e+00, %52 > %193 = call float @llvm.fma.f32(float %192, float %191, float %51) > %194 = call float @llvm.fma.f32(float %191, float %191, float 1.000000e+00) > %195 = call float @llvm.fabs.f32(float %193) > %196 = call float @llvm.log2.f32(float %195) > %197 = fmul float %194, 0x3FAE8EC8A0000000 > %198 = fmul float %196, -1.500000e+00 > %199 = call float @llvm.exp2.f32(float %198) > %200 = fsub float -0.000000e+00, %49 > %201 = call float @llvm.fma.f32(float %53, float %199, float %200) > %202 = fmul float %199, %53 > %203 = call float @llvm.maxnum.f32(float %201, float 0.000000e+00) > %204 = fsub float -0.000000e+00, %203 > %205 = call float @llvm.fma.f32(float %204, float %156, float %202) > %206 = call float @llvm.maxnum.f32(float %205, float %66) > %207 = fcmp une float %47, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %47 > %209 = fmul float %146, %208 > %210 = fsub float -0.000000e+00, %209 > br label %ENDIF > >ELSE: ; preds = %main_body > %211 = fsub float -0.000000e+00, %146 > %212 = fcmp olt float %146, -0.000000e+00 > %213 = select i1 %212, float 1.000000e+00, float %211 > %214 = fcmp oge float %213, 0.000000e+00 > %.op = fmul float %213, 0x4600000000000000 > %215 = select i1 %214, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp48.0 = phi float [ %210, %IF ], [ %215, %ELSE ] > %216 = fmul float %temp48.0, 0x3FF7154760000000 > %217 = call float @llvm.exp2.f32(float %216) > %218 = fadd float %217, %54 > %219 = fmul float %218, %56 > %220 = fmul float %219, 5.000000e-01 > %221 = fmul float %185, %220 > %222 = call float @llvm.minnum.f32(float %221, float %50) > %223 = call float @llvm.maxnum.f32(float %222, float %55) > %224 = fmul float %223, %206 > %225 = fcmp une float %60, 0.000000e+00 > br i1 %225, label %IF61, label %ELSE62 > >IF61: ; preds = %ENDIF > %226 = fdiv float 1.000000e+00, %60 > %227 = fmul float %146, %226 > %228 = fsub float -0.000000e+00, %227 > br label %ENDIF60 > >ELSE62: ; preds = %ENDIF > %229 = fsub float -0.000000e+00, %146 > %230 = fcmp olt float %146, -0.000000e+00 > %231 = select i1 %230, float 1.000000e+00, float %229 > %232 = fcmp oge float %231, 0.000000e+00 > %.op66 = fmul float %231, 0x4600000000000000 > %233 = select i1 %232, float %.op66, float 0xC600000000000000 > br label %ENDIF60 > >ENDIF60: ; preds = %ELSE62, %IF61 > %temp48.1 = phi float [ %228, %IF61 ], [ %233, %ELSE62 ] > %234 = fsub float %61, %146 > %235 = fcmp une float %48, 0.000000e+00 > br i1 %235, label %IF64, label %ELSE65 > >IF64: ; preds = %ENDIF60 > %236 = fdiv float 1.000000e+00, %48 > %237 = fmul float %234, %236 > br label %ENDIF63 > >ELSE65: ; preds = %ENDIF60 > %238 = fcmp ogt float %234, 0.000000e+00 > %239 = select i1 %238, float 1.000000e+00, float %234 > %240 = fcmp oge float %239, 0.000000e+00 > %.op67 = fmul float %239, 0x4600000000000000 > %241 = select i1 %240, float %.op67, float 0xC600000000000000 > br label %ENDIF63 > >ENDIF63: ; preds = %ELSE65, %IF64 > %temp52.0 = phi float [ %237, %IF64 ], [ %241, %ELSE65 ] > %242 = fmul float %temp48.1, 0x3FF7154760000000 > %243 = call float @llvm.exp2.f32(float %242) > %244 = fmul float %243, %57 > %245 = fmul float %243, %58 > %246 = fmul float %243, %59 > %247 = call float @llvm.fma.f32(float %57, float %243, float %223) > %248 = call float @llvm.fma.f32(float %58, float %243, float %223) > %249 = call float @llvm.fma.f32(float %59, float %243, float %223) > %250 = call float @llvm.fma.f32(float %244, float %197, float %224) > %251 = call float @llvm.fma.f32(float %245, float %197, float %224) > %252 = call float @llvm.fma.f32(float %246, float %197, float %224) > %253 = fcmp oeq float %247, 0.000000e+00 > %254 = fcmp oeq float %248, 0.000000e+00 > %255 = fcmp oeq float %249, 0.000000e+00 > %256 = fcmp ogt float %250, 0.000000e+00 > %257 = select i1 %256, float 1.000000e+00, float %250 > %258 = fcmp oge float %257, 0.000000e+00 > %259 = fcmp ogt float %251, 0.000000e+00 > %260 = select i1 %259, float 1.000000e+00, float %251 > %261 = fcmp oge float %260, 0.000000e+00 > %262 = fcmp ogt float %252, 0.000000e+00 > %263 = select i1 %262, float 1.000000e+00, float %252 > %264 = fcmp oge float %263, 0.000000e+00 > %.op68 = fmul float %257, 0x4600000000000000 > %265 = select i1 %258, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %260, 0x4600000000000000 > %266 = select i1 %261, float %.op69, float 0xC600000000000000 > %.op70 = fmul float %263, 0x4600000000000000 > %267 = select i1 %264, float %.op70, float 0xC600000000000000 > %268 = fdiv float 1.000000e+00, %247 > %269 = fdiv float 1.000000e+00, %248 > %270 = fdiv float 1.000000e+00, %249 > %271 = fmul float %250, %268 > %272 = fmul float %251, %269 > %273 = fmul float %252, %270 > %274 = select i1 %253, float %265, float %271 > %275 = select i1 %254, float %266, float %272 > %276 = select i1 %255, float %267, float %273 > %277 = fmul float %247, %temp52.0 > %278 = fmul float %248, %temp52.0 > %279 = fmul float %249, %temp52.0 > %280 = call float @llvm.fabs.f32(float %139) > %281 = call float @llvm.fabs.f32(float %139) > %282 = call float @llvm.fabs.f32(float %139) > %283 = fmul float %247, %280 > %284 = fmul float %248, %281 > %285 = fmul float %249, %282 > %286 = fmul float %283, 0xBFF7154760000000 > %287 = fmul float %284, 0xBFF7154760000000 > %288 = fmul float %285, 0xBFF7154760000000 > %289 = call float @llvm.exp2.f32(float %286) > %290 = call float @llvm.exp2.f32(float %287) > %291 = call float @llvm.exp2.f32(float %288) > %292 = fmul float %277, 0xBFF7154760000000 > %293 = fmul float %278, 0xBFF7154760000000 > %294 = fmul float %279, 0xBFF7154760000000 > %295 = call float @llvm.log2.f32(float %63) > %296 = call float @llvm.log2.f32(float %64) > %297 = call float @llvm.log2.f32(float %65) > %298 = fmul float %295, 0x3FDD1745E0000000 > %299 = fmul float %296, 0x3FDD1745E0000000 > %300 = fmul float %297, 0x3FDD1745E0000000 > %301 = call float @llvm.exp2.f32(float %298) > %302 = call float @llvm.exp2.f32(float %299) > %303 = call float @llvm.exp2.f32(float %300) > %304 = call float @llvm.exp2.f32(float %292) > %305 = call float @llvm.exp2.f32(float %293) > %306 = call float @llvm.exp2.f32(float %294) > %307 = fmul float %304, %301 > %308 = fmul float %305, %302 > %309 = fmul float %306, %303 > %310 = fmul float %274, %307 > %311 = fmul float %275, %308 > %312 = fmul float %276, %309 > %313 = fsub float 1.000000e+00, %289 > %314 = fsub float 1.000000e+00, %290 > %315 = fsub float 1.000000e+00, %291 > %316 = call float @llvm.fma.f32(float %310, float %313, float 0xBF70624DE0000000) > %317 = call float @llvm.fma.f32(float %311, float %314, float 0xBF70624DE0000000) > %318 = call float @llvm.fma.f32(float %312, float %315, float 0xBF70624DE0000000) > %319 = call float @llvm.maxnum.f32(float %316, float 0.000000e+00) > %320 = call float @llvm.maxnum.f32(float %317, float 0.000000e+00) > %321 = call float @llvm.maxnum.f32(float %318, float 0.000000e+00) > %322 = call float @llvm.fma.f32(float %319, float 0x4018CCCCC0000000, float 5.000000e-01) > %323 = call float @llvm.fma.f32(float %320, float 0x4018CCCCC0000000, float 5.000000e-01) > %324 = call float @llvm.fma.f32(float %321, float 0x4018CCCCC0000000, float 5.000000e-01) > %325 = fmul float %319, %322 > %326 = fmul float %320, %323 > %327 = fmul float %321, %324 > %328 = call float @llvm.fma.f32(float %319, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %329 = call float @llvm.fma.f32(float %320, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %330 = call float @llvm.fma.f32(float %321, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %331 = call float @llvm.fma.f32(float %319, float %328, float 0x3FAEB851E0000000) > %332 = call float @llvm.fma.f32(float %320, float %329, float 0x3FAEB851E0000000) > %333 = call float @llvm.fma.f32(float %321, float %330, float 0x3FAEB851E0000000) > %334 = fcmp oeq float %331, 0.000000e+00 > %335 = fcmp oeq float %332, 0.000000e+00 > %336 = fcmp oeq float %333, 0.000000e+00 > %337 = fcmp ogt float %325, 0.000000e+00 > %338 = select i1 %337, float 1.000000e+00, float %325 > %339 = fcmp oge float %338, 0.000000e+00 > %340 = fcmp ogt float %326, 0.000000e+00 > %341 = select i1 %340, float 1.000000e+00, float %326 > %342 = fcmp oge float %341, 0.000000e+00 > %343 = fcmp ogt float %327, 0.000000e+00 > %344 = select i1 %343, float 1.000000e+00, float %327 > %345 = fcmp oge float %344, 0.000000e+00 > %.op71 = fmul float %338, 0x4600000000000000 > %346 = select i1 %339, float %.op71, float 0xC600000000000000 > %.op72 = fmul float %341, 0x4600000000000000 > %347 = select i1 %342, float %.op72, float 0xC600000000000000 > %.op73 = fmul float %344, 0x4600000000000000 > %348 = select i1 %345, float %.op73, float 0xC600000000000000 > %349 = fdiv float 1.000000e+00, %331 > %350 = fdiv float 1.000000e+00, %332 > %351 = fdiv float 1.000000e+00, %333 > %352 = fmul float %325, %349 > %353 = fmul float %326, %350 > %354 = fmul float %327, %351 > %355 = select i1 %334, float %346, float %352 > %356 = select i1 %335, float %347, float %353 > %357 = select i1 %336, float %348, float %354 > %358 = fmul float %37, %100 > %359 = fmul float %38, %101 > %360 = fadd float %359, %358 > %361 = fmul float %39, %102 > %362 = fadd float %360, %361 > %363 = fmul float %37, %106 > %364 = fmul float %38, %107 > %365 = fadd float %364, %363 > %366 = fmul float %39, %108 > %367 = fadd float %365, %366 > %368 = fmul float %37, %89 > %369 = fmul float %38, %90 > %370 = fadd float %369, %368 > %371 = fmul float %39, %91 > %372 = fadd float %370, %371 > %373 = fmul float %40, %100 > %374 = fmul float %41, %101 > %375 = fadd float %374, %373 > %376 = fmul float %42, %102 > %377 = fadd float %375, %376 > %378 = fmul float %40, %106 > %379 = fmul float %41, %107 > %380 = fadd float %379, %378 > %381 = fmul float %42, %108 > %382 = fadd float %380, %381 > %383 = fmul float %40, %89 > %384 = fmul float %41, %90 > %385 = fadd float %384, %383 > %386 = fmul float %42, %91 > %387 = fadd float %385, %386 > %388 = fmul float %44, %100 > %389 = fmul float %45, %101 > %390 = fadd float %389, %388 > %391 = fmul float %46, %102 > %392 = fadd float %390, %391 > %393 = fmul float %44, %106 > %394 = fmul float %45, %107 > %395 = fadd float %394, %393 > %396 = fmul float %46, %108 > %397 = fadd float %395, %396 > %398 = fmul float %44, %89 > %399 = fmul float %45, %90 > %400 = fadd float %399, %398 > %401 = fmul float %46, %91 > %402 = fadd float %400, %401 > %403 = bitcast i32 %11 to float > %404 = insertvalue <{ float, float, float }> undef, float %403, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %95, float %96, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %150, float %151, float %152, float %73) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %355, float %356, float %357, float %289) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %161, float %171, float %166, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %362, float %367, float %372, float %196) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %377, float %382, float %387, float %139) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %392, float %397, float %402, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %112, float %113, float %114, float %115) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %121, float %127, float %133, float %139) > ret <{ float, float, float }> %404 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL IN[5], GENERIC[5], PERSPECTIVE >DCL IN[6], GENERIC[6], PERSPECTIVE >DCL IN[7], GENERIC[7], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], CUBE, FLOAT >DCL SVIEW[4], BUFFER, FLOAT >DCL CONST[1][0..25] >DCL TEMP[0..5], LOCAL >IMM[0] UINT32 {0, 400, 288, 384} >IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} >IMM[2] UINT32 {304, 272, 0, 0} >IMM[3] INT32 {0, 0, 0, 0} > 0: FMA TEMP[0].xy, IN[0].xyyy, CONST[1][25].xyyy, CONST[1][18].xyyy > 1: FMA TEMP[0].xy, CONST[1][25].wwww, CONST[1][18].zwww, TEMP[0].xyyy > 2: MOV TEMP[1].xy, TEMP[0].xyyy > 3: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 4: FMA TEMP[1].xy, TEMP[1].ywww, IMM[1].xxxx, IMM[1].yyyy > 5: MOV TEMP[0].x, TEMP[1].xyxx > 6: MOV TEMP[0].z, -TEMP[1].yyyy > 7: FMA TEMP[0].xy, TEMP[0].xzzz, CONST[1][24].xyyy, IN[0].xyyy > 8: MOV TEMP[1].xy, TEMP[0].xyyy > 9: TEX TEMP[1].yw, TEMP[1], SAMP[1], 2D > 10: MOV TEMP[2].xy, TEMP[0].xyyy > 11: TEX TEMP[2].w, TEMP[2], SAMP[2], 2D > 12: FMA TEMP[2].x, IN[7].wwww, IN[1].wwww, TEMP[2].wwww > 13: ADD TEMP[0].x, TEMP[2].xxxx, IMM[1].yyyy > 14: CEIL TEMP[2].x, TEMP[0].xxxx > 15: FMA TEMP[1].xy, TEMP[1].ywww, IMM[1].xxxx, IMM[1].yyyy > 16: MOV TEMP[3].xy, TEMP[1].xyxx > 17: FMA TEMP[4].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[1].zzzz > 18: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[4].xxxx > 19: SQRT TEMP[1].x, TEMP[1].xxxx > 20: MOV TEMP[3].z, TEMP[1].xxxx > 21: DP3 TEMP[1].x, IN[4].xyzz, TEMP[3].xyzz > 22: DP3 TEMP[4].x, IN[5].xyzz, TEMP[3].xyzz > 23: MOV TEMP[1].y, TEMP[4].xxxx > 24: DP3 TEMP[4].x, IN[6].xyzz, TEMP[3].xyzz > 25: MOV TEMP[1].z, TEMP[4].xxxx > 26: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz > 27: RSQ TEMP[4].x, TEMP[4].xxxx > 28: MUL TEMP[4].xyz, TEMP[4].xxxx, TEMP[1].xyzz > 29: DP3 TEMP[3].x, IN[3].xyzz, IN[3].xyzz > 30: RSQ TEMP[5].x, TEMP[3].xxxx > 31: MUL TEMP[3].xyz, TEMP[5].xxxx, IN[3].xyzz > 32: DP3 TEMP[5].x, -TEMP[3].xyzz, TEMP[4].xyzz > 33: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 34: FMA TEMP[1].xyz, TEMP[4].xyzz, -TEMP[5].xxxx, -TEMP[3].xyzz > 35: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[3].xyzz > 36: ADD TEMP[4].x, TEMP[4].xxxx, -CONST[1][19].xxxx > 37: ABS TEMP[4].x, TEMP[4].xxxx > 38: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].zzzz > 39: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].wwww > 40: MOV TEMP[1].xyz, TEMP[1].xyzz > 41: TEX TEMP[1].xyz, TEMP[1], SAMP[3], CUBE > 42: MUL TEMP[3].xyz, TEMP[1].xyzz, CONST[1][17].xyzz > 43: FMA TEMP[1].xyz, TEMP[3].xyzz, IN[2].wwww, IN[2].xyzz > 44: MOV TEMP[3].x, IMM[3].xxxx > 45: MOV TEMP[3].w, IMM[0].xxxx > 46: TXF TEMP[3].x, TEMP[3], SAMP[4], BUFFER > 47: MUL TEMP[1].xyz, TEMP[3].xxxx, TEMP[1].xyzz > 48: MOV_SAT TEMP[3].x, CONST[1][19].yyyy > 49: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[2].xxxx > 50: MUL TEMP[0].x, TEMP[4].xxxx, TEMP[0].xxxx > 51: MUL TEMP[0].x, TEMP[0].xxxx, IN[2].wwww > 52: MOV TEMP[1].w, TEMP[0].xxxx > 53: MOV OUT[0], TEMP[1] > 54: END >radeonsi: Compiling shader 287 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %39 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 > %41 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %42 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %41, i64 0, i64 3 > %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 > %44 = extractelement <8 x i32> %40, i32 7 > %45 = extractelement <4 x i32> %43, i32 0 > %46 = and i32 %45, %44 > %47 = insertelement <4 x i32> %43, i32 %46, i32 0 > %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 > %50 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %50, i64 0, i64 7 > %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 > %53 = extractelement <8 x i32> %49, i32 7 > %54 = extractelement <4 x i32> %52, i32 0 > %55 = and i32 %54, %53 > %56 = insertelement <4 x i32> %52, i32 %55, i32 0 > %57 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %58 = load <8 x i32>, <8 x i32> addrspace(2)* %57, align 32, !tbaa !0 > %59 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %60 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %59, i64 0, i64 11 > %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0 > %62 = extractelement <8 x i32> %58, i32 7 > %63 = extractelement <4 x i32> %61, i32 0 > %64 = and i32 %63, %62 > %65 = insertelement <4 x i32> %61, i32 %64, i32 0 > %66 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, align 32, !tbaa !0 > %68 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %69 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %68, i64 0, i64 15 > %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 > %71 = extractelement <8 x i32> %67, i32 7 > %72 = extractelement <4 x i32> %70, i32 0 > %73 = and i32 %72, %71 > %74 = insertelement <4 x i32> %70, i32 %73, i32 0 > %75 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %76 = bitcast <8 x i32> addrspace(2)* %75 to <2 x i128> addrspace(2)* > %77 = load <2 x i128>, <2 x i128> addrspace(2)* %76, align 32, !tbaa !0 > %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %6, <2 x i32> %8) > %93 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %6, <2 x i32> %8) > %94 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %6, <2 x i32> %8) > %95 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %6, <2 x i32> %8) > %96 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %6, <2 x i32> %8) > %97 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %6, <2 x i32> %8) > %98 = call float @llvm.fma.f32(float %78, float %36, float %28) > %99 = call float @llvm.fma.f32(float %79, float %37, float %29) > %100 = call float @llvm.fma.f32(float %38, float %30, float %98) > %101 = call float @llvm.fma.f32(float %38, float %31, float %99) > %102 = bitcast float %100 to i32 > %103 = bitcast float %101 to i32 > %104 = insertelement <2 x i32> undef, i32 %102, i32 0 > %105 = insertelement <2 x i32> %104, i32 %103, i32 1 > %106 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %105, <8 x i32> %40, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %107 = extractelement <4 x float> %106, i32 1 > %108 = extractelement <4 x float> %106, i32 3 > %109 = call float @llvm.fma.f32(float %107, float 2.000000e+00, float -1.000000e+00) > %110 = call float @llvm.fma.f32(float %108, float 2.000000e+00, float -1.000000e+00) > %111 = fsub float -0.000000e+00, %110 > %112 = call float @llvm.fma.f32(float %109, float %34, float %78) > %113 = call float @llvm.fma.f32(float %111, float %35, float %79) > %114 = bitcast float %112 to i32 > %115 = bitcast float %113 to i32 > %116 = insertelement <2 x i32> undef, i32 %114, i32 0 > %117 = insertelement <2 x i32> %116, i32 %115, i32 1 > %118 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %117, <8 x i32> %49, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %119 = extractelement <4 x float> %118, i32 1 > %120 = extractelement <4 x float> %118, i32 3 > %121 = bitcast float %112 to i32 > %122 = bitcast float %113 to i32 > %123 = insertelement <2 x i32> undef, i32 %121, i32 0 > %124 = insertelement <2 x i32> %123, i32 %122, i32 1 > %125 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %124, <8 x i32> %58, <4 x i32> %65, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %126 = extractelement <4 x float> %125, i32 3 > %127 = call float @llvm.fma.f32(float %97, float %80, float %126) > %128 = fadd float %127, -1.000000e+00 > %129 = call float @llvm.ceil.f32(float %128) > %130 = call float @llvm.fma.f32(float %119, float 2.000000e+00, float -1.000000e+00) > %131 = call float @llvm.fma.f32(float %120, float 2.000000e+00, float -1.000000e+00) > %132 = fsub float -0.000000e+00, %130 > %133 = call float @llvm.fma.f32(float %132, float %130, float 1.000000e+00) > %134 = fsub float -0.000000e+00, %131 > %135 = call float @llvm.fma.f32(float %134, float %131, float %133) > %136 = call float @llvm.sqrt.f32(float %135) > %137 = fmul float %88, %130 > %138 = fmul float %89, %131 > %139 = fadd float %138, %137 > %140 = fmul float %90, %136 > %141 = fadd float %139, %140 > %142 = fmul float %91, %130 > %143 = fmul float %92, %131 > %144 = fadd float %143, %142 > %145 = fmul float %93, %136 > %146 = fadd float %144, %145 > %147 = fmul float %94, %130 > %148 = fmul float %95, %131 > %149 = fadd float %148, %147 > %150 = fmul float %96, %136 > %151 = fadd float %149, %150 > %152 = fmul float %141, %141 > %153 = fmul float %146, %146 > %154 = fadd float %153, %152 > %155 = fmul float %151, %151 > %156 = fadd float %154, %155 > %157 = call float @llvm.AMDGPU.rsq.clamped.f32(float %156) > %158 = fmul float %157, %141 > %159 = fmul float %157, %146 > %160 = fmul float %157, %151 > %161 = fmul float %85, %85 > %162 = fmul float %86, %86 > %163 = fadd float %162, %161 > %164 = fmul float %87, %87 > %165 = fadd float %163, %164 > %166 = call float @llvm.AMDGPU.rsq.clamped.f32(float %165) > %167 = fmul float %166, %85 > %168 = fmul float %166, %86 > %169 = fmul float %166, %87 > %170 = fmul float %167, %158 > %171 = fsub float -0.000000e+00, %170 > %172 = fmul float %168, %159 > %173 = fsub float %171, %172 > %174 = fmul float %169, %160 > %175 = fsub float %173, %174 > %176 = fadd float %175, %175 > %177 = fsub float -0.000000e+00, %176 > %178 = fsub float -0.000000e+00, %167 > %179 = call float @llvm.fma.f32(float %158, float %177, float %178) > %180 = fsub float -0.000000e+00, %176 > %181 = fsub float -0.000000e+00, %168 > %182 = call float @llvm.fma.f32(float %159, float %180, float %181) > %183 = fsub float -0.000000e+00, %176 > %184 = fsub float -0.000000e+00, %169 > %185 = call float @llvm.fma.f32(float %160, float %183, float %184) > %186 = fmul float %158, %167 > %187 = fmul float %159, %168 > %188 = fadd float %187, %186 > %189 = fmul float %160, %169 > %190 = fadd float %188, %189 > %191 = fsub float %190, %32 > %192 = call float @llvm.fabs.f32(float %191) > %193 = fsub float 1.000000e+00, %192 > %194 = call float @llvm.maxnum.f32(float %193, float 0.000000e+00) > %195 = insertelement <4 x float> undef, float %179, i32 0 > %196 = insertelement <4 x float> %195, float %182, i32 1 > %197 = insertelement <4 x float> %196, float %185, i32 2 > %198 = shufflevector <4 x float> %197, <4 x float> %118, <4 x i32> <i32 0, i32 1, i32 2, i32 7> > %199 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %198) > %200 = extractelement <4 x float> %199, i32 0 > %201 = extractelement <4 x float> %199, i32 1 > %202 = extractelement <4 x float> %199, i32 2 > %203 = call float @llvm.fabs.f32(float %202) > %204 = fdiv float 1.000000e+00, %203 > %205 = fmul float %200, %204 > %206 = fadd float %205, 1.500000e+00 > %207 = fmul float %201, %204 > %208 = fadd float %207, 1.500000e+00 > %209 = bitcast float %208 to i32 > %210 = bitcast float %206 to i32 > %bc = bitcast <4 x float> %199 to <4 x i32> > %211 = insertelement <4 x i32> undef, i32 %209, i32 0 > %212 = insertelement <4 x i32> %211, i32 %210, i32 1 > %213 = shufflevector <4 x i32> %212, <4 x i32> %bc, <4 x i32> <i32 0, i32 1, i32 7, i32 undef> > %214 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %213, <8 x i32> %67, <4 x i32> %74, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %215 = extractelement <4 x float> %214, i32 0 > %216 = extractelement <4 x float> %214, i32 1 > %217 = extractelement <4 x float> %214, i32 2 > %218 = fmul float %215, %25 > %219 = fmul float %216, %26 > %220 = fmul float %217, %27 > %221 = call float @llvm.fma.f32(float %218, float %84, float %81) > %222 = call float @llvm.fma.f32(float %219, float %84, float %82) > %223 = call float @llvm.fma.f32(float %220, float %84, float %83) > %224 = extractelement <2 x i128> %77, i32 1 > %225 = bitcast i128 %224 to <16 x i8> > %226 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %225, i32 0, i32 0) > %227 = extractelement <4 x float> %226, i32 0 > %228 = fmul float %227, %221 > %229 = fmul float %227, %222 > %230 = fmul float %227, %223 > %231 = call float @llvm.AMDGPU.clamp.(float %33, float 0.000000e+00, float 1.000000e+00) > %232 = fmul float %231, %129 > %233 = fmul float %194, %232 > %234 = fmul float %233, %84 > %235 = bitcast float %5 to i32 > %236 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %235, 10 > %237 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %236, float %228, 11 > %238 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %237, float %229, 12 > %239 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %238, float %230, 13 > %240 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %239, float %234, 14 > %241 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %240, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %241 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: readnone >declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..43] >DCL TEMP[0..12], LOCAL >IMM[0] FLT32 { 1.0000, -0.1500, 0.0597, -1.5000} >IMM[1] UINT32 {0, 416, 432, 448} >IMM[2] UINT32 {464, 496, 592, 672} >IMM[3] UINT32 {640, 656, 480, 512} >IMM[4] UINT32 {624, 544, 528, 608} >IMM[5] FLT32 { 0.0000, 0.0000, 158456325028528675187087900672.0000, 1.4427} >IMM[6] UINT32 {560, 576, 0, 0} >IMM[7] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[8] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][26], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][27], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][28], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][29], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][31], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][37].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][42].xyzz > 14: MUL TEMP[6].xyz, CONST[1][40].xyzz, CONST[1][41].xyzz > 15: MOV TEMP[6].w, CONST[1][40].wwww > 16: DP3 TEMP[1].x, CONST[1][30].xyzz, TEMP[5].xyzz > 17: DP3 TEMP[7].x, CONST[1][32].xyzz, TEMP[5].xyzz > 18: MOV TEMP[1].z, TEMP[7].xxxx > 19: DP3 TEMP[5].x, CONST[1][31].xyzz, TEMP[5].xyzz > 20: MOV TEMP[1].y, TEMP[5].xxxx > 21: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz > 22: RSQ TEMP[7].x, TEMP[7].xxxx > 23: MUL TEMP[8].xyz, TEMP[7].xxxx, TEMP[1].xyzz > 24: FMA TEMP[5].x, -TEMP[5].xxxx, TEMP[7].xxxx, IMM[0].yyyy > 25: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].xxxx > 26: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 27: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 28: DP3 TEMP[7].x, -TEMP[8].xyzz, CONST[1][39].xyzz > 29: FMA TEMP[9].x, -CONST[1][34].yyyy, TEMP[7].xxxx, CONST[1][34].xxxx > 30: FMA TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx, IMM[0].xxxx > 31: MOV TEMP[0].z, TEMP[7].xxxx > 32: ABS TEMP[7].x, TEMP[9].xxxx > 33: LG2 TEMP[7].x, TEMP[7].xxxx > 34: MOV TEMP[0].w, TEMP[7].xxxx > 35: MUL TEMP[7].xy, TEMP[0].zwww, IMM[0].zwww > 36: EX2 TEMP[9].x, TEMP[7].yyyy > 37: FMA TEMP[1].x, CONST[1][34].zzzz, TEMP[9].xxxx, -CONST[1][33].zzzz > 38: MUL TEMP[9].x, TEMP[9].xxxx, CONST[1][34].zzzz > 39: MAX TEMP[10].x, TEMP[1].xxxx, IMM[5].xxxx > 40: ABS TEMP[11].x, TEMP[2].xxxx > 41: MUL TEMP[11].x, TEMP[11].xxxx, IMM[5].yyyy > 42: MIN TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx > 43: ADD TEMP[11].x, -TEMP[11].xxxx, IMM[0].xxxx > 44: FMA TEMP[9].x, -TEMP[10].xxxx, TEMP[11].xxxx, TEMP[9].xxxx > 45: MAX TEMP[9].x, TEMP[9].xxxx, CONST[1][38].wwww > 46: FSNE TEMP[10].x, CONST[1][33].xxxx, IMM[5].xxxx > 47: UIF TEMP[10].xxxx :0 > 48: RCP TEMP[10].x, CONST[1][33].xxxx > 49: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 50: ELSE :0 > 51: SSG TEMP[11].x, -TEMP[0].xxxx > 52: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 53: ENDIF > 54: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 55: EX2 TEMP[10].x, TEMP[1].xxxx > 56: ADD TEMP[1].x, TEMP[10].xxxx, CONST[1][34].wwww > 57: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][35].yyyy > 58: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].xxxx > 59: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx > 60: MIN TEMP[5].x, TEMP[5].xxxx, CONST[1][33].wwww > 61: MAX TEMP[5].x, TEMP[5].xxxx, CONST[1][35].xxxx > 62: MUL TEMP[9].x, TEMP[5].xxxx, TEMP[9].xxxx > 63: FSNE TEMP[10].x, CONST[1][36].wwww, IMM[5].xxxx > 64: UIF TEMP[10].xxxx :0 > 65: RCP TEMP[10].x, CONST[1][36].wwww > 66: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 67: ELSE :0 > 68: SSG TEMP[11].x, -TEMP[0].xxxx > 69: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 70: ENDIF > 71: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][37].zzzz > 72: FSNE TEMP[11].x, CONST[1][33].yyyy, IMM[5].xxxx > 73: UIF TEMP[11].xxxx :0 > 74: RCP TEMP[11].x, CONST[1][33].yyyy > 75: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx > 76: ELSE :0 > 77: SSG TEMP[12].x, TEMP[0].xxxx > 78: MUL TEMP[11].x, IMM[5].zzzz, TEMP[12].xxxx > 79: ENDIF > 80: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 81: EX2 TEMP[10].x, TEMP[1].xxxx > 82: MUL TEMP[8].xyz, TEMP[10].xxxx, CONST[1][36].xyzz > 83: FMA TEMP[5].xyz, CONST[1][36].xyzz, TEMP[10].xxxx, TEMP[5].xxxx > 84: FMA TEMP[7].xyz, TEMP[8].xyzz, TEMP[7].xxxx, TEMP[9].xxxx > 85: FSEQ TEMP[9].xyz, TEMP[5].xyzz, IMM[5].xxxx > 86: SSG TEMP[10].xyz, TEMP[7].xyzz > 87: MUL TEMP[10].xyz, IMM[5].zzzz, TEMP[10].xyzz > 88: RCP TEMP[12].x, TEMP[5].xxxx > 89: RCP TEMP[12].y, TEMP[5].yyyy > 90: RCP TEMP[12].z, TEMP[5].zzzz > 91: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[12].xyzz > 92: UCMP TEMP[7].xyz, TEMP[9].xyzz, TEMP[10].xyzz, TEMP[7].xyzz > 93: MUL TEMP[8].xyz, TEMP[11].xxxx, -TEMP[5].xyzz > 94: ABS TEMP[2].xyz, TEMP[2].xxxx > 95: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[5].xyzz > 96: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].wwww > 97: EX2 TEMP[2].x, TEMP[1].xxxx > 98: EX2 TEMP[2].y, TEMP[1].yyyy > 99: EX2 TEMP[2].z, TEMP[1].zzzz >100: MUL TEMP[8].xyz, TEMP[8].xyzz, IMM[5].wwww >101: LG2 TEMP[5].x, CONST[1][38].xxxx >102: LG2 TEMP[5].y, CONST[1][38].yyyy >103: LG2 TEMP[5].z, CONST[1][38].zzzz >104: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[7].yyyy >105: EX2 TEMP[9].x, TEMP[5].xxxx >106: EX2 TEMP[9].y, TEMP[5].yyyy >107: EX2 TEMP[9].z, TEMP[5].zzzz >108: EX2 TEMP[5].x, TEMP[8].xxxx >109: EX2 TEMP[5].y, TEMP[8].yyyy >110: EX2 TEMP[5].z, TEMP[8].zzzz >111: MUL TEMP[8].xyz, TEMP[5].xyzz, TEMP[9].xyzz >112: MUL TEMP[0].xyz, TEMP[7].xyzz, TEMP[8].xyzz >113: ADD TEMP[5].xyz, -TEMP[2].xyzz, IMM[0].xxxx >114: MOV TEMP[2].w, TEMP[2].xxxx >115: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz, IMM[7].zzzz >116: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[5].xxxx >117: FMA TEMP[5].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[7].xxxx >118: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[5].xyzz >119: FMA TEMP[8].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[8].xxxx >120: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[8].xyzz, IMM[8].yyyy >121: FSEQ TEMP[5].xyz, TEMP[0].xyzz, IMM[5].xxxx >122: SSG TEMP[7].xyz, TEMP[1].xyzz >123: MUL TEMP[7].xyz, IMM[5].zzzz, TEMP[7].xyzz >124: RCP TEMP[8].x, TEMP[0].xxxx >125: RCP TEMP[8].y, TEMP[0].yyyy >126: RCP TEMP[8].z, TEMP[0].zzzz >127: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[8].xyzz >128: UCMP TEMP[2].xyz, TEMP[5].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >129: MOV OUT[4], IN[2] >130: MOV OUT[3], TEMP[2] >131: MOV OUT[2], TEMP[6] >132: MOV OUT[1], TEMP[4] >133: MOV OUT[0], TEMP[3] >134: END >radeonsi: Compiling shader 288 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 428) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 476) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 540) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 544) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 548) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 552) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 556) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 576) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 580) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 584) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 588) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 600) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 604) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 608) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 612) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 616) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 620) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 624) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 628) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 632) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 640) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 644) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 648) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 652) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 656) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 660) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 664) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 672) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 676) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 680) > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %13) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %14) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %15) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = extractelement <4 x float> %90, i32 3 > %95 = fmul float %18, %80 > %96 = fmul float %19, %81 > %97 = fadd float %95, %96 > %98 = fmul float %20, %82 > %99 = fadd float %97, %98 > %100 = fadd float %99, %21 > %101 = fmul float %22, %80 > %102 = fmul float %23, %81 > %103 = fadd float %101, %102 > %104 = fmul float %24, %82 > %105 = fadd float %103, %104 > %106 = fadd float %105, %25 > %107 = fmul float %26, %80 > %108 = fmul float %27, %81 > %109 = fadd float %107, %108 > %110 = fmul float %28, %82 > %111 = fadd float %109, %110 > %112 = fadd float %111, %29 > %113 = fmul float %30, %80 > %114 = fmul float %31, %81 > %115 = fadd float %113, %114 > %116 = fmul float %32, %82 > %117 = fadd float %115, %116 > %118 = fadd float %117, %33 > %119 = fmul float %37, %80 > %120 = fmul float %38, %81 > %121 = fadd float %119, %120 > %122 = fmul float %39, %82 > %123 = fadd float %121, %122 > %124 = fadd float %123, %40 > %125 = fadd float %124, %59 > %126 = fsub float %74, %80 > %127 = fsub float %75, %81 > %128 = fsub float %76, %82 > %129 = fmul float %67, %71 > %130 = fmul float %68, %72 > %131 = fmul float %69, %73 > %132 = fmul float %34, %126 > %133 = fmul float %35, %127 > %134 = fadd float %133, %132 > %135 = fmul float %36, %128 > %136 = fadd float %134, %135 > %137 = fmul float %41, %126 > %138 = fmul float %42, %127 > %139 = fadd float %138, %137 > %140 = fmul float %43, %128 > %141 = fadd float %139, %140 > %142 = fmul float %37, %126 > %143 = fmul float %38, %127 > %144 = fadd float %143, %142 > %145 = fmul float %39, %128 > %146 = fadd float %144, %145 > %147 = fmul float %136, %136 > %148 = fmul float %146, %146 > %149 = fadd float %148, %147 > %150 = fmul float %141, %141 > %151 = fadd float %149, %150 > %152 = call float @llvm.AMDGPU.rsq.clamped.f32(float %151) > %153 = fmul float %152, %136 > %154 = fmul float %152, %146 > %155 = fmul float %152, %141 > %156 = fsub float -0.000000e+00, %146 > %157 = call float @llvm.fma.f32(float %156, float %152, float 0xBFC3333340000000) > %158 = fsub float 1.000000e+00, %157 > %159 = call float @llvm.AMDGPU.clamp.(float %158, float 0.000000e+00, float 1.000000e+00) > %160 = fmul float %159, %159 > %161 = fmul float %153, %64 > %162 = fsub float -0.000000e+00, %161 > %163 = fmul float %154, %65 > %164 = fsub float %162, %163 > %165 = fmul float %155, %66 > %166 = fsub float %164, %165 > %167 = fsub float -0.000000e+00, %49 > %168 = call float @llvm.fma.f32(float %167, float %166, float %48) > %169 = call float @llvm.fma.f32(float %166, float %166, float 1.000000e+00) > %170 = call float @llvm.fabs.f32(float %168) > %171 = call float @llvm.log2.f32(float %170) > %172 = fmul float %169, 0x3FAE8EC8A0000000 > %173 = fmul float %171, -1.500000e+00 > %174 = call float @llvm.exp2.f32(float %173) > %175 = fsub float -0.000000e+00, %46 > %176 = call float @llvm.fma.f32(float %50, float %174, float %175) > %177 = fmul float %174, %50 > %178 = call float @llvm.maxnum.f32(float %176, float 0.000000e+00) > %179 = call float @llvm.fabs.f32(float %118) > %180 = fmul float %179, 0x3EF4F8B580000000 > %181 = call float @llvm.minnum.f32(float %180, float 1.000000e+00) > %182 = fsub float 1.000000e+00, %181 > %183 = fsub float -0.000000e+00, %178 > %184 = call float @llvm.fma.f32(float %183, float %182, float %177) > %185 = call float @llvm.maxnum.f32(float %184, float %63) > %186 = fcmp une float %44, 0.000000e+00 > br i1 %186, label %IF, label %ELSE > >IF: ; preds = %main_body > %187 = fdiv float 1.000000e+00, %44 > %188 = fmul float %125, %187 > %189 = fsub float -0.000000e+00, %188 > br label %ENDIF > >ELSE: ; preds = %main_body > %190 = fsub float -0.000000e+00, %125 > %191 = fcmp olt float %125, -0.000000e+00 > %192 = select i1 %191, float 1.000000e+00, float %190 > %193 = fcmp oge float %192, 0.000000e+00 > %.op = fmul float %192, 0x4600000000000000 > %194 = select i1 %193, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp40.0 = phi float [ %189, %IF ], [ %194, %ELSE ] > %195 = fmul float %temp40.0, 0x3FF7154760000000 > %196 = call float @llvm.exp2.f32(float %195) > %197 = fadd float %196, %51 > %198 = fmul float %197, %53 > %199 = fmul float %198, 5.000000e-01 > %200 = fmul float %160, %199 > %201 = call float @llvm.minnum.f32(float %200, float %47) > %202 = call float @llvm.maxnum.f32(float %201, float %52) > %203 = fmul float %202, %185 > %204 = fcmp une float %57, 0.000000e+00 > br i1 %204, label %IF53, label %ELSE54 > >IF53: ; preds = %ENDIF > %205 = fdiv float 1.000000e+00, %57 > %206 = fmul float %125, %205 > %207 = fsub float -0.000000e+00, %206 > br label %ENDIF52 > >ELSE54: ; preds = %ENDIF > %208 = fsub float -0.000000e+00, %125 > %209 = fcmp olt float %125, -0.000000e+00 > %210 = select i1 %209, float 1.000000e+00, float %208 > %211 = fcmp oge float %210, 0.000000e+00 > %.op58 = fmul float %210, 0x4600000000000000 > %212 = select i1 %211, float %.op58, float 0xC600000000000000 > br label %ENDIF52 > >ENDIF52: ; preds = %ELSE54, %IF53 > %temp40.1 = phi float [ %207, %IF53 ], [ %212, %ELSE54 ] > %213 = fsub float %58, %125 > %214 = fcmp une float %45, 0.000000e+00 > br i1 %214, label %IF56, label %ELSE57 > >IF56: ; preds = %ENDIF52 > %215 = fdiv float 1.000000e+00, %45 > %216 = fmul float %213, %215 > br label %ENDIF55 > >ELSE57: ; preds = %ENDIF52 > %217 = fcmp ogt float %213, 0.000000e+00 > %218 = select i1 %217, float 1.000000e+00, float %213 > %219 = fcmp oge float %218, 0.000000e+00 > %.op59 = fmul float %218, 0x4600000000000000 > %220 = select i1 %219, float %.op59, float 0xC600000000000000 > br label %ENDIF55 > >ENDIF55: ; preds = %ELSE57, %IF56 > %temp44.0 = phi float [ %216, %IF56 ], [ %220, %ELSE57 ] > %221 = fmul float %temp40.1, 0x3FF7154760000000 > %222 = call float @llvm.exp2.f32(float %221) > %223 = fmul float %222, %54 > %224 = fmul float %222, %55 > %225 = fmul float %222, %56 > %226 = call float @llvm.fma.f32(float %54, float %222, float %202) > %227 = call float @llvm.fma.f32(float %55, float %222, float %202) > %228 = call float @llvm.fma.f32(float %56, float %222, float %202) > %229 = call float @llvm.fma.f32(float %223, float %172, float %203) > %230 = call float @llvm.fma.f32(float %224, float %172, float %203) > %231 = call float @llvm.fma.f32(float %225, float %172, float %203) > %232 = fcmp oeq float %226, 0.000000e+00 > %233 = fcmp oeq float %227, 0.000000e+00 > %234 = fcmp oeq float %228, 0.000000e+00 > %235 = fcmp ogt float %229, 0.000000e+00 > %236 = select i1 %235, float 1.000000e+00, float %229 > %237 = fcmp oge float %236, 0.000000e+00 > %238 = fcmp ogt float %230, 0.000000e+00 > %239 = select i1 %238, float 1.000000e+00, float %230 > %240 = fcmp oge float %239, 0.000000e+00 > %241 = fcmp ogt float %231, 0.000000e+00 > %242 = select i1 %241, float 1.000000e+00, float %231 > %243 = fcmp oge float %242, 0.000000e+00 > %.op60 = fmul float %236, 0x4600000000000000 > %244 = select i1 %237, float %.op60, float 0xC600000000000000 > %.op61 = fmul float %239, 0x4600000000000000 > %245 = select i1 %240, float %.op61, float 0xC600000000000000 > %.op62 = fmul float %242, 0x4600000000000000 > %246 = select i1 %243, float %.op62, float 0xC600000000000000 > %247 = fdiv float 1.000000e+00, %226 > %248 = fdiv float 1.000000e+00, %227 > %249 = fdiv float 1.000000e+00, %228 > %250 = fmul float %229, %247 > %251 = fmul float %230, %248 > %252 = fmul float %231, %249 > %253 = select i1 %232, float %244, float %250 > %254 = select i1 %233, float %245, float %251 > %255 = select i1 %234, float %246, float %252 > %256 = fmul float %226, %temp44.0 > %257 = fmul float %227, %temp44.0 > %258 = fmul float %228, %temp44.0 > %259 = call float @llvm.fabs.f32(float %118) > %260 = call float @llvm.fabs.f32(float %118) > %261 = call float @llvm.fabs.f32(float %118) > %262 = fmul float %226, %259 > %263 = fmul float %227, %260 > %264 = fmul float %228, %261 > %265 = fmul float %262, 0xBFF7154760000000 > %266 = fmul float %263, 0xBFF7154760000000 > %267 = fmul float %264, 0xBFF7154760000000 > %268 = call float @llvm.exp2.f32(float %265) > %269 = call float @llvm.exp2.f32(float %266) > %270 = call float @llvm.exp2.f32(float %267) > %271 = fmul float %256, 0xBFF7154760000000 > %272 = fmul float %257, 0xBFF7154760000000 > %273 = fmul float %258, 0xBFF7154760000000 > %274 = call float @llvm.log2.f32(float %60) > %275 = call float @llvm.log2.f32(float %61) > %276 = call float @llvm.log2.f32(float %62) > %277 = fmul float %274, 0x3FDD1745E0000000 > %278 = fmul float %275, 0x3FDD1745E0000000 > %279 = fmul float %276, 0x3FDD1745E0000000 > %280 = call float @llvm.exp2.f32(float %277) > %281 = call float @llvm.exp2.f32(float %278) > %282 = call float @llvm.exp2.f32(float %279) > %283 = call float @llvm.exp2.f32(float %271) > %284 = call float @llvm.exp2.f32(float %272) > %285 = call float @llvm.exp2.f32(float %273) > %286 = fmul float %283, %280 > %287 = fmul float %284, %281 > %288 = fmul float %285, %282 > %289 = fmul float %253, %286 > %290 = fmul float %254, %287 > %291 = fmul float %255, %288 > %292 = fsub float 1.000000e+00, %268 > %293 = fsub float 1.000000e+00, %269 > %294 = fsub float 1.000000e+00, %270 > %295 = call float @llvm.fma.f32(float %289, float %292, float 0xBF70624DE0000000) > %296 = call float @llvm.fma.f32(float %290, float %293, float 0xBF70624DE0000000) > %297 = call float @llvm.fma.f32(float %291, float %294, float 0xBF70624DE0000000) > %298 = call float @llvm.maxnum.f32(float %295, float 0.000000e+00) > %299 = call float @llvm.maxnum.f32(float %296, float 0.000000e+00) > %300 = call float @llvm.maxnum.f32(float %297, float 0.000000e+00) > %301 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 5.000000e-01) > %302 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 5.000000e-01) > %303 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 5.000000e-01) > %304 = fmul float %298, %301 > %305 = fmul float %299, %302 > %306 = fmul float %300, %303 > %307 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %308 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %309 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %310 = call float @llvm.fma.f32(float %298, float %307, float 0x3FAEB851E0000000) > %311 = call float @llvm.fma.f32(float %299, float %308, float 0x3FAEB851E0000000) > %312 = call float @llvm.fma.f32(float %300, float %309, float 0x3FAEB851E0000000) > %313 = fcmp oeq float %310, 0.000000e+00 > %314 = fcmp oeq float %311, 0.000000e+00 > %315 = fcmp oeq float %312, 0.000000e+00 > %316 = fcmp ogt float %304, 0.000000e+00 > %317 = select i1 %316, float 1.000000e+00, float %304 > %318 = fcmp oge float %317, 0.000000e+00 > %319 = fcmp ogt float %305, 0.000000e+00 > %320 = select i1 %319, float 1.000000e+00, float %305 > %321 = fcmp oge float %320, 0.000000e+00 > %322 = fcmp ogt float %306, 0.000000e+00 > %323 = select i1 %322, float 1.000000e+00, float %306 > %324 = fcmp oge float %323, 0.000000e+00 > %.op63 = fmul float %317, 0x4600000000000000 > %325 = select i1 %318, float %.op63, float 0xC600000000000000 > %.op64 = fmul float %320, 0x4600000000000000 > %326 = select i1 %321, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %323, 0x4600000000000000 > %327 = select i1 %324, float %.op65, float 0xC600000000000000 > %328 = fdiv float 1.000000e+00, %310 > %329 = fdiv float 1.000000e+00, %311 > %330 = fdiv float 1.000000e+00, %312 > %331 = fmul float %304, %328 > %332 = fmul float %305, %329 > %333 = fmul float %306, %330 > %334 = select i1 %313, float %325, float %331 > %335 = select i1 %314, float %326, float %332 > %336 = select i1 %315, float %327, float %333 > %337 = bitcast i32 %11 to float > %338 = insertvalue <{ float, float, float }> undef, float %337, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %129, float %130, float %131, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %334, float %335, float %336, float %268) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %91, float %92, float %93, float %94) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %100, float %106, float %112, float %118) > ret <{ float, float, float }> %338 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], BUFFER, FLOAT >DCL CONST[1][0..25] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 400, 288, 384} >IMM[1] FLT32 { 2.0000, -1.0000, 0.0000, 0.0000} >IMM[2] UINT32 {272, 256, 0, 0} >IMM[3] INT32 {0, 0, 0, 0} > 0: FMA TEMP[0].xy, IN[0].xyyy, CONST[1][25].xyyy, CONST[1][18].xyyy > 1: FMA TEMP[0].xy, CONST[1][25].wwww, CONST[1][18].zwww, TEMP[0].xyyy > 2: MOV TEMP[1].xy, TEMP[0].xyyy > 3: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 4: FMA TEMP[1].xy, TEMP[1].ywww, IMM[1].xxxx, IMM[1].yyyy > 5: MOV TEMP[0].x, TEMP[1].xyxx > 6: MOV TEMP[0].z, -TEMP[1].yyyy > 7: FMA TEMP[0].xy, TEMP[0].xzzz, CONST[1][24].xyyy, IN[0].xyyy > 8: MOV TEMP[1].xy, TEMP[0].xyyy > 9: TEX TEMP[1].w, TEMP[1], SAMP[1], 2D > 10: MOV TEMP[2].xy, TEMP[0].xyyy > 11: TEX TEMP[2], TEMP[2], SAMP[2], 2D > 12: FMA TEMP[1].x, IN[3].wwww, IN[1].wwww, TEMP[1].wwww > 13: ADD TEMP[0].x, TEMP[1].xxxx, IMM[1].yyyy > 14: CEIL TEMP[1].x, TEMP[0].xxxx > 15: MOV_SAT TEMP[3].x, CONST[1][17].wwww > 16: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[1].xxxx > 17: MUL TEMP[0].x, TEMP[2].wwww, TEMP[0].xxxx > 18: MUL TEMP[1].xyz, TEMP[2].xyzz, CONST[1][16].xyzz > 19: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[3].xyzz > 20: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xyzz > 21: FMA TEMP[1].xyz, TEMP[1].xyzz, IN[2].wwww, IN[2].xyzz > 22: MUL TEMP[0].x, TEMP[0].xxxx, IN[2].wwww > 23: MOV TEMP[0].w, TEMP[0].xxxx > 24: MOV TEMP[2].x, IMM[3].xxxx > 25: MOV TEMP[2].w, IMM[0].xxxx > 26: TXF TEMP[2].x, TEMP[2], SAMP[3], BUFFER > 27: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 28: MOV OUT[0], TEMP[0] > 29: END >radeonsi: Compiling shader 289 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 412) > %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 > %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 3 > %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 > %43 = extractelement <8 x i32> %39, i32 7 > %44 = extractelement <4 x i32> %42, i32 0 > %45 = and i32 %44, %43 > %46 = insertelement <4 x i32> %42, i32 %45, i32 0 > %47 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 > %49 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %50 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %49, i64 0, i64 7 > %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 > %52 = extractelement <8 x i32> %48, i32 7 > %53 = extractelement <4 x i32> %51, i32 0 > %54 = and i32 %53, %52 > %55 = insertelement <4 x i32> %51, i32 %54, i32 0 > %56 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 > %58 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 11 > %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0 > %61 = extractelement <8 x i32> %57, i32 7 > %62 = extractelement <4 x i32> %60, i32 0 > %63 = and i32 %62, %61 > %64 = insertelement <4 x i32> %60, i32 %63, i32 0 > %65 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %66 = bitcast <8 x i32> addrspace(2)* %65 to <2 x i128> addrspace(2)* > %67 = load <2 x i128>, <2 x i128> addrspace(2)* %66, align 32, !tbaa !0 > %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %82 = call float @llvm.fma.f32(float %68, float %35, float %29) > %83 = call float @llvm.fma.f32(float %69, float %36, float %30) > %84 = call float @llvm.fma.f32(float %37, float %31, float %82) > %85 = call float @llvm.fma.f32(float %37, float %32, float %83) > %86 = bitcast float %84 to i32 > %87 = bitcast float %85 to i32 > %88 = insertelement <2 x i32> undef, i32 %86, i32 0 > %89 = insertelement <2 x i32> %88, i32 %87, i32 1 > %90 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %89, <8 x i32> %39, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %91 = extractelement <4 x float> %90, i32 1 > %92 = extractelement <4 x float> %90, i32 3 > %93 = call float @llvm.fma.f32(float %91, float 2.000000e+00, float -1.000000e+00) > %94 = call float @llvm.fma.f32(float %92, float 2.000000e+00, float -1.000000e+00) > %95 = fsub float -0.000000e+00, %94 > %96 = call float @llvm.fma.f32(float %93, float %33, float %68) > %97 = call float @llvm.fma.f32(float %95, float %34, float %69) > %98 = bitcast float %96 to i32 > %99 = bitcast float %97 to i32 > %100 = insertelement <2 x i32> undef, i32 %98, i32 0 > %101 = insertelement <2 x i32> %100, i32 %99, i32 1 > %102 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %101, <8 x i32> %48, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %103 = extractelement <4 x float> %102, i32 3 > %104 = bitcast float %96 to i32 > %105 = bitcast float %97 to i32 > %106 = insertelement <2 x i32> undef, i32 %104, i32 0 > %107 = insertelement <2 x i32> %106, i32 %105, i32 1 > %108 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %107, <8 x i32> %57, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %109 = extractelement <4 x float> %108, i32 0 > %110 = extractelement <4 x float> %108, i32 1 > %111 = extractelement <4 x float> %108, i32 2 > %112 = extractelement <4 x float> %108, i32 3 > %113 = call float @llvm.fma.f32(float %81, float %73, float %103) > %114 = fadd float %113, -1.000000e+00 > %115 = call float @llvm.ceil.f32(float %114) > %116 = call float @llvm.AMDGPU.clamp.(float %28, float 0.000000e+00, float 1.000000e+00) > %117 = fmul float %116, %115 > %118 = fmul float %112, %117 > %119 = fmul float %109, %25 > %120 = fmul float %110, %26 > %121 = fmul float %111, %27 > %122 = fmul float %119, %78 > %123 = fmul float %120, %79 > %124 = fmul float %121, %80 > %125 = fmul float %122, %70 > %126 = fmul float %123, %71 > %127 = fmul float %124, %72 > %128 = call float @llvm.fma.f32(float %125, float %77, float %74) > %129 = call float @llvm.fma.f32(float %126, float %77, float %75) > %130 = call float @llvm.fma.f32(float %127, float %77, float %76) > %131 = fmul float %118, %77 > %132 = extractelement <2 x i128> %67, i32 1 > %133 = bitcast i128 %132 to <16 x i8> > %134 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %133, i32 0, i32 0) > %135 = extractelement <4 x float> %134, i32 0 > %136 = fmul float %135, %128 > %137 = fmul float %135, %129 > %138 = fmul float %135, %130 > %139 = bitcast float %5 to i32 > %140 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %139, 10 > %141 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %140, float %136, 11 > %142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %141, float %137, 12 > %143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float %138, 13 > %144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %131, 14 > %145 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %145 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..10] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 16, 32, 48} >IMM[2] UINT32 {112, 128, 144, 160} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][0], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][1], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][2], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][3], TEMP[0] > 8: MOV TEMP[1].w, TEMP[0].xxxx > 9: MOV TEMP[0].xy, IN[2].xyxx > 10: DP3 TEMP[2].x, CONST[1][7].xyzz, IN[3].xyzz > 11: DP3 TEMP[3].x, CONST[1][7].xyzz, IN[4].xyzz > 12: MOV TEMP[2].y, TEMP[3].xxxx > 13: DP3 TEMP[3].x, CONST[1][7].xyzz, IN[1].xyzz > 14: MOV TEMP[2].z, TEMP[3].xxxx > 15: DP3 TEMP[3].x, CONST[1][8].xyzz, IN[3].xyzz > 16: DP3 TEMP[4].x, CONST[1][8].xyzz, IN[4].xyzz > 17: MOV TEMP[3].y, TEMP[4].xxxx > 18: DP3 TEMP[4].x, CONST[1][8].xyzz, IN[1].xyzz > 19: MOV TEMP[3].z, TEMP[4].xxxx > 20: DP3 TEMP[4].x, CONST[1][9].xyzz, IN[3].xyzz > 21: DP3 TEMP[5].x, CONST[1][9].xyzz, IN[4].xyzz > 22: MOV TEMP[4].y, TEMP[5].xxxx > 23: DP3 TEMP[5].x, CONST[1][9].xyzz, IN[1].xyzz > 24: MOV TEMP[4].z, TEMP[5].xxxx > 25: MUL TEMP[5], IN[5], CONST[1][10] > 26: MOV OUT[5], TEMP[5] > 27: MOV OUT[4], TEMP[4] > 28: MOV OUT[3], TEMP[3] > 29: MOV OUT[2], TEMP[2] > 30: MOV OUT[1], TEMP[0] > 31: MOV OUT[0], TEMP[1] > 32: END >radeonsi: Compiling shader 290 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 0) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 4) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 8) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 12) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 16) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 20) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 24) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 28) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 32) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 36) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 40) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 44) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 48) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 52) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 56) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 60) > %37 = call float @llvm.SI.load.const(<16 x i8> %20, i32 112) > %38 = call float @llvm.SI.load.const(<16 x i8> %20, i32 116) > %39 = call float @llvm.SI.load.const(<16 x i8> %20, i32 120) > %40 = call float @llvm.SI.load.const(<16 x i8> %20, i32 128) > %41 = call float @llvm.SI.load.const(<16 x i8> %20, i32 132) > %42 = call float @llvm.SI.load.const(<16 x i8> %20, i32 136) > %43 = call float @llvm.SI.load.const(<16 x i8> %20, i32 144) > %44 = call float @llvm.SI.load.const(<16 x i8> %20, i32 148) > %45 = call float @llvm.SI.load.const(<16 x i8> %20, i32 152) > %46 = call float @llvm.SI.load.const(<16 x i8> %20, i32 160) > %47 = call float @llvm.SI.load.const(<16 x i8> %20, i32 164) > %48 = call float @llvm.SI.load.const(<16 x i8> %20, i32 168) > %49 = call float @llvm.SI.load.const(<16 x i8> %20, i32 172) > %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 > %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %13) > %53 = extractelement <4 x float> %52, i32 0 > %54 = extractelement <4 x float> %52, i32 1 > %55 = extractelement <4 x float> %52, i32 2 > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %14) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %15) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 > %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %16) > %70 = extractelement <4 x float> %69, i32 0 > %71 = extractelement <4 x float> %69, i32 1 > %72 = extractelement <4 x float> %69, i32 2 > %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 > %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %17) > %76 = extractelement <4 x float> %75, i32 0 > %77 = extractelement <4 x float> %75, i32 1 > %78 = extractelement <4 x float> %75, i32 2 > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %18) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = extractelement <4 x float> %81, i32 3 > %86 = fmul float %21, %53 > %87 = fmul float %22, %54 > %88 = fadd float %86, %87 > %89 = fmul float %23, %55 > %90 = fadd float %88, %89 > %91 = fadd float %90, %24 > %92 = fmul float %25, %53 > %93 = fmul float %26, %54 > %94 = fadd float %92, %93 > %95 = fmul float %27, %55 > %96 = fadd float %94, %95 > %97 = fadd float %96, %28 > %98 = fmul float %29, %53 > %99 = fmul float %30, %54 > %100 = fadd float %98, %99 > %101 = fmul float %31, %55 > %102 = fadd float %100, %101 > %103 = fadd float %102, %32 > %104 = fmul float %33, %53 > %105 = fmul float %34, %54 > %106 = fadd float %104, %105 > %107 = fmul float %35, %55 > %108 = fadd float %106, %107 > %109 = fadd float %108, %36 > %110 = fmul float %37, %70 > %111 = fmul float %38, %71 > %112 = fadd float %111, %110 > %113 = fmul float %39, %72 > %114 = fadd float %112, %113 > %115 = fmul float %37, %76 > %116 = fmul float %38, %77 > %117 = fadd float %116, %115 > %118 = fmul float %39, %78 > %119 = fadd float %117, %118 > %120 = fmul float %37, %59 > %121 = fmul float %38, %60 > %122 = fadd float %121, %120 > %123 = fmul float %39, %61 > %124 = fadd float %122, %123 > %125 = fmul float %40, %70 > %126 = fmul float %41, %71 > %127 = fadd float %126, %125 > %128 = fmul float %42, %72 > %129 = fadd float %127, %128 > %130 = fmul float %40, %76 > %131 = fmul float %41, %77 > %132 = fadd float %131, %130 > %133 = fmul float %42, %78 > %134 = fadd float %132, %133 > %135 = fmul float %40, %59 > %136 = fmul float %41, %60 > %137 = fadd float %136, %135 > %138 = fmul float %42, %61 > %139 = fadd float %137, %138 > %140 = fmul float %43, %70 > %141 = fmul float %44, %71 > %142 = fadd float %141, %140 > %143 = fmul float %45, %72 > %144 = fadd float %142, %143 > %145 = fmul float %43, %76 > %146 = fmul float %44, %77 > %147 = fadd float %146, %145 > %148 = fmul float %45, %78 > %149 = fadd float %147, %148 > %150 = fmul float %43, %59 > %151 = fmul float %44, %60 > %152 = fadd float %151, %150 > %153 = fmul float %45, %61 > %154 = fadd float %152, %153 > %155 = fmul float %82, %46 > %156 = fmul float %83, %47 > %157 = fmul float %84, %48 > %158 = fmul float %85, %49 > %159 = bitcast i32 %11 to float > %160 = insertvalue <{ float, float, float }> undef, float %159, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %65, float %66, float %55, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %114, float %119, float %124, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %129, float %134, float %139, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %144, float %149, float %154, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %155, float %156, float %157, float %158) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %91, float %97, float %103, float %109) > ret <{ float, float, float }> %160 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..1] >DCL CONST[2][0..25] >DCL TEMP[0..4], LOCAL >IMM[0] UINT32 {1, 352, 368, 0} >IMM[1] UINT32 {16, 384, 400, 0} >IMM[2] FLT32 { 2.0000, -1.0000, 1.0010, 0.0000} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] FLT32 { 1.0000, 0.5000, 0.9610, 0.0000} > 0: FMA TEMP[0].x, IN[0].xxxx, CONST[2][22].xxxx, CONST[2][22].wwww > 1: FMA TEMP[1].x, IN[0].yyyy, CONST[2][22].yyyy, CONST[2][23].xxxx > 2: MOV TEMP[0].y, TEMP[1].xxxx > 3: FMA TEMP[0].xy, CONST[1][1].xxxx, CONST[2][23].yzzz, TEMP[0].xyyy > 4: MOV TEMP[1].xy, TEMP[0].xyyy > 5: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 6: FMA TEMP[1].xy, TEMP[1].ywww, IMM[2].xxxx, IMM[2].yyyy > 7: MOV TEMP[0].x, TEMP[1].xyxx > 8: MOV TEMP[0].z, -TEMP[1].yyyy > 9: FMA TEMP[0].xy, TEMP[0].xzzz, CONST[2][24].xyyy, IN[0].xyyy > 10: MOV TEMP[1].xy, TEMP[0].xyyy > 11: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 12: ADD TEMP[2].x, -IN[4].wwww, IMM[2].zzzz > 13: ADD TEMP[2].x, -TEMP[2].xxxx, TEMP[1].wwww > 14: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[4].xyzz > 15: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[2].wwww > 16: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx > 17: INEG TEMP[2].x, TEMP[2].xxxx > 18: USNE TEMP[2].x, TEMP[2].xxxx, IMM[0].wwww > 19: AND TEMP[2].x, TEMP[2].xxxx, IMM[4].xxxx > 20: KILL_IF -TEMP[2].xxxx > 21: MOV TEMP[2].xy, TEMP[0].xyyy > 22: TEX TEMP[2].yw, TEMP[2], SAMP[2], 2D > 23: MOV TEMP[3].xy, TEMP[0].xyyy > 24: TEX TEMP[3], TEMP[3], SAMP[3], 2D > 25: FMA TEMP[2].xy, TEMP[2].ywww, IMM[2].xxxx, IMM[2].yyyy > 26: MOV TEMP[0].xy, TEMP[2].xyxx > 27: FMA TEMP[4].x, -TEMP[2].xxxx, TEMP[2].xxxx, IMM[4].xxxx > 28: FMA TEMP[2].x, -TEMP[2].yyyy, TEMP[2].yyyy, TEMP[4].xxxx > 29: SQRT TEMP[2].x, TEMP[2].xxxx > 30: MOV TEMP[0].z, TEMP[2].xxxx > 31: DP3 TEMP[2].x, IN[1].xyzz, TEMP[0].xyzz > 32: DP3 TEMP[4].x, IN[2].xyzz, TEMP[0].xyzz > 33: MOV TEMP[2].y, TEMP[4].xxxx > 34: DP3 TEMP[4].x, IN[3].xyzz, TEMP[0].xyzz > 35: MOV TEMP[2].z, TEMP[4].xxxx > 36: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz > 37: RSQ TEMP[4].x, TEMP[0].xxxx > 38: MUL TEMP[0].xyz, TEMP[4].xxxx, TEMP[2].xyzz > 39: FMA TEMP[0].xyz, TEMP[0].xyzz, IMM[4].yyyy, IMM[4].yyyy > 40: MOV TEMP[0].w, IMM[4].zzzz > 41: MOV TEMP[1].w, TEMP[3].wwww > 42: MUL TEMP[2].x, TEMP[3].zzzz, CONST[2][25].xxxx > 43: MOV TEMP[2].yz, TEMP[3].xyxx > 44: MOV TEMP[2].w, CONST[2][24].wwww > 45: MOV OUT[0], TEMP[0] > 46: MOV OUT[1], TEMP[1] > 47: MOV OUT[2], TEMP[2] > 48: END >radeonsi: Compiling shader 291 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) > %26 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 > %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 364) > %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 368) > %32 = call float @llvm.SI.load.const(<16 x i8> %27, i32 372) > %33 = call float @llvm.SI.load.const(<16 x i8> %27, i32 376) > %34 = call float @llvm.SI.load.const(<16 x i8> %27, i32 384) > %35 = call float @llvm.SI.load.const(<16 x i8> %27, i32 388) > %36 = call float @llvm.SI.load.const(<16 x i8> %27, i32 396) > %37 = call float @llvm.SI.load.const(<16 x i8> %27, i32 400) > %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 > %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 3 > %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 > %43 = extractelement <8 x i32> %39, i32 7 > %44 = extractelement <4 x i32> %42, i32 0 > %45 = and i32 %44, %43 > %46 = insertelement <4 x i32> %42, i32 %45, i32 0 > %47 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 > %49 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %50 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %49, i64 0, i64 7 > %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 > %52 = extractelement <8 x i32> %48, i32 7 > %53 = extractelement <4 x i32> %51, i32 0 > %54 = and i32 %53, %52 > %55 = insertelement <4 x i32> %51, i32 %54, i32 0 > %56 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 > %58 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 11 > %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0 > %61 = extractelement <8 x i32> %57, i32 7 > %62 = extractelement <4 x i32> %60, i32 0 > %63 = and i32 %62, %61 > %64 = insertelement <4 x i32> %60, i32 %63, i32 0 > %65 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %66 = load <8 x i32>, <8 x i32> addrspace(2)* %65, align 32, !tbaa !0 > %67 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %68 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %67, i64 0, i64 15 > %69 = load <4 x i32>, <4 x i32> addrspace(2)* %68, align 16, !tbaa !0 > %70 = extractelement <8 x i32> %66, i32 7 > %71 = extractelement <4 x i32> %69, i32 0 > %72 = and i32 %71, %70 > %73 = insertelement <4 x i32> %69, i32 %72, i32 0 > %74 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %89 = call float @llvm.fma.f32(float %74, float %28, float %30) > %90 = call float @llvm.fma.f32(float %75, float %29, float %31) > %91 = call float @llvm.fma.f32(float %25, float %32, float %89) > %92 = call float @llvm.fma.f32(float %25, float %33, float %90) > %93 = bitcast float %91 to i32 > %94 = bitcast float %92 to i32 > %95 = insertelement <2 x i32> undef, i32 %93, i32 0 > %96 = insertelement <2 x i32> %95, i32 %94, i32 1 > %97 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %96, <8 x i32> %39, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %98 = extractelement <4 x float> %97, i32 1 > %99 = extractelement <4 x float> %97, i32 3 > %100 = call float @llvm.fma.f32(float %98, float 2.000000e+00, float -1.000000e+00) > %101 = call float @llvm.fma.f32(float %99, float 2.000000e+00, float -1.000000e+00) > %102 = fsub float -0.000000e+00, %101 > %103 = call float @llvm.fma.f32(float %100, float %34, float %74) > %104 = call float @llvm.fma.f32(float %102, float %35, float %75) > %105 = bitcast float %103 to i32 > %106 = bitcast float %104 to i32 > %107 = insertelement <2 x i32> undef, i32 %105, i32 0 > %108 = insertelement <2 x i32> %107, i32 %106, i32 1 > %109 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %108, <8 x i32> %48, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %110 = extractelement <4 x float> %109, i32 0 > %111 = extractelement <4 x float> %109, i32 1 > %112 = extractelement <4 x float> %109, i32 2 > %113 = extractelement <4 x float> %109, i32 3 > %114 = fsub float 0x3FF00418A0000000, %88 > %115 = fsub float %113, %114 > %116 = fmul float %110, %85 > %117 = fmul float %111, %86 > %118 = fmul float %112, %87 > %119 = fcmp olt float %115, 0.000000e+00 > %120 = select i1 %119, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %120) > %121 = bitcast float %103 to i32 > %122 = bitcast float %104 to i32 > %123 = insertelement <2 x i32> undef, i32 %121, i32 0 > %124 = insertelement <2 x i32> %123, i32 %122, i32 1 > %125 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %124, <8 x i32> %57, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %126 = extractelement <4 x float> %125, i32 1 > %127 = extractelement <4 x float> %125, i32 3 > %128 = bitcast float %103 to i32 > %129 = bitcast float %104 to i32 > %130 = insertelement <2 x i32> undef, i32 %128, i32 0 > %131 = insertelement <2 x i32> %130, i32 %129, i32 1 > %132 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %131, <8 x i32> %66, <4 x i32> %73, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %133 = extractelement <4 x float> %132, i32 0 > %134 = extractelement <4 x float> %132, i32 1 > %135 = extractelement <4 x float> %132, i32 2 > %136 = extractelement <4 x float> %132, i32 3 > %137 = call float @llvm.fma.f32(float %126, float 2.000000e+00, float -1.000000e+00) > %138 = call float @llvm.fma.f32(float %127, float 2.000000e+00, float -1.000000e+00) > %139 = fsub float -0.000000e+00, %137 > %140 = call float @llvm.fma.f32(float %139, float %137, float 1.000000e+00) > %141 = fsub float -0.000000e+00, %138 > %142 = call float @llvm.fma.f32(float %141, float %138, float %140) > %143 = call float @llvm.sqrt.f32(float %142) > %144 = fmul float %76, %137 > %145 = fmul float %77, %138 > %146 = fadd float %145, %144 > %147 = fmul float %78, %143 > %148 = fadd float %146, %147 > %149 = fmul float %79, %137 > %150 = fmul float %80, %138 > %151 = fadd float %150, %149 > %152 = fmul float %81, %143 > %153 = fadd float %151, %152 > %154 = fmul float %82, %137 > %155 = fmul float %83, %138 > %156 = fadd float %155, %154 > %157 = fmul float %84, %143 > %158 = fadd float %156, %157 > %159 = fmul float %148, %148 > %160 = fmul float %153, %153 > %161 = fadd float %160, %159 > %162 = fmul float %158, %158 > %163 = fadd float %161, %162 > %164 = call float @llvm.AMDGPU.rsq.clamped.f32(float %163) > %165 = fmul float %164, %148 > %166 = fmul float %164, %153 > %167 = fmul float %164, %158 > %168 = call float @llvm.fma.f32(float %165, float 5.000000e-01, float 5.000000e-01) > %169 = call float @llvm.fma.f32(float %166, float 5.000000e-01, float 5.000000e-01) > %170 = call float @llvm.fma.f32(float %167, float 5.000000e-01, float 5.000000e-01) > %171 = fmul float %135, %37 > %172 = bitcast float %5 to i32 > %173 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %172, 10 > %174 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %173, float %168, 11 > %175 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %174, float %169, 12 > %176 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %175, float %170, 13 > %177 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %176, float 0x3FEEC08320000000, 14 > %178 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %177, float %116, 15 > %179 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %178, float %117, 16 > %180 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %179, float %118, 17 > %181 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %180, float %136, 18 > %182 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %181, float %171, 19 > %183 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %182, float %134, 20 > %184 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %183, float %133, 21 > %185 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %184, float %36, 22 > %186 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %185, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %186 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..37] >DCL TEMP[0..13], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 320, 336, 352} >IMM[2] UINT32 {368, 400, 496, 592} >IMM[3] UINT32 {576, 544, 560, 384} >IMM[4] UINT32 {416, 528, 448, 432} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {512, 464, 480, 0} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][20], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][22], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][23], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][25], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][31].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: MUL TEMP[5].xy, CONST[1][37].xyyy, IMM[0].xyyy > 14: MUL TEMP[6].xy, TEMP[2].xxxx, CONST[1][37].xyyy > 15: FMA TEMP[5].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 16: MOV TEMP[5].zw, TEMP[1].wwzw > 17: ADD TEMP[7].xyz, -IN[0].xyzz, CONST[1][36].xyzz > 18: MUL TEMP[8].xyz, CONST[1][34].xyzz, CONST[1][35].xyzz > 19: MOV TEMP[8].w, CONST[1][34].wwww > 20: DP3 TEMP[1].x, CONST[1][24].xyzz, TEMP[7].xyzz > 21: DP3 TEMP[9].x, CONST[1][26].xyzz, TEMP[7].xyzz > 22: MOV TEMP[1].z, TEMP[9].xxxx > 23: DP3 TEMP[7].x, CONST[1][25].xyzz, TEMP[7].xyzz > 24: MOV TEMP[1].y, TEMP[7].xxxx > 25: DP3 TEMP[9].x, TEMP[1].xyzz, TEMP[1].xyzz > 26: RSQ TEMP[9].x, TEMP[9].xxxx > 27: MUL TEMP[6].xyz, TEMP[9].xxxx, TEMP[1].xyzz > 28: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx, IMM[0].zzzz > 29: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 30: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 31: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 32: DP3 TEMP[9].x, -TEMP[6].xyzz, CONST[1][33].xyzz > 33: FMA TEMP[10].x, -CONST[1][28].yyyy, TEMP[9].xxxx, CONST[1][28].xxxx > 34: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].xxxx > 35: MOV TEMP[0].z, TEMP[9].xxxx > 36: ABS TEMP[9].x, TEMP[10].xxxx > 37: LG2 TEMP[9].x, TEMP[9].xxxx > 38: MOV TEMP[0].w, TEMP[9].xxxx > 39: MUL TEMP[9].xy, TEMP[0].zwww, IMM[5].xyyy > 40: EX2 TEMP[10].x, TEMP[9].yyyy > 41: FMA TEMP[1].x, CONST[1][28].zzzz, TEMP[10].xxxx, -CONST[1][27].zzzz > 42: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][28].zzzz > 43: MAX TEMP[11].x, TEMP[1].xxxx, IMM[0].wwww > 44: ABS TEMP[12].x, TEMP[2].xxxx > 45: MUL TEMP[12].x, TEMP[12].xxxx, IMM[5].zzzz > 46: MIN TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx > 47: ADD TEMP[12].x, -TEMP[12].xxxx, IMM[0].xxxx > 48: FMA TEMP[10].x, -TEMP[11].xxxx, TEMP[12].xxxx, TEMP[10].xxxx > 49: MAX TEMP[10].x, TEMP[10].xxxx, CONST[1][32].wwww > 50: FSNE TEMP[11].x, CONST[1][27].xxxx, IMM[0].wwww > 51: UIF TEMP[11].xxxx :0 > 52: RCP TEMP[11].x, CONST[1][27].xxxx > 53: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 54: ELSE :0 > 55: SSG TEMP[12].x, -TEMP[0].xxxx > 56: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 57: ENDIF > 58: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 59: EX2 TEMP[11].x, TEMP[1].xxxx > 60: ADD TEMP[1].x, TEMP[11].xxxx, CONST[1][28].wwww > 61: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][29].yyyy > 62: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].yyyy > 63: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[1].xxxx > 64: MIN TEMP[7].x, TEMP[7].xxxx, CONST[1][27].wwww > 65: MAX TEMP[7].x, TEMP[7].xxxx, CONST[1][29].xxxx > 66: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx > 67: FSNE TEMP[11].x, CONST[1][30].wwww, IMM[0].wwww > 68: UIF TEMP[11].xxxx :0 > 69: RCP TEMP[11].x, CONST[1][30].wwww > 70: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 71: ELSE :0 > 72: SSG TEMP[12].x, -TEMP[0].xxxx > 73: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 74: ENDIF > 75: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][31].zzzz > 76: FSNE TEMP[12].x, CONST[1][27].yyyy, IMM[0].wwww > 77: UIF TEMP[12].xxxx :0 > 78: RCP TEMP[12].x, CONST[1][27].yyyy > 79: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 80: ELSE :0 > 81: SSG TEMP[13].x, TEMP[0].xxxx > 82: MUL TEMP[12].x, IMM[5].wwww, TEMP[13].xxxx > 83: ENDIF > 84: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 85: EX2 TEMP[11].x, TEMP[1].xxxx > 86: MUL TEMP[6].xyz, TEMP[11].xxxx, CONST[1][30].xyzz > 87: FMA TEMP[7].xyz, CONST[1][30].xyzz, TEMP[11].xxxx, TEMP[7].xxxx > 88: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[10].xxxx > 89: FSEQ TEMP[10].xyz, TEMP[7].xyzz, IMM[0].wwww > 90: SSG TEMP[11].xyz, TEMP[9].xyzz > 91: MUL TEMP[11].xyz, IMM[5].wwww, TEMP[11].xyzz > 92: RCP TEMP[13].x, TEMP[7].xxxx > 93: RCP TEMP[13].y, TEMP[7].yyyy > 94: RCP TEMP[13].z, TEMP[7].zzzz > 95: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 96: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz > 97: MUL TEMP[6].xyz, TEMP[12].xxxx, -TEMP[7].xyzz > 98: ABS TEMP[2].xyz, TEMP[2].xxxx > 99: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[7].xyzz >100: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].xxxx >101: EX2 TEMP[2].x, TEMP[1].xxxx >102: EX2 TEMP[2].y, TEMP[1].yyyy >103: EX2 TEMP[2].z, TEMP[1].zzzz >104: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[7].xxxx >105: LG2 TEMP[7].x, CONST[1][32].xxxx >106: LG2 TEMP[7].y, CONST[1][32].yyyy >107: LG2 TEMP[7].z, CONST[1][32].zzzz >108: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >109: EX2 TEMP[10].x, TEMP[7].xxxx >110: EX2 TEMP[10].y, TEMP[7].yyyy >111: EX2 TEMP[10].z, TEMP[7].zzzz >112: EX2 TEMP[7].x, TEMP[6].xxxx >113: EX2 TEMP[7].y, TEMP[6].yyyy >114: EX2 TEMP[7].z, TEMP[6].zzzz >115: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[10].xyzz >116: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[6].xyzz >117: ADD TEMP[7].xyz, -TEMP[2].xyzz, IMM[0].xxxx >118: MOV TEMP[2].w, TEMP[2].xxxx >119: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].wwww >120: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >121: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >122: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xyzz >123: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >124: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].zzzz >125: FSEQ TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww >126: SSG TEMP[7].xyz, TEMP[1].xyzz >127: MUL TEMP[7].xyz, IMM[5].wwww, TEMP[7].xyzz >128: RCP TEMP[9].x, TEMP[0].xxxx >129: RCP TEMP[9].y, TEMP[0].yyyy >130: RCP TEMP[9].z, TEMP[0].zzzz >131: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[9].xyzz >132: UCMP TEMP[2].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >133: MOV OUT[5], IN[2] >134: MOV OUT[4], TEMP[2] >135: MOV OUT[3], TEMP[8] >136: MOV OUT[2], TEMP[5] >137: MOV OUT[1], TEMP[4] >138: MOV OUT[0], TEMP[3] >139: END >radeonsi: Compiling shader 292 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 332) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 412) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 492) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 524) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 544) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 548) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 552) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 556) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 568) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 576) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 580) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 584) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 592) > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 596) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %13) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %14) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %15) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = extractelement <4 x float> %92, i32 3 > %97 = fmul float %18, %82 > %98 = fmul float %19, %83 > %99 = fadd float %97, %98 > %100 = fmul float %20, %84 > %101 = fadd float %99, %100 > %102 = fadd float %101, %21 > %103 = fmul float %22, %82 > %104 = fmul float %23, %83 > %105 = fadd float %103, %104 > %106 = fmul float %24, %84 > %107 = fadd float %105, %106 > %108 = fadd float %107, %25 > %109 = fmul float %26, %82 > %110 = fmul float %27, %83 > %111 = fadd float %109, %110 > %112 = fmul float %28, %84 > %113 = fadd float %111, %112 > %114 = fadd float %113, %29 > %115 = fmul float %30, %82 > %116 = fmul float %31, %83 > %117 = fadd float %115, %116 > %118 = fmul float %32, %84 > %119 = fadd float %117, %118 > %120 = fadd float %119, %33 > %121 = fmul float %37, %82 > %122 = fmul float %38, %83 > %123 = fadd float %121, %122 > %124 = fmul float %39, %84 > %125 = fadd float %123, %124 > %126 = fadd float %125, %40 > %127 = fadd float %126, %59 > %128 = fsub float -0.000000e+00, %78 > %129 = fmul float %120, %77 > %130 = fmul float %120, %78 > %131 = call float @llvm.fma.f32(float %102, float %77, float %129) > %132 = call float @llvm.fma.f32(float %108, float %128, float %130) > %133 = fsub float %74, %82 > %134 = fsub float %75, %83 > %135 = fsub float %76, %84 > %136 = fmul float %67, %71 > %137 = fmul float %68, %72 > %138 = fmul float %69, %73 > %139 = fmul float %34, %133 > %140 = fmul float %35, %134 > %141 = fadd float %140, %139 > %142 = fmul float %36, %135 > %143 = fadd float %141, %142 > %144 = fmul float %41, %133 > %145 = fmul float %42, %134 > %146 = fadd float %145, %144 > %147 = fmul float %43, %135 > %148 = fadd float %146, %147 > %149 = fmul float %37, %133 > %150 = fmul float %38, %134 > %151 = fadd float %150, %149 > %152 = fmul float %39, %135 > %153 = fadd float %151, %152 > %154 = fmul float %143, %143 > %155 = fmul float %153, %153 > %156 = fadd float %155, %154 > %157 = fmul float %148, %148 > %158 = fadd float %156, %157 > %159 = call float @llvm.AMDGPU.rsq.clamped.f32(float %158) > %160 = fmul float %159, %143 > %161 = fmul float %159, %153 > %162 = fmul float %159, %148 > %163 = fsub float -0.000000e+00, %153 > %164 = call float @llvm.fma.f32(float %163, float %159, float 0xBFC3333340000000) > %165 = fsub float 1.000000e+00, %164 > %166 = call float @llvm.AMDGPU.clamp.(float %165, float 0.000000e+00, float 1.000000e+00) > %167 = fmul float %166, %166 > %168 = fmul float %160, %64 > %169 = fsub float -0.000000e+00, %168 > %170 = fmul float %161, %65 > %171 = fsub float %169, %170 > %172 = fmul float %162, %66 > %173 = fsub float %171, %172 > %174 = fsub float -0.000000e+00, %49 > %175 = call float @llvm.fma.f32(float %174, float %173, float %48) > %176 = call float @llvm.fma.f32(float %173, float %173, float 1.000000e+00) > %177 = call float @llvm.fabs.f32(float %175) > %178 = call float @llvm.log2.f32(float %177) > %179 = fmul float %176, 0x3FAE8EC8A0000000 > %180 = fmul float %178, -1.500000e+00 > %181 = call float @llvm.exp2.f32(float %180) > %182 = fsub float -0.000000e+00, %46 > %183 = call float @llvm.fma.f32(float %50, float %181, float %182) > %184 = fmul float %181, %50 > %185 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00) > %186 = call float @llvm.fabs.f32(float %120) > %187 = fmul float %186, 0x3EF4F8B580000000 > %188 = call float @llvm.minnum.f32(float %187, float 1.000000e+00) > %189 = fsub float 1.000000e+00, %188 > %190 = fsub float -0.000000e+00, %185 > %191 = call float @llvm.fma.f32(float %190, float %189, float %184) > %192 = call float @llvm.maxnum.f32(float %191, float %63) > %193 = fcmp une float %44, 0.000000e+00 > br i1 %193, label %IF, label %ELSE > >IF: ; preds = %main_body > %194 = fdiv float 1.000000e+00, %44 > %195 = fmul float %127, %194 > %196 = fsub float -0.000000e+00, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fsub float -0.000000e+00, %127 > %198 = fcmp olt float %127, -0.000000e+00 > %199 = select i1 %198, float 1.000000e+00, float %197 > %200 = fcmp oge float %199, 0.000000e+00 > %.op = fmul float %199, 0x4600000000000000 > %201 = select i1 %200, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %196, %IF ], [ %201, %ELSE ] > %202 = fmul float %temp44.0, 0x3FF7154760000000 > %203 = call float @llvm.exp2.f32(float %202) > %204 = fadd float %203, %51 > %205 = fmul float %204, %53 > %206 = fmul float %205, 5.000000e-01 > %207 = fmul float %167, %206 > %208 = call float @llvm.minnum.f32(float %207, float %47) > %209 = call float @llvm.maxnum.f32(float %208, float %52) > %210 = fmul float %209, %192 > %211 = fcmp une float %57, 0.000000e+00 > br i1 %211, label %IF57, label %ELSE58 > >IF57: ; preds = %ENDIF > %212 = fdiv float 1.000000e+00, %57 > %213 = fmul float %127, %212 > %214 = fsub float -0.000000e+00, %213 > br label %ENDIF56 > >ELSE58: ; preds = %ENDIF > %215 = fsub float -0.000000e+00, %127 > %216 = fcmp olt float %127, -0.000000e+00 > %217 = select i1 %216, float 1.000000e+00, float %215 > %218 = fcmp oge float %217, 0.000000e+00 > %.op62 = fmul float %217, 0x4600000000000000 > %219 = select i1 %218, float %.op62, float 0xC600000000000000 > br label %ENDIF56 > >ENDIF56: ; preds = %ELSE58, %IF57 > %temp44.1 = phi float [ %214, %IF57 ], [ %219, %ELSE58 ] > %220 = fsub float %58, %127 > %221 = fcmp une float %45, 0.000000e+00 > br i1 %221, label %IF60, label %ELSE61 > >IF60: ; preds = %ENDIF56 > %222 = fdiv float 1.000000e+00, %45 > %223 = fmul float %220, %222 > br label %ENDIF59 > >ELSE61: ; preds = %ENDIF56 > %224 = fcmp ogt float %220, 0.000000e+00 > %225 = select i1 %224, float 1.000000e+00, float %220 > %226 = fcmp oge float %225, 0.000000e+00 > %.op63 = fmul float %225, 0x4600000000000000 > %227 = select i1 %226, float %.op63, float 0xC600000000000000 > br label %ENDIF59 > >ENDIF59: ; preds = %ELSE61, %IF60 > %temp48.0 = phi float [ %223, %IF60 ], [ %227, %ELSE61 ] > %228 = fmul float %temp44.1, 0x3FF7154760000000 > %229 = call float @llvm.exp2.f32(float %228) > %230 = fmul float %229, %54 > %231 = fmul float %229, %55 > %232 = fmul float %229, %56 > %233 = call float @llvm.fma.f32(float %54, float %229, float %209) > %234 = call float @llvm.fma.f32(float %55, float %229, float %209) > %235 = call float @llvm.fma.f32(float %56, float %229, float %209) > %236 = call float @llvm.fma.f32(float %230, float %179, float %210) > %237 = call float @llvm.fma.f32(float %231, float %179, float %210) > %238 = call float @llvm.fma.f32(float %232, float %179, float %210) > %239 = fcmp oeq float %233, 0.000000e+00 > %240 = fcmp oeq float %234, 0.000000e+00 > %241 = fcmp oeq float %235, 0.000000e+00 > %242 = fcmp ogt float %236, 0.000000e+00 > %243 = select i1 %242, float 1.000000e+00, float %236 > %244 = fcmp oge float %243, 0.000000e+00 > %245 = fcmp ogt float %237, 0.000000e+00 > %246 = select i1 %245, float 1.000000e+00, float %237 > %247 = fcmp oge float %246, 0.000000e+00 > %248 = fcmp ogt float %238, 0.000000e+00 > %249 = select i1 %248, float 1.000000e+00, float %238 > %250 = fcmp oge float %249, 0.000000e+00 > %.op64 = fmul float %243, 0x4600000000000000 > %251 = select i1 %244, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %246, 0x4600000000000000 > %252 = select i1 %247, float %.op65, float 0xC600000000000000 > %.op66 = fmul float %249, 0x4600000000000000 > %253 = select i1 %250, float %.op66, float 0xC600000000000000 > %254 = fdiv float 1.000000e+00, %233 > %255 = fdiv float 1.000000e+00, %234 > %256 = fdiv float 1.000000e+00, %235 > %257 = fmul float %236, %254 > %258 = fmul float %237, %255 > %259 = fmul float %238, %256 > %260 = select i1 %239, float %251, float %257 > %261 = select i1 %240, float %252, float %258 > %262 = select i1 %241, float %253, float %259 > %263 = fmul float %233, %temp48.0 > %264 = fmul float %234, %temp48.0 > %265 = fmul float %235, %temp48.0 > %266 = call float @llvm.fabs.f32(float %120) > %267 = call float @llvm.fabs.f32(float %120) > %268 = call float @llvm.fabs.f32(float %120) > %269 = fmul float %233, %266 > %270 = fmul float %234, %267 > %271 = fmul float %235, %268 > %272 = fmul float %269, 0xBFF7154760000000 > %273 = fmul float %270, 0xBFF7154760000000 > %274 = fmul float %271, 0xBFF7154760000000 > %275 = call float @llvm.exp2.f32(float %272) > %276 = call float @llvm.exp2.f32(float %273) > %277 = call float @llvm.exp2.f32(float %274) > %278 = fmul float %263, 0xBFF7154760000000 > %279 = fmul float %264, 0xBFF7154760000000 > %280 = fmul float %265, 0xBFF7154760000000 > %281 = call float @llvm.log2.f32(float %60) > %282 = call float @llvm.log2.f32(float %61) > %283 = call float @llvm.log2.f32(float %62) > %284 = fmul float %281, 0x3FDD1745E0000000 > %285 = fmul float %282, 0x3FDD1745E0000000 > %286 = fmul float %283, 0x3FDD1745E0000000 > %287 = call float @llvm.exp2.f32(float %284) > %288 = call float @llvm.exp2.f32(float %285) > %289 = call float @llvm.exp2.f32(float %286) > %290 = call float @llvm.exp2.f32(float %278) > %291 = call float @llvm.exp2.f32(float %279) > %292 = call float @llvm.exp2.f32(float %280) > %293 = fmul float %290, %287 > %294 = fmul float %291, %288 > %295 = fmul float %292, %289 > %296 = fmul float %260, %293 > %297 = fmul float %261, %294 > %298 = fmul float %262, %295 > %299 = fsub float 1.000000e+00, %275 > %300 = fsub float 1.000000e+00, %276 > %301 = fsub float 1.000000e+00, %277 > %302 = call float @llvm.fma.f32(float %296, float %299, float 0xBF70624DE0000000) > %303 = call float @llvm.fma.f32(float %297, float %300, float 0xBF70624DE0000000) > %304 = call float @llvm.fma.f32(float %298, float %301, float 0xBF70624DE0000000) > %305 = call float @llvm.maxnum.f32(float %302, float 0.000000e+00) > %306 = call float @llvm.maxnum.f32(float %303, float 0.000000e+00) > %307 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00) > %308 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 5.000000e-01) > %309 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 5.000000e-01) > %310 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 5.000000e-01) > %311 = fmul float %305, %308 > %312 = fmul float %306, %309 > %313 = fmul float %307, %310 > %314 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %315 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %316 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %317 = call float @llvm.fma.f32(float %305, float %314, float 0x3FAEB851E0000000) > %318 = call float @llvm.fma.f32(float %306, float %315, float 0x3FAEB851E0000000) > %319 = call float @llvm.fma.f32(float %307, float %316, float 0x3FAEB851E0000000) > %320 = fcmp oeq float %317, 0.000000e+00 > %321 = fcmp oeq float %318, 0.000000e+00 > %322 = fcmp oeq float %319, 0.000000e+00 > %323 = fcmp ogt float %311, 0.000000e+00 > %324 = select i1 %323, float 1.000000e+00, float %311 > %325 = fcmp oge float %324, 0.000000e+00 > %326 = fcmp ogt float %312, 0.000000e+00 > %327 = select i1 %326, float 1.000000e+00, float %312 > %328 = fcmp oge float %327, 0.000000e+00 > %329 = fcmp ogt float %313, 0.000000e+00 > %330 = select i1 %329, float 1.000000e+00, float %313 > %331 = fcmp oge float %330, 0.000000e+00 > %.op67 = fmul float %324, 0x4600000000000000 > %332 = select i1 %325, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %327, 0x4600000000000000 > %333 = select i1 %328, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %330, 0x4600000000000000 > %334 = select i1 %331, float %.op69, float 0xC600000000000000 > %335 = fdiv float 1.000000e+00, %317 > %336 = fdiv float 1.000000e+00, %318 > %337 = fdiv float 1.000000e+00, %319 > %338 = fmul float %311, %335 > %339 = fmul float %312, %336 > %340 = fmul float %313, %337 > %341 = select i1 %320, float %332, float %338 > %342 = select i1 %321, float %333, float %339 > %343 = select i1 %322, float %334, float %340 > %344 = bitcast i32 %11 to float > %345 = insertvalue <{ float, float, float }> undef, float %344, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %131, float %132, float %114, float %120) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %136, float %137, float %138, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %341, float %342, float %343, float %275) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %93, float %94, float %95, float %96) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %102, float %108, float %114, float %120) > ret <{ float, float, float }> %345 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL CONST[1][0..9] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[0].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[0].xxxx > 7: MOV TEMP[0].xy, IN[1].xyxx > 8: MOV OUT[1], TEMP[0] > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 293 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 128) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 132) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 136) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 140) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 144) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 148) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 152) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 156) > %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 > %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %13) > %32 = extractelement <4 x float> %31, i32 0 > %33 = extractelement <4 x float> %31, i32 1 > %34 = extractelement <4 x float> %31, i32 2 > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %14) > %38 = fmul float %17, %32 > %39 = fmul float %18, %33 > %40 = fadd float %38, %39 > %41 = fmul float %19, %34 > %42 = fadd float %40, %41 > %43 = fadd float %42, %20 > %44 = fmul float %21, %32 > %45 = fmul float %22, %33 > %46 = fadd float %44, %45 > %47 = fmul float %23, %34 > %48 = fadd float %46, %47 > %49 = fadd float %48, %24 > %50 = fmul float %25, %32 > %51 = fmul float %26, %33 > %52 = fadd float %50, %51 > %53 = fmul float %27, %34 > %54 = fadd float %52, %53 > %55 = fadd float %54, %28 > %56 = lshr i32 %8, 13 > %57 = and i32 %56, 255 > %58 = mul i32 %57, %10 > %59 = add i32 %58, 16 > %60 = sext i32 %59 to i64 > %61 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %60 > %62 = bitcast i32 addrspace(3)* %61 to float addrspace(3)* > store float %43, float addrspace(3)* %62, align 4 > %63 = add i32 %58, 17 > %64 = sext i32 %63 to i64 > %65 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %64 > %66 = bitcast i32 addrspace(3)* %65 to float addrspace(3)* > store float %49, float addrspace(3)* %66, align 4 > %67 = add i32 %58, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > store float %55, float addrspace(3)* %70, align 4 > %71 = add i32 %58, 20 > %bc = bitcast <4 x float> %37 to <4 x i32> > %72 = extractelement <4 x i32> %bc, i32 0 > %73 = sext i32 %71 to i64 > %74 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %73 > store i32 %72, i32 addrspace(3)* %74, align 4 > %75 = add i32 %58, 21 > %bc12 = bitcast <4 x float> %37 to <4 x i32> > %76 = extractelement <4 x i32> %bc12, i32 1 > %77 = sext i32 %75 to i64 > %78 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %77 > store i32 %76, i32 addrspace(3)* %78, align 4 > %79 = add i32 %58, 22 > %80 = sext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = bitcast i32 addrspace(3)* %81 to float addrspace(3)* > store float %34, float addrspace(3)* %82, align 4 > %83 = add i32 %58, 23 > %84 = sext i32 %83 to i64 > %85 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %84 > store i32 1065353216, i32 addrspace(3)* %85, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..3], ARRAY(1), GENERIC[0] >DCL OUT[4], PATCH >DCL OUT[5], PATCH[1] >DCL OUT[6], PATCH[2] >DCL OUT[7], PATCH[3] >DCL CONST[1][0..49] >DCL CONST[2][0..39] >DCL TEMP[0..10], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 0.0000, 0.4000, 100.0000} >IMM[1] UINT32 {0, 752, 768, 784} >IMM[2] UINT32 {1, 624, 0, 0} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: MOV TEMP[0].xyz, IN[0][0].xyzx > 11: MOV TEMP[0].w, IMM[0].xxxx > 12: MOV TEMP[1], CONST[1][47] > 13: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 14: MOV TEMP[2], CONST[1][48] > 15: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 16: MOV TEMP[3], CONST[1][49] > 17: DP4 TEMP[0].x, TEMP[3], TEMP[0] > 18: MOV TEMP[4].xyz, IN[1][0].xyzx > 19: MOV TEMP[4].w, IMM[0].xxxx > 20: MOV TEMP[5], CONST[1][47] > 21: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 22: MOV TEMP[0].y, TEMP[5].xxxx > 23: MOV TEMP[5], CONST[1][48] > 24: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 25: MOV TEMP[0].z, TEMP[5].xxxx > 26: MOV TEMP[5], CONST[1][49] > 27: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 28: MOV TEMP[0].w, TEMP[5].xxxx > 29: MOV TEMP[4].xyz, IN[2][0].xyzx > 30: MOV TEMP[4].w, IMM[0].xxxx > 31: MOV TEMP[5], CONST[1][47] > 32: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 33: MOV TEMP[3].z, TEMP[5].xxxx > 34: MOV TEMP[6], CONST[1][48] > 35: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 36: MOV TEMP[7].z, CONST[2][39] > 37: MUL TEMP[7].xy, TEMP[0].xwww, TEMP[7].zzzz > 38: MOV TEMP[0].xw, TEMP[7].xxxy > 39: MOV TEMP[7], CONST[1][49] > 40: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 41: MOV TEMP[8].z, CONST[2][39] > 42: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[8].zzzz > 43: MOV TEMP[7].x, CONST[2][39] > 44: FSLT TEMP[7].x, TEMP[1].xxxx, -TEMP[7].xxxx > 45: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 46: INEG TEMP[7].x, TEMP[7].xxxx > 47: MOV TEMP[4].y, TEMP[7].xxxx > 48: MOV TEMP[7].x, CONST[2][39] > 49: FSLT TEMP[7].xy, TEMP[0].yzzz, -TEMP[7].xxxx > 50: AND TEMP[7].xy, TEMP[7].xyyy, IMM[3].xxxx > 51: INEG TEMP[7].xy, TEMP[7].xyyy > 52: MOV TEMP[4].zw, TEMP[7].yyxy > 53: AND TEMP[7].xy, TEMP[4].yzzz, IMM[2].xxxx > 54: MOV TEMP[4].yz, TEMP[7].yxyy > 55: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 56: MOV TEMP[4].y, TEMP[7].xxxx > 57: MOV TEMP[7].x, CONST[2][39] > 58: FSLT TEMP[7].x, TEMP[5].xxxx, -TEMP[7].xxxx > 59: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 60: INEG TEMP[7].x, TEMP[7].xxxx > 61: MOV TEMP[4].z, TEMP[7].xxxx > 62: AND TEMP[7].x, TEMP[4].zzzz, IMM[2].xxxx > 63: MOV TEMP[4].z, TEMP[7].xxxx > 64: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 65: MOV TEMP[4].y, TEMP[7].xxxx > 66: MOV TEMP[7].x, CONST[2][39] > 67: FSLT TEMP[7].x, TEMP[2].xxxx, -TEMP[7].xxxx > 68: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 69: INEG TEMP[7].x, TEMP[7].xxxx > 70: MOV TEMP[4].z, TEMP[7].xxxx > 71: AND TEMP[7].xy, TEMP[4].zwww, IMM[2].xxxx > 72: MOV TEMP[4].zw, TEMP[7].yyxy > 73: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 74: MOV TEMP[4].z, TEMP[7].xxxx > 75: MOV TEMP[7].x, CONST[2][39] > 76: FSLT TEMP[7].x, TEMP[6].xxxx, -TEMP[7].xxxx > 77: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 78: INEG TEMP[7].x, TEMP[7].xxxx > 79: MOV TEMP[4].w, TEMP[7].xxxx > 80: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 81: MOV TEMP[4].w, TEMP[7].xxxx > 82: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 83: MOV TEMP[4].z, TEMP[7].xxxx > 84: FSLT TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy > 85: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 86: INEG TEMP[7].x, TEMP[7].xxxx > 87: MOV TEMP[4].w, TEMP[7].xxxx > 88: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 89: MOV TEMP[4].w, TEMP[7].xxxx > 90: FSLT TEMP[7].x, TEMP[0].wwww, IMM[0].yyyy > 91: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 92: INEG TEMP[7].x, TEMP[7].xxxx > 93: MOV TEMP[7].x, TEMP[7].xxxx > 94: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx > 95: MOV TEMP[7].x, TEMP[8].xxxx > 96: UADD TEMP[8].x, TEMP[4].wwww, TEMP[7].xxxx > 97: MOV TEMP[4].w, TEMP[8].xxxx > 98: FSLT TEMP[8].x, TEMP[4].xxxx, IMM[0].yyyy > 99: AND TEMP[8].x, TEMP[8].xxxx, IMM[3].xxxx >100: INEG TEMP[8].x, TEMP[8].xxxx >101: MOV TEMP[7].x, TEMP[8].xxxx >102: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx >103: MOV TEMP[7].x, TEMP[8].xxxx >104: UADD TEMP[7].x, TEMP[4].wwww, TEMP[7].xxxx >105: MOV TEMP[4].w, TEMP[7].xxxx >106: MOV TEMP[7].x, CONST[2][39] >107: FSLT TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx >108: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >109: INEG TEMP[1].x, TEMP[1].xxxx >110: MOV TEMP[3].x, TEMP[1].xxxx >111: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >112: MOV TEMP[3].x, TEMP[1].xxxx >113: MOV TEMP[1].x, CONST[2][39] >114: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].yzzz >115: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >116: INEG TEMP[1].xy, TEMP[1].xyyy >117: MOV TEMP[0].yz, TEMP[1].yxyy >118: AND TEMP[1].xy, TEMP[0].yzzz, IMM[2].xxxx >119: MOV TEMP[0].yz, TEMP[1].yxyy >120: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >121: MOV TEMP[0].y, TEMP[1].xxxx >122: MOV TEMP[1].x, CONST[2][39] >123: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx >124: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >125: INEG TEMP[1].x, TEMP[1].xxxx >126: MOV TEMP[3].x, TEMP[1].xxxx >127: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >128: MOV TEMP[3].x, TEMP[1].xxxx >129: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >130: MOV TEMP[0].y, TEMP[1].xxxx >131: MOV TEMP[1].x, CONST[2][39] >132: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx >133: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >134: INEG TEMP[1].x, TEMP[1].xxxx >135: MOV TEMP[3].x, TEMP[1].xxxx >136: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >137: MOV TEMP[3].x, TEMP[1].xxxx >138: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >139: MOV TEMP[0].z, TEMP[1].xxxx >140: MOV TEMP[1].x, CONST[2][39] >141: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx >142: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >143: INEG TEMP[1].x, TEMP[1].xxxx >144: MOV TEMP[3].x, TEMP[1].xxxx >145: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >146: MOV TEMP[3].x, TEMP[1].xxxx >147: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >148: MOV TEMP[0].z, TEMP[1].xxxx >149: MOV TEMP[1].x, CONST[2][39] >150: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].xwww >151: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >152: INEG TEMP[1].xy, TEMP[1].xyyy >153: MOV TEMP[3].xy, TEMP[1].xyxx >154: AND TEMP[1].xy, TEMP[3].xyyy, IMM[2].xxxx >155: MOV TEMP[3].xy, TEMP[1].xyxx >156: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >157: MOV TEMP[3].x, TEMP[1].xxxx >158: MOV TEMP[1].x, CONST[2][39] >159: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx >160: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >161: INEG TEMP[1].x, TEMP[1].xxxx >162: MOV TEMP[3].y, TEMP[1].xxxx >163: AND TEMP[1].x, TEMP[3].yyyy, IMM[2].xxxx >164: MOV TEMP[3].y, TEMP[1].xxxx >165: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >166: MOV TEMP[3].x, TEMP[1].xxxx >167: USEQ TEMP[1].x, TEMP[4].yyyy, IMM[3].yyyy >168: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >169: INEG TEMP[1].x, TEMP[1].xxxx >170: MOV TEMP[3].y, TEMP[1].xxxx >171: USEQ TEMP[1].xy, TEMP[0].yzzz, IMM[3].yyyy >172: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >173: INEG TEMP[1].xy, TEMP[1].xyyy >174: MOV TEMP[0].yz, TEMP[1].yxyy >175: OR TEMP[1].x, TEMP[0].yyyy, TEMP[3].yyyy >176: MOV TEMP[0].y, TEMP[1].xxxx >177: USEQ TEMP[1].x, TEMP[4].zzzz, IMM[3].yyyy >178: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >179: INEG TEMP[1].x, TEMP[1].xxxx >180: MOV TEMP[3].y, TEMP[1].xxxx >181: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].yyyy >182: MOV TEMP[0].z, TEMP[1].xxxx >183: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >184: MOV TEMP[0].y, TEMP[1].xxxx >185: USEQ TEMP[1].x, TEMP[4].wwww, IMM[3].yyyy >186: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >187: INEG TEMP[1].x, TEMP[1].xxxx >188: MOV TEMP[0].z, TEMP[1].xxxx >189: USEQ TEMP[1].x, TEMP[3].xxxx, IMM[3].yyyy >190: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >191: INEG TEMP[1].x, TEMP[1].xxxx >192: MOV TEMP[3].x, TEMP[1].xxxx >193: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >194: MOV TEMP[0].z, TEMP[1].xxxx >195: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >196: MOV TEMP[0].y, TEMP[1].xxxx >197: MOV TEMP[1].x, TEMP[0].yyyy >198: USNE TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx >199: UIF TEMP[1].xxxx :0 >200: MOV TEMP[1].x, IMM[0].yyyy >201: MOV TEMP[2].x, IMM[0].yyyy >202: MOV TEMP[5].x, IMM[0].yyyy >203: MOV TEMP[6].x, IMM[0].yyyy >204: ELSE :0 >205: ADD TEMP[3].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >206: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >207: MOV TEMP[0].y, TEMP[7].xxxx >208: ADD TEMP[3].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >209: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >210: MOV TEMP[0].z, TEMP[7].xxxx >211: SQRT TEMP[7].x, TEMP[0].yyyy >212: SQRT TEMP[7].y, TEMP[0].zzzz >213: MOV TEMP[7].xy, TEMP[7].xyxx >214: ADD TEMP[3].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >215: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz >216: SQRT TEMP[8].x, TEMP[3].xxxx >217: MIN TEMP[9].x, TEMP[0].wwww, TEMP[0].xxxx >218: MIN TEMP[10].x, TEMP[0].wwww, TEMP[4].xxxx >219: MOV TEMP[0].w, TEMP[10].xxxx >220: MIN TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >221: MUL TEMP[4].x, TEMP[9].xxxx, IMM[0].zzzz >222: MOV TEMP[3].y, TEMP[4].xxxx >223: MAX TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww >224: MUL TEMP[4].xy, TEMP[0].xwww, IMM[0].zzzz >225: MOV TEMP[0].xw, TEMP[4].xxxy >226: MAX TEMP[4].xy, TEMP[0].xwww, IMM[0].wwww >227: FSNE TEMP[9].x, TEMP[3].xxxx, IMM[0].yyyy >228: UIF TEMP[9].xxxx :0 >229: RCP TEMP[3].x, TEMP[3].xxxx >230: MUL TEMP[3].x, TEMP[7].xxxx, TEMP[3].xxxx >231: ELSE :0 >232: SSG TEMP[9].x, TEMP[7].xxxx >233: MUL TEMP[3].x, IMM[4].xxxx, TEMP[9].xxxx >234: ENDIF >235: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >236: MOV TEMP[0].y, TEMP[3].xxxx >237: FSNE TEMP[3].x, TEMP[4].yyyy, IMM[0].yyyy >238: UIF TEMP[3].xxxx :0 >239: RCP TEMP[3].x, TEMP[4].yyyy >240: MUL TEMP[3].x, TEMP[7].yyyy, TEMP[3].xxxx >241: ELSE :0 >242: SSG TEMP[7].x, TEMP[7].yyyy >243: MUL TEMP[3].x, IMM[4].xxxx, TEMP[7].xxxx >244: ENDIF >245: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >246: MOV TEMP[0].z, TEMP[3].xxxx >247: FSNE TEMP[3].x, TEMP[4].xxxx, IMM[0].yyyy >248: UIF TEMP[3].xxxx :0 >249: RCP TEMP[3].x, TEMP[4].xxxx >250: MUL TEMP[3].x, TEMP[8].xxxx, TEMP[3].xxxx >251: ELSE :0 >252: SSG TEMP[4].x, TEMP[8].xxxx >253: MUL TEMP[3].x, IMM[4].xxxx, TEMP[4].xxxx >254: ENDIF >255: MAX TEMP[0].x, TEMP[3].xxxx, IMM[0].xxxx >256: MIN TEMP[0].xyz, TEMP[0].xyzz, IMM[4].yyyy >257: MAX TEMP[3].x, TEMP[0].yyyy, TEMP[0].xxxx >258: MOV TEMP[0].w, TEMP[3].xxxx >259: MAX TEMP[6].x, TEMP[0].wwww, TEMP[0].zzzz >260: MOV TEMP[1].x, TEMP[0].zzzz >261: MOV TEMP[2].x, TEMP[0].xxxx >262: MOV TEMP[5].x, TEMP[0].yyyy >263: ENDIF >264: MOV OUT[4], TEMP[1] >265: MOV OUT[5], TEMP[2] >266: MOV OUT[6], TEMP[5] >267: MOV OUT[7], TEMP[6] >268: MOV OUT[0].x, TEMP[1].xxxx >269: MOV OUT[0].y, TEMP[2].xxxx >270: MOV OUT[0].z, TEMP[5].xxxx >271: MOV OUT[1].x, TEMP[6].xxxx >272: END >radeonsi: Compiling shader 294 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 752) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 756) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 760) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 764) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = lshr i32 %10, 8 > %30 = and i32 %29, 31 > %31 = lshr i32 %7, 13 > %32 = and i32 %31, 255 > %33 = and i32 %7, 8191 > %34 = and i32 %10, 255 > %35 = mul nuw nsw i32 %33, %34 > %36 = mul nuw nsw i32 %30, %32 > %37 = add nuw nsw i32 %35, %36 > %38 = add nuw nsw i32 %37, 16 > %39 = zext i32 %38 to i64 > %40 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %39 > %41 = load i32, i32 addrspace(3)* %40, align 4 > %42 = lshr i32 %7, 13 > %43 = and i32 %42, 255 > %44 = and i32 %7, 8191 > %45 = and i32 %10, 255 > %46 = mul nuw nsw i32 %44, %45 > %47 = mul nuw nsw i32 %30, %43 > %48 = add nuw nsw i32 %46, %47 > %49 = add nuw nsw i32 %48, 17 > %50 = zext i32 %49 to i64 > %51 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %50 > %52 = load i32, i32 addrspace(3)* %51, align 4 > %53 = lshr i32 %7, 13 > %54 = and i32 %53, 255 > %55 = and i32 %7, 8191 > %56 = and i32 %10, 255 > %57 = mul nuw nsw i32 %55, %56 > %58 = mul nuw nsw i32 %30, %54 > %59 = add nuw nsw i32 %57, %58 > %60 = add nuw nsw i32 %59, 18 > %61 = zext i32 %60 to i64 > %62 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %61 > %63 = load i32, i32 addrspace(3)* %62, align 4 > %64 = lshr i32 %7, 13 > %65 = and i32 %64, 255 > %66 = and i32 %7, 8191 > %67 = and i32 %10, 255 > %68 = mul nuw nsw i32 %66, %67 > %69 = mul nuw nsw i32 %30, %65 > %70 = add nuw nsw i32 %68, %69 > %71 = add nuw nsw i32 %70, 19 > %72 = zext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = load i32, i32 addrspace(3)* %73, align 4 > %75 = lshr i32 %6, 13 > %76 = and i32 %75, 255 > %77 = shl i32 %5, 2 > %78 = and i32 %77, 262140 > %79 = and i32 %6, 8191 > %80 = and i32 %10, 255 > %81 = mul nuw nsw i32 %79, %80 > %82 = add nuw nsw i32 %78, %81 > %83 = mul nuw nsw i32 %30, %76 > %84 = add nuw nsw i32 %82, %83 > %85 = add nuw nsw i32 %84, 16 > %86 = zext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > store i32 %41, i32 addrspace(3)* %87, align 4 > %88 = add nuw nsw i32 %84, 17 > %89 = zext i32 %88 to i64 > %90 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %89 > store i32 %52, i32 addrspace(3)* %90, align 4 > %91 = add nuw nsw i32 %84, 18 > %92 = zext i32 %91 to i64 > %93 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %92 > store i32 %63, i32 addrspace(3)* %93, align 4 > %94 = add nuw nsw i32 %84, 19 > %95 = zext i32 %94 to i64 > %96 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %95 > store i32 %74, i32 addrspace(3)* %96, align 4 > %97 = lshr i32 %7, 13 > %98 = and i32 %97, 255 > %99 = and i32 %7, 8191 > %100 = and i32 %10, 255 > %101 = mul nuw nsw i32 %99, %100 > %102 = mul nuw nsw i32 %30, %98 > %103 = add nuw nsw i32 %101, %102 > %104 = add nuw nsw i32 %103, 20 > %105 = zext i32 %104 to i64 > %106 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %105 > %107 = load i32, i32 addrspace(3)* %106, align 4 > %108 = lshr i32 %7, 13 > %109 = and i32 %108, 255 > %110 = and i32 %7, 8191 > %111 = and i32 %10, 255 > %112 = mul nuw nsw i32 %110, %111 > %113 = mul nuw nsw i32 %30, %109 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 21 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > %118 = load i32, i32 addrspace(3)* %117, align 4 > %119 = lshr i32 %7, 13 > %120 = and i32 %119, 255 > %121 = and i32 %7, 8191 > %122 = and i32 %10, 255 > %123 = mul nuw nsw i32 %121, %122 > %124 = mul nuw nsw i32 %30, %120 > %125 = add nuw nsw i32 %123, %124 > %126 = add nuw nsw i32 %125, 22 > %127 = zext i32 %126 to i64 > %128 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %127 > %129 = load i32, i32 addrspace(3)* %128, align 4 > %130 = lshr i32 %7, 13 > %131 = and i32 %130, 255 > %132 = and i32 %7, 8191 > %133 = and i32 %10, 255 > %134 = mul nuw nsw i32 %132, %133 > %135 = mul nuw nsw i32 %30, %131 > %136 = add nuw nsw i32 %134, %135 > %137 = add nuw nsw i32 %136, 23 > %138 = zext i32 %137 to i64 > %139 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %138 > %140 = load i32, i32 addrspace(3)* %139, align 4 > %141 = lshr i32 %6, 13 > %142 = and i32 %141, 255 > %143 = shl i32 %5, 2 > %144 = and i32 %143, 262140 > %145 = and i32 %6, 8191 > %146 = and i32 %10, 255 > %147 = mul nuw nsw i32 %145, %146 > %148 = add nuw nsw i32 %144, %147 > %149 = mul nuw nsw i32 %30, %142 > %150 = add nuw nsw i32 %148, %149 > %151 = add nuw nsw i32 %150, 20 > %152 = zext i32 %151 to i64 > %153 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %152 > store i32 %107, i32 addrspace(3)* %153, align 4 > %154 = add nuw nsw i32 %150, 21 > %155 = zext i32 %154 to i64 > %156 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %155 > store i32 %118, i32 addrspace(3)* %156, align 4 > %157 = add nuw nsw i32 %150, 22 > %158 = zext i32 %157 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %129, i32 addrspace(3)* %159, align 4 > %160 = add nuw nsw i32 %150, 23 > %161 = zext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > store i32 %140, i32 addrspace(3)* %162, align 4 > %163 = and i32 %7, 8191 > %164 = and i32 %10, 255 > %165 = mul nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 16 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > %169 = bitcast i32 addrspace(3)* %168 to float addrspace(3)* > %170 = load float, float addrspace(3)* %169, align 4 > %171 = and i32 %7, 8191 > %172 = and i32 %10, 255 > %173 = mul nuw nsw i32 %171, %172 > %174 = add nuw nsw i32 %173, 17 > %175 = zext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = and i32 %7, 8191 > %180 = and i32 %10, 255 > %181 = mul nuw nsw i32 %179, %180 > %182 = add nuw nsw i32 %181, 18 > %183 = zext i32 %182 to i64 > %184 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %183 > %185 = bitcast i32 addrspace(3)* %184 to float addrspace(3)* > %186 = load float, float addrspace(3)* %185, align 4 > %187 = fmul float %13, %170 > %188 = fmul float %14, %178 > %189 = fadd float %187, %188 > %190 = fmul float %15, %186 > %191 = fadd float %189, %190 > %192 = fadd float %191, %16 > %193 = fmul float %17, %170 > %194 = fmul float %18, %178 > %195 = fadd float %193, %194 > %196 = fmul float %19, %186 > %197 = fadd float %195, %196 > %198 = fadd float %197, %20 > %199 = fmul float %21, %170 > %200 = fmul float %22, %178 > %201 = fadd float %199, %200 > %202 = fmul float %23, %186 > %203 = fadd float %201, %202 > %204 = fadd float %203, %24 > %205 = lshr i32 %7, 13 > %206 = and i32 %205, 255 > %207 = and i32 %7, 8191 > %208 = and i32 %10, 255 > %209 = mul nuw nsw i32 %207, %208 > %210 = add nuw nsw i32 %209, %206 > %211 = add nuw nsw i32 %210, 16 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = bitcast i32 addrspace(3)* %213 to float addrspace(3)* > %215 = load float, float addrspace(3)* %214, align 4 > %216 = lshr i32 %7, 13 > %217 = and i32 %216, 255 > %218 = and i32 %7, 8191 > %219 = and i32 %10, 255 > %220 = mul nuw nsw i32 %218, %219 > %221 = add nuw nsw i32 %220, %217 > %222 = add nuw nsw i32 %221, 17 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = bitcast i32 addrspace(3)* %224 to float addrspace(3)* > %226 = load float, float addrspace(3)* %225, align 4 > %227 = lshr i32 %7, 13 > %228 = and i32 %227, 255 > %229 = and i32 %7, 8191 > %230 = and i32 %10, 255 > %231 = mul nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, %228 > %233 = add nuw nsw i32 %232, 18 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %13, %215 > %239 = fmul float %14, %226 > %240 = fadd float %238, %239 > %241 = fmul float %15, %237 > %242 = fadd float %240, %241 > %243 = fadd float %242, %16 > %244 = fmul float %17, %215 > %245 = fmul float %18, %226 > %246 = fadd float %244, %245 > %247 = fmul float %19, %237 > %248 = fadd float %246, %247 > %249 = fadd float %248, %20 > %250 = fmul float %21, %215 > %251 = fmul float %22, %226 > %252 = fadd float %250, %251 > %253 = fmul float %23, %237 > %254 = fadd float %252, %253 > %255 = fadd float %254, %24 > %256 = and i32 %7, 8191 > %257 = and i32 %10, 255 > %258 = mul nuw nsw i32 %256, %257 > %259 = lshr i32 %7, 12 > %260 = and i32 %259, 510 > %261 = add nuw nsw i32 %258, %260 > %262 = add nuw nsw i32 %261, 16 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = bitcast i32 addrspace(3)* %264 to float addrspace(3)* > %266 = load float, float addrspace(3)* %265, align 4 > %267 = and i32 %7, 8191 > %268 = and i32 %10, 255 > %269 = mul nuw nsw i32 %267, %268 > %270 = lshr i32 %7, 12 > %271 = and i32 %270, 510 > %272 = add nuw nsw i32 %269, %271 > %273 = add nuw nsw i32 %272, 17 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = bitcast i32 addrspace(3)* %275 to float addrspace(3)* > %277 = load float, float addrspace(3)* %276, align 4 > %278 = and i32 %7, 8191 > %279 = and i32 %10, 255 > %280 = mul nuw nsw i32 %278, %279 > %281 = lshr i32 %7, 12 > %282 = and i32 %281, 510 > %283 = add nuw nsw i32 %280, %282 > %284 = add nuw nsw i32 %283, 18 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = bitcast i32 addrspace(3)* %286 to float addrspace(3)* > %288 = load float, float addrspace(3)* %287, align 4 > %289 = fmul float %13, %266 > %290 = fmul float %14, %277 > %291 = fadd float %289, %290 > %292 = fmul float %15, %288 > %293 = fadd float %291, %292 > %294 = fadd float %293, %16 > %295 = fmul float %17, %266 > %296 = fmul float %18, %277 > %297 = fadd float %295, %296 > %298 = fmul float %19, %288 > %299 = fadd float %297, %298 > %300 = fadd float %299, %20 > %301 = fmul float %204, %28 > %302 = fmul float %255, %28 > %303 = fmul float %21, %266 > %304 = fmul float %22, %277 > %305 = fadd float %303, %304 > %306 = fmul float %23, %288 > %307 = fadd float %305, %306 > %308 = fadd float %307, %24 > %309 = fmul float %308, %28 > %310 = fsub float -0.000000e+00, %27 > %311 = fcmp olt float %192, %310 > %312 = zext i1 %311 to i32 > %313 = fsub float -0.000000e+00, %27 > %314 = fcmp olt float %243, %313 > %315 = fsub float -0.000000e+00, %27 > %316 = fcmp olt float %249, %315 > %317 = zext i1 %314 to i32 > %318 = zext i1 %316 to i32 > %319 = add nuw nsw i32 %317, %312 > %320 = fsub float -0.000000e+00, %27 > %321 = fcmp olt float %294, %320 > %322 = zext i1 %321 to i32 > %323 = add nuw nsw i32 %322, %319 > %324 = fsub float -0.000000e+00, %27 > %325 = fcmp olt float %198, %324 > %326 = zext i1 %325 to i32 > %327 = add nuw nsw i32 %318, %326 > %328 = fsub float -0.000000e+00, %27 > %329 = fcmp olt float %300, %328 > %330 = zext i1 %329 to i32 > %331 = add nuw nsw i32 %330, %327 > %332 = fcmp olt float %301, 0.000000e+00 > %333 = zext i1 %332 to i32 > %334 = fcmp olt float %302, 0.000000e+00 > %335 = zext i1 %334 to i32 > %336 = add nuw nsw i32 %333, %335 > %337 = fcmp olt float %309, 0.000000e+00 > %338 = zext i1 %337 to i32 > %339 = add nuw nsw i32 %336, %338 > %340 = fcmp olt float %27, %192 > %341 = zext i1 %340 to i32 > %342 = fcmp olt float %27, %243 > %343 = fcmp olt float %27, %249 > %344 = zext i1 %342 to i32 > %345 = zext i1 %343 to i32 > %346 = add nuw nsw i32 %344, %341 > %347 = fcmp olt float %27, %294 > %348 = zext i1 %347 to i32 > %349 = add nuw nsw i32 %346, %348 > %350 = fcmp olt float %27, %198 > %351 = zext i1 %350 to i32 > %352 = add nuw nsw i32 %345, %351 > %353 = fcmp olt float %27, %300 > %354 = zext i1 %353 to i32 > %355 = add nuw nsw i32 %352, %354 > %356 = fcmp olt float %27, %301 > %357 = fcmp olt float %27, %302 > %358 = zext i1 %356 to i32 > %359 = zext i1 %357 to i32 > %360 = add nuw nsw i32 %359, %358 > %361 = fcmp olt float %27, %309 > %362 = zext i1 %361 to i32 > %363 = add nuw nsw i32 %362, %360 > %364 = icmp eq i32 %323, 3 > %365 = sext i1 %364 to i32 > %366 = icmp eq i32 %349, 3 > %367 = icmp eq i32 %355, 3 > %368 = sext i1 %367 to i32 > %369 = icmp eq i32 %331, 3 > %370 = sext i1 %369 to i32 > %371 = select i1 %367, i32 -1, i32 %370 > %372 = select i1 %366, i32 -1, i32 %365 > %373 = or i32 %371, %372 > %374 = icmp eq i32 %339, 3 > %375 = icmp eq i32 %363, 3 > %376 = sext i1 %375 to i32 > %377 = select i1 %374, i32 -1, i32 %376 > %378 = or i32 %377, %373 > %379 = icmp eq i32 %378, 0 > br i1 %379, label %ELSE, label %ENDIF > >ELSE: ; preds = %main_body > %380 = lshr i32 %7, 13 > %381 = and i32 %380, 255 > %382 = and i32 %7, 8191 > %383 = and i32 %10, 255 > %384 = mul nuw nsw i32 %382, %383 > %385 = add nuw nsw i32 %384, %381 > %386 = add nuw nsw i32 %385, 16 > %387 = zext i32 %386 to i64 > %388 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %387 > %389 = bitcast i32 addrspace(3)* %388 to float addrspace(3)* > %390 = load float, float addrspace(3)* %389, align 4 > %391 = and i32 %7, 8191 > %392 = and i32 %10, 255 > %393 = mul nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 16 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = bitcast i32 addrspace(3)* %396 to float addrspace(3)* > %398 = load float, float addrspace(3)* %397, align 4 > %399 = fsub float %398, %390 > %400 = lshr i32 %7, 13 > %401 = and i32 %400, 255 > %402 = and i32 %7, 8191 > %403 = and i32 %10, 255 > %404 = mul nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, %401 > %406 = add nuw nsw i32 %405, 17 > %407 = zext i32 %406 to i64 > %408 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %407 > %409 = bitcast i32 addrspace(3)* %408 to float addrspace(3)* > %410 = load float, float addrspace(3)* %409, align 4 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = add nuw nsw i32 %413, 17 > %415 = zext i32 %414 to i64 > %416 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %415 > %417 = bitcast i32 addrspace(3)* %416 to float addrspace(3)* > %418 = load float, float addrspace(3)* %417, align 4 > %419 = fsub float %418, %410 > %420 = lshr i32 %7, 13 > %421 = and i32 %420, 255 > %422 = and i32 %7, 8191 > %423 = and i32 %10, 255 > %424 = mul nuw nsw i32 %422, %423 > %425 = add nuw nsw i32 %424, %421 > %426 = add nuw nsw i32 %425, 18 > %427 = zext i32 %426 to i64 > %428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %427 > %429 = bitcast i32 addrspace(3)* %428 to float addrspace(3)* > %430 = load float, float addrspace(3)* %429, align 4 > %431 = and i32 %7, 8191 > %432 = and i32 %10, 255 > %433 = mul nuw nsw i32 %431, %432 > %434 = add nuw nsw i32 %433, 18 > %435 = zext i32 %434 to i64 > %436 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %435 > %437 = bitcast i32 addrspace(3)* %436 to float addrspace(3)* > %438 = load float, float addrspace(3)* %437, align 4 > %439 = fsub float %438, %430 > %440 = fmul float %399, %399 > %441 = fmul float %419, %419 > %442 = fadd float %441, %440 > %443 = fmul float %439, %439 > %444 = fadd float %442, %443 > %445 = and i32 %7, 8191 > %446 = and i32 %10, 255 > %447 = mul nuw nsw i32 %445, %446 > %448 = lshr i32 %7, 12 > %449 = and i32 %448, 510 > %450 = add nuw nsw i32 %447, %449 > %451 = add nuw nsw i32 %450, 16 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > %454 = bitcast i32 addrspace(3)* %453 to float addrspace(3)* > %455 = load float, float addrspace(3)* %454, align 4 > %456 = lshr i32 %7, 13 > %457 = and i32 %456, 255 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, %457 > %462 = add nuw nsw i32 %461, 16 > %463 = zext i32 %462 to i64 > %464 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %463 > %465 = bitcast i32 addrspace(3)* %464 to float addrspace(3)* > %466 = load float, float addrspace(3)* %465, align 4 > %467 = fsub float %466, %455 > %468 = and i32 %7, 8191 > %469 = and i32 %10, 255 > %470 = mul nuw nsw i32 %468, %469 > %471 = lshr i32 %7, 12 > %472 = and i32 %471, 510 > %473 = add nuw nsw i32 %470, %472 > %474 = add nuw nsw i32 %473, 17 > %475 = zext i32 %474 to i64 > %476 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %475 > %477 = bitcast i32 addrspace(3)* %476 to float addrspace(3)* > %478 = load float, float addrspace(3)* %477, align 4 > %479 = lshr i32 %7, 13 > %480 = and i32 %479, 255 > %481 = and i32 %7, 8191 > %482 = and i32 %10, 255 > %483 = mul nuw nsw i32 %481, %482 > %484 = add nuw nsw i32 %483, %480 > %485 = add nuw nsw i32 %484, 17 > %486 = zext i32 %485 to i64 > %487 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %486 > %488 = bitcast i32 addrspace(3)* %487 to float addrspace(3)* > %489 = load float, float addrspace(3)* %488, align 4 > %490 = fsub float %489, %478 > %491 = and i32 %7, 8191 > %492 = and i32 %10, 255 > %493 = mul nuw nsw i32 %491, %492 > %494 = lshr i32 %7, 12 > %495 = and i32 %494, 510 > %496 = add nuw nsw i32 %493, %495 > %497 = add nuw nsw i32 %496, 18 > %498 = zext i32 %497 to i64 > %499 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %498 > %500 = bitcast i32 addrspace(3)* %499 to float addrspace(3)* > %501 = load float, float addrspace(3)* %500, align 4 > %502 = lshr i32 %7, 13 > %503 = and i32 %502, 255 > %504 = and i32 %7, 8191 > %505 = and i32 %10, 255 > %506 = mul nuw nsw i32 %504, %505 > %507 = add nuw nsw i32 %506, %503 > %508 = add nuw nsw i32 %507, 18 > %509 = zext i32 %508 to i64 > %510 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %509 > %511 = bitcast i32 addrspace(3)* %510 to float addrspace(3)* > %512 = load float, float addrspace(3)* %511, align 4 > %513 = fsub float %512, %501 > %514 = fmul float %467, %467 > %515 = fmul float %490, %490 > %516 = fadd float %515, %514 > %517 = fmul float %513, %513 > %518 = fadd float %516, %517 > %519 = call float @llvm.sqrt.f32(float %444) > %520 = call float @llvm.sqrt.f32(float %518) > %521 = and i32 %7, 8191 > %522 = and i32 %10, 255 > %523 = mul nuw nsw i32 %521, %522 > %524 = add nuw nsw i32 %523, 16 > %525 = zext i32 %524 to i64 > %526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %525 > %527 = bitcast i32 addrspace(3)* %526 to float addrspace(3)* > %528 = load float, float addrspace(3)* %527, align 4 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = lshr i32 %7, 12 > %533 = and i32 %532, 510 > %534 = add nuw nsw i32 %531, %533 > %535 = add nuw nsw i32 %534, 16 > %536 = zext i32 %535 to i64 > %537 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %536 > %538 = bitcast i32 addrspace(3)* %537 to float addrspace(3)* > %539 = load float, float addrspace(3)* %538, align 4 > %540 = fsub float %539, %528 > %541 = and i32 %7, 8191 > %542 = and i32 %10, 255 > %543 = mul nuw nsw i32 %541, %542 > %544 = add nuw nsw i32 %543, 17 > %545 = zext i32 %544 to i64 > %546 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %545 > %547 = bitcast i32 addrspace(3)* %546 to float addrspace(3)* > %548 = load float, float addrspace(3)* %547, align 4 > %549 = and i32 %7, 8191 > %550 = and i32 %10, 255 > %551 = mul nuw nsw i32 %549, %550 > %552 = lshr i32 %7, 12 > %553 = and i32 %552, 510 > %554 = add nuw nsw i32 %551, %553 > %555 = add nuw nsw i32 %554, 17 > %556 = zext i32 %555 to i64 > %557 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %556 > %558 = bitcast i32 addrspace(3)* %557 to float addrspace(3)* > %559 = load float, float addrspace(3)* %558, align 4 > %560 = fsub float %559, %548 > %561 = and i32 %7, 8191 > %562 = and i32 %10, 255 > %563 = mul nuw nsw i32 %561, %562 > %564 = add nuw nsw i32 %563, 18 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fsub float %579, %568 > %581 = fmul float %540, %540 > %582 = fmul float %560, %560 > %583 = fadd float %582, %581 > %584 = fmul float %580, %580 > %585 = fadd float %583, %584 > %586 = call float @llvm.sqrt.f32(float %585) > %587 = call float @llvm.minnum.f32(float %302, float %301) > %588 = call float @llvm.minnum.f32(float %302, float %309) > %589 = call float @llvm.minnum.f32(float %301, float %309) > %590 = fmul float %587, 0x3FD99999A0000000 > %591 = call float @llvm.maxnum.f32(float %590, float 1.000000e+02) > %592 = fmul float %589, 0x3FD99999A0000000 > %593 = fmul float %588, 0x3FD99999A0000000 > %594 = call float @llvm.maxnum.f32(float %592, float 1.000000e+02) > %595 = call float @llvm.maxnum.f32(float %593, float 1.000000e+02) > %596 = fcmp une float %591, 0.000000e+00 > br i1 %596, label %IF45, label %ELSE46 > >ENDIF: ; preds = %main_body, %ENDIF50 > %temp24.0 = phi i32 [ %phitmp57, %ENDIF50 ], [ 0, %main_body ] > %temp20.0 = phi i32 [ %phitmp56, %ENDIF50 ], [ 0, %main_body ] > %temp8.0 = phi i32 [ %phitmp55, %ENDIF50 ], [ 0, %main_body ] > %temp4.0 = phi i32 [ %phitmp, %ENDIF50 ], [ 0, %main_body ] > %597 = lshr i32 %5, 16 > %598 = shl nuw nsw i32 %597, 2 > %599 = and i32 %6, 8191 > %600 = and i32 %10, 255 > %601 = mul nuw nsw i32 %599, %600 > %602 = add nuw nsw i32 %598, %601 > %603 = add nuw nsw i32 %602, 8 > %604 = zext i32 %603 to i64 > %605 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %604 > store i32 %temp4.0, i32 addrspace(3)* %605, align 4 > %606 = add nuw nsw i32 %602, 9 > %607 = zext i32 %606 to i64 > %608 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %607 > store i32 %368, i32 addrspace(3)* %608, align 4 > %609 = add nuw nsw i32 %602, 10 > %610 = zext i32 %609 to i64 > %611 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %610 > %612 = bitcast i32 addrspace(3)* %611 to float addrspace(3)* > store float %15, float addrspace(3)* %612, align 4 > %613 = add nuw nsw i32 %602, 11 > %614 = zext i32 %613 to i64 > %615 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %614 > %616 = bitcast i32 addrspace(3)* %615 to float addrspace(3)* > store float %16, float addrspace(3)* %616, align 4 > %617 = lshr i32 %5, 16 > %618 = shl nuw nsw i32 %617, 2 > %619 = and i32 %6, 8191 > %620 = and i32 %10, 255 > %621 = mul nuw nsw i32 %619, %620 > %622 = add nuw nsw i32 %618, %621 > %623 = add nuw nsw i32 %622, 12 > %624 = zext i32 %623 to i64 > %625 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %624 > store i32 %temp8.0, i32 addrspace(3)* %625, align 4 > %626 = add nuw nsw i32 %622, 13 > %627 = zext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > store float %18, float addrspace(3)* %629, align 4 > %630 = add nuw nsw i32 %622, 14 > %631 = zext i32 %630 to i64 > %632 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %631 > %633 = bitcast i32 addrspace(3)* %632 to float addrspace(3)* > store float %19, float addrspace(3)* %633, align 4 > %634 = add nuw nsw i32 %622, 15 > %635 = zext i32 %634 to i64 > %636 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %635 > %637 = bitcast i32 addrspace(3)* %636 to float addrspace(3)* > store float %20, float addrspace(3)* %637, align 4 > %638 = lshr i32 %5, 16 > %639 = shl nuw nsw i32 %638, 2 > %640 = and i32 %6, 8191 > %641 = and i32 %10, 255 > %642 = mul nuw nsw i32 %640, %641 > %643 = add nuw nsw i32 %639, %642 > %644 = add nuw nsw i32 %643, 16 > %645 = zext i32 %644 to i64 > %646 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %645 > store i32 %temp20.0, i32 addrspace(3)* %646, align 4 > %647 = add nuw nsw i32 %643, 17 > %648 = zext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %14, float addrspace(3)* %650, align 4 > %651 = add nuw nsw i32 %643, 18 > %652 = zext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %15, float addrspace(3)* %654, align 4 > %655 = add nuw nsw i32 %643, 19 > %656 = zext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %16, float addrspace(3)* %658, align 4 > %659 = lshr i32 %5, 16 > %660 = shl nuw nsw i32 %659, 2 > %661 = and i32 %6, 8191 > %662 = and i32 %10, 255 > %663 = mul nuw nsw i32 %661, %662 > %664 = add nuw nsw i32 %660, %663 > %665 = add nuw nsw i32 %664, 20 > %666 = zext i32 %665 to i64 > %667 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %666 > store i32 %temp24.0, i32 addrspace(3)* %667, align 4 > %668 = add nuw nsw i32 %664, 21 > %669 = zext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > store float %18, float addrspace(3)* %671, align 4 > %672 = add nuw nsw i32 %664, 22 > %673 = zext i32 %672 to i64 > %674 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %673 > %675 = bitcast i32 addrspace(3)* %674 to float addrspace(3)* > store float %19, float addrspace(3)* %675, align 4 > %676 = add nuw nsw i32 %664, 23 > %677 = zext i32 %676 to i64 > %678 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %677 > %679 = bitcast i32 addrspace(3)* %678 to float addrspace(3)* > store float %20, float addrspace(3)* %679, align 4 > %680 = lshr i32 %5, 16 > %681 = shl nuw nsw i32 %680, 2 > %682 = and i32 %6, 8191 > %683 = and i32 %10, 255 > %684 = mul nuw nsw i32 %682, %683 > %685 = add nuw nsw i32 %681, %684 > %686 = zext i32 %685 to i64 > %687 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %686 > store i32 %temp4.0, i32 addrspace(3)* %687, align 4 > %688 = lshr i32 %5, 16 > %689 = shl nuw nsw i32 %688, 2 > %690 = and i32 %6, 8191 > %691 = and i32 %10, 255 > %692 = mul nuw nsw i32 %690, %691 > %693 = add nuw nsw i32 %689, %692 > %694 = add nuw nsw i32 %693, 1 > %695 = zext i32 %694 to i64 > %696 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %695 > store i32 %temp8.0, i32 addrspace(3)* %696, align 4 > %697 = lshr i32 %5, 16 > %698 = shl nuw nsw i32 %697, 2 > %699 = and i32 %6, 8191 > %700 = and i32 %10, 255 > %701 = mul nuw nsw i32 %699, %700 > %702 = add nuw nsw i32 %698, %701 > %703 = add nuw nsw i32 %702, 2 > %704 = zext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > store i32 %temp20.0, i32 addrspace(3)* %705, align 4 > %706 = lshr i32 %5, 16 > %707 = shl nuw nsw i32 %706, 2 > %708 = and i32 %6, 8191 > %709 = and i32 %10, 255 > %710 = mul nuw nsw i32 %708, %709 > %711 = add nuw nsw i32 %707, %710 > %712 = add nuw nsw i32 %711, 4 > %713 = zext i32 %712 to i64 > %714 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %713 > store i32 %temp24.0, i32 addrspace(3)* %714, align 4 > %715 = and i32 %10, 255 > %716 = lshr i32 %10, 8 > %717 = and i32 %716, 31 > %718 = lshr i32 %5, 16 > %719 = shl nuw nsw i32 %718, 2 > %720 = and i32 %6, 8191 > %721 = and i32 %10, 255 > %722 = mul nuw nsw i32 %720, %721 > %723 = add nuw nsw i32 %719, %722 > %724 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %725 = bitcast i64 %724 to <2 x i32> > %726 = extractelement <2 x i32> %725, i32 0 > %727 = extractelement <2 x i32> %725, i32 1 > %728 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %726, 0 > %729 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %728, i32 %727, 1 > %730 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %729, i32 %8, 13 > %731 = bitcast i32 %715 to float > %732 = bitcast i32 %717 to float > %733 = bitcast i32 %723 to float > %734 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %730, float %731, 14 > %735 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %734, float %732, 15 > %736 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %735, float %733, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %736 > >IF45: ; preds = %ELSE > %737 = fdiv float 1.000000e+00, %591 > %738 = fmul float %519, %737 > br label %ENDIF44 > >ELSE46: ; preds = %ELSE > %739 = fcmp ogt float %519, 0.000000e+00 > %740 = select i1 %739, float 1.000000e+00, float %519 > %741 = fcmp oge float %740, 0.000000e+00 > %.op = fmul float %740, 0x4600000000000000 > %742 = select i1 %741, float %.op, float 0xC600000000000000 > br label %ENDIF44 > >ENDIF44: ; preds = %ELSE46, %IF45 > %temp12.0 = phi float [ %738, %IF45 ], [ %742, %ELSE46 ] > %743 = call float @llvm.maxnum.f32(float %temp12.0, float 1.000000e+00) > %744 = fcmp une float %595, 0.000000e+00 > br i1 %744, label %IF48, label %ELSE49 > >IF48: ; preds = %ENDIF44 > %745 = fdiv float 1.000000e+00, %595 > %746 = fmul float %520, %745 > br label %ENDIF47 > >ELSE49: ; preds = %ENDIF44 > %747 = fcmp ogt float %520, 0.000000e+00 > %748 = select i1 %747, float 1.000000e+00, float %520 > %749 = fcmp oge float %748, 0.000000e+00 > %.op53 = fmul float %748, 0x4600000000000000 > %750 = select i1 %749, float %.op53, float 0xC600000000000000 > br label %ENDIF47 > >ENDIF47: ; preds = %ELSE49, %IF48 > %temp12.1 = phi float [ %746, %IF48 ], [ %750, %ELSE49 ] > %751 = call float @llvm.maxnum.f32(float %temp12.1, float 1.000000e+00) > %752 = fcmp une float %594, 0.000000e+00 > br i1 %752, label %IF51, label %ELSE52 > >IF51: ; preds = %ENDIF47 > %753 = fdiv float 1.000000e+00, %594 > %754 = fmul float %586, %753 > br label %ENDIF50 > >ELSE52: ; preds = %ENDIF47 > %755 = fcmp ogt float %586, 0.000000e+00 > %756 = select i1 %755, float 1.000000e+00, float %586 > %757 = fcmp oge float %756, 0.000000e+00 > %.op54 = fmul float %756, 0x4600000000000000 > %758 = select i1 %757, float %.op54, float 0xC600000000000000 > br label %ENDIF50 > >ENDIF50: ; preds = %ELSE52, %IF51 > %temp12.2 = phi float [ %754, %IF51 ], [ %758, %ELSE52 ] > %759 = call float @llvm.maxnum.f32(float %temp12.2, float 1.000000e+00) > %760 = call float @llvm.minnum.f32(float %759, float 6.300000e+01) > %761 = call float @llvm.minnum.f32(float %743, float 6.300000e+01) > %762 = call float @llvm.minnum.f32(float %751, float 6.300000e+01) > %763 = call float @llvm.maxnum.f32(float %761, float %760) > %764 = call float @llvm.maxnum.f32(float %763, float %762) > %phitmp = bitcast float %762 to i32 > %phitmp55 = bitcast float %760 to i32 > %phitmp56 = bitcast float %761 to i32 > %phitmp57 = bitcast float %764 to i32 > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..1], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..49] >DCL CONST[2][0..39] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[1] UINT32 {0, 752, 768, 784} >IMM[2] UINT32 {1, 624, 0, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[0].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[0].w, IMM[0].xxxx > 4: MOV TEMP[1], CONST[1][47] > 5: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 6: MOV TEMP[2], CONST[1][48] > 7: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 8: MOV TEMP[1].y, TEMP[2].xxxx > 9: MOV TEMP[2], CONST[1][49] > 10: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 11: MOV TEMP[1].z, TEMP[2].xxxx > 12: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 13: SQRT TEMP[2].x, TEMP[0].xxxx > 14: FSEQ TEMP[3].xyz, TEMP[2].xxxx, IMM[0].yyyy > 15: SSG TEMP[4].xyz, TEMP[1].xyzz > 16: MUL TEMP[4].xyz, IMM[0].zzzz, TEMP[4].xyzz > 17: RCP TEMP[5].xyz, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz > 19: UCMP TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[1].xyzz > 20: MOV TEMP[3].x, CONST[2][39] > 21: FSNE TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy > 22: UIF TEMP[3].xxxx :0 > 23: MOV TEMP[3].x, CONST[2][39] > 24: RCP TEMP[3].x, TEMP[3].xxxx > 25: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[3].xxxx > 26: ELSE :0 > 27: SSG TEMP[2].x, TEMP[2].xxxx > 28: MUL TEMP[3].x, IMM[0].zzzz, TEMP[2].xxxx > 29: ENDIF > 30: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[0].xxxx > 31: MOV TEMP[2].z, CONST[2][39] > 32: FMA TEMP[2].x, TEMP[1].zzzz, TEMP[2].zzzz, IMM[0].xxxx > 33: FSEQ TEMP[3].xy, TEMP[2].xxxx, IMM[0].yyyy > 34: SSG TEMP[4].xy, TEMP[1].xyyy > 35: MUL TEMP[4].xy, IMM[0].zzzz, TEMP[4].xyyy > 36: RCP TEMP[2].xy, TEMP[2].xxxx > 37: MUL TEMP[2].xy, TEMP[1].xyyy, TEMP[2].xyyy > 38: UCMP TEMP[2].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[2].xyyy > 39: MOV TEMP[3].z, CONST[2][39] > 40: MUL TEMP[1].x, TEMP[1].zzzz, TEMP[3].zzzz > 41: MOV TEMP[0].y, TEMP[1].xxxx > 42: MOV TEMP[2].z, TEMP[0].xxxx > 43: MOV TEMP[1].zw, TEMP[0].xxyx > 44: MOV TEMP[2].w, IMM[0].xxxx > 45: MUL TEMP[0].xy, SV[0].yyyy, IN[1][1].xyyy > 46: FMA TEMP[0].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[0].xyyy > 47: FMA TEMP[1].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[0].xyyy > 48: MOV OUT[1], TEMP[1] > 49: MOV OUT[0], TEMP[2] > 50: END >radeonsi: Compiling shader 295 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 752) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 756) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 760) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 764) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = fadd float %7, %8 > %30 = fsub float 1.000000e+00, %29 > %31 = lshr i32 %6, 13 > %32 = and i32 %31, 255 > %33 = shl i32 %5, 2 > %34 = and i32 %33, 262140 > %35 = and i32 %6, 8191 > %36 = mul i32 %35, %9 > %37 = add i32 %34, %36 > %38 = add i32 %37, %32 > %39 = add i32 %38, 16 > %40 = sext i32 %39 to i64 > %41 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %40 > %42 = bitcast i32 addrspace(3)* %41 to float addrspace(3)* > %43 = load float, float addrspace(3)* %42, align 4 > %44 = fmul float %43, %8 > %45 = lshr i32 %6, 13 > %46 = and i32 %45, 255 > %47 = shl i32 %5, 2 > %48 = and i32 %47, 262140 > %49 = and i32 %6, 8191 > %50 = mul i32 %49, %9 > %51 = add i32 %48, %50 > %52 = add i32 %51, %46 > %53 = add i32 %52, 17 > %54 = sext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = bitcast i32 addrspace(3)* %55 to float addrspace(3)* > %57 = load float, float addrspace(3)* %56, align 4 > %58 = fmul float %57, %8 > %59 = lshr i32 %6, 13 > %60 = and i32 %59, 255 > %61 = shl i32 %5, 2 > %62 = and i32 %61, 262140 > %63 = and i32 %6, 8191 > %64 = mul i32 %63, %9 > %65 = add i32 %62, %64 > %66 = add i32 %65, %60 > %67 = add i32 %66, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > %71 = load float, float addrspace(3)* %70, align 4 > %72 = fmul float %71, %8 > %73 = shl i32 %5, 2 > %74 = and i32 %73, 262140 > %75 = and i32 %6, 8191 > %76 = mul i32 %75, %9 > %77 = add i32 %74, %76 > %78 = add i32 %77, 16 > %79 = sext i32 %78 to i64 > %80 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %79 > %81 = bitcast i32 addrspace(3)* %80 to float addrspace(3)* > %82 = load float, float addrspace(3)* %81, align 4 > %83 = call float @llvm.fma.f32(float %7, float %82, float %44) > %84 = shl i32 %5, 2 > %85 = and i32 %84, 262140 > %86 = and i32 %6, 8191 > %87 = mul i32 %86, %9 > %88 = add i32 %85, %87 > %89 = add i32 %88, 17 > %90 = sext i32 %89 to i64 > %91 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %90 > %92 = bitcast i32 addrspace(3)* %91 to float addrspace(3)* > %93 = load float, float addrspace(3)* %92, align 4 > %94 = call float @llvm.fma.f32(float %7, float %93, float %58) > %95 = shl i32 %5, 2 > %96 = and i32 %95, 262140 > %97 = and i32 %6, 8191 > %98 = mul i32 %97, %9 > %99 = add i32 %96, %98 > %100 = add i32 %99, 18 > %101 = sext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > %103 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > %104 = load float, float addrspace(3)* %103, align 4 > %105 = call float @llvm.fma.f32(float %7, float %104, float %72) > %106 = shl i32 %5, 2 > %107 = and i32 %106, 262140 > %108 = and i32 %6, 8191 > %109 = mul i32 %108, %9 > %110 = add i32 %107, %109 > %111 = lshr i32 %6, 12 > %112 = and i32 %111, 510 > %113 = add i32 %110, %112 > %114 = add i32 %113, 16 > %115 = sext i32 %114 to i64 > %116 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %115 > %117 = bitcast i32 addrspace(3)* %116 to float addrspace(3)* > %118 = load float, float addrspace(3)* %117, align 4 > %119 = call float @llvm.fma.f32(float %30, float %118, float %83) > %120 = shl i32 %5, 2 > %121 = and i32 %120, 262140 > %122 = and i32 %6, 8191 > %123 = mul i32 %122, %9 > %124 = add i32 %121, %123 > %125 = lshr i32 %6, 12 > %126 = and i32 %125, 510 > %127 = add i32 %124, %126 > %128 = add i32 %127, 17 > %129 = sext i32 %128 to i64 > %130 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %129 > %131 = bitcast i32 addrspace(3)* %130 to float addrspace(3)* > %132 = load float, float addrspace(3)* %131, align 4 > %133 = call float @llvm.fma.f32(float %30, float %132, float %94) > %134 = shl i32 %5, 2 > %135 = and i32 %134, 262140 > %136 = and i32 %6, 8191 > %137 = mul i32 %136, %9 > %138 = add i32 %135, %137 > %139 = lshr i32 %6, 12 > %140 = and i32 %139, 510 > %141 = add i32 %138, %140 > %142 = add i32 %141, 18 > %143 = sext i32 %142 to i64 > %144 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %143 > %145 = bitcast i32 addrspace(3)* %144 to float addrspace(3)* > %146 = load float, float addrspace(3)* %145, align 4 > %147 = call float @llvm.fma.f32(float %30, float %146, float %105) > %148 = fmul float %13, %119 > %149 = fmul float %14, %133 > %150 = fadd float %148, %149 > %151 = fmul float %15, %147 > %152 = fadd float %150, %151 > %153 = fadd float %152, %16 > %154 = fmul float %17, %119 > %155 = fmul float %18, %133 > %156 = fadd float %154, %155 > %157 = fmul float %19, %147 > %158 = fadd float %156, %157 > %159 = fadd float %158, %20 > %160 = fmul float %21, %119 > %161 = fmul float %22, %133 > %162 = fadd float %160, %161 > %163 = fmul float %23, %147 > %164 = fadd float %162, %163 > %165 = fadd float %164, %24 > %166 = fmul float %153, %153 > %167 = fmul float %159, %159 > %168 = fadd float %167, %166 > %169 = fmul float %165, %165 > %170 = fadd float %168, %169 > %171 = call float @llvm.sqrt.f32(float %170) > %172 = fcmp oeq float %171, 0.000000e+00 > %173 = fcmp oeq float %171, 0.000000e+00 > %174 = fcmp oeq float %171, 0.000000e+00 > %175 = fcmp ogt float %153, 0.000000e+00 > %176 = select i1 %175, float 1.000000e+00, float %153 > %177 = fcmp oge float %176, 0.000000e+00 > %178 = fcmp ogt float %159, 0.000000e+00 > %179 = select i1 %178, float 1.000000e+00, float %159 > %180 = fcmp oge float %179, 0.000000e+00 > %181 = fcmp ogt float %165, 0.000000e+00 > %182 = select i1 %181, float 1.000000e+00, float %165 > %183 = fcmp oge float %182, 0.000000e+00 > %.op = fmul float %176, 0x4600000000000000 > %184 = select i1 %177, float %.op, float 0xC600000000000000 > %.op24 = fmul float %179, 0x4600000000000000 > %185 = select i1 %180, float %.op24, float 0xC600000000000000 > %.op25 = fmul float %182, 0x4600000000000000 > %186 = select i1 %183, float %.op25, float 0xC600000000000000 > %187 = fdiv float 1.000000e+00, %171 > %188 = fmul float %153, %187 > %189 = fmul float %159, %187 > %190 = fmul float %165, %187 > %191 = select i1 %172, float %184, float %188 > %192 = select i1 %173, float %185, float %189 > %193 = select i1 %174, float %186, float %190 > %194 = fcmp une float %27, 0.000000e+00 > br i1 %194, label %IF, label %ELSE > >IF: ; preds = %main_body > %195 = fdiv float 1.000000e+00, %27 > %196 = fmul float %171, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fcmp ogt float %171, 0.000000e+00 > %198 = select i1 %197, float 1.000000e+00, float %171 > %199 = fcmp oge float %198, 0.000000e+00 > %.op26 = fmul float %198, 0x4600000000000000 > %200 = select i1 %199, float %.op26, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %196, %IF ], [ %200, %ELSE ] > %201 = fsub float 1.000000e+00, %temp12.0 > %202 = call float @llvm.fma.f32(float %193, float %28, float 1.000000e+00) > %203 = fcmp oeq float %202, 0.000000e+00 > %204 = fcmp oeq float %202, 0.000000e+00 > %205 = fcmp ogt float %191, 0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %191 > %207 = fcmp oge float %206, 0.000000e+00 > %208 = fcmp ogt float %192, 0.000000e+00 > %209 = select i1 %208, float 1.000000e+00, float %192 > %210 = fcmp oge float %209, 0.000000e+00 > %.op27 = fmul float %206, 0x4600000000000000 > %211 = select i1 %207, float %.op27, float 0xC600000000000000 > %.op28 = fmul float %209, 0x4600000000000000 > %212 = select i1 %210, float %.op28, float 0xC600000000000000 > %213 = fdiv float 1.000000e+00, %202 > %214 = fmul float %191, %213 > %215 = fmul float %192, %213 > %216 = select i1 %203, float %211, float %214 > %217 = select i1 %204, float %212, float %215 > %218 = fmul float %193, %28 > %219 = lshr i32 %6, 13 > %220 = and i32 %219, 255 > %221 = shl i32 %5, 2 > %222 = and i32 %221, 262140 > %223 = and i32 %6, 8191 > %224 = mul i32 %223, %9 > %225 = add i32 %222, %224 > %226 = add i32 %225, %220 > %227 = add i32 %226, 20 > %228 = sext i32 %227 to i64 > %229 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %228 > %230 = bitcast i32 addrspace(3)* %229 to float addrspace(3)* > %231 = load float, float addrspace(3)* %230, align 4 > %232 = fmul float %231, %8 > %233 = lshr i32 %6, 13 > %234 = and i32 %233, 255 > %235 = shl i32 %5, 2 > %236 = and i32 %235, 262140 > %237 = and i32 %6, 8191 > %238 = mul i32 %237, %9 > %239 = add i32 %236, %238 > %240 = add i32 %239, %234 > %241 = add i32 %240, 21 > %242 = sext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > %244 = bitcast i32 addrspace(3)* %243 to float addrspace(3)* > %245 = load float, float addrspace(3)* %244, align 4 > %246 = fmul float %245, %8 > %247 = shl i32 %5, 2 > %248 = and i32 %247, 262140 > %249 = and i32 %6, 8191 > %250 = mul i32 %249, %9 > %251 = add i32 %248, %250 > %252 = add i32 %251, 20 > %253 = sext i32 %252 to i64 > %254 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %253 > %255 = bitcast i32 addrspace(3)* %254 to float addrspace(3)* > %256 = load float, float addrspace(3)* %255, align 4 > %257 = call float @llvm.fma.f32(float %7, float %256, float %232) > %258 = shl i32 %5, 2 > %259 = and i32 %258, 262140 > %260 = and i32 %6, 8191 > %261 = mul i32 %260, %9 > %262 = add i32 %259, %261 > %263 = add i32 %262, 21 > %264 = sext i32 %263 to i64 > %265 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %264 > %266 = bitcast i32 addrspace(3)* %265 to float addrspace(3)* > %267 = load float, float addrspace(3)* %266, align 4 > %268 = call float @llvm.fma.f32(float %7, float %267, float %246) > %269 = shl i32 %5, 2 > %270 = and i32 %269, 262140 > %271 = and i32 %6, 8191 > %272 = mul i32 %271, %9 > %273 = add i32 %270, %272 > %274 = lshr i32 %6, 12 > %275 = and i32 %274, 510 > %276 = add i32 %273, %275 > %277 = add i32 %276, 20 > %278 = sext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = bitcast i32 addrspace(3)* %279 to float addrspace(3)* > %281 = load float, float addrspace(3)* %280, align 4 > %282 = call float @llvm.fma.f32(float %30, float %281, float %257) > %283 = shl i32 %5, 2 > %284 = and i32 %283, 262140 > %285 = and i32 %6, 8191 > %286 = mul i32 %285, %9 > %287 = add i32 %284, %286 > %288 = lshr i32 %6, 12 > %289 = and i32 %288, 510 > %290 = add i32 %287, %289 > %291 = add i32 %290, 21 > %292 = sext i32 %291 to i64 > %293 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %292 > %294 = bitcast i32 addrspace(3)* %293 to float addrspace(3)* > %295 = load float, float addrspace(3)* %294, align 4 > %296 = call float @llvm.fma.f32(float %30, float %295, float %268) > %297 = bitcast i32 %10 to float > %298 = insertvalue <{ float, float, float }> undef, float %297, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %282, float %296, float %218, float %201) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %216, float %217, float %201, float 1.000000e+00) > ret <{ float, float, float }> %298 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..22] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { -0.3765, 0.0000, 1.0000, 2.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {0, 352, 0, 0} >IMM[3] FLT32 { -1.0000, 0.5000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: ADD TEMP[1].x, TEMP[0].wwww, IMM[0].xxxx > 3: MUL TEMP[2].xyz, TEMP[0].xyzz, IN[4].xyzz > 4: FSLT TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy > 5: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 6: INEG TEMP[1].x, TEMP[1].xxxx > 7: USNE TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 8: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz > 9: KILL_IF -TEMP[1].xxxx > 10: MOV TEMP[1].xy, IN[0].xyyy > 11: TEX TEMP[1].yw, TEMP[1], SAMP[1], 2D > 12: FMA TEMP[1].xy, TEMP[1].ywww, IMM[0].wwww, IMM[3].xxxx > 13: MOV TEMP[0].xy, TEMP[1].xyxx > 14: FMA TEMP[3].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 15: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[3].xxxx > 16: SQRT TEMP[1].x, TEMP[1].xxxx > 17: MOV TEMP[0].z, TEMP[1].xxxx > 18: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 19: DP3 TEMP[3].x, IN[2].xyzz, TEMP[0].xyzz > 20: MOV TEMP[1].y, TEMP[3].xxxx > 21: DP3 TEMP[3].x, IN[3].xyzz, TEMP[0].xyzz > 22: MOV TEMP[1].z, TEMP[3].xxxx > 23: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 24: RSQ TEMP[3].x, TEMP[0].xxxx > 25: MUL TEMP[0].xyz, TEMP[3].xxxx, TEMP[1].xyzz > 26: FMA TEMP[0].xyz, TEMP[0].xyzz, IMM[3].yyyy, IMM[3].yyyy > 27: MOV TEMP[0].w, CONST[1][22].zzzz > 28: MOV TEMP[1].xy, IN[0].xyyy > 29: TEX TEMP[1], TEMP[1], SAMP[2], 2D > 30: MOV TEMP[2].w, TEMP[1].wwww > 31: MUL TEMP[3].x, TEMP[1].zzzz, CONST[1][22].yyyy > 32: MOV TEMP[3].yz, TEMP[1].xyxx > 33: MOV TEMP[3].w, CONST[1][22].xxxx > 34: MOV OUT[0], TEMP[0] > 35: MOV OUT[1], TEMP[2] > 36: MOV OUT[2], TEMP[3] > 37: END >radeonsi: Compiling shader 296 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 360) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %38 = load <8 x i32>, <8 x i32> addrspace(2)* %37, align 32, !tbaa !0 > %39 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %40 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %39, i64 0, i64 7 > %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 > %42 = extractelement <8 x i32> %38, i32 7 > %43 = extractelement <4 x i32> %41, i32 0 > %44 = and i32 %43, %42 > %45 = insertelement <4 x i32> %41, i32 %44, i32 0 > %46 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 > %48 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %49 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %48, i64 0, i64 11 > %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 > %51 = extractelement <8 x i32> %47, i32 7 > %52 = extractelement <4 x i32> %50, i32 0 > %53 = and i32 %52, %51 > %54 = insertelement <4 x i32> %50, i32 %53, i32 0 > %55 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %69 = bitcast float %55 to i32 > %70 = bitcast float %56 to i32 > %71 = insertelement <2 x i32> undef, i32 %69, i32 0 > %72 = insertelement <2 x i32> %71, i32 %70, i32 1 > %73 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %72, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = extractelement <4 x float> %73, i32 3 > %78 = fadd float %77, 0xBFD8181820000000 > %79 = fmul float %74, %66 > %80 = fmul float %75, %67 > %81 = fmul float %76, %68 > %82 = fcmp olt float %78, 0.000000e+00 > %83 = select i1 %82, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %83) > %84 = bitcast float %55 to i32 > %85 = bitcast float %56 to i32 > %86 = insertelement <2 x i32> undef, i32 %84, i32 0 > %87 = insertelement <2 x i32> %86, i32 %85, i32 1 > %88 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %87, <8 x i32> %38, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %89 = extractelement <4 x float> %88, i32 1 > %90 = extractelement <4 x float> %88, i32 3 > %91 = call float @llvm.fma.f32(float %89, float 2.000000e+00, float -1.000000e+00) > %92 = call float @llvm.fma.f32(float %90, float 2.000000e+00, float -1.000000e+00) > %93 = fsub float -0.000000e+00, %91 > %94 = call float @llvm.fma.f32(float %93, float %91, float 1.000000e+00) > %95 = fsub float -0.000000e+00, %92 > %96 = call float @llvm.fma.f32(float %95, float %92, float %94) > %97 = call float @llvm.sqrt.f32(float %96) > %98 = fmul float %57, %91 > %99 = fmul float %58, %92 > %100 = fadd float %99, %98 > %101 = fmul float %59, %97 > %102 = fadd float %100, %101 > %103 = fmul float %60, %91 > %104 = fmul float %61, %92 > %105 = fadd float %104, %103 > %106 = fmul float %62, %97 > %107 = fadd float %105, %106 > %108 = fmul float %63, %91 > %109 = fmul float %64, %92 > %110 = fadd float %109, %108 > %111 = fmul float %65, %97 > %112 = fadd float %110, %111 > %113 = fmul float %102, %102 > %114 = fmul float %107, %107 > %115 = fadd float %114, %113 > %116 = fmul float %112, %112 > %117 = fadd float %115, %116 > %118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117) > %119 = fmul float %118, %102 > %120 = fmul float %118, %107 > %121 = fmul float %118, %112 > %122 = call float @llvm.fma.f32(float %119, float 5.000000e-01, float 5.000000e-01) > %123 = call float @llvm.fma.f32(float %120, float 5.000000e-01, float 5.000000e-01) > %124 = call float @llvm.fma.f32(float %121, float 5.000000e-01, float 5.000000e-01) > %125 = bitcast float %55 to i32 > %126 = bitcast float %56 to i32 > %127 = insertelement <2 x i32> undef, i32 %125, i32 0 > %128 = insertelement <2 x i32> %127, i32 %126, i32 1 > %129 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %128, <8 x i32> %47, <4 x i32> %54, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %130 = extractelement <4 x float> %129, i32 0 > %131 = extractelement <4 x float> %129, i32 1 > %132 = extractelement <4 x float> %129, i32 2 > %133 = extractelement <4 x float> %129, i32 3 > %134 = fmul float %132, %26 > %135 = bitcast float %5 to i32 > %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %135, 10 > %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136, float %122, 11 > %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %123, 12 > %139 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138, float %124, 13 > %140 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %139, float %27, 14 > %141 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %140, float %79, 15 > %142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %141, float %80, 16 > %143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float %81, 17 > %144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %133, 18 > %145 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144, float %134, 19 > %146 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %145, float %131, 20 > %147 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %146, float %130, 21 > %148 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %147, float %25, 22 > %149 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %148, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %149 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..34] >DCL CONST[2][0..4095] >DCL TEMP[0..20], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 256} >IMM[3] UINT32 {272, 288, 304, 528} >IMM[4] UINT32 {336, 432, 544, 496} >IMM[5] UINT32 {512, 320, 352, 464} >IMM[6] FLT32 { 0.0000, -0.1500, 0.0597, -1.5000} >IMM[7] UINT32 {384, 368, 448, 400} >IMM[8] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.4427, 0.5000} >IMM[9] UINT32 {416, 0, 0, 0} >IMM[10] FLT32 { 0.4545, -0.0040, 6.2000, 1.7000} >IMM[11] FLT32 { 0.0600, 0.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, IN[4].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[3].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[3].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[3].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[3].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[3].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[3].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[3].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[3].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[3].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[3].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[4].y, TEMP[18].xxxx >224: UMUL TEMP[18].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[19].xxxx >227: MOV TEMP[18].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[20].xxxx >231: MOV TEMP[19].z, CONST[2][ADDR[0].x] >232: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].zzzz, -TEMP[8].zzzz >233: MUL TEMP[18].x, TEMP[18].xxxx, IN[3].xxxx >234: MUL TEMP[18].x, IMM[0].yyyy, TEMP[18].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[18].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[19].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[19].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[19].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[20].xxxx >244: MOV TEMP[19].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[19].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[3].xxxx, TEMP[10].zzzz >249: MOV TEMP[18].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[3].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[3].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[3].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].zzzz >259: MOV TEMP[18].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[18] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[3].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[3].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[3].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[4].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[7].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[7].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[6].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[3].zzzz, TEMP[2].xxxx >307: MOV TEMP[16].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[6].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[7].xxxx >315: MOV TEMP[6].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[8].xxxx >317: ADD TEMP[2].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[3].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[9].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[6].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[7].xxxx >329: MOV TEMP[6].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[2].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[6].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[6].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[6].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[2].x, TEMP[2].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[2].xxxx >341: ADD TEMP[2].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[2].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].yzzz >344: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[2].yxyy >346: ADD TEMP[2].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy >348: MOV TEMP[0].y, TEMP[2].xxxx >349: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[2].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[4].x, TEMP[0], TEMP[5] >354: MOV TEMP[4].w, IMM[0].zzzz >355: DP4 TEMP[0].x, CONST[1][16], TEMP[4] >356: DP4 TEMP[2].x, CONST[1][17], TEMP[4] >357: MOV TEMP[0].y, TEMP[2].xxxx >358: DP4 TEMP[2].x, CONST[1][18], TEMP[4] >359: MOV TEMP[0].z, TEMP[2].xxxx >360: DP4 TEMP[2].x, CONST[1][19], TEMP[4] >361: MOV TEMP[0].w, TEMP[2].xxxx >362: ADD TEMP[3].xyz, -TEMP[4].xyzz, CONST[1][33].xyzz >363: DP4 TEMP[5].x, CONST[1][21], TEMP[4] >364: ADD TEMP[1].x, TEMP[5].xxxx, CONST[1][27].wwww >365: MOV TEMP[5], TEMP[0] >366: MOV TEMP[6].xy, IN[1].xyxx >367: MUL TEMP[8].xyz, CONST[1][31].xyzz, CONST[1][32].xyzz >368: MOV TEMP[8].w, CONST[1][31].wwww >369: ABS TEMP[9].x, TEMP[2].xxxx >370: MUL TEMP[0].x, TEMP[9].xxxx, IMM[6].xxxx >371: MIN TEMP[9].x, TEMP[0].xxxx, IMM[0].zzzz >372: ADD TEMP[0].x, -TEMP[9].xxxx, IMM[0].zzzz >373: DP3 TEMP[7].x, CONST[1][20].xyzz, TEMP[3].xyzz >374: DP3 TEMP[9].x, CONST[1][22].xyzz, TEMP[3].xyzz >375: MOV TEMP[7].z, TEMP[9].xxxx >376: DP3 TEMP[3].x, CONST[1][21].xyzz, TEMP[3].xyzz >377: MOV TEMP[7].y, TEMP[3].xxxx >378: DP3 TEMP[9].x, TEMP[7].xyzz, TEMP[7].xyzz >379: RSQ TEMP[9].x, TEMP[9].xxxx >380: MUL TEMP[10].xyz, TEMP[9].xxxx, TEMP[7].xyzz >381: FMA TEMP[3].x, -TEMP[3].xxxx, TEMP[9].xxxx, IMM[6].yyyy >382: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].zzzz >383: MOV_SAT TEMP[3].x, TEMP[3].xxxx >384: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx >385: DP3 TEMP[9].x, -TEMP[10].xyzz, CONST[1][29].xyzz >386: FMA TEMP[10].x, -CONST[1][24].yyyy, TEMP[9].xxxx, CONST[1][24].xxxx >387: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].zzzz >388: MUL TEMP[9].x, TEMP[9].xxxx, IMM[6].zzzz >389: ABS TEMP[10].x, TEMP[10].xxxx >390: LG2 TEMP[10].x, TEMP[10].xxxx >391: MUL TEMP[10].x, TEMP[10].xxxx, IMM[6].wwww >392: EX2 TEMP[10].x, TEMP[10].xxxx >393: FMA TEMP[11].x, CONST[1][24].zzzz, TEMP[10].xxxx, -CONST[1][23].zzzz >394: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][24].zzzz >395: MAX TEMP[11].x, TEMP[11].xxxx, IMM[8].xxxx >396: FMA TEMP[0].x, -TEMP[11].xxxx, TEMP[0].xxxx, TEMP[10].xxxx >397: MAX TEMP[10].x, TEMP[0].xxxx, CONST[1][28].wwww >398: FSNE TEMP[11].x, CONST[1][23].xxxx, IMM[8].xxxx >399: UIF TEMP[11].xxxx :0 >400: RCP TEMP[11].x, CONST[1][23].xxxx >401: MUL TEMP[11].x, -TEMP[1].xxxx, TEMP[11].xxxx >402: ELSE :0 >403: SSG TEMP[12].x, -TEMP[1].xxxx >404: MUL TEMP[11].x, IMM[8].yyyy, TEMP[12].xxxx >405: ENDIF >406: MUL TEMP[11].x, TEMP[11].xxxx, IMM[8].zzzz >407: EX2 TEMP[11].x, TEMP[11].xxxx >408: ADD TEMP[11].x, TEMP[11].xxxx, CONST[1][24].wwww >409: MUL TEMP[11].x, TEMP[11].xxxx, CONST[1][25].yyyy >410: MUL TEMP[11].x, TEMP[11].xxxx, IMM[8].wwww >411: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[11].xxxx >412: MIN TEMP[3].x, TEMP[3].xxxx, CONST[1][23].wwww >413: MAX TEMP[3].x, TEMP[3].xxxx, CONST[1][25].xxxx >414: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[10].xxxx >415: FSNE TEMP[10].x, CONST[1][26].wwww, IMM[8].xxxx >416: UIF TEMP[10].xxxx :0 >417: RCP TEMP[10].x, CONST[1][26].wwww >418: MUL TEMP[10].x, -TEMP[1].xxxx, TEMP[10].xxxx >419: ELSE :0 >420: SSG TEMP[11].x, -TEMP[1].xxxx >421: MUL TEMP[10].x, IMM[8].yyyy, TEMP[11].xxxx >422: ENDIF >423: ADD TEMP[1].x, -TEMP[1].xxxx, CONST[1][27].zzzz >424: FSNE TEMP[11].x, CONST[1][23].yyyy, IMM[8].xxxx >425: UIF TEMP[11].xxxx :0 >426: RCP TEMP[11].x, CONST[1][23].yyyy >427: MUL TEMP[11].x, TEMP[1].xxxx, TEMP[11].xxxx >428: ELSE :0 >429: SSG TEMP[12].x, TEMP[1].xxxx >430: MUL TEMP[11].x, IMM[8].yyyy, TEMP[12].xxxx >431: ENDIF >432: MUL TEMP[10].x, TEMP[10].xxxx, IMM[8].zzzz >433: EX2 TEMP[10].x, TEMP[10].xxxx >434: MUL TEMP[7].xyz, TEMP[10].xxxx, CONST[1][26].xyzz >435: FMA TEMP[3].xyz, CONST[1][26].xyzz, TEMP[10].xxxx, TEMP[3].xxxx >436: FMA TEMP[9].xyz, TEMP[7].xyzz, TEMP[9].xxxx, TEMP[0].xxxx >437: MUL TEMP[7].xyz, TEMP[11].xxxx, -TEMP[3].xyzz >438: ABS TEMP[2].xyz, TEMP[2].xxxx >439: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[3].xyzz >440: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[8].zzzz >441: EX2 TEMP[2].x, TEMP[1].xxxx >442: EX2 TEMP[2].y, TEMP[1].yyyy >443: EX2 TEMP[2].z, TEMP[1].zzzz >444: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[8].zzzz >445: LG2 TEMP[10].x, CONST[1][28].xxxx >446: LG2 TEMP[10].y, CONST[1][28].yyyy >447: LG2 TEMP[10].z, CONST[1][28].zzzz >448: MUL TEMP[4].xyz, TEMP[10].xyzz, IMM[10].xxxx >449: EX2 TEMP[10].x, TEMP[4].xxxx >450: EX2 TEMP[10].y, TEMP[4].yyyy >451: EX2 TEMP[10].z, TEMP[4].zzzz >452: EX2 TEMP[4].x, TEMP[7].xxxx >453: EX2 TEMP[4].y, TEMP[7].yyyy >454: EX2 TEMP[4].z, TEMP[7].zzzz >455: MUL TEMP[7].xyz, TEMP[4].xyzz, TEMP[10].xyzz >456: FSEQ TEMP[4].xyz, TEMP[3].xyzz, IMM[8].xxxx >457: SSG TEMP[10].xyz, TEMP[9].xyzz >458: MUL TEMP[10].xyz, IMM[8].yyyy, TEMP[10].xyzz >459: RCP TEMP[11].x, TEMP[3].xxxx >460: RCP TEMP[11].y, TEMP[3].yyyy >461: RCP TEMP[11].z, TEMP[3].zzzz >462: MUL TEMP[3].xyz, TEMP[9].xyzz, TEMP[11].xyzz >463: UCMP TEMP[3].xyz, TEMP[4].xyzz, TEMP[10].xyzz, TEMP[3].xyzz >464: MUL TEMP[0].xyz, TEMP[3].xyzz, TEMP[7].xyzz >465: ADD TEMP[3].xyz, -TEMP[2].xyzz, IMM[0].zzzz >466: MOV TEMP[2].w, TEMP[2].xxxx >467: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xyzz, IMM[10].yyyy >468: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[8].xxxx >469: FMA TEMP[3].xyz, TEMP[0].xyzz, IMM[10].zzzz, IMM[8].wwww >470: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[3].xyzz >471: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[10].zzzz, IMM[10].wwww >472: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[11].xxxx >473: FSEQ TEMP[3].xyz, TEMP[0].xyzz, IMM[8].xxxx >474: SSG TEMP[4].xyz, TEMP[1].xyzz >475: MUL TEMP[4].xyz, IMM[8].yyyy, TEMP[4].xyzz >476: RCP TEMP[7].x, TEMP[0].xxxx >477: RCP TEMP[7].y, TEMP[0].yyyy >478: RCP TEMP[7].z, TEMP[0].zzzz >479: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[7].xyzz >480: UCMP TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[0].xyzz >481: MOV OUT[4], IN[2] >482: MOV OUT[3], TEMP[2] >483: MOV OUT[2], TEMP[8] >484: MOV OUT[1], TEMP[6] >485: MOV OUT[0], TEMP[5] >486: END >radeonsi: Compiling shader 297 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 256) > %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 260) > %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 264) > %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 268) > %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 272) > %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 276) > %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 280) > %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 284) > %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 288) > %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 292) > %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 296) > %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 300) > %32 = call float @llvm.SI.load.const(<16 x i8> %19, i32 304) > %33 = call float @llvm.SI.load.const(<16 x i8> %19, i32 308) > %34 = call float @llvm.SI.load.const(<16 x i8> %19, i32 312) > %35 = call float @llvm.SI.load.const(<16 x i8> %19, i32 316) > %36 = call float @llvm.SI.load.const(<16 x i8> %19, i32 320) > %37 = call float @llvm.SI.load.const(<16 x i8> %19, i32 324) > %38 = call float @llvm.SI.load.const(<16 x i8> %19, i32 328) > %39 = call float @llvm.SI.load.const(<16 x i8> %19, i32 336) > %40 = call float @llvm.SI.load.const(<16 x i8> %19, i32 340) > %41 = call float @llvm.SI.load.const(<16 x i8> %19, i32 344) > %42 = call float @llvm.SI.load.const(<16 x i8> %19, i32 348) > %43 = call float @llvm.SI.load.const(<16 x i8> %19, i32 352) > %44 = call float @llvm.SI.load.const(<16 x i8> %19, i32 356) > %45 = call float @llvm.SI.load.const(<16 x i8> %19, i32 360) > %46 = call float @llvm.SI.load.const(<16 x i8> %19, i32 368) > %47 = call float @llvm.SI.load.const(<16 x i8> %19, i32 372) > %48 = call float @llvm.SI.load.const(<16 x i8> %19, i32 376) > %49 = call float @llvm.SI.load.const(<16 x i8> %19, i32 380) > %50 = call float @llvm.SI.load.const(<16 x i8> %19, i32 384) > %51 = call float @llvm.SI.load.const(<16 x i8> %19, i32 388) > %52 = call float @llvm.SI.load.const(<16 x i8> %19, i32 392) > %53 = call float @llvm.SI.load.const(<16 x i8> %19, i32 396) > %54 = call float @llvm.SI.load.const(<16 x i8> %19, i32 400) > %55 = call float @llvm.SI.load.const(<16 x i8> %19, i32 404) > %56 = call float @llvm.SI.load.const(<16 x i8> %19, i32 416) > %57 = call float @llvm.SI.load.const(<16 x i8> %19, i32 420) > %58 = call float @llvm.SI.load.const(<16 x i8> %19, i32 424) > %59 = call float @llvm.SI.load.const(<16 x i8> %19, i32 428) > %60 = call float @llvm.SI.load.const(<16 x i8> %19, i32 440) > %61 = call float @llvm.SI.load.const(<16 x i8> %19, i32 444) > %62 = call float @llvm.SI.load.const(<16 x i8> %19, i32 448) > %63 = call float @llvm.SI.load.const(<16 x i8> %19, i32 452) > %64 = call float @llvm.SI.load.const(<16 x i8> %19, i32 456) > %65 = call float @llvm.SI.load.const(<16 x i8> %19, i32 460) > %66 = call float @llvm.SI.load.const(<16 x i8> %19, i32 464) > %67 = call float @llvm.SI.load.const(<16 x i8> %19, i32 468) > %68 = call float @llvm.SI.load.const(<16 x i8> %19, i32 472) > %69 = call float @llvm.SI.load.const(<16 x i8> %19, i32 496) > %70 = call float @llvm.SI.load.const(<16 x i8> %19, i32 500) > %71 = call float @llvm.SI.load.const(<16 x i8> %19, i32 504) > %72 = call float @llvm.SI.load.const(<16 x i8> %19, i32 508) > %73 = call float @llvm.SI.load.const(<16 x i8> %19, i32 512) > %74 = call float @llvm.SI.load.const(<16 x i8> %19, i32 516) > %75 = call float @llvm.SI.load.const(<16 x i8> %19, i32 520) > %76 = call float @llvm.SI.load.const(<16 x i8> %19, i32 528) > %77 = call float @llvm.SI.load.const(<16 x i8> %19, i32 532) > %78 = call float @llvm.SI.load.const(<16 x i8> %19, i32 536) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %13) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 > %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %14) > %90 = extractelement <4 x float> %89, i32 0 > %91 = extractelement <4 x float> %89, i32 1 > %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 > %94 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %15) > %95 = extractelement <4 x float> %94, i32 0 > %96 = extractelement <4 x float> %94, i32 1 > %97 = extractelement <4 x float> %94, i32 2 > %98 = extractelement <4 x float> %94, i32 3 > %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 > %101 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %100, i32 0, i32 %16) > %102 = extractelement <4 x float> %101, i32 0 > %103 = extractelement <4 x float> %101, i32 1 > %104 = extractelement <4 x float> %101, i32 2 > %105 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0 > %107 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %106, i32 0, i32 %17) > %108 = extractelement <4 x float> %107, i32 0 > %109 = extractelement <4 x float> %107, i32 1 > %110 = extractelement <4 x float> %107, i32 2 > %111 = fmul float %110, 0x406FE01000000000 > %112 = fmul float %109, 0x406FE01000000000 > %113 = fmul float %108, 0x406FE01000000000 > %114 = fptosi float %111 to i32 > %115 = fptosi float %112 to i32 > %116 = fptosi float %113 to i32 > %117 = shl i32 %114, 1 > %118 = or i32 %117, 1 > %119 = shl i32 %115, 1 > %120 = or i32 %119, 1 > %121 = shl i32 %116, 1 > %122 = or i32 %121, 1 > %123 = shl i32 %114, 5 > %124 = or i32 %123, 4 > %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %124) > %126 = fmul float %102, %125 > %127 = shl i32 %115, 5 > %128 = or i32 %127, 4 > %129 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %128) > %130 = fmul float %103, %129 > %131 = shl i32 %118, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %131) > %133 = shl i32 %118, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %118, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %138) > %140 = shl i32 %118, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %118, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %146) > %148 = shl i32 %118, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %102 > %153 = fmul float %144, %102 > %154 = fmul float %153, 2.000000e+00 > %155 = shl i32 %120, 4 > %156 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %155) > %157 = shl i32 %120, 4 > %158 = or i32 %157, 12 > %159 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %158) > %160 = fmul float %156, %159 > %161 = shl i32 %120, 4 > %162 = or i32 %161, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %162) > %164 = shl i32 %120, 4 > %165 = or i32 %164, 8 > %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %165) > %167 = fsub float -0.000000e+00, %160 > %168 = call float @llvm.fma.f32(float %163, float %166, float %167) > %169 = shl i32 %120, 4 > %170 = or i32 %169, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %170) > %172 = shl i32 %120, 4 > %173 = or i32 %172, 8 > %174 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %160) > %176 = fmul float %175, %103 > %177 = fmul float %176, 2.000000e+00 > %178 = fmul float %168, %103 > %179 = fmul float %178, 2.000000e+00 > %180 = shl i32 %118, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %181) > %183 = shl i32 %118, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %184) > %186 = shl i32 %118, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %186) > %188 = shl i32 %118, 4 > %189 = or i32 %188, 12 > %190 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %189) > %191 = fmul float %185, %190 > %192 = fmul float %185, %187 > %193 = fmul float %182, %190 > %194 = shl i32 %118, 4 > %195 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %194) > %196 = shl i32 %118, 4 > %197 = or i32 %196, 4 > %198 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %197) > %199 = call float @llvm.fma.f32(float %195, float %198, float %191) > %200 = fmul float %199, %102 > %201 = fmul float %200, 2.000000e+00 > %202 = shl i32 %118, 4 > %203 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %202) > %204 = shl i32 %118, 4 > %205 = or i32 %204, 4 > %206 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %205) > %207 = shl i32 %118, 4 > %208 = or i32 %207, 8 > %209 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %208) > %210 = shl i32 %118, 4 > %211 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %210) > %212 = shl i32 %118, 4 > %213 = or i32 %212, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %213) > %215 = shl i32 %118, 4 > %216 = or i32 %215, 8 > %217 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %216) > %218 = fmul float %203, %211 > %219 = fmul float %206, %214 > %220 = fmul float %209, %217 > %221 = fadd float %220, %219 > %222 = fadd float %220, %218 > %223 = fadd float %219, %218 > %224 = fsub float -0.000000e+00, %221 > %225 = call float @llvm.fma.f32(float %224, float 2.000000e+00, float 1.000000e+00) > %226 = fsub float -0.000000e+00, %222 > %227 = call float @llvm.fma.f32(float %226, float 2.000000e+00, float 1.000000e+00) > %228 = fsub float -0.000000e+00, %223 > %229 = call float @llvm.fma.f32(float %228, float 2.000000e+00, float 1.000000e+00) > %230 = fmul float %102, %227 > %231 = shl i32 %120, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %232) > %234 = shl i32 %120, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %235) > %237 = shl i32 %120, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %237) > %239 = shl i32 %120, 4 > %240 = or i32 %239, 12 > %241 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %240) > %242 = fmul float %236, %241 > %243 = fmul float %236, %238 > %244 = fmul float %233, %241 > %245 = shl i32 %120, 4 > %246 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %245) > %247 = shl i32 %120, 4 > %248 = or i32 %247, 4 > %249 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %248) > %250 = call float @llvm.fma.f32(float %246, float %249, float %242) > %251 = fmul float %250, %103 > %252 = fmul float %251, 2.000000e+00 > %253 = shl i32 %120, 4 > %254 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %253) > %255 = shl i32 %120, 4 > %256 = or i32 %255, 4 > %257 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %256) > %258 = shl i32 %120, 4 > %259 = or i32 %258, 8 > %260 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %259) > %261 = shl i32 %120, 4 > %262 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %261) > %263 = shl i32 %120, 4 > %264 = or i32 %263, 4 > %265 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %264) > %266 = shl i32 %120, 4 > %267 = or i32 %266, 8 > %268 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %267) > %269 = fmul float %254, %262 > %270 = fmul float %257, %265 > %271 = fmul float %260, %268 > %272 = fadd float %271, %270 > %273 = fadd float %271, %269 > %274 = fadd float %270, %269 > %275 = fsub float -0.000000e+00, %272 > %276 = call float @llvm.fma.f32(float %275, float 2.000000e+00, float 1.000000e+00) > %277 = fsub float -0.000000e+00, %273 > %278 = call float @llvm.fma.f32(float %277, float 2.000000e+00, float 1.000000e+00) > %279 = fsub float -0.000000e+00, %274 > %280 = call float @llvm.fma.f32(float %279, float 2.000000e+00, float 1.000000e+00) > %281 = fmul float %103, %278 > %282 = fadd float %201, %252 > %283 = fadd float %230, %281 > %284 = fadd float %154, %179 > %285 = fadd float %126, %130 > %286 = shl i32 %116, 5 > %287 = or i32 %286, 4 > %288 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %287) > %289 = fmul float %104, %288 > %290 = shl i32 %122, 4 > %291 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %290) > %292 = shl i32 %122, 4 > %293 = or i32 %292, 12 > %294 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %293) > %295 = fmul float %291, %294 > %296 = shl i32 %122, 4 > %297 = or i32 %296, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %297) > %299 = shl i32 %122, 4 > %300 = or i32 %299, 8 > %301 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %300) > %302 = fsub float -0.000000e+00, %295 > %303 = call float @llvm.fma.f32(float %298, float %301, float %302) > %304 = shl i32 %122, 4 > %305 = or i32 %304, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %305) > %307 = shl i32 %122, 4 > %308 = or i32 %307, 8 > %309 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %295) > %311 = fmul float %310, %104 > %312 = fmul float %311, 2.000000e+00 > %313 = fmul float %303, %104 > %314 = fmul float %313, 2.000000e+00 > %315 = shl i32 %122, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %316) > %318 = shl i32 %122, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %319) > %321 = shl i32 %122, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %321) > %323 = shl i32 %122, 4 > %324 = or i32 %323, 12 > %325 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %324) > %326 = fmul float %320, %325 > %327 = fmul float %320, %322 > %328 = fmul float %317, %325 > %329 = shl i32 %122, 4 > %330 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %329) > %331 = shl i32 %122, 4 > %332 = or i32 %331, 4 > %333 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %332) > %334 = call float @llvm.fma.f32(float %330, float %333, float %326) > %335 = fmul float %334, %104 > %336 = fmul float %335, 2.000000e+00 > %337 = shl i32 %122, 4 > %338 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %337) > %339 = shl i32 %122, 4 > %340 = or i32 %339, 4 > %341 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %340) > %342 = shl i32 %122, 4 > %343 = or i32 %342, 8 > %344 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %343) > %345 = shl i32 %122, 4 > %346 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %345) > %347 = shl i32 %122, 4 > %348 = or i32 %347, 4 > %349 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %348) > %350 = shl i32 %122, 4 > %351 = or i32 %350, 8 > %352 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %351) > %353 = fmul float %338, %346 > %354 = fmul float %341, %349 > %355 = fmul float %344, %352 > %356 = fadd float %355, %354 > %357 = fadd float %355, %353 > %358 = fadd float %354, %353 > %359 = fsub float -0.000000e+00, %356 > %360 = call float @llvm.fma.f32(float %359, float 2.000000e+00, float 1.000000e+00) > %361 = fsub float -0.000000e+00, %357 > %362 = call float @llvm.fma.f32(float %361, float 2.000000e+00, float 1.000000e+00) > %363 = fsub float -0.000000e+00, %358 > %364 = call float @llvm.fma.f32(float %363, float 2.000000e+00, float 1.000000e+00) > %365 = fmul float %104, %362 > %366 = fadd float %282, %336 > %367 = fadd float %283, %365 > %368 = fadd float %284, %314 > %369 = fadd float %285, %289 > %370 = fmul float %366, %84 > %371 = fmul float %367, %85 > %372 = fadd float %370, %371 > %373 = fmul float %368, %86 > %374 = fadd float %372, %373 > %375 = fadd float %374, %369 > %376 = shl i32 %118, 4 > %377 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %376) > %378 = shl i32 %118, 4 > %379 = or i32 %378, 8 > %380 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %379) > %381 = fsub float -0.000000e+00, %193 > %382 = call float @llvm.fma.f32(float %377, float %380, float %381) > %383 = fmul float %382, %102 > %384 = fmul float %383, 2.000000e+00 > %385 = fmul float %152, 2.000000e+00 > %386 = shl i32 %120, 4 > %387 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %386) > %388 = shl i32 %120, 4 > %389 = or i32 %388, 8 > %390 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %389) > %391 = fsub float -0.000000e+00, %244 > %392 = call float @llvm.fma.f32(float %387, float %390, float %391) > %393 = fmul float %392, %103 > %394 = fmul float %393, 2.000000e+00 > %395 = fmul float %102, %229 > %396 = fmul float %102, %225 > %397 = fmul float %103, %280 > %398 = fmul float %103, %276 > %399 = shl i32 %114, 5 > %400 = or i32 %399, 8 > %401 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %400) > %402 = fmul float %102, %401 > %403 = shl i32 %115, 5 > %404 = or i32 %403, 8 > %405 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %404) > %406 = fmul float %103, %405 > %407 = fadd float %394, %384 > %408 = fadd float %177, %385 > %409 = fadd float %397, %395 > %410 = fadd float %406, %402 > %411 = shl i32 %122, 4 > %412 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %411) > %413 = shl i32 %122, 4 > %414 = or i32 %413, 8 > %415 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %414) > %416 = fsub float -0.000000e+00, %328 > %417 = call float @llvm.fma.f32(float %412, float %415, float %416) > %418 = fmul float %417, %104 > %419 = fmul float %418, 2.000000e+00 > %420 = fmul float %104, %364 > %421 = fmul float %104, %360 > %422 = shl i32 %116, 5 > %423 = or i32 %422, 8 > %424 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %423) > %425 = fmul float %104, %424 > %426 = fadd float %407, %419 > %427 = fadd float %408, %312 > %428 = fadd float %409, %420 > %429 = fadd float %410, %425 > %430 = fmul float %426, %84 > %431 = fmul float %427, %85 > %432 = fadd float %430, %431 > %433 = fmul float %428, %86 > %434 = fadd float %432, %433 > %435 = fadd float %434, %429 > %436 = shl i32 %114, 5 > %437 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %436) > %438 = fmul float %102, %437 > %439 = shl i32 %115, 5 > %440 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %439) > %441 = fmul float %103, %440 > %442 = shl i32 %116, 5 > %443 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %442) > %444 = fmul float %104, %443 > %445 = shl i32 %118, 4 > %446 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %445) > %447 = shl i32 %118, 4 > %448 = or i32 %447, 4 > %449 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %448) > %450 = fsub float -0.000000e+00, %191 > %451 = call float @llvm.fma.f32(float %446, float %449, float %450) > %452 = fadd float %193, %192 > %453 = fmul float %451, %102 > %454 = fmul float %452, %102 > %455 = fmul float %453, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = shl i32 %120, 4 > %458 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %457) > %459 = shl i32 %120, 4 > %460 = or i32 %459, 4 > %461 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %460) > %462 = fsub float -0.000000e+00, %242 > %463 = call float @llvm.fma.f32(float %458, float %461, float %462) > %464 = shl i32 %122, 4 > %465 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %464) > %466 = shl i32 %122, 4 > %467 = or i32 %466, 4 > %468 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %467) > %469 = fsub float -0.000000e+00, %326 > %470 = call float @llvm.fma.f32(float %465, float %468, float %469) > %471 = fadd float %328, %327 > %472 = fmul float %463, %103 > %473 = fmul float %470, %104 > %474 = fmul float %471, %104 > %475 = fmul float %473, 2.000000e+00 > %476 = fmul float %474, 2.000000e+00 > %477 = fadd float %244, %243 > %478 = fmul float %477, %103 > %479 = fmul float %472, 2.000000e+00 > %480 = fmul float %478, 2.000000e+00 > %481 = fadd float %396, %398 > %482 = fadd float %455, %479 > %483 = fadd float %456, %480 > %484 = fadd float %438, %441 > %485 = fadd float %421, %481 > %486 = fadd float %475, %482 > %487 = fadd float %476, %483 > %488 = fadd float %444, %484 > %489 = fmul float %485, %84 > %490 = fmul float %486, %85 > %491 = fadd float %489, %490 > %492 = fmul float %487, %86 > %493 = fadd float %491, %492 > %494 = fadd float %493, %488 > %495 = fmul float %20, %494 > %496 = fmul float %21, %375 > %497 = fadd float %495, %496 > %498 = fmul float %22, %435 > %499 = fadd float %497, %498 > %500 = fadd float %499, %23 > %501 = fmul float %24, %494 > %502 = fmul float %25, %375 > %503 = fadd float %501, %502 > %504 = fmul float %26, %435 > %505 = fadd float %503, %504 > %506 = fadd float %505, %27 > %507 = fmul float %28, %494 > %508 = fmul float %29, %375 > %509 = fadd float %507, %508 > %510 = fmul float %30, %435 > %511 = fadd float %509, %510 > %512 = fadd float %511, %31 > %513 = fmul float %32, %494 > %514 = fmul float %33, %375 > %515 = fadd float %513, %514 > %516 = fmul float %34, %435 > %517 = fadd float %515, %516 > %518 = fadd float %517, %35 > %519 = fsub float %76, %494 > %520 = fsub float %77, %375 > %521 = fsub float %78, %435 > %522 = fmul float %39, %494 > %523 = fmul float %40, %375 > %524 = fadd float %522, %523 > %525 = fmul float %41, %435 > %526 = fadd float %524, %525 > %527 = fadd float %526, %42 > %528 = fadd float %527, %61 > %529 = fmul float %69, %73 > %530 = fmul float %70, %74 > %531 = fmul float %71, %75 > %532 = call float @llvm.fabs.f32(float %518) > %533 = fmul float %532, 0x3EF4F8B580000000 > %534 = call float @llvm.minnum.f32(float %533, float 1.000000e+00) > %535 = fsub float 1.000000e+00, %534 > %536 = fmul float %36, %519 > %537 = fmul float %37, %520 > %538 = fadd float %537, %536 > %539 = fmul float %38, %521 > %540 = fadd float %538, %539 > %541 = fmul float %43, %519 > %542 = fmul float %44, %520 > %543 = fadd float %542, %541 > %544 = fmul float %45, %521 > %545 = fadd float %543, %544 > %546 = fmul float %39, %519 > %547 = fmul float %40, %520 > %548 = fadd float %547, %546 > %549 = fmul float %41, %521 > %550 = fadd float %548, %549 > %551 = fmul float %540, %540 > %552 = fmul float %550, %550 > %553 = fadd float %552, %551 > %554 = fmul float %545, %545 > %555 = fadd float %553, %554 > %556 = call float @llvm.AMDGPU.rsq.clamped.f32(float %555) > %557 = fmul float %556, %540 > %558 = fmul float %556, %550 > %559 = fmul float %556, %545 > %560 = fsub float -0.000000e+00, %550 > %561 = call float @llvm.fma.f32(float %560, float %556, float 0xBFC3333340000000) > %562 = fsub float 1.000000e+00, %561 > %563 = call float @llvm.AMDGPU.clamp.(float %562, float 0.000000e+00, float 1.000000e+00) > %564 = fmul float %563, %563 > %565 = fmul float %557, %66 > %566 = fsub float -0.000000e+00, %565 > %567 = fmul float %558, %67 > %568 = fsub float %566, %567 > %569 = fmul float %559, %68 > %570 = fsub float %568, %569 > %571 = fsub float -0.000000e+00, %51 > %572 = call float @llvm.fma.f32(float %571, float %570, float %50) > %573 = call float @llvm.fma.f32(float %570, float %570, float 1.000000e+00) > %574 = fmul float %573, 0x3FAE8EC8A0000000 > %575 = call float @llvm.fabs.f32(float %572) > %576 = call float @llvm.log2.f32(float %575) > %577 = fmul float %576, -1.500000e+00 > %578 = call float @llvm.exp2.f32(float %577) > %579 = fsub float -0.000000e+00, %48 > %580 = call float @llvm.fma.f32(float %52, float %578, float %579) > %581 = fmul float %578, %52 > %582 = call float @llvm.maxnum.f32(float %580, float 0.000000e+00) > %583 = fsub float -0.000000e+00, %582 > %584 = call float @llvm.fma.f32(float %583, float %535, float %581) > %585 = call float @llvm.maxnum.f32(float %584, float %65) > %586 = fcmp une float %46, 0.000000e+00 > br i1 %586, label %IF, label %ELSE > >IF: ; preds = %main_body > %587 = fdiv float 1.000000e+00, %46 > %588 = fmul float %528, %587 > %589 = fsub float -0.000000e+00, %588 > br label %ENDIF > >ELSE: ; preds = %main_body > %590 = fsub float -0.000000e+00, %528 > %591 = fcmp olt float %528, -0.000000e+00 > %592 = select i1 %591, float 1.000000e+00, float %590 > %593 = fcmp oge float %592, 0.000000e+00 > %.op = fmul float %592, 0x4600000000000000 > %594 = select i1 %593, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %589, %IF ], [ %594, %ELSE ] > %595 = fmul float %temp44.0, 0x3FF7154760000000 > %596 = call float @llvm.exp2.f32(float %595) > %597 = fadd float %596, %53 > %598 = fmul float %597, %55 > %599 = fmul float %598, 5.000000e-01 > %600 = fmul float %564, %599 > %601 = call float @llvm.minnum.f32(float %600, float %49) > %602 = call float @llvm.maxnum.f32(float %601, float %54) > %603 = fmul float %602, %585 > %604 = fcmp une float %59, 0.000000e+00 > br i1 %604, label %IF159, label %ELSE160 > >IF159: ; preds = %ENDIF > %605 = fdiv float 1.000000e+00, %59 > %606 = fmul float %528, %605 > %607 = fsub float -0.000000e+00, %606 > br label %ENDIF158 > >ELSE160: ; preds = %ENDIF > %608 = fsub float -0.000000e+00, %528 > %609 = fcmp olt float %528, -0.000000e+00 > %610 = select i1 %609, float 1.000000e+00, float %608 > %611 = fcmp oge float %610, 0.000000e+00 > %.op164 = fmul float %610, 0x4600000000000000 > %612 = select i1 %611, float %.op164, float 0xC600000000000000 > br label %ENDIF158 > >ENDIF158: ; preds = %ELSE160, %IF159 > %temp40.0 = phi float [ %607, %IF159 ], [ %612, %ELSE160 ] > %613 = fsub float %60, %528 > %614 = fcmp une float %47, 0.000000e+00 > br i1 %614, label %IF162, label %ELSE163 > >IF162: ; preds = %ENDIF158 > %615 = fdiv float 1.000000e+00, %47 > %616 = fmul float %613, %615 > br label %ENDIF161 > >ELSE163: ; preds = %ENDIF158 > %617 = fcmp ogt float %613, 0.000000e+00 > %618 = select i1 %617, float 1.000000e+00, float %613 > %619 = fcmp oge float %618, 0.000000e+00 > %.op165 = fmul float %618, 0x4600000000000000 > %620 = select i1 %619, float %.op165, float 0xC600000000000000 > br label %ENDIF161 > >ENDIF161: ; preds = %ELSE163, %IF162 > %temp44.1 = phi float [ %616, %IF162 ], [ %620, %ELSE163 ] > %621 = fmul float %temp40.0, 0x3FF7154760000000 > %622 = call float @llvm.exp2.f32(float %621) > %623 = fmul float %622, %56 > %624 = fmul float %622, %57 > %625 = fmul float %622, %58 > %626 = call float @llvm.fma.f32(float %56, float %622, float %602) > %627 = call float @llvm.fma.f32(float %57, float %622, float %602) > %628 = call float @llvm.fma.f32(float %58, float %622, float %602) > %629 = call float @llvm.fma.f32(float %623, float %574, float %603) > %630 = call float @llvm.fma.f32(float %624, float %574, float %603) > %631 = call float @llvm.fma.f32(float %625, float %574, float %603) > %632 = fmul float %626, %temp44.1 > %633 = fmul float %627, %temp44.1 > %634 = fmul float %628, %temp44.1 > %635 = call float @llvm.fabs.f32(float %518) > %636 = call float @llvm.fabs.f32(float %518) > %637 = call float @llvm.fabs.f32(float %518) > %638 = fmul float %626, %635 > %639 = fmul float %627, %636 > %640 = fmul float %628, %637 > %641 = fmul float %638, 0xBFF7154760000000 > %642 = fmul float %639, 0xBFF7154760000000 > %643 = fmul float %640, 0xBFF7154760000000 > %644 = call float @llvm.exp2.f32(float %641) > %645 = call float @llvm.exp2.f32(float %642) > %646 = call float @llvm.exp2.f32(float %643) > %647 = fmul float %632, 0xBFF7154760000000 > %648 = fmul float %633, 0xBFF7154760000000 > %649 = fmul float %634, 0xBFF7154760000000 > %650 = call float @llvm.log2.f32(float %62) > %651 = call float @llvm.log2.f32(float %63) > %652 = call float @llvm.log2.f32(float %64) > %653 = fmul float %650, 0x3FDD1745E0000000 > %654 = fmul float %651, 0x3FDD1745E0000000 > %655 = fmul float %652, 0x3FDD1745E0000000 > %656 = call float @llvm.exp2.f32(float %653) > %657 = call float @llvm.exp2.f32(float %654) > %658 = call float @llvm.exp2.f32(float %655) > %659 = call float @llvm.exp2.f32(float %647) > %660 = call float @llvm.exp2.f32(float %648) > %661 = call float @llvm.exp2.f32(float %649) > %662 = fmul float %659, %656 > %663 = fmul float %660, %657 > %664 = fmul float %661, %658 > %665 = fcmp oeq float %626, 0.000000e+00 > %666 = fcmp oeq float %627, 0.000000e+00 > %667 = fcmp oeq float %628, 0.000000e+00 > %668 = fcmp ogt float %629, 0.000000e+00 > %669 = select i1 %668, float 1.000000e+00, float %629 > %670 = fcmp oge float %669, 0.000000e+00 > %671 = fcmp ogt float %630, 0.000000e+00 > %672 = select i1 %671, float 1.000000e+00, float %630 > %673 = fcmp oge float %672, 0.000000e+00 > %674 = fcmp ogt float %631, 0.000000e+00 > %675 = select i1 %674, float 1.000000e+00, float %631 > %676 = fcmp oge float %675, 0.000000e+00 > %.op166 = fmul float %669, 0x4600000000000000 > %677 = select i1 %670, float %.op166, float 0xC600000000000000 > %.op167 = fmul float %672, 0x4600000000000000 > %678 = select i1 %673, float %.op167, float 0xC600000000000000 > %.op168 = fmul float %675, 0x4600000000000000 > %679 = select i1 %676, float %.op168, float 0xC600000000000000 > %680 = fdiv float 1.000000e+00, %626 > %681 = fdiv float 1.000000e+00, %627 > %682 = fdiv float 1.000000e+00, %628 > %683 = fmul float %629, %680 > %684 = fmul float %630, %681 > %685 = fmul float %631, %682 > %686 = select i1 %665, float %677, float %683 > %687 = select i1 %666, float %678, float %684 > %688 = select i1 %667, float %679, float %685 > %689 = fmul float %686, %662 > %690 = fmul float %687, %663 > %691 = fmul float %688, %664 > %692 = fsub float 1.000000e+00, %644 > %693 = fsub float 1.000000e+00, %645 > %694 = fsub float 1.000000e+00, %646 > %695 = call float @llvm.fma.f32(float %689, float %692, float 0xBF70624DE0000000) > %696 = call float @llvm.fma.f32(float %690, float %693, float 0xBF70624DE0000000) > %697 = call float @llvm.fma.f32(float %691, float %694, float 0xBF70624DE0000000) > %698 = call float @llvm.maxnum.f32(float %695, float 0.000000e+00) > %699 = call float @llvm.maxnum.f32(float %696, float 0.000000e+00) > %700 = call float @llvm.maxnum.f32(float %697, float 0.000000e+00) > %701 = call float @llvm.fma.f32(float %698, float 0x4018CCCCC0000000, float 5.000000e-01) > %702 = call float @llvm.fma.f32(float %699, float 0x4018CCCCC0000000, float 5.000000e-01) > %703 = call float @llvm.fma.f32(float %700, float 0x4018CCCCC0000000, float 5.000000e-01) > %704 = fmul float %698, %701 > %705 = fmul float %699, %702 > %706 = fmul float %700, %703 > %707 = call float @llvm.fma.f32(float %698, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %708 = call float @llvm.fma.f32(float %699, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %709 = call float @llvm.fma.f32(float %700, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %710 = call float @llvm.fma.f32(float %698, float %707, float 0x3FAEB851E0000000) > %711 = call float @llvm.fma.f32(float %699, float %708, float 0x3FAEB851E0000000) > %712 = call float @llvm.fma.f32(float %700, float %709, float 0x3FAEB851E0000000) > %713 = fcmp oeq float %710, 0.000000e+00 > %714 = fcmp oeq float %711, 0.000000e+00 > %715 = fcmp oeq float %712, 0.000000e+00 > %716 = fcmp ogt float %704, 0.000000e+00 > %717 = select i1 %716, float 1.000000e+00, float %704 > %718 = fcmp oge float %717, 0.000000e+00 > %719 = fcmp ogt float %705, 0.000000e+00 > %720 = select i1 %719, float 1.000000e+00, float %705 > %721 = fcmp oge float %720, 0.000000e+00 > %722 = fcmp ogt float %706, 0.000000e+00 > %723 = select i1 %722, float 1.000000e+00, float %706 > %724 = fcmp oge float %723, 0.000000e+00 > %.op169 = fmul float %717, 0x4600000000000000 > %725 = select i1 %718, float %.op169, float 0xC600000000000000 > %.op170 = fmul float %720, 0x4600000000000000 > %726 = select i1 %721, float %.op170, float 0xC600000000000000 > %.op171 = fmul float %723, 0x4600000000000000 > %727 = select i1 %724, float %.op171, float 0xC600000000000000 > %728 = fdiv float 1.000000e+00, %710 > %729 = fdiv float 1.000000e+00, %711 > %730 = fdiv float 1.000000e+00, %712 > %731 = fmul float %704, %728 > %732 = fmul float %705, %729 > %733 = fmul float %706, %730 > %734 = select i1 %713, float %725, float %731 > %735 = select i1 %714, float %726, float %732 > %736 = select i1 %715, float %727, float %733 > %737 = bitcast i32 %11 to float > %738 = insertvalue <{ float, float, float }> undef, float %737, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %90, float %91, float %424, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %529, float %530, float %531, float %72) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %734, float %735, float %736, float %644) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %96, float %97, float %98) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %500, float %506, float %512, float %518) > ret <{ float, float, float }> %738 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..30] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 480, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0], TEMP[0], IN[3] > 3: MUL TEMP[0], TEMP[0], IN[1] > 4: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[2].wwww > 5: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 6: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][30].xyzz > 7: MOV TEMP[1].w, TEMP[1].xxxx > 8: MOV TEMP[1].xyz, TEMP[0].xyzx > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 298 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 484) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 488) > %28 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 > %30 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 3 > %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 > %33 = extractelement <8 x i32> %29, i32 7 > %34 = extractelement <4 x i32> %32, i32 0 > %35 = and i32 %34, %33 > %36 = insertelement <4 x i32> %32, i32 %35, i32 0 > %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %48 = bitcast float %37 to i32 > %49 = bitcast float %38 to i32 > %50 = insertelement <2 x i32> undef, i32 %48, i32 0 > %51 = insertelement <2 x i32> %50, i32 %49, i32 1 > %52 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %51, <8 x i32> %29, <4 x i32> %36, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %53 = extractelement <4 x float> %52, i32 0 > %54 = extractelement <4 x float> %52, i32 1 > %55 = extractelement <4 x float> %52, i32 2 > %56 = extractelement <4 x float> %52, i32 3 > %57 = fmul float %53, %44 > %58 = fmul float %54, %45 > %59 = fmul float %55, %46 > %60 = fmul float %56, %47 > %61 = fmul float %57, %39 > %62 = fmul float %58, %40 > %63 = fmul float %59, %41 > %64 = fmul float %60, %42 > %65 = fmul float %61, %43 > %66 = fmul float %62, %43 > %67 = fmul float %63, %43 > %68 = fmul float %64, %65 > %69 = fmul float %64, %66 > %70 = fmul float %64, %67 > %71 = fmul float %68, %25 > %72 = fmul float %69, %26 > %73 = fadd float %72, %71 > %74 = fmul float %70, %27 > %75 = fadd float %73, %74 > %76 = bitcast float %5 to i32 > %77 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %76, 10 > %78 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %77, float %68, 11 > %79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %78, float %69, 12 > %80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %79, float %70, 13 > %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %80, float %75, 14 > %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..22] >DCL CONST[2][0..24] >DCL TEMP[0..12], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, -0.1500, 0.0597} >IMM[1] UINT32 {0, 16, 32, 48} >IMM[2] UINT32 {128, 1, 288, 176} >IMM[3] UINT32 {112, 144, 320, 256} >IMM[4] FLT32 { -1.5000, 0.0000, 158456325028528675187087900672.0000, 1.4427} >IMM[5] UINT32 {240, 304, 224, 272} >IMM[6] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[7] UINT32 {384, 160, 352, 0} >IMM[8] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][0], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][1], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][2], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][3], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][8], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[2][18].wwww > 11: MOV TEMP[1], TEMP[1] > 12: MOV TEMP[3].xy, IN[1].xyxx > 13: ABS TEMP[4].x, TEMP[2].xxxx > 14: MUL TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy > 15: MIN TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx > 16: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[0].xxxx > 17: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][11].xyzz > 18: DP3 TEMP[6].x, CONST[1][7].xyzz, TEMP[5].xyzz > 19: DP3 TEMP[7].x, CONST[1][9].xyzz, TEMP[5].xyzz > 20: MOV TEMP[6].z, TEMP[7].xxxx > 21: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[5].xyzz > 22: MOV TEMP[6].y, TEMP[7].xxxx > 23: DP3 TEMP[8].x, TEMP[6].xyzz, TEMP[6].xyzz > 24: RSQ TEMP[8].x, TEMP[8].xxxx > 25: MUL TEMP[5].xyz, TEMP[8].xxxx, TEMP[6].xyzz > 26: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].zzzz > 27: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 28: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 29: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 30: DP3 TEMP[8].x, -TEMP[5].xyzz, CONST[2][20].xyzz > 31: FMA TEMP[9].x, -CONST[2][16].yyyy, TEMP[8].xxxx, CONST[2][16].xxxx > 32: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[8].xxxx, IMM[0].xxxx > 33: MUL TEMP[8].x, TEMP[8].xxxx, IMM[0].wwww > 34: ABS TEMP[9].x, TEMP[9].xxxx > 35: LG2 TEMP[9].x, TEMP[9].xxxx > 36: MUL TEMP[9].x, TEMP[9].xxxx, IMM[4].xxxx > 37: EX2 TEMP[9].x, TEMP[9].xxxx > 38: FMA TEMP[5].x, CONST[2][16].zzzz, TEMP[9].xxxx, -CONST[2][15].zzzz > 39: MUL TEMP[9].x, TEMP[9].xxxx, CONST[2][16].zzzz > 40: MAX TEMP[10].x, TEMP[5].xxxx, IMM[4].yyyy > 41: FMA TEMP[4].x, -TEMP[10].xxxx, TEMP[4].xxxx, TEMP[9].xxxx > 42: MAX TEMP[4].x, TEMP[4].xxxx, CONST[2][19].wwww > 43: MAX TEMP[9].x, TEMP[0].xxxx, IMM[4].yyyy > 44: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[2][18].zzzz > 45: FSNE TEMP[10].x, CONST[2][15].yyyy, IMM[4].yyyy > 46: UIF TEMP[10].xxxx :0 > 47: RCP TEMP[10].x, CONST[2][15].yyyy > 48: MUL TEMP[10].x, TEMP[0].xxxx, TEMP[10].xxxx > 49: ELSE :0 > 50: SSG TEMP[11].x, TEMP[0].xxxx > 51: MUL TEMP[10].x, IMM[4].zzzz, TEMP[11].xxxx > 52: ENDIF > 53: FSNE TEMP[11].x, CONST[2][15].xxxx, IMM[4].yyyy > 54: UIF TEMP[11].xxxx :0 > 55: RCP TEMP[11].x, CONST[2][15].xxxx > 56: MUL TEMP[11].x, -TEMP[9].xxxx, TEMP[11].xxxx > 57: ELSE :0 > 58: SSG TEMP[12].x, -TEMP[9].xxxx > 59: MUL TEMP[11].x, IMM[4].zzzz, TEMP[12].xxxx > 60: ENDIF > 61: FSNE TEMP[12].x, CONST[2][14].wwww, IMM[4].yyyy > 62: UIF TEMP[12].xxxx :0 > 63: RCP TEMP[12].x, CONST[2][14].wwww > 64: MUL TEMP[12].x, -TEMP[9].xxxx, TEMP[12].xxxx > 65: ELSE :0 > 66: SSG TEMP[9].x, -TEMP[9].xxxx > 67: MUL TEMP[12].x, IMM[4].zzzz, TEMP[9].xxxx > 68: ENDIF > 69: MUL TEMP[9].x, TEMP[12].xxxx, IMM[4].wwww > 70: EX2 TEMP[9].x, TEMP[9].xxxx > 71: MUL TEMP[5].x, TEMP[11].xxxx, IMM[4].wwww > 72: EX2 TEMP[11].x, TEMP[5].xxxx > 73: ADD TEMP[5].x, TEMP[11].xxxx, CONST[2][16].wwww > 74: MUL TEMP[5].x, TEMP[5].xxxx, CONST[2][17].yyyy > 75: MUL TEMP[5].x, TEMP[5].xxxx, IMM[6].xxxx > 76: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx > 77: MIN TEMP[7].x, TEMP[7].xxxx, CONST[2][15].wwww > 78: MAX TEMP[7].x, TEMP[7].xxxx, CONST[2][17].xxxx > 79: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[4].xxxx > 80: FMA TEMP[7].xyz, CONST[2][14].xyzz, TEMP[9].xxxx, TEMP[7].xxxx > 81: MUL TEMP[6].xyz, TEMP[9].xxxx, CONST[2][14].xyzz > 82: FMA TEMP[4].xyz, TEMP[6].xyzz, TEMP[8].xxxx, TEMP[4].xxxx > 83: FSEQ TEMP[8].xyz, TEMP[7].xyzz, IMM[4].yyyy > 84: SSG TEMP[9].xyz, TEMP[4].xyzz > 85: MUL TEMP[9].xyz, IMM[4].zzzz, TEMP[9].xyzz > 86: RCP TEMP[11].x, TEMP[7].xxxx > 87: RCP TEMP[11].y, TEMP[7].yyyy > 88: RCP TEMP[11].z, TEMP[7].zzzz > 89: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[11].xyzz > 90: UCMP TEMP[4].xyz, TEMP[8].xyzz, TEMP[9].xyzz, TEMP[4].xyzz > 91: MUL TEMP[6].xyz, TEMP[10].xxxx, -TEMP[7].xyzz > 92: ABS TEMP[2].xyz, TEMP[2].xxxx > 93: MUL TEMP[5].xyz, TEMP[2].xyzz, -TEMP[7].xyzz > 94: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[4].wwww > 95: EX2 TEMP[2].x, TEMP[5].xxxx > 96: EX2 TEMP[2].y, TEMP[5].yyyy > 97: EX2 TEMP[2].z, TEMP[5].zzzz > 98: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[4].wwww > 99: LG2 TEMP[7].x, CONST[2][19].xxxx >100: LG2 TEMP[7].y, CONST[2][19].yyyy >101: LG2 TEMP[7].z, CONST[2][19].zzzz >102: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[6].yyyy >103: EX2 TEMP[8].x, TEMP[7].xxxx >104: EX2 TEMP[8].y, TEMP[7].yyyy >105: EX2 TEMP[8].z, TEMP[7].zzzz >106: EX2 TEMP[7].x, TEMP[6].xxxx >107: EX2 TEMP[7].y, TEMP[6].yyyy >108: EX2 TEMP[7].z, TEMP[6].zzzz >109: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[8].xyzz >110: MUL TEMP[0].xyz, TEMP[4].xyzz, TEMP[6].xyzz >111: ADD TEMP[4].xyz, -TEMP[2].xyzz, IMM[0].xxxx >112: MOV TEMP[2].w, TEMP[2].xxxx >113: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xyzz, IMM[6].zzzz >114: MAX TEMP[4].xyz, TEMP[0].xyzz, IMM[4].yyyy >115: FMA TEMP[7].xyz, TEMP[4].xyzz, IMM[6].wwww, IMM[6].xxxx >116: MUL TEMP[5].xyz, TEMP[4].xyzz, TEMP[7].xyzz >117: FMA TEMP[6].xyz, TEMP[4].xyzz, IMM[6].wwww, IMM[8].xxxx >118: FMA TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xyzz, IMM[8].yyyy >119: FSEQ TEMP[6].xyz, TEMP[4].xyzz, IMM[4].yyyy >120: SSG TEMP[7].xyz, TEMP[5].xyzz >121: MUL TEMP[7].xyz, IMM[4].zzzz, TEMP[7].xyzz >122: RCP TEMP[8].x, TEMP[4].xxxx >123: RCP TEMP[8].y, TEMP[4].yyyy >124: RCP TEMP[8].z, TEMP[4].zzzz >125: MUL TEMP[4].xyz, TEMP[5].xyzz, TEMP[8].xyzz >126: UCMP TEMP[2].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[4].xyzz >127: MUL TEMP[0], IN[2], CONST[1][10] >128: MUL TEMP[4].xyz, TEMP[0].xyzz, CONST[1][22].xyzz >129: MOV TEMP[4].w, TEMP[0].wwww >130: MOV OUT[3], TEMP[4] >131: MOV OUT[2], TEMP[2] >132: MOV OUT[1], TEMP[3] >133: MOV OUT[0], TEMP[1] >134: END >radeonsi: Compiling shader 299 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 0) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 4) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 8) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 12) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 16) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 20) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 24) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 28) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 32) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 36) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 40) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 44) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 48) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 52) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 56) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 60) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 112) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 116) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 120) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 128) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 132) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 136) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 140) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 144) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 148) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 152) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 160) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 164) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 168) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 172) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 176) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 180) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 184) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 > %56 = call float @llvm.SI.load.const(<16 x i8> %55, i32 224) > %57 = call float @llvm.SI.load.const(<16 x i8> %55, i32 228) > %58 = call float @llvm.SI.load.const(<16 x i8> %55, i32 232) > %59 = call float @llvm.SI.load.const(<16 x i8> %55, i32 236) > %60 = call float @llvm.SI.load.const(<16 x i8> %55, i32 240) > %61 = call float @llvm.SI.load.const(<16 x i8> %55, i32 244) > %62 = call float @llvm.SI.load.const(<16 x i8> %55, i32 248) > %63 = call float @llvm.SI.load.const(<16 x i8> %55, i32 252) > %64 = call float @llvm.SI.load.const(<16 x i8> %55, i32 256) > %65 = call float @llvm.SI.load.const(<16 x i8> %55, i32 260) > %66 = call float @llvm.SI.load.const(<16 x i8> %55, i32 264) > %67 = call float @llvm.SI.load.const(<16 x i8> %55, i32 268) > %68 = call float @llvm.SI.load.const(<16 x i8> %55, i32 272) > %69 = call float @llvm.SI.load.const(<16 x i8> %55, i32 276) > %70 = call float @llvm.SI.load.const(<16 x i8> %55, i32 296) > %71 = call float @llvm.SI.load.const(<16 x i8> %55, i32 300) > %72 = call float @llvm.SI.load.const(<16 x i8> %55, i32 304) > %73 = call float @llvm.SI.load.const(<16 x i8> %55, i32 308) > %74 = call float @llvm.SI.load.const(<16 x i8> %55, i32 312) > %75 = call float @llvm.SI.load.const(<16 x i8> %55, i32 316) > %76 = call float @llvm.SI.load.const(<16 x i8> %55, i32 320) > %77 = call float @llvm.SI.load.const(<16 x i8> %55, i32 324) > %78 = call float @llvm.SI.load.const(<16 x i8> %55, i32 328) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %13) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %14) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %15) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = extractelement <4 x float> %92, i32 3 > %97 = fmul float %18, %82 > %98 = fmul float %19, %83 > %99 = fadd float %97, %98 > %100 = fmul float %20, %84 > %101 = fadd float %99, %100 > %102 = fadd float %101, %21 > %103 = fmul float %22, %82 > %104 = fmul float %23, %83 > %105 = fadd float %103, %104 > %106 = fmul float %24, %84 > %107 = fadd float %105, %106 > %108 = fadd float %107, %25 > %109 = fmul float %26, %82 > %110 = fmul float %27, %83 > %111 = fadd float %109, %110 > %112 = fmul float %28, %84 > %113 = fadd float %111, %112 > %114 = fadd float %113, %29 > %115 = fmul float %30, %82 > %116 = fmul float %31, %83 > %117 = fadd float %115, %116 > %118 = fmul float %32, %84 > %119 = fadd float %117, %118 > %120 = fadd float %119, %33 > %121 = fmul float %37, %82 > %122 = fmul float %38, %83 > %123 = fadd float %121, %122 > %124 = fmul float %39, %84 > %125 = fadd float %123, %124 > %126 = fadd float %125, %40 > %127 = fadd float %126, %71 > %128 = call float @llvm.fabs.f32(float %120) > %129 = fmul float %128, 0x3EF4F8B580000000 > %130 = call float @llvm.minnum.f32(float %129, float 1.000000e+00) > %131 = fsub float 1.000000e+00, %130 > %132 = fsub float %48, %82 > %133 = fsub float %49, %83 > %134 = fsub float %50, %84 > %135 = fmul float %34, %132 > %136 = fmul float %35, %133 > %137 = fadd float %136, %135 > %138 = fmul float %36, %134 > %139 = fadd float %137, %138 > %140 = fmul float %41, %132 > %141 = fmul float %42, %133 > %142 = fadd float %141, %140 > %143 = fmul float %43, %134 > %144 = fadd float %142, %143 > %145 = fmul float %37, %132 > %146 = fmul float %38, %133 > %147 = fadd float %146, %145 > %148 = fmul float %39, %134 > %149 = fadd float %147, %148 > %150 = fmul float %139, %139 > %151 = fmul float %149, %149 > %152 = fadd float %151, %150 > %153 = fmul float %144, %144 > %154 = fadd float %152, %153 > %155 = call float @llvm.AMDGPU.rsq.clamped.f32(float %154) > %156 = fmul float %155, %139 > %157 = fmul float %155, %149 > %158 = fmul float %155, %144 > %159 = fsub float -0.000000e+00, %149 > %160 = call float @llvm.fma.f32(float %159, float %155, float 0xBFC3333340000000) > %161 = fsub float 1.000000e+00, %160 > %162 = call float @llvm.AMDGPU.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) > %163 = fmul float %162, %162 > %164 = fmul float %156, %76 > %165 = fsub float -0.000000e+00, %164 > %166 = fmul float %157, %77 > %167 = fsub float %165, %166 > %168 = fmul float %158, %78 > %169 = fsub float %167, %168 > %170 = fsub float -0.000000e+00, %65 > %171 = call float @llvm.fma.f32(float %170, float %169, float %64) > %172 = call float @llvm.fma.f32(float %169, float %169, float 1.000000e+00) > %173 = fmul float %172, 0x3FAE8EC8A0000000 > %174 = call float @llvm.fabs.f32(float %171) > %175 = call float @llvm.log2.f32(float %174) > %176 = fmul float %175, -1.500000e+00 > %177 = call float @llvm.exp2.f32(float %176) > %178 = fsub float -0.000000e+00, %62 > %179 = call float @llvm.fma.f32(float %66, float %177, float %178) > %180 = fmul float %177, %66 > %181 = call float @llvm.maxnum.f32(float %179, float 0.000000e+00) > %182 = fsub float -0.000000e+00, %181 > %183 = call float @llvm.fma.f32(float %182, float %131, float %180) > %184 = call float @llvm.maxnum.f32(float %183, float %75) > %185 = call float @llvm.maxnum.f32(float %127, float 0.000000e+00) > %186 = fsub float %70, %127 > %187 = fcmp une float %61, 0.000000e+00 > br i1 %187, label %IF, label %ELSE > >IF: ; preds = %main_body > %188 = fdiv float 1.000000e+00, %61 > %189 = fmul float %186, %188 > br label %ENDIF > >ELSE: ; preds = %main_body > %190 = fcmp ogt float %186, 0.000000e+00 > %191 = select i1 %190, float 1.000000e+00, float %186 > %192 = fcmp oge float %191, 0.000000e+00 > %.op = fmul float %191, 0x4600000000000000 > %193 = select i1 %192, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp40.0 = phi float [ %189, %IF ], [ %193, %ELSE ] > %194 = fcmp une float %60, 0.000000e+00 > br i1 %194, label %IF53, label %ELSE54 > >IF53: ; preds = %ENDIF > %195 = fdiv float 1.000000e+00, %60 > %196 = fmul float %185, %195 > %197 = fsub float -0.000000e+00, %196 > br label %ENDIF52 > >ELSE54: ; preds = %ENDIF > %198 = fcmp ole float %185, -0.000000e+00 > %.op58 = fmul float %185, 0xC600000000000000 > %199 = select i1 %198, float %.op58, float 0xC600000000000000 > br label %ENDIF52 > >ENDIF52: ; preds = %ELSE54, %IF53 > %temp44.0 = phi float [ %197, %IF53 ], [ %199, %ELSE54 ] > %200 = fcmp une float %59, 0.000000e+00 > br i1 %200, label %IF56, label %ELSE57 > >IF56: ; preds = %ENDIF52 > %201 = fdiv float 1.000000e+00, %59 > %202 = fmul float %185, %201 > %203 = fsub float -0.000000e+00, %202 > br label %ENDIF55 > >ELSE57: ; preds = %ENDIF52 > %204 = fcmp ole float %185, -0.000000e+00 > %.op59 = fmul float %185, 0xC600000000000000 > %205 = select i1 %204, float %.op59, float 0xC600000000000000 > br label %ENDIF55 > >ENDIF55: ; preds = %ELSE57, %IF56 > %temp48.0 = phi float [ %203, %IF56 ], [ %205, %ELSE57 ] > %206 = fmul float %temp48.0, 0x3FF7154760000000 > %207 = call float @llvm.exp2.f32(float %206) > %208 = fmul float %temp44.0, 0x3FF7154760000000 > %209 = call float @llvm.exp2.f32(float %208) > %210 = fadd float %209, %67 > %211 = fmul float %210, %69 > %212 = fmul float %211, 5.000000e-01 > %213 = fmul float %163, %212 > %214 = call float @llvm.minnum.f32(float %213, float %63) > %215 = call float @llvm.maxnum.f32(float %214, float %68) > %216 = fmul float %215, %184 > %217 = call float @llvm.fma.f32(float %56, float %207, float %215) > %218 = call float @llvm.fma.f32(float %57, float %207, float %215) > %219 = call float @llvm.fma.f32(float %58, float %207, float %215) > %220 = fmul float %207, %56 > %221 = fmul float %207, %57 > %222 = fmul float %207, %58 > %223 = call float @llvm.fma.f32(float %220, float %173, float %216) > %224 = call float @llvm.fma.f32(float %221, float %173, float %216) > %225 = call float @llvm.fma.f32(float %222, float %173, float %216) > %226 = fcmp oeq float %217, 0.000000e+00 > %227 = fcmp oeq float %218, 0.000000e+00 > %228 = fcmp oeq float %219, 0.000000e+00 > %229 = fcmp ogt float %223, 0.000000e+00 > %230 = select i1 %229, float 1.000000e+00, float %223 > %231 = fcmp oge float %230, 0.000000e+00 > %232 = fcmp ogt float %224, 0.000000e+00 > %233 = select i1 %232, float 1.000000e+00, float %224 > %234 = fcmp oge float %233, 0.000000e+00 > %235 = fcmp ogt float %225, 0.000000e+00 > %236 = select i1 %235, float 1.000000e+00, float %225 > %237 = fcmp oge float %236, 0.000000e+00 > %.op60 = fmul float %230, 0x4600000000000000 > %238 = select i1 %231, float %.op60, float 0xC600000000000000 > %.op61 = fmul float %233, 0x4600000000000000 > %239 = select i1 %234, float %.op61, float 0xC600000000000000 > %.op62 = fmul float %236, 0x4600000000000000 > %240 = select i1 %237, float %.op62, float 0xC600000000000000 > %241 = fdiv float 1.000000e+00, %217 > %242 = fdiv float 1.000000e+00, %218 > %243 = fdiv float 1.000000e+00, %219 > %244 = fmul float %223, %241 > %245 = fmul float %224, %242 > %246 = fmul float %225, %243 > %247 = select i1 %226, float %238, float %244 > %248 = select i1 %227, float %239, float %245 > %249 = select i1 %228, float %240, float %246 > %250 = fmul float %217, %temp40.0 > %251 = fmul float %218, %temp40.0 > %252 = fmul float %219, %temp40.0 > %253 = call float @llvm.fabs.f32(float %120) > %254 = call float @llvm.fabs.f32(float %120) > %255 = call float @llvm.fabs.f32(float %120) > %256 = fmul float %217, %253 > %257 = fmul float %218, %254 > %258 = fmul float %219, %255 > %259 = fmul float %256, 0xBFF7154760000000 > %260 = fmul float %257, 0xBFF7154760000000 > %261 = fmul float %258, 0xBFF7154760000000 > %262 = call float @llvm.exp2.f32(float %259) > %263 = call float @llvm.exp2.f32(float %260) > %264 = call float @llvm.exp2.f32(float %261) > %265 = fmul float %250, 0xBFF7154760000000 > %266 = fmul float %251, 0xBFF7154760000000 > %267 = fmul float %252, 0xBFF7154760000000 > %268 = call float @llvm.log2.f32(float %72) > %269 = call float @llvm.log2.f32(float %73) > %270 = call float @llvm.log2.f32(float %74) > %271 = fmul float %268, 0x3FDD1745E0000000 > %272 = fmul float %269, 0x3FDD1745E0000000 > %273 = fmul float %270, 0x3FDD1745E0000000 > %274 = call float @llvm.exp2.f32(float %271) > %275 = call float @llvm.exp2.f32(float %272) > %276 = call float @llvm.exp2.f32(float %273) > %277 = call float @llvm.exp2.f32(float %265) > %278 = call float @llvm.exp2.f32(float %266) > %279 = call float @llvm.exp2.f32(float %267) > %280 = fmul float %277, %274 > %281 = fmul float %278, %275 > %282 = fmul float %279, %276 > %283 = fmul float %247, %280 > %284 = fmul float %248, %281 > %285 = fmul float %249, %282 > %286 = fsub float 1.000000e+00, %262 > %287 = fsub float 1.000000e+00, %263 > %288 = fsub float 1.000000e+00, %264 > %289 = call float @llvm.fma.f32(float %283, float %286, float 0xBF70624DE0000000) > %290 = call float @llvm.fma.f32(float %284, float %287, float 0xBF70624DE0000000) > %291 = call float @llvm.fma.f32(float %285, float %288, float 0xBF70624DE0000000) > %292 = call float @llvm.maxnum.f32(float %289, float 0.000000e+00) > %293 = call float @llvm.maxnum.f32(float %290, float 0.000000e+00) > %294 = call float @llvm.maxnum.f32(float %291, float 0.000000e+00) > %295 = call float @llvm.fma.f32(float %292, float 0x4018CCCCC0000000, float 5.000000e-01) > %296 = call float @llvm.fma.f32(float %293, float 0x4018CCCCC0000000, float 5.000000e-01) > %297 = call float @llvm.fma.f32(float %294, float 0x4018CCCCC0000000, float 5.000000e-01) > %298 = fmul float %292, %295 > %299 = fmul float %293, %296 > %300 = fmul float %294, %297 > %301 = call float @llvm.fma.f32(float %292, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %302 = call float @llvm.fma.f32(float %293, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %303 = call float @llvm.fma.f32(float %294, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %304 = call float @llvm.fma.f32(float %292, float %301, float 0x3FAEB851E0000000) > %305 = call float @llvm.fma.f32(float %293, float %302, float 0x3FAEB851E0000000) > %306 = call float @llvm.fma.f32(float %294, float %303, float 0x3FAEB851E0000000) > %307 = fcmp oeq float %304, 0.000000e+00 > %308 = fcmp oeq float %305, 0.000000e+00 > %309 = fcmp oeq float %306, 0.000000e+00 > %310 = fcmp ogt float %298, 0.000000e+00 > %311 = select i1 %310, float 1.000000e+00, float %298 > %312 = fcmp oge float %311, 0.000000e+00 > %313 = fcmp ogt float %299, 0.000000e+00 > %314 = select i1 %313, float 1.000000e+00, float %299 > %315 = fcmp oge float %314, 0.000000e+00 > %316 = fcmp ogt float %300, 0.000000e+00 > %317 = select i1 %316, float 1.000000e+00, float %300 > %318 = fcmp oge float %317, 0.000000e+00 > %.op63 = fmul float %311, 0x4600000000000000 > %319 = select i1 %312, float %.op63, float 0xC600000000000000 > %.op64 = fmul float %314, 0x4600000000000000 > %320 = select i1 %315, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %317, 0x4600000000000000 > %321 = select i1 %318, float %.op65, float 0xC600000000000000 > %322 = fdiv float 1.000000e+00, %304 > %323 = fdiv float 1.000000e+00, %305 > %324 = fdiv float 1.000000e+00, %306 > %325 = fmul float %298, %322 > %326 = fmul float %299, %323 > %327 = fmul float %300, %324 > %328 = select i1 %307, float %319, float %325 > %329 = select i1 %308, float %320, float %326 > %330 = select i1 %309, float %321, float %327 > %331 = fmul float %93, %44 > %332 = fmul float %94, %45 > %333 = fmul float %95, %46 > %334 = fmul float %96, %47 > %335 = fmul float %331, %51 > %336 = fmul float %332, %52 > %337 = fmul float %333, %53 > %338 = bitcast i32 %11 to float > %339 = insertvalue <{ float, float, float }> undef, float %338, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %328, float %329, float %330, float %262) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %335, float %336, float %337, float %334) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %102, float %108, float %114, float %120) > ret <{ float, float, float }> %339 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], BUFFER, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0] >DCL TEMP[0..2], LOCAL >IMM[0] INT32 {0, 0, 0, 0} >IMM[1] UINT32 {0, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D > 2: MUL TEMP[0], TEMP[0], IN[2] > 3: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww > 4: MOV TEMP[1].w, TEMP[1].xxxx > 5: FMA TEMP[0].xyz, TEMP[0].xyzz, IN[1].wwww, IN[1].xyzz > 6: MOV TEMP[2].x, IMM[0].xxxx > 7: MOV TEMP[2].w, IMM[1].xxxx > 8: TXF TEMP[2].x, TEMP[2], SAMP[0], BUFFER > 9: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xxxx > 10: MOV OUT[0], TEMP[1] > 11: END >radeonsi: Compiling shader 300 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to <2 x i128> addrspace(2)* > %24 = load <2 x i128>, <2 x i128> addrspace(2)* %23, align 32, !tbaa !0 > %25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !tbaa !0 > %27 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %28 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %27, i64 0, i64 7 > %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !tbaa !0 > %30 = extractelement <8 x i32> %26, i32 7 > %31 = extractelement <4 x i32> %29, i32 0 > %32 = and i32 %31, %30 > %33 = insertelement <4 x i32> %29, i32 %32, i32 0 > %34 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %35 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %44 = bitcast float %34 to i32 > %45 = bitcast float %35 to i32 > %46 = insertelement <2 x i32> undef, i32 %44, i32 0 > %47 = insertelement <2 x i32> %46, i32 %45, i32 1 > %48 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %47, <8 x i32> %26, <4 x i32> %33, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %49 = extractelement <4 x float> %48, i32 0 > %50 = extractelement <4 x float> %48, i32 1 > %51 = extractelement <4 x float> %48, i32 2 > %52 = extractelement <4 x float> %48, i32 3 > %53 = fmul float %49, %40 > %54 = fmul float %50, %41 > %55 = fmul float %51, %42 > %56 = fmul float %52, %43 > %57 = fmul float %56, %39 > %58 = call float @llvm.fma.f32(float %53, float %39, float %36) > %59 = call float @llvm.fma.f32(float %54, float %39, float %37) > %60 = call float @llvm.fma.f32(float %55, float %39, float %38) > %61 = extractelement <2 x i128> %24, i32 1 > %62 = bitcast i128 %61 to <16 x i8> > %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %62, i32 0, i32 0) > %64 = extractelement <4 x float> %63, i32 0 > %65 = fmul float %58, %64 > %66 = fmul float %59, %64 > %67 = fmul float %60, %64 > %68 = bitcast float %5 to i32 > %69 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %68, 10 > %70 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %69, float %65, 11 > %71 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %70, float %66, 12 > %72 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %71, float %67, 13 > %73 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %72, float %57, 14 > %74 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %73, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %74 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 1 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 2 >DCL SV[0], VERTEXID >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL BUFFER[8] >DCL CONST[1][0..11] >DCL CONST[2][0..20] >DCL TEMP[0..8], LOCAL >IMM[0] UINT32 {8, 3, 4, 32} >IMM[1] UINT32 {1, 2, 0, 64} >IMM[2] FLT32 { 1.0000, -0.1500, 0.0597, -1.5000} >IMM[3] UINT32 {80, 96, 128, 288} >IMM[4] UINT32 {176, 5, 6, 7} >IMM[5] UINT32 {112, 144, 320, 256} >IMM[6] UINT32 {240, 304, 224, 272} >IMM[7] FLT32 { 0.0000, 0.0000, 158456325028528675187087900672.0000, 1.4427} >IMM[8] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[9] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: UMAD TEMP[0].x, SV[0].xxxx, IMM[0].xxxx, IMM[0].yyyy > 1: UMUL TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz > 2: LOAD TEMP[0].x, BUFFER[8], TEMP[0].xxxx > 3: MOV TEMP[0].x, TEMP[0].xxxx > 4: UMUL TEMP[1].x, IMM[0].wwww, SV[0].xxxx > 5: LOAD TEMP[1].x, BUFFER[8], TEMP[1].xxxx > 6: MOV TEMP[0].y, TEMP[1].xxxx > 7: UMAD TEMP[1].x, SV[0].xxxx, IMM[0].xxxx, IMM[1].xxxx > 8: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz > 9: LOAD TEMP[1].x, BUFFER[8], TEMP[1].xxxx > 10: MOV TEMP[0].z, TEMP[1].xxxx > 11: UMAD TEMP[1].x, SV[0].xxxx, IMM[0].xxxx, IMM[1].yyyy > 12: UMUL TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz > 13: LOAD TEMP[1].x, BUFFER[8], TEMP[1].xxxx > 14: MOV TEMP[0].w, TEMP[1].xxxx > 15: MOV TEMP[1].x, TEMP[0].xxxw > 16: MOV TEMP[2].xyz, TEMP[0].yzwy > 17: MOV TEMP[2].w, IMM[2].xxxx > 18: DP4 TEMP[3].x, CONST[1][4], TEMP[2] > 19: DP4 TEMP[4].x, CONST[1][5], TEMP[2] > 20: MOV TEMP[3].y, TEMP[4].xxxx > 21: DP4 TEMP[4].x, CONST[1][6], TEMP[2] > 22: DP4 TEMP[5].x, CONST[1][8], TEMP[2] > 23: ADD TEMP[2].x, TEMP[5].xxxx, CONST[2][18].wwww > 24: MOV TEMP[3].z, TEMP[4].xxxx > 25: ADD TEMP[0].xyz, -TEMP[0].yzww, CONST[1][11].xyzz > 26: UMAD TEMP[5].x, SV[0].xxxx, IMM[0].xxxx, IMM[0].zzzz > 27: UMUL TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz > 28: LOAD TEMP[5].x, BUFFER[8], TEMP[5].xxxx > 29: MOV TEMP[5].x, TEMP[5].xxxx > 30: UMAD TEMP[6].x, SV[0].xxxx, IMM[0].xxxx, IMM[4].yyyy > 31: UMUL TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz > 32: LOAD TEMP[6].x, BUFFER[8], TEMP[6].xxxx > 33: MOV TEMP[5].y, TEMP[6].xxxx > 34: UMAD TEMP[6].x, SV[0].xxxx, IMM[0].xxxx, IMM[4].zzzz > 35: UMUL TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz > 36: LOAD TEMP[6].x, BUFFER[8], TEMP[6].xxxx > 37: MOV TEMP[5].z, TEMP[6].xxxx > 38: UMAD TEMP[6].x, SV[0].xxxx, IMM[0].xxxx, IMM[4].wwww > 39: UMUL TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz > 40: LOAD TEMP[6].x, BUFFER[8], TEMP[6].xxxx > 41: MOV TEMP[5].w, TEMP[6].xxxx > 42: MOV TEMP[1].yz, TEMP[5].yxyy > 43: MOV TEMP[5].xy, TEMP[5].zwzz > 44: DP3 TEMP[6].x, CONST[1][4].xyzz, TEMP[1].xyzz > 45: DP3 TEMP[7].x, CONST[1][5].xyzz, TEMP[1].xyzz > 46: MOV TEMP[6].y, TEMP[7].xxxx > 47: DP3 TEMP[7].x, CONST[1][6].xyzz, TEMP[1].xyzz > 48: MOV TEMP[6].z, TEMP[7].xxxx > 49: DP3 TEMP[1].x, CONST[1][7].xyzz, TEMP[0].xyzz > 50: DP3 TEMP[7].x, CONST[1][9].xyzz, TEMP[0].xyzz > 51: MOV TEMP[1].z, TEMP[7].xxxx > 52: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[0].xyzz > 53: MOV TEMP[1].y, TEMP[0].xxxx > 54: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz > 55: RSQ TEMP[7].x, TEMP[7].xxxx > 56: FMA TEMP[0].x, -TEMP[0].xxxx, TEMP[7].xxxx, IMM[2].yyyy > 57: ADD TEMP[0].x, -TEMP[0].xxxx, IMM[2].xxxx > 58: MOV_SAT TEMP[0].x, TEMP[0].xxxx > 59: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[0].xxxx > 60: MAX TEMP[0].x, TEMP[2].xxxx, IMM[7].xxxx > 61: FSNE TEMP[7].x, CONST[2][15].yyyy, IMM[7].xxxx > 62: UIF TEMP[7].xxxx :0 > 63: ELSE :0 > 64: ENDIF > 65: FSNE TEMP[7].x, CONST[2][15].xxxx, IMM[7].xxxx > 66: UIF TEMP[7].xxxx :0 > 67: RCP TEMP[7].x, CONST[2][15].xxxx > 68: MUL TEMP[7].x, -TEMP[0].xxxx, TEMP[7].xxxx > 69: ELSE :0 > 70: SSG TEMP[8].x, -TEMP[0].xxxx > 71: MUL TEMP[7].x, IMM[7].zzzz, TEMP[8].xxxx > 72: ENDIF > 73: FSNE TEMP[8].x, CONST[2][14].wwww, IMM[7].xxxx > 74: UIF TEMP[8].xxxx :0 > 75: RCP TEMP[8].x, CONST[2][14].wwww > 76: MUL TEMP[8].x, -TEMP[0].xxxx, TEMP[8].xxxx > 77: ELSE :0 > 78: SSG TEMP[0].x, -TEMP[0].xxxx > 79: MUL TEMP[8].x, IMM[7].zzzz, TEMP[0].xxxx > 80: ENDIF > 81: MUL TEMP[0].x, TEMP[8].xxxx, IMM[7].wwww > 82: EX2 TEMP[0].x, TEMP[0].xxxx > 83: MUL TEMP[7].x, TEMP[7].xxxx, IMM[7].wwww > 84: EX2 TEMP[7].x, TEMP[7].xxxx > 85: ADD TEMP[7].x, TEMP[7].xxxx, CONST[2][16].wwww > 86: MUL TEMP[7].x, TEMP[7].xxxx, CONST[2][17].yyyy > 87: MUL TEMP[7].x, TEMP[7].xxxx, IMM[8].xxxx > 88: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[7].xxxx > 89: MIN TEMP[1].x, TEMP[1].xxxx, CONST[2][15].wwww > 90: MAX TEMP[1].x, TEMP[1].xxxx, CONST[2][17].xxxx > 91: FMA TEMP[0].xyz, CONST[2][14].xyzz, TEMP[0].xxxx, TEMP[1].xxxx > 92: ABS TEMP[1].xyz, TEMP[4].xxxx > 93: MUL TEMP[2].xyz, TEMP[1].xyzz, -TEMP[0].xyzz > 94: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[7].wwww > 95: EX2 TEMP[0].x, TEMP[2].xxxx > 96: EX2 TEMP[0].y, TEMP[2].yyyy > 97: EX2 TEMP[0].z, TEMP[2].zzzz > 98: MOV TEMP[0].xyz, TEMP[0].xyzx > 99: MOV OUT[3], TEMP[0] >100: MOV OUT[1], TEMP[6] >101: MOV OUT[2], TEMP[5] >102: MOV OUT[0], TEMP[3] >103: END >radeonsi: Compiling shader 301 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { >main_body: > %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 > %15 = call float @llvm.SI.load.const(<16 x i8> %14, i32 64) > %16 = call float @llvm.SI.load.const(<16 x i8> %14, i32 68) > %17 = call float @llvm.SI.load.const(<16 x i8> %14, i32 72) > %18 = call float @llvm.SI.load.const(<16 x i8> %14, i32 76) > %19 = call float @llvm.SI.load.const(<16 x i8> %14, i32 80) > %20 = call float @llvm.SI.load.const(<16 x i8> %14, i32 84) > %21 = call float @llvm.SI.load.const(<16 x i8> %14, i32 88) > %22 = call float @llvm.SI.load.const(<16 x i8> %14, i32 92) > %23 = call float @llvm.SI.load.const(<16 x i8> %14, i32 96) > %24 = call float @llvm.SI.load.const(<16 x i8> %14, i32 100) > %25 = call float @llvm.SI.load.const(<16 x i8> %14, i32 104) > %26 = call float @llvm.SI.load.const(<16 x i8> %14, i32 108) > %27 = call float @llvm.SI.load.const(<16 x i8> %14, i32 112) > %28 = call float @llvm.SI.load.const(<16 x i8> %14, i32 116) > %29 = call float @llvm.SI.load.const(<16 x i8> %14, i32 120) > %30 = call float @llvm.SI.load.const(<16 x i8> %14, i32 128) > %31 = call float @llvm.SI.load.const(<16 x i8> %14, i32 132) > %32 = call float @llvm.SI.load.const(<16 x i8> %14, i32 136) > %33 = call float @llvm.SI.load.const(<16 x i8> %14, i32 140) > %34 = call float @llvm.SI.load.const(<16 x i8> %14, i32 144) > %35 = call float @llvm.SI.load.const(<16 x i8> %14, i32 148) > %36 = call float @llvm.SI.load.const(<16 x i8> %14, i32 152) > %37 = call float @llvm.SI.load.const(<16 x i8> %14, i32 176) > %38 = call float @llvm.SI.load.const(<16 x i8> %14, i32 180) > %39 = call float @llvm.SI.load.const(<16 x i8> %14, i32 184) > %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 > %42 = call float @llvm.SI.load.const(<16 x i8> %41, i32 224) > %43 = call float @llvm.SI.load.const(<16 x i8> %41, i32 228) > %44 = call float @llvm.SI.load.const(<16 x i8> %41, i32 232) > %45 = call float @llvm.SI.load.const(<16 x i8> %41, i32 236) > %46 = call float @llvm.SI.load.const(<16 x i8> %41, i32 240) > %47 = call float @llvm.SI.load.const(<16 x i8> %41, i32 252) > %48 = call float @llvm.SI.load.const(<16 x i8> %41, i32 268) > %49 = call float @llvm.SI.load.const(<16 x i8> %41, i32 272) > %50 = call float @llvm.SI.load.const(<16 x i8> %41, i32 276) > %51 = call float @llvm.SI.load.const(<16 x i8> %41, i32 300) > %52 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %4, i64 0, i64 8 > %53 = load <4 x i32>, <4 x i32> addrspace(2)* %52, align 16, !tbaa !0 > %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %0, i64 0, i64 1 > %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 > %56 = add i32 %9, %6 > %57 = shl i32 %56, 5 > %58 = or i32 %57, 12 > %59 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %53, i32 0, i32 %58, i1 false, i1 false) > %60 = shl i32 %56, 5 > %61 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %53, i32 0, i32 %60, i1 false, i1 false) > %62 = shl i32 %56, 5 > %63 = or i32 %62, 4 > %64 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %53, i32 0, i32 %63, i1 false, i1 false) > %65 = shl i32 %56, 5 > %66 = or i32 %65, 8 > %67 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %53, i32 0, i32 %66, i1 false, i1 false) > %68 = fmul float %15, %61 > %69 = fmul float %16, %64 > %70 = fadd float %68, %69 > %71 = fmul float %17, %67 > %72 = fadd float %70, %71 > %73 = fadd float %72, %18 > %74 = fmul float %19, %61 > %75 = fmul float %20, %64 > %76 = fadd float %74, %75 > %77 = fmul float %21, %67 > %78 = fadd float %76, %77 > %79 = fadd float %78, %22 > %80 = fmul float %23, %61 > %81 = fmul float %24, %64 > %82 = fadd float %80, %81 > %83 = fmul float %25, %67 > %84 = fadd float %82, %83 > %85 = fadd float %84, %26 > %86 = fmul float %30, %61 > %87 = fmul float %31, %64 > %88 = fadd float %86, %87 > %89 = fmul float %32, %67 > %90 = fadd float %88, %89 > %91 = fadd float %90, %33 > %92 = fadd float %91, %51 > %93 = fsub float %37, %61 > %94 = fsub float %38, %64 > %95 = fsub float %39, %67 > %96 = shl i32 %56, 5 > %97 = or i32 %96, 16 > %98 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %53, i32 0, i32 %97, i1 false, i1 false) > %99 = shl i32 %56, 5 > %100 = or i32 %99, 20 > %101 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %53, i32 0, i32 %100, i1 false, i1 false) > %102 = shl i32 %56, 5 > %103 = or i32 %102, 24 > %104 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %53, i32 0, i32 %103, i1 false, i1 false) > %105 = shl i32 %56, 5 > %106 = or i32 %105, 28 > %107 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %53, i32 0, i32 %106, i1 false, i1 false) > %108 = fmul float %15, %59 > %109 = fmul float %16, %98 > %110 = fadd float %109, %108 > %111 = fmul float %17, %101 > %112 = fadd float %110, %111 > %113 = fmul float %19, %59 > %114 = fmul float %20, %98 > %115 = fadd float %114, %113 > %116 = fmul float %21, %101 > %117 = fadd float %115, %116 > %118 = fmul float %23, %59 > %119 = fmul float %24, %98 > %120 = fadd float %119, %118 > %121 = fmul float %25, %101 > %122 = fadd float %120, %121 > %123 = fmul float %27, %93 > %124 = fmul float %28, %94 > %125 = fadd float %124, %123 > %126 = fmul float %29, %95 > %127 = fadd float %125, %126 > %128 = fmul float %34, %93 > %129 = fmul float %35, %94 > %130 = fadd float %129, %128 > %131 = fmul float %36, %95 > %132 = fadd float %130, %131 > %133 = fmul float %30, %93 > %134 = fmul float %31, %94 > %135 = fadd float %134, %133 > %136 = fmul float %32, %95 > %137 = fadd float %135, %136 > %138 = fmul float %127, %127 > %139 = fmul float %137, %137 > %140 = fadd float %139, %138 > %141 = fmul float %132, %132 > %142 = fadd float %140, %141 > %143 = call float @llvm.AMDGPU.rsq.clamped.f32(float %142) > %144 = fsub float -0.000000e+00, %137 > %145 = call float @llvm.fma.f32(float %144, float %143, float 0xBFC3333340000000) > %146 = fsub float 1.000000e+00, %145 > %147 = call float @llvm.AMDGPU.clamp.(float %146, float 0.000000e+00, float 1.000000e+00) > %148 = fmul float %147, %147 > %149 = call float @llvm.maxnum.f32(float %92, float 0.000000e+00) > %150 = fcmp une float %46, 0.000000e+00 > br i1 %150, label %IF37, label %ELSE38 > >IF37: ; preds = %main_body > %151 = fdiv float 1.000000e+00, %46 > %152 = fmul float %149, %151 > %153 = fsub float -0.000000e+00, %152 > br label %ENDIF36 > >ELSE38: ; preds = %main_body > %154 = fcmp ole float %149, -0.000000e+00 > %.op = fmul float %149, 0xC600000000000000 > %155 = select i1 %154, float %.op, float 0xC600000000000000 > br label %ENDIF36 > >ENDIF36: ; preds = %ELSE38, %IF37 > %temp28.0 = phi float [ %153, %IF37 ], [ %155, %ELSE38 ] > %156 = fcmp une float %45, 0.000000e+00 > br i1 %156, label %IF40, label %ELSE41 > >IF40: ; preds = %ENDIF36 > %157 = fdiv float 1.000000e+00, %45 > %158 = fmul float %149, %157 > %159 = fsub float -0.000000e+00, %158 > br label %ENDIF39 > >ELSE41: ; preds = %ENDIF36 > %160 = fcmp ole float %149, -0.000000e+00 > %.op42 = fmul float %149, 0xC600000000000000 > %161 = select i1 %160, float %.op42, float 0xC600000000000000 > br label %ENDIF39 > >ENDIF39: ; preds = %ELSE41, %IF40 > %temp32.0 = phi float [ %159, %IF40 ], [ %161, %ELSE41 ] > %162 = fmul float %temp32.0, 0x3FF7154760000000 > %163 = call float @llvm.exp2.f32(float %162) > %164 = fmul float %temp28.0, 0x3FF7154760000000 > %165 = call float @llvm.exp2.f32(float %164) > %166 = fadd float %165, %48 > %167 = fmul float %166, %50 > %168 = fmul float %167, 5.000000e-01 > %169 = fmul float %148, %168 > %170 = call float @llvm.minnum.f32(float %169, float %47) > %171 = call float @llvm.maxnum.f32(float %170, float %49) > %172 = call float @llvm.fma.f32(float %42, float %163, float %171) > %173 = call float @llvm.fma.f32(float %43, float %163, float %171) > %174 = call float @llvm.fma.f32(float %44, float %163, float %171) > %175 = call float @llvm.fabs.f32(float %85) > %176 = call float @llvm.fabs.f32(float %85) > %177 = call float @llvm.fabs.f32(float %85) > %178 = fmul float %172, %175 > %179 = fmul float %173, %176 > %180 = fmul float %174, %177 > %181 = fmul float %178, 0xBFF7154760000000 > %182 = fmul float %179, 0xBFF7154760000000 > %183 = fmul float %180, 0xBFF7154760000000 > %184 = call float @llvm.exp2.f32(float %181) > %185 = call float @llvm.exp2.f32(float %182) > %186 = call float @llvm.exp2.f32(float %183) > %187 = bitcast float %73 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %187, i32 1, i32 undef, i32 %8, i32 64, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %188 = bitcast float %79 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %188, i32 1, i32 undef, i32 %8, i32 68, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %189 = bitcast float %85 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %189, i32 1, i32 undef, i32 %8, i32 72, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 undef, i32 1, i32 undef, i32 %8, i32 76, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %190 = bitcast float %112 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %190, i32 1, i32 undef, i32 %8, i32 80, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %191 = bitcast float %117 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %191, i32 1, i32 undef, i32 %8, i32 84, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %192 = bitcast float %122 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %192, i32 1, i32 undef, i32 %8, i32 88, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 undef, i32 1, i32 undef, i32 %8, i32 92, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %193 = bitcast float %104 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %193, i32 1, i32 undef, i32 %8, i32 96, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %194 = bitcast float %107 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %194, i32 1, i32 undef, i32 %8, i32 100, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %195 = bitcast float %104 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %195, i32 1, i32 undef, i32 %8, i32 104, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %196 = bitcast float %107 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %196, i32 1, i32 undef, i32 %8, i32 108, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %197 = bitcast float %184 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %197, i32 1, i32 undef, i32 %8, i32 112, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %198 = bitcast float %185 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %198, i32 1, i32 undef, i32 %8, i32 116, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %199 = bitcast float %186 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %199, i32 1, i32 undef, i32 %8, i32 120, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > %200 = bitcast float %67 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %55, i32 %200, i32 1, i32 undef, i32 %8, i32 124, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readonly >declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) > >attributes #0 = { nounwind readnone } >attributes #1 = { nounwind readonly } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY >GEOM >PROPERTY GS_INPUT_PRIMITIVE POINTS >PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP >PROPERTY GS_MAX_OUTPUT_VERTICES 4 >PROPERTY GS_INVOCATIONS 1 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[0..15] >DCL CONST[1][0..24] >DCL CONST[2][0..6] >DCL TEMP[0..34], LOCAL >DCL ADDR[0] >IMM[0] UINT32 {0, 384, 368, 160} >IMM[1] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.0000} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] UINT32 {4, 1, 64, 80} >IMM[4] UINT32 {96, 16, 32, 48} > 0: MOV TEMP[0], CONST[1][24] > 1: ADD TEMP[0].x, -CONST[1][24].xxxx, IN[0][1].yyyy > 2: MOV TEMP[0].y, TEMP[0].xxxx > 3: MOV TEMP[0].x, IN[0][1].xxxx > 4: DP2 TEMP[1].x, TEMP[0].xyyy, TEMP[0].xyyy > 5: RSQ TEMP[1].x, TEMP[1].xxxx > 6: MUL TEMP[0].xy, TEMP[1].xxxx, TEMP[0].xyyy > 7: MOV TEMP[1], CONST[1][24] > 8: MUL TEMP[1].xy, TEMP[0].xyyy, CONST[1][24].yyyy > 9: DP2 TEMP[2].x, IN[0][1].xyyy, IN[0][1].xyyy > 10: SQRT TEMP[2].x, TEMP[2].xxxx > 11: MOV TEMP[3], CONST[1][24] > 12: ADD TEMP[2].x, TEMP[2].xxxx, -CONST[1][24].xxxx > 13: MOV TEMP[3], CONST[1][23] > 14: FSNE TEMP[3].x, CONST[1][23].wwww, IMM[1].xxxx > 15: UIF TEMP[3].xxxx :0 > 16: MOV TEMP[3], CONST[1][23] > 17: RCP TEMP[3].x, CONST[1][23].wwww > 18: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[3].xxxx > 19: ELSE :0 > 20: SSG TEMP[2].x, TEMP[2].xxxx > 21: MUL TEMP[3].x, IMM[1].yyyy, TEMP[2].xxxx > 22: ENDIF > 23: MOV_SAT TEMP[2].x, TEMP[3].xxxx > 24: MUL TEMP[1].xy, TEMP[2].xxxx, TEMP[1].xyyy > 25: MOV TEMP[2], CONST[1][24] > 26: FSNE TEMP[2].x, CONST[1][24].wwww, IMM[1].xxxx > 27: UIF TEMP[2].xxxx :0 > 28: MOV TEMP[2], CONST[1][24] > 29: RCP TEMP[2].x, CONST[1][24].wwww > 30: MUL TEMP[2].x, IN[0][2].xxxx, TEMP[2].xxxx > 31: ELSE :0 > 32: SSG TEMP[3].x, IN[0][2].xxxx > 33: MUL TEMP[2].x, IMM[1].yyyy, TEMP[3].xxxx > 34: ENDIF > 35: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 36: MOV TEMP[3], CONST[1][24] > 37: FSLT TEMP[3].x, IMM[1].xxxx, CONST[1][24].zzzz > 38: AND TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx > 39: INEG TEMP[3].x, TEMP[3].xxxx > 40: ADD TEMP[4].x, -IN[0][2].xxxx, IN[0][2].yyyy > 41: MOV TEMP[5], CONST[1][24] > 42: FSNE TEMP[5].x, CONST[1][24].zzzz, IMM[1].xxxx > 43: UIF TEMP[5].xxxx :0 > 44: MOV TEMP[5], CONST[1][24] > 45: RCP TEMP[5].x, CONST[1][24].zzzz > 46: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 47: ELSE :0 > 48: SSG TEMP[4].x, TEMP[4].xxxx > 49: MUL TEMP[5].x, IMM[1].yyyy, TEMP[4].xxxx > 50: ENDIF > 51: MOV_SAT TEMP[4].x, TEMP[5].xxxx > 52: MUL TEMP[4].x, TEMP[2].xxxx, TEMP[4].xxxx > 53: MOV TEMP[1].w, TEMP[4].xxxx > 54: MOV TEMP[5].x, TEMP[3].xxxx > 55: MOV TEMP[5].x, TEMP[4].xxxx > 56: MOV TEMP[5].x, TEMP[2].xxxx > 57: USNE TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx > 58: UIF TEMP[3].xxxx :0 > 59: MOV TEMP[3].x, TEMP[4].xxxx > 60: ELSE :0 > 61: MOV TEMP[3].x, TEMP[2].xxxx > 62: ENDIF > 63: MOV TEMP[2], CONST[1][10] > 64: MUL TEMP[2].x, TEMP[3].xxxx, CONST[1][10].wwww > 65: MOV TEMP[0].w, TEMP[2].xxxx > 66: MOV TEMP[2], CONST[1][10] > 67: MUL TEMP[2].xyz, CONST[1][10].xyzz, IN[0][3].xyzz > 68: MOV TEMP[0].z, -TEMP[0].yyyy > 69: MOV TEMP[3].z, IMM[1].xxxx > 70: MOV TEMP[4].w, IMM[1].zzzz > 71: MOV TEMP[5].w, IMM[1].zzzz > 72: MOV TEMP[1].z, IMM[1].xxxx > 73: BGNLOOP :0 > 74: USGE TEMP[6].x, TEMP[1].zzzz, IMM[3].xxxx > 75: AND TEMP[7].x, TEMP[6].xxxx, IMM[2].xxxx > 76: INEG TEMP[8].x, TEMP[7].xxxx > 77: MOV TEMP[1].w, TEMP[8].xxxx > 78: MOV TEMP[9].x, TEMP[8].xxxx > 79: USNE TEMP[10].x, TEMP[8].xxxx, IMM[0].xxxx > 80: UIF TEMP[10].xxxx :0 > 81: BRK > 82: ENDIF > 83: MOV TEMP[11].x, TEMP[1].zzzz > 84: UARL ADDR[0].x, TEMP[1].zzzz > 85: UARL ADDR[0].x, TEMP[1].zzzz > 86: DP2 TEMP[12].x, TEMP[0].zxxx, CONST[ADDR[0].x+12].xyyy > 87: MOV TEMP[13].x, TEMP[1].zzzz > 88: UARL ADDR[0].x, TEMP[1].zzzz > 89: DP2 TEMP[14].x, TEMP[0].xyyy, CONST[ADDR[0].x+8].xyyy > 90: MOV TEMP[12].y, TEMP[14].xxxx > 91: MOV TEMP[15].x, TEMP[1].zzzz > 92: UARL ADDR[0].x, TEMP[1].zzzz > 93: MUL TEMP[16].xy, TEMP[1].xyyy, CONST[ADDR[0].x+4].wwww > 94: MOV TEMP[12].zw, TEMP[16].yyxy > 95: MOV TEMP[17], CONST[1][23] > 96: FMA TEMP[3].xy, CONST[1][23].xyyy, TEMP[12].xyyy, -TEMP[16].xyyy > 97: ADD TEMP[4].xyz, TEMP[3].xyzz, IN[0][0].xyzz > 98: MOV TEMP[18], CONST[2][4] > 99: DP4 TEMP[5].x, CONST[2][4], TEMP[4] >100: MOV TEMP[19], CONST[2][5] >101: DP4 TEMP[20].x, CONST[2][5], TEMP[4] >102: MOV TEMP[5].y, TEMP[20].xxxx >103: MOV TEMP[21], CONST[2][6] >104: DP4 TEMP[22].x, CONST[2][6], TEMP[4] >105: MOV TEMP[5].z, TEMP[22].xxxx >106: MOV TEMP[23], CONST[2][0] >107: DP4 TEMP[24].x, CONST[2][0], TEMP[5] >108: MOV TEMP[1].w, TEMP[24].xxxx >109: MOV TEMP[25], CONST[2][1] >110: DP4 TEMP[26].x, CONST[2][1], TEMP[5] >111: MOV TEMP[2].w, TEMP[26].xxxx >112: MOV TEMP[27], CONST[2][2] >113: DP4 TEMP[3].x, CONST[2][2], TEMP[5] >114: MOV TEMP[28], CONST[2][3] >115: DP4 TEMP[29].x, CONST[2][3], TEMP[5] >116: MOV TEMP[3].y, TEMP[29].xxxx >117: MOV TEMP[30].x, TEMP[24].xxxx >118: MOV TEMP[30].y, TEMP[26].xxxx >119: MOV TEMP[30].z, TEMP[3].xxxx >120: MOV TEMP[30].w, TEMP[29].xxxx >121: MOV TEMP[31].x, TEMP[1].zzzz >122: UARL ADDR[0].x, TEMP[1].zzzz >123: MOV TEMP[32].xy, CONST[ADDR[0].x].zwzz >124: MOV TEMP[33].xyz, TEMP[2].xyzx >125: MOV TEMP[33].w, TEMP[0].wwww >126: MOV OUT[2], TEMP[33] >127: MOV OUT[1], TEMP[32] >128: MOV OUT[0], TEMP[30] >129: EMIT IMM[2].yyyy >130: UADD TEMP[34].x, TEMP[1].zzzz, IMM[2].xxxx >131: MOV TEMP[1].z, TEMP[34].xxxx >132: ENDLOOP :0 >133: ENDPRIM IMM[2].yyyy >134: END >radeonsi: Compiling shader 302 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_gs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 160) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 164) > %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 168) > %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 172) > %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 368) > %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) > %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 380) > %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 384) > %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 388) > %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 392) > %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 396) > %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 > %32 = call float @llvm.SI.load.const(<16 x i8> %31, i32 0) > %33 = call float @llvm.SI.load.const(<16 x i8> %31, i32 4) > %34 = call float @llvm.SI.load.const(<16 x i8> %31, i32 8) > %35 = call float @llvm.SI.load.const(<16 x i8> %31, i32 12) > %36 = call float @llvm.SI.load.const(<16 x i8> %31, i32 16) > %37 = call float @llvm.SI.load.const(<16 x i8> %31, i32 20) > %38 = call float @llvm.SI.load.const(<16 x i8> %31, i32 24) > %39 = call float @llvm.SI.load.const(<16 x i8> %31, i32 28) > %40 = call float @llvm.SI.load.const(<16 x i8> %31, i32 32) > %41 = call float @llvm.SI.load.const(<16 x i8> %31, i32 36) > %42 = call float @llvm.SI.load.const(<16 x i8> %31, i32 40) > %43 = call float @llvm.SI.load.const(<16 x i8> %31, i32 44) > %44 = call float @llvm.SI.load.const(<16 x i8> %31, i32 48) > %45 = call float @llvm.SI.load.const(<16 x i8> %31, i32 52) > %46 = call float @llvm.SI.load.const(<16 x i8> %31, i32 56) > %47 = call float @llvm.SI.load.const(<16 x i8> %31, i32 60) > %48 = call float @llvm.SI.load.const(<16 x i8> %31, i32 64) > %49 = call float @llvm.SI.load.const(<16 x i8> %31, i32 68) > %50 = call float @llvm.SI.load.const(<16 x i8> %31, i32 72) > %51 = call float @llvm.SI.load.const(<16 x i8> %31, i32 76) > %52 = call float @llvm.SI.load.const(<16 x i8> %31, i32 80) > %53 = call float @llvm.SI.load.const(<16 x i8> %31, i32 84) > %54 = call float @llvm.SI.load.const(<16 x i8> %31, i32 88) > %55 = call float @llvm.SI.load.const(<16 x i8> %31, i32 92) > %56 = call float @llvm.SI.load.const(<16 x i8> %31, i32 96) > %57 = call float @llvm.SI.load.const(<16 x i8> %31, i32 100) > %58 = call float @llvm.SI.load.const(<16 x i8> %31, i32 104) > %59 = call float @llvm.SI.load.const(<16 x i8> %31, i32 108) > %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %0, i64 0, i64 2 > %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %0, i64 0, i64 3 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = shl i32 %7, 2 > %65 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %64, i32 5376, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %66 = bitcast i32 %65 to float > %67 = fsub float %66, %26 > %68 = shl i32 %7, 2 > %69 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %68, i32 5120, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %70 = bitcast i32 %69 to float > %71 = fmul float %70, %70 > %72 = fmul float %67, %67 > %73 = fadd float %71, %72 > %74 = call float @llvm.AMDGPU.rsq.clamped.f32(float %73) > %75 = fmul float %74, %70 > %76 = fmul float %74, %67 > %77 = fmul float %75, %27 > %78 = fmul float %76, %27 > %79 = shl i32 %7, 2 > %80 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %79, i32 5120, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %81 = bitcast i32 %80 to float > %82 = shl i32 %7, 2 > %83 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %82, i32 5376, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %84 = bitcast i32 %83 to float > %85 = shl i32 %7, 2 > %86 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %85, i32 5120, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %87 = bitcast i32 %86 to float > %88 = shl i32 %7, 2 > %89 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %88, i32 5376, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %90 = bitcast i32 %89 to float > %91 = fmul float %81, %87 > %92 = fmul float %84, %90 > %93 = fadd float %91, %92 > %94 = call float @llvm.sqrt.f32(float %93) > %95 = fsub float %94, %26 > %96 = fcmp une float %25, 0.000000e+00 > br i1 %96, label %IF, label %ELSE > >IF: ; preds = %main_body > %97 = fdiv float 1.000000e+00, %25 > %98 = fmul float %95, %97 > br label %ENDIF > >ELSE: ; preds = %main_body > %99 = fcmp ogt float %95, 0.000000e+00 > %100 = select i1 %99, float 1.000000e+00, float %95 > %101 = fcmp oge float %100, 0.000000e+00 > %.op = fmul float %100, 0x4600000000000000 > %102 = select i1 %101, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %98, %IF ], [ %102, %ELSE ] > %103 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %104 = fmul float %103, %77 > %105 = fmul float %103, %78 > %106 = fcmp une float %29, 0.000000e+00 > br i1 %106, label %IF141, label %ELSE142 > >IF141: ; preds = %ENDIF > %107 = fdiv float 1.000000e+00, %29 > %108 = shl i32 %7, 2 > %109 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %108, i32 6144, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %110 = bitcast i32 %109 to float > %111 = fmul float %110, %107 > br label %ENDIF140 > >ELSE142: ; preds = %ENDIF > %112 = shl i32 %7, 2 > %113 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %112, i32 6144, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %114 = bitcast i32 %113 to float > %115 = fcmp ogt float %114, 0.000000e+00 > %116 = select i1 %115, float 1.000000e+00, float %114 > %117 = fcmp oge float %116, 0.000000e+00 > %.op159 = fmul float %116, 0x4600000000000000 > %118 = select i1 %117, float %.op159, float 0xC600000000000000 > br label %ENDIF140 > >ENDIF140: ; preds = %ELSE142, %IF141 > %temp8.0 = phi float [ %111, %IF141 ], [ %118, %ELSE142 ] > %119 = call float @llvm.AMDGPU.clamp.(float %temp8.0, float 0.000000e+00, float 1.000000e+00) > %120 = fcmp ogt float %28, 0.000000e+00 > %121 = shl i32 %7, 2 > %122 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %121, i32 6144, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %123 = bitcast i32 %122 to float > %124 = shl i32 %7, 2 > %125 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %124, i32 6400, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %126 = bitcast i32 %125 to float > %127 = fsub float %126, %123 > %128 = fcmp une float %28, 0.000000e+00 > br i1 %128, label %IF144, label %ELSE145 > >IF144: ; preds = %ENDIF140 > %129 = fdiv float 1.000000e+00, %28 > %130 = fmul float %127, %129 > br label %ENDIF143 > >ELSE145: ; preds = %ENDIF140 > %131 = fcmp ogt float %127, 0.000000e+00 > %132 = select i1 %131, float 1.000000e+00, float %127 > %133 = fcmp oge float %132, 0.000000e+00 > %.op160 = fmul float %132, 0x4600000000000000 > %134 = select i1 %133, float %.op160, float 0xC600000000000000 > br label %ENDIF143 > >ENDIF143: ; preds = %ELSE145, %IF144 > %temp20.0 = phi float [ %130, %IF144 ], [ %134, %ELSE145 ] > %135 = call float @llvm.AMDGPU.clamp.(float %temp20.0, float 0.000000e+00, float 1.000000e+00) > %136 = fmul float %119, %135 > %. = select i1 %120, float %136, float %119 > %137 = fmul float %., %22 > %138 = shl i32 %7, 2 > %139 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %138, i32 7168, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %140 = bitcast i32 %139 to float > %141 = fmul float %19, %140 > %142 = shl i32 %7, 2 > %143 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %142, i32 7424, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %144 = bitcast i32 %143 to float > %145 = fmul float %20, %144 > %146 = shl i32 %7, 2 > %147 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %146, i32 7680, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %148 = bitcast i32 %147 to float > %149 = fmul float %21, %148 > %150 = shl i32 %7, 2 > %151 = shl i32 %7, 2 > %152 = shl i32 %7, 2 > %153 = bitcast float %141 to i32 > %154 = bitcast float %145 to i32 > %155 = bitcast float %149 to i32 > %156 = bitcast float %137 to i32 > br label %LOOP > >LOOP: ; preds = %ENDIF149, %ENDIF143 > %157 = phi i32 [ 0, %ENDIF143 ], [ %283, %ENDIF149 ] > %158 = phi i32 [ 0, %ENDIF143 ], [ %283, %ENDIF149 ] > %159 = phi i32 [ 0, %ENDIF143 ], [ %283, %ENDIF149 ] > %160 = phi i32 [ 0, %ENDIF143 ], [ %283, %ENDIF149 ] > %161 = phi i32 [ 0, %ENDIF143 ], [ %283, %ENDIF149 ] > %162 = phi i32 [ 0, %ENDIF143 ], [ %283, %ENDIF149 ] > %.0 = phi i32 [ 0, %ENDIF143 ], [ %282, %ENDIF149 ] > %163 = icmp ugt i32 %157, 3 > br i1 %163, label %IF150, label %ENDIF149 > >IF150: ; preds = %LOOP > call void @llvm.SI.sendmsg(i32 18, i32 %6) > call void @llvm.SI.sendmsg(i32 3, i32 %6) > ret void undef > >ENDIF149: ; preds = %LOOP > %164 = shl i32 %158, 4 > %165 = add i32 %164, 192 > %166 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %165) > %167 = shl i32 %158, 4 > %168 = add i32 %167, 196 > %169 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %168) > %170 = fmul float %76, %166 > %171 = fmul float %75, %169 > %172 = fsub float %171, %170 > %173 = shl i32 %159, 4 > %174 = add i32 %173, 128 > %175 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %174) > %176 = shl i32 %159, 4 > %177 = add i32 %176, 132 > %178 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %177) > %179 = fmul float %75, %175 > %180 = fmul float %76, %178 > %181 = fadd float %179, %180 > %182 = shl i32 %160, 4 > %183 = add i32 %182, 76 > %184 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %183) > %185 = fmul float %104, %184 > %186 = shl i32 %160, 4 > %187 = add i32 %186, 76 > %188 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %187) > %189 = fmul float %105, %188 > %190 = fsub float -0.000000e+00, %185 > %191 = call float @llvm.fma.f32(float %23, float %172, float %190) > %192 = fsub float -0.000000e+00, %189 > %193 = call float @llvm.fma.f32(float %24, float %181, float %192) > %194 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %150, i32 4096, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %195 = bitcast i32 %194 to float > %196 = fadd float %191, %195 > %197 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %151, i32 4352, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %198 = bitcast i32 %197 to float > %199 = fadd float %193, %198 > %200 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %61, i32 %152, i32 4608, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) > %201 = bitcast i32 %200 to float > %202 = fadd float %201, 0.000000e+00 > %203 = fmul float %48, %196 > %204 = fmul float %49, %199 > %205 = fadd float %203, %204 > %206 = fmul float %50, %202 > %207 = fadd float %205, %206 > %208 = fadd float %207, %51 > %209 = fmul float %52, %196 > %210 = fmul float %53, %199 > %211 = fadd float %209, %210 > %212 = fmul float %54, %202 > %213 = fadd float %211, %212 > %214 = fadd float %213, %55 > %215 = fmul float %56, %196 > %216 = fmul float %57, %199 > %217 = fadd float %215, %216 > %218 = fmul float %58, %202 > %219 = fadd float %217, %218 > %220 = fadd float %219, %59 > %221 = fmul float %32, %208 > %222 = fmul float %33, %214 > %223 = fadd float %221, %222 > %224 = fmul float %34, %220 > %225 = fadd float %223, %224 > %226 = fadd float %225, %35 > %227 = fmul float %36, %208 > %228 = fmul float %37, %214 > %229 = fadd float %227, %228 > %230 = fmul float %38, %220 > %231 = fadd float %229, %230 > %232 = fadd float %231, %39 > %233 = fmul float %40, %208 > %234 = fmul float %41, %214 > %235 = fadd float %233, %234 > %236 = fmul float %42, %220 > %237 = fadd float %235, %236 > %238 = fadd float %237, %43 > %239 = fmul float %44, %208 > %240 = fmul float %45, %214 > %241 = fadd float %239, %240 > %242 = fmul float %46, %220 > %243 = fadd float %241, %242 > %244 = fadd float %243, %47 > %245 = shl i32 %161, 4 > %246 = or i32 %245, 8 > %247 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %246) > %248 = shl i32 %161, 4 > %249 = or i32 %248, 12 > %250 = call float @llvm.SI.load.const(<16 x i8> %16, i32 %249) > %251 = icmp ult i32 %.0, 5 > %252 = select i1 %251, float 1.000000e+00, float -1.000000e+00 > call void @llvm.AMDGPU.kill(float %252) > %253 = shl i32 %.0, 2 > %254 = bitcast float %226 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 %254, i32 1, i32 %253, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %255 = shl i32 %.0, 2 > %256 = add i32 %255, 16 > %257 = bitcast float %232 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 %257, i32 1, i32 %256, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %258 = shl i32 %.0, 2 > %259 = add i32 %258, 32 > %260 = bitcast float %238 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 %260, i32 1, i32 %259, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %261 = shl i32 %.0, 2 > %262 = add i32 %261, 48 > %263 = bitcast float %244 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 %263, i32 1, i32 %262, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %264 = shl i32 %.0, 2 > %265 = add i32 %264, 64 > %266 = bitcast float %247 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 %266, i32 1, i32 %265, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %267 = shl i32 %.0, 2 > %268 = add i32 %267, 80 > %269 = bitcast float %250 to i32 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 %269, i32 1, i32 %268, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %270 = shl i32 %.0, 2 > %271 = add i32 %270, 96 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 undef, i32 1, i32 %271, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %272 = shl i32 %.0, 2 > %273 = add i32 %272, 112 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 undef, i32 1, i32 %273, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %274 = shl i32 %.0, 2 > %275 = add i32 %274, 128 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 %153, i32 1, i32 %275, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %276 = shl i32 %.0, 2 > %277 = add i32 %276, 144 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 %154, i32 1, i32 %277, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %278 = shl i32 %.0, 2 > %279 = add i32 %278, 160 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 %155, i32 1, i32 %279, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %280 = shl i32 %.0, 2 > %281 = add i32 %280, 176 > call void @llvm.SI.tbuffer.store.i32(<16 x i8> %63, i32 %156, i32 1, i32 %281, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) > %282 = add i32 %.0, 1 > call void @llvm.SI.sendmsg(i32 34, i32 %6) > %283 = add i32 %162, 1 > br label %LOOP >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readonly >declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.AMDGPU.kill(float) > >declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) > >; Function Attrs: nounwind >declare void @llvm.SI.sendmsg(i32, i32) #3 > >attributes #0 = { nounwind readnone } >attributes #1 = { nounwind readonly } >attributes #2 = { readnone } >attributes #3 = { nounwind } > >!0 = !{!"const", null, i32 1} > >radeonsi: Compiling shader 303 >GS Copy Shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, i32, i32, i32, i32) { >main_body: > %5 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %0, i64 0, i64 7 > %6 = load <16 x i8>, <16 x i8> addrspace(2)* %5, align 16, !tbaa !0 > %7 = shl i32 %1, 2 > %8 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %9 = bitcast i32 %8 to float > %10 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 256, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %11 = bitcast i32 %10 to float > %12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 512, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %13 = bitcast i32 %12 to float > %14 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 768, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %15 = bitcast i32 %14 to float > %16 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 1024, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %17 = bitcast i32 %16 to float > %18 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 1280, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %19 = bitcast i32 %18 to float > %20 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 1536, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %21 = bitcast i32 %20 to float > %22 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 1792, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %23 = bitcast i32 %22 to float > %24 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 2048, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %25 = bitcast i32 %24 to float > %26 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 2304, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %27 = bitcast i32 %26 to float > %28 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 2560, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %29 = bitcast i32 %28 to float > %30 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %6, i32 %7, i32 2816, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > %31 = bitcast i32 %30 to float > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %19, float %21, float %23) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %27, float %29, float %31) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %9, float %11, float %13, float %15) > ret void undef >} > >; Function Attrs: nounwind readonly >declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readonly } > >!0 = !{!"const", null, i32 1} > >GS Copy Shader: > >GS Copy Shader as VS: >Shader main disassembly: > s_load_dwordx4 s[0:3], s[0:1], 0x1c ; C080011C > v_lshlrev_b32_e32 v0, 2, v0 ; 34000082 > s_movk_i32 s4, 0x100 ; B0040100 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_dword v1, v0, s[0:3], 0 offen glc slc ; E0305000 80400100 > buffer_load_dword v2, v0, s[0:3], s4 offen glc slc ; E0305000 04400200 > s_movk_i32 s4, 0x200 ; B0040200 > buffer_load_dword v3, v0, s[0:3], s4 offen glc slc ; E0305000 04400300 > s_movk_i32 s4, 0x300 ; B0040300 > buffer_load_dword v4, v0, s[0:3], s4 offen glc slc ; E0305000 04400400 > s_movk_i32 s4, 0x400 ; B0040400 > buffer_load_dword v5, v0, s[0:3], s4 offen glc slc ; E0305000 04400500 > s_movk_i32 s4, 0x500 ; B0040500 > buffer_load_dword v6, v0, s[0:3], s4 offen glc slc ; E0305000 04400600 > s_movk_i32 s4, 0x600 ; B0040600 > buffer_load_dword v7, v0, s[0:3], s4 offen glc slc ; E0305000 04400700 > s_movk_i32 s4, 0x700 ; B0040700 > buffer_load_dword v8, v0, s[0:3], s4 offen glc slc ; E0305000 04400800 > s_movk_i32 s4, 0x800 ; B0040800 > buffer_load_dword v9, v0, s[0:3], s4 offen glc slc ; E0305000 04400900 > s_movk_i32 s4, 0x900 ; B0040900 > buffer_load_dword v10, v0, s[0:3], s4 offen glc slc ; E0305000 04400A00 > s_movk_i32 s4, 0xa00 ; B0040A00 > buffer_load_dword v11, v0, s[0:3], s4 offen glc slc ; E0305000 04400B00 > s_movk_i32 s4, 0xb00 ; B0040B00 > buffer_load_dword v0, v0, s[0:3], s4 offen glc slc ; E0305000 04400000 > s_waitcnt vmcnt(4) ; BF8C0F74 > exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 33, 0, 0, 0, v9, v10, v11, v0 ; F800021F 000B0A09 > exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 8 >VGPRS: 12 >Code Size: 188 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Geometry Shader: >Shader main disassembly: > s_load_dwordx4 s[4:7], s[0:1], 0x8 ; C0820108 > v_lshlrev_b32_e32 v1, 2, v0 ; 34020082 > s_movk_i32 s8, 0x1500 ; B0081500 > s_movk_i32 s9, 0x1400 ; B0091400 > s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_dword v3, v1, s[4:7], s8 offen glc ; E0305000 08010301 > buffer_load_dword v1, v1, s[4:7], s9 offen glc ; E0305000 09010101 > s_buffer_load_dword s16, s[12:15], 0x60 ; C2080D60 > s_buffer_load_dword s9, s[12:15], 0x5f ; C2048D5F > s_buffer_load_dword s40, s[12:15], 0x62 ; C2140D62 > s_buffer_load_dword s8, s[12:15], 0x63 ; C2040D63 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_cmp_eq_f32_e64 s[18:19], 0, s9 ; D0040012 00001280 > s_and_b64 vcc, exec, s[18:19] ; 87EA127E > s_waitcnt vmcnt(1) ; BF8C0F71 > v_subrev_f32_e32 v2, s16, v3 ; 0A040610 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v4, v1, v1 ; 10080301 > v_mad_f32 v5, v2, v2, v4 ; D2820005 04120502 > v_mac_f32_e32 v4, v3, v3 ; 3E080703 > v_rsq_clamp_f32_e32 v3, v5 ; 7E065905 > v_sqrt_f32_e32 v4, v4 ; 7E086704 > v_subrev_f32_e32 v4, s16, v4 ; 0A080810 > s_cbranch_vccnz BB0_2 ; BF870000 > v_rcp_f32_e32 v5, s9 ; 7E0A5409 > v_mul_f32_e32 v4, v5, v4 ; 10080905 > s_branch BB0_3 ; BF820000 > v_cmp_lt_f32_e32 vcc, 0, v4 ; 7C020880 > v_cndmask_b32_e64 v4, v4, 1.0, vcc ; D2000004 01A9E504 > v_cmp_le_f32_e32 vcc, 0, v4 ; 7C060880 > v_mul_f32_e32 v4, 0x70000000, v4 ; 100808FF 70000000 > v_bfrev_b32_e32 v5, 15 ; 7E0A708F > v_cndmask_b32_e32 v4, v5, v4 ; 00080905 > s_load_dwordx4 s[44:47], s[2:3], 0x8 ; C0960308 > s_buffer_load_dword s41, s[12:15], 0x28 ; C2148D28 > s_buffer_load_dword s42, s[12:15], 0x29 ; C2150D29 > s_buffer_load_dword s43, s[12:15], 0x2a ; C2158D2A > s_buffer_load_dword s48, s[12:15], 0x2b ; C2180D2B > s_buffer_load_dword s49, s[12:15], 0x61 ; C2188D61 > v_cmp_eq_f32_e64 s[16:17], 0, s8 ; D0040010 00001080 > v_add_f32_e64 v6, 0, v4 clamp ; D2060806 00020880 > s_and_b64 vcc, exec, s[16:17] ; 87EA107E > s_waitcnt lgkmcnt(0) ; BF8C007F > s_mov_b64 vcc, vcc ; BEEA046A > s_cbranch_vccnz BB0_5 ; BF870000 > v_lshlrev_b32_e32 v4, 2, v0 ; 34080082 > s_movk_i32 s9, 0x1800 ; B0091800 > buffer_load_dword v4, v4, s[4:7], s9 offen glc ; E0305000 09010404 > v_rcp_f32_e32 v5, s8 ; 7E0A5408 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v4, v5, v4 ; 10080905 > s_branch BB0_6 ; BF820000 > v_lshlrev_b32_e32 v4, 2, v0 ; 34080082 > s_movk_i32 s8, 0x1800 ; B0081800 > buffer_load_dword v4, v4, s[4:7], s8 offen glc ; E0305000 08010404 > v_bfrev_b32_e32 v5, 15 ; 7E0A708F > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_lt_f32_e32 vcc, 0, v4 ; 7C020880 > v_cndmask_b32_e64 v4, v4, 1.0, vcc ; D2000004 01A9E504 > v_cmp_le_f32_e32 vcc, 0, v4 ; 7C060880 > v_mul_f32_e32 v4, 0x70000000, v4 ; 100808FF 70000000 > v_cndmask_b32_e32 v4, v5, v4 ; 00080905 > v_lshlrev_b32_e32 v5, 2, v0 ; 340A0082 > s_movk_i32 s8, 0x1800 ; B0081800 > buffer_load_dword v7, v5, s[4:7], s8 offen glc ; E0305000 08010705 > s_movk_i32 s8, 0x1900 ; B0081900 > buffer_load_dword v5, v5, s[4:7], s8 offen glc ; E0305000 08010505 > s_buffer_load_dword s8, s[12:15], 0x5c ; C2040D5C > s_buffer_load_dword s9, s[12:15], 0x5d ; C2048D5D > s_buffer_load_dword s12, s[44:47], 0x0 ; C2062D00 > s_buffer_load_dword s13, s[44:47], 0x1 ; C206AD01 > s_buffer_load_dword s14, s[44:47], 0x2 ; C2072D02 > s_buffer_load_dword s15, s[44:47], 0x3 ; C207AD03 > s_buffer_load_dword s16, s[44:47], 0x4 ; C2082D04 > s_buffer_load_dword s17, s[44:47], 0x5 ; C208AD05 > s_buffer_load_dword s18, s[44:47], 0x6 ; C2092D06 > s_buffer_load_dword s19, s[44:47], 0x7 ; C209AD07 > s_buffer_load_dword s20, s[44:47], 0x8 ; C20A2D08 > s_buffer_load_dword s21, s[44:47], 0x9 ; C20AAD09 > s_buffer_load_dword s22, s[44:47], 0xa ; C20B2D0A > s_buffer_load_dword s23, s[44:47], 0xb ; C20BAD0B > s_buffer_load_dword s24, s[44:47], 0xc ; C20C2D0C > s_buffer_load_dword s25, s[44:47], 0xd ; C20CAD0D > s_buffer_load_dword s26, s[44:47], 0xe ; C20D2D0E > s_buffer_load_dword s27, s[44:47], 0xf ; C20DAD0F > s_buffer_load_dword s28, s[44:47], 0x10 ; C20E2D10 > s_buffer_load_dword s29, s[44:47], 0x11 ; C20EAD11 > s_buffer_load_dword s30, s[44:47], 0x12 ; C20F2D12 > s_buffer_load_dword s31, s[44:47], 0x13 ; C20FAD13 > s_buffer_load_dword s32, s[44:47], 0x14 ; C2102D14 > s_buffer_load_dword s33, s[44:47], 0x15 ; C210AD15 > s_buffer_load_dword s34, s[44:47], 0x16 ; C2112D16 > s_buffer_load_dword s35, s[44:47], 0x17 ; C211AD17 > s_buffer_load_dword s36, s[44:47], 0x18 ; C2122D18 > s_buffer_load_dword s37, s[44:47], 0x19 ; C212AD19 > s_buffer_load_dword s38, s[44:47], 0x1a ; C2132D1A > s_buffer_load_dword s39, s[44:47], 0x1b ; C213AD1B > v_cmp_eq_f32_e64 s[44:45], 0, s40 ; D004002C 00005080 > v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 > s_and_b64 vcc, exec, s[44:45] ; 87EA2C7E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v5, v7, v5 ; 0A0A0B07 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_mov_b64 vcc, vcc ; BEEA046A > s_cbranch_vccnz BB0_8 ; BF870000 > v_rcp_f32_e32 v7, s40 ; 7E0E5428 > v_mul_f32_e32 v5, v7, v5 ; 100A0B07 > s_branch BB0_9 ; BF820000 > v_cmp_lt_f32_e32 vcc, 0, v5 ; 7C020A80 > v_cndmask_b32_e64 v5, v5, 1.0, vcc ; D2000005 01A9E505 > v_cmp_le_f32_e32 vcc, 0, v5 ; 7C060A80 > v_mul_f32_e32 v5, 0x70000000, v5 ; 100A0AFF 70000000 > v_bfrev_b32_e32 v7, 15 ; 7E0E708F > v_cndmask_b32_e32 v5, v7, v5 ; 000A0B07 > v_lshlrev_b32_e32 v0, 2, v0 ; 34000082 > s_movk_i32 s44, 0x1c00 ; B02C1C00 > buffer_load_dword v9, v0, s[4:7], s44 offen glc ; E0305000 2C010900 > s_movk_i32 s44, 0x1d00 ; B02C1D00 > buffer_load_dword v10, v0, s[4:7], s44 offen glc ; E0305000 2C010A00 > s_movk_i32 s44, 0x1e00 ; B02C1E00 > buffer_load_dword v11, v0, s[4:7], s44 offen glc ; E0305000 2C010B00 > s_load_dwordx4 s[44:47], s[2:3], 0x0 ; C0960300 > s_load_dwordx4 s[0:3], s[0:1], 0xc ; C080010C > v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 > v_cmp_lt_f32_e64 vcc, 0, s40 ; D002006A 00005080 > v_mul_f32_e32 v5, v5, v4 ; 100A0905 > v_mul_f32_e32 v2, v2, v3 ; 10040702 > v_mul_f32_e32 v1, v1, v3 ; 10020701 > v_cndmask_b32_e32 v8, v4, v5 ; 00100B04 > v_mul_f32_e32 v5, s49, v1 ; 100A0231 > v_mul_f32_e32 v7, s49, v2 ; 100E0431 > v_mul_f32_e32 v5, v5, v6 ; 100A0D05 > v_mul_f32_e32 v6, v7, v6 ; 100C0D07 > v_mov_b32_e32 v3, 0 ; 7E060280 > v_mov_b32_e32 v4, 0xc4 ; 7E0802FF 000000C4 > v_mov_b32_e32 v7, 0xb0 ; 7E0E02FF 000000B0 > v_mul_f32_e32 v8, s48, v8 ; 10101030 > s_movk_i32 s40, 0xffc0 ; B028FFC0 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v9, s41, v9 ; 10121229 > s_movk_i32 s41, 0x1000 ; B0291000 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v10, s42, v10 ; 1014142A > s_movk_i32 s42, 0x1100 ; B02A1100 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v11, s43, v11 ; 1016162B > s_movk_i32 s43, 0x1200 ; B02B1200 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_cmp_gt_u32_e32 vcc, 4, v3 ; 7D880684 > s_and_b64 vcc, exec, vcc ; 87EA6A7E > s_cbranch_vccnz BB0_12 ; BF870000 > s_branch BB0_11 ; BF820000 > v_add_i32_e32 v16, vcc, s40, v4 ; 4A200828 > v_cmp_gt_u32_e32 vcc, 5, v3 ; 7D880685 > v_cndmask_b32_e64 v17, -1.0, 1.0, vcc ; D2000011 01A9E4F3 > v_add_i32_e32 v18, vcc, -4, v4 ; 4A2408C4 > buffer_load_dword v18, v18, s[44:47], 0 offen ; E0301000 800B1212 > v_add_i32_e32 v20, vcc, 0xffffff88, v4 ; 4A2808FF FFFFFF88 > buffer_load_dword v16, v16, s[44:47], 0 offen ; E0301000 800B1010 > v_add_i32_e32 v19, vcc, 0xffffffbc, v4 ; 4A2608FF FFFFFFBC > buffer_load_dword v12, v4, s[44:47], 0 offen ; E0301000 800B0C04 > buffer_load_dword v20, v20, s[44:47], 0 offen ; E0301000 800B1414 > buffer_load_dword v19, v19, s[44:47], 0 offen ; E0301000 800B1313 > buffer_load_dword v13, v0, s[4:7], s41 offen glc ; E0305000 29010D00 > buffer_load_dword v14, v0, s[4:7], s42 offen glc ; E0305000 2A010E00 > buffer_load_dword v15, v0, s[4:7], s43 offen glc ; E0305000 2B010F00 > v_add_i32_e32 v21, vcc, 0xffffff44, v4 ; 4A2A08FF FFFFFF44 > v_add_i32_e32 v22, vcc, 0xffffff48, v4 ; 4A2C08FF FFFFFF48 > buffer_load_dword v21, v21, s[44:47], 0 offen ; E0301000 800B1515 > buffer_load_dword v22, v22, s[44:47], 0 offen ; E0301000 800B1616 > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mul_f32_e32 v18, v18, v2 ; 10240512 > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mul_f32_e32 v16, v16, v2 ; 10200510 > s_waitcnt vmcnt(7) ; BF8C0F77 > v_mad_f32 v12, v1, v12, -v18 ; D282000C 844A1901 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_mul_f32_e32 v18, v20, v5 ; 10240B14 > v_fma_f32 v12, s8, v12, -v18 ; D296000C 844A1808 > s_waitcnt vmcnt(5) ; BF8C0F75 > v_mac_f32_e32 v16, v19, v1 ; 3E200313 > v_mul_f32_e32 v18, v20, v6 ; 10240D14 > v_fma_f32 v16, s9, v16, -v18 ; D2960010 844A2009 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_add_f32_e32 v12, v13, v12 ; 0618190D > s_waitcnt vmcnt(3) ; BF8C0F73 > v_add_f32_e32 v13, v14, v16 ; 061A210E > v_mul_f32_e32 v14, s29, v13 ; 101C1A1D > v_mul_f32_e32 v16, s33, v13 ; 10201A21 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_add_f32_e32 v15, 0, v15 ; 061E1E80 > v_mac_f32_e32 v14, s28, v12 ; 3E1C181C > v_mac_f32_e32 v16, s32, v12 ; 3E201820 > v_mul_f32_e32 v13, s37, v13 ; 101A1A25 > v_mac_f32_e32 v14, s30, v15 ; 3E1C1E1E > v_mac_f32_e32 v16, s34, v15 ; 3E201E22 > v_mac_f32_e32 v13, s36, v12 ; 3E1A1824 > v_add_f32_e32 v12, s31, v14 ; 06181C1F > v_add_f32_e32 v14, s35, v16 ; 061C2023 > v_mul_f32_e32 v16, s17, v14 ; 10201C11 > v_mul_f32_e32 v18, s21, v14 ; 10241C15 > v_mac_f32_e32 v13, s38, v15 ; 3E1A1E26 > v_mul_f32_e32 v15, s13, v14 ; 101E1C0D > v_mul_f32_e32 v14, s25, v14 ; 101C1C19 > v_mac_f32_e32 v15, s12, v12 ; 3E1E180C > v_mac_f32_e32 v16, s16, v12 ; 3E201810 > v_mac_f32_e32 v18, s20, v12 ; 3E241814 > v_mac_f32_e32 v14, s24, v12 ; 3E1C1818 > v_add_f32_e32 v12, s39, v13 ; 06181A27 > v_mac_f32_e32 v14, s26, v12 ; 3E1C181A > v_mac_f32_e32 v16, s18, v12 ; 3E201812 > v_mac_f32_e32 v15, s14, v12 ; 3E1E180E > v_mac_f32_e32 v18, s22, v12 ; 3E241816 > v_add_f32_e32 v12, s15, v15 ; 06181E0F > v_add_f32_e32 v13, s19, v16 ; 061A2013 > v_add_f32_e32 v15, s23, v18 ; 061E2417 > v_add_f32_e32 v14, s27, v14 ; 061C1C1B > v_cmpx_le_f32_e32 vcc, 0, v17 ; 7C262280 > v_add_i32_e32 v16, vcc, 0xffffff50, v7 ; 4A200EFF FFFFFF50 > tbuffer_store_format_x v12, 0x0, -1, 0, -1, 0, 4, 4, v16, s[0:3], -1, 0, s10 ; EA245000 0A400C10 > s_waitcnt vmcnt(0) expcnt(0) ; BF8C0F00 > v_add_i32_e32 v12, vcc, 0xffffff60, v7 ; 4A180EFF FFFFFF60 > tbuffer_store_format_x v13, 0x0, -1, 0, -1, 0, 4, 4, v12, s[0:3], -1, 0, s10 ; EA245000 0A400D0C > v_add_i32_e32 v12, vcc, 0xffffff70, v7 ; 4A180EFF FFFFFF70 > tbuffer_store_format_x v15, 0x0, -1, 0, -1, 0, 4, 4, v12, s[0:3], -1, 0, s10 ; EA245000 0A400F0C > v_add_i32_e32 v12, vcc, 0xffffff80, v7 ; 4A180EFF FFFFFF80 > tbuffer_store_format_x v14, 0x0, -1, 0, -1, 0, 4, 4, v12, s[0:3], -1, 0, s10 ; EA245000 0A400E0C > v_add_i32_e32 v12, vcc, 0xffffff90, v7 ; 4A180EFF FFFFFF90 > tbuffer_store_format_x v21, 0x0, -1, 0, -1, 0, 4, 4, v12, s[0:3], -1, 0, s10 ; EA245000 0A40150C > v_add_i32_e32 v12, vcc, 0xffffffa0, v7 ; 4A180EFF FFFFFFA0 > tbuffer_store_format_x v22, 0x0, -1, 0, -1, 0, 4, 4, v12, s[0:3], -1, 0, s10 ; EA245000 0A40160C > v_add_i32_e32 v12, vcc, 0xffffffb0, v7 ; 4A180EFF FFFFFFB0 > tbuffer_store_format_x v0, 0x0, -1, 0, -1, 0, 4, 4, v12, s[0:3], -1, 0, s10 ; EA245000 0A40000C > v_add_i32_e32 v12, vcc, s40, v7 ; 4A180E28 > tbuffer_store_format_x v0, 0x0, -1, 0, -1, 0, 4, 4, v12, s[0:3], -1, 0, s10 ; EA245000 0A40000C > v_add_i32_e32 v12, vcc, 0xffffffd0, v7 ; 4A180EFF FFFFFFD0 > tbuffer_store_format_x v9, 0x0, -1, 0, -1, 0, 4, 4, v12, s[0:3], -1, 0, s10 ; EA245000 0A40090C > v_add_i32_e32 v12, vcc, 0xffffffe0, v7 ; 4A180EFF FFFFFFE0 > tbuffer_store_format_x v10, 0x0, -1, 0, -1, 0, 4, 4, v12, s[0:3], -1, 0, s10 ; EA245000 0A400A0C > v_add_i32_e32 v12, vcc, -16, v7 ; 4A180ED0 > s_mov_b32 m0, s11 ; BEFC030B > tbuffer_store_format_x v11, 0x0, -1, 0, -1, 0, 4, 4, v12, s[0:3], -1, 0, s10 ; EA245000 0A400B0C > tbuffer_store_format_x v8, 0x0, -1, 0, -1, 0, 4, 4, v7, s[0:3], -1, 0, s10 ; EA245000 0A400807 > s_waitcnt vmcnt(0) expcnt(0) ; BF8C0F00 > s_sendmsg Gs(emit stream 0), [m0] ; BF900022 > v_add_i32_e32 v3, vcc, 1, v3 ; 4A060681 > v_add_i32_e32 v4, vcc, 16, v4 ; 4A080890 > v_add_i32_e32 v7, vcc, 4, v7 ; 4A0E0E84 > s_branch BB0_10 ; BF820000 > s_mov_b32 m0, s11 ; BEFC030B > s_sendmsg Gs(cut stream 0), [m0] ; BF900012 > s_sendmsg Gs_done(nop), [m0] ; BF900003 > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 24 >Code Size: 1272 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..27] >DCL CONST[2][0..25] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {1, 352, 160, 0} >IMM[1] UINT32 {432, 400, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[2][22].xyzz > 3: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[2][10].xyzz > 4: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[2][10].wwww > 5: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[1].wwww > 6: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][27].xyzz > 7: MOV TEMP[0].xyz, TEMP[0].xyzx > 8: MUL TEMP[1].x, TEMP[1].xxxx, CONST[2][25].xxxx > 9: MOV TEMP[0].w, TEMP[1].xxxx > 10: MOV OUT[0], TEMP[0] > 11: END >radeonsi: Compiling shader 304 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 > %30 = call float @llvm.SI.load.const(<16 x i8> %29, i32 160) > %31 = call float @llvm.SI.load.const(<16 x i8> %29, i32 164) > %32 = call float @llvm.SI.load.const(<16 x i8> %29, i32 168) > %33 = call float @llvm.SI.load.const(<16 x i8> %29, i32 172) > %34 = call float @llvm.SI.load.const(<16 x i8> %29, i32 352) > %35 = call float @llvm.SI.load.const(<16 x i8> %29, i32 356) > %36 = call float @llvm.SI.load.const(<16 x i8> %29, i32 360) > %37 = call float @llvm.SI.load.const(<16 x i8> %29, i32 400) > %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 > %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 3 > %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 > %43 = extractelement <8 x i32> %39, i32 7 > %44 = extractelement <4 x i32> %42, i32 0 > %45 = and i32 %44, %43 > %46 = insertelement <4 x i32> %42, i32 %45, i32 0 > %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %50 = bitcast float %47 to i32 > %51 = bitcast float %48 to i32 > %52 = insertelement <2 x i32> undef, i32 %50, i32 0 > %53 = insertelement <2 x i32> %52, i32 %51, i32 1 > %54 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %53, <8 x i32> %39, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = fmul float %55, %34 > %59 = fmul float %56, %35 > %60 = fmul float %57, %36 > %61 = fmul float %58, %30 > %62 = fmul float %59, %31 > %63 = fmul float %60, %32 > %64 = fmul float %61, %33 > %65 = fmul float %62, %33 > %66 = fmul float %63, %33 > %67 = fmul float %64, %49 > %68 = fmul float %65, %49 > %69 = fmul float %66, %49 > %70 = fmul float %67, %25 > %71 = fmul float %68, %26 > %72 = fadd float %71, %70 > %73 = fmul float %69, %27 > %74 = fadd float %72, %73 > %75 = fmul float %74, %37 > %76 = bitcast float %5 to i32 > %77 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %76, 10 > %78 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %77, float %67, 11 > %79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %78, float %68, 12 > %80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %79, float %69, 13 > %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %80, float %75, 14 > %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..11] >DCL CONST[2][0..24] >DCL TEMP[0..12], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, -0.1500, 0.0597} >IMM[1] UINT32 {0, 16, 32, 48} >IMM[2] UINT32 {128, 1, 288, 176} >IMM[3] UINT32 {112, 144, 320, 256} >IMM[4] FLT32 { -1.5000, 0.0000, 158456325028528675187087900672.0000, 1.4427} >IMM[5] UINT32 {240, 304, 224, 272} >IMM[6] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[7] UINT32 {384, 160, 0, 0} >IMM[8] FLT32 { 1.7000, 0.0600, 1.0000, -1.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][0], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][1], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][2], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][3], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][8], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[2][18].wwww > 11: MOV TEMP[1], TEMP[1] > 12: MOV TEMP[3].xy, IN[1].xyxx > 13: ABS TEMP[4].x, TEMP[2].xxxx > 14: MUL TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy > 15: MIN TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx > 16: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[0].xxxx > 17: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][11].xyzz > 18: DP3 TEMP[6].x, CONST[1][7].xyzz, TEMP[5].xyzz > 19: DP3 TEMP[7].x, CONST[1][9].xyzz, TEMP[5].xyzz > 20: MOV TEMP[6].z, TEMP[7].xxxx > 21: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[5].xyzz > 22: MOV TEMP[6].y, TEMP[7].xxxx > 23: DP3 TEMP[8].x, TEMP[6].xyzz, TEMP[6].xyzz > 24: RSQ TEMP[8].x, TEMP[8].xxxx > 25: MUL TEMP[5].xyz, TEMP[8].xxxx, TEMP[6].xyzz > 26: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].zzzz > 27: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 28: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 29: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 30: DP3 TEMP[8].x, -TEMP[5].xyzz, CONST[2][20].xyzz > 31: FMA TEMP[9].x, -CONST[2][16].yyyy, TEMP[8].xxxx, CONST[2][16].xxxx > 32: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[8].xxxx, IMM[0].xxxx > 33: MUL TEMP[8].x, TEMP[8].xxxx, IMM[0].wwww > 34: ABS TEMP[9].x, TEMP[9].xxxx > 35: LG2 TEMP[9].x, TEMP[9].xxxx > 36: MUL TEMP[9].x, TEMP[9].xxxx, IMM[4].xxxx > 37: EX2 TEMP[9].x, TEMP[9].xxxx > 38: FMA TEMP[5].x, CONST[2][16].zzzz, TEMP[9].xxxx, -CONST[2][15].zzzz > 39: MUL TEMP[9].x, TEMP[9].xxxx, CONST[2][16].zzzz > 40: MAX TEMP[10].x, TEMP[5].xxxx, IMM[4].yyyy > 41: FMA TEMP[4].x, -TEMP[10].xxxx, TEMP[4].xxxx, TEMP[9].xxxx > 42: MAX TEMP[4].x, TEMP[4].xxxx, CONST[2][19].wwww > 43: MAX TEMP[9].x, TEMP[0].xxxx, IMM[4].yyyy > 44: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[2][18].zzzz > 45: FSNE TEMP[10].x, CONST[2][15].yyyy, IMM[4].yyyy > 46: UIF TEMP[10].xxxx :0 > 47: RCP TEMP[10].x, CONST[2][15].yyyy > 48: MUL TEMP[10].x, TEMP[0].xxxx, TEMP[10].xxxx > 49: ELSE :0 > 50: SSG TEMP[11].x, TEMP[0].xxxx > 51: MUL TEMP[10].x, IMM[4].zzzz, TEMP[11].xxxx > 52: ENDIF > 53: FSNE TEMP[11].x, CONST[2][15].xxxx, IMM[4].yyyy > 54: UIF TEMP[11].xxxx :0 > 55: RCP TEMP[11].x, CONST[2][15].xxxx > 56: MUL TEMP[11].x, -TEMP[9].xxxx, TEMP[11].xxxx > 57: ELSE :0 > 58: SSG TEMP[12].x, -TEMP[9].xxxx > 59: MUL TEMP[11].x, IMM[4].zzzz, TEMP[12].xxxx > 60: ENDIF > 61: FSNE TEMP[12].x, CONST[2][14].wwww, IMM[4].yyyy > 62: UIF TEMP[12].xxxx :0 > 63: RCP TEMP[12].x, CONST[2][14].wwww > 64: MUL TEMP[12].x, -TEMP[9].xxxx, TEMP[12].xxxx > 65: ELSE :0 > 66: SSG TEMP[9].x, -TEMP[9].xxxx > 67: MUL TEMP[12].x, IMM[4].zzzz, TEMP[9].xxxx > 68: ENDIF > 69: MUL TEMP[9].x, TEMP[12].xxxx, IMM[4].wwww > 70: EX2 TEMP[9].x, TEMP[9].xxxx > 71: MUL TEMP[5].x, TEMP[11].xxxx, IMM[4].wwww > 72: EX2 TEMP[11].x, TEMP[5].xxxx > 73: ADD TEMP[5].x, TEMP[11].xxxx, CONST[2][16].wwww > 74: MUL TEMP[5].x, TEMP[5].xxxx, CONST[2][17].yyyy > 75: MUL TEMP[5].x, TEMP[5].xxxx, IMM[6].xxxx > 76: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx > 77: MIN TEMP[7].x, TEMP[7].xxxx, CONST[2][15].wwww > 78: MAX TEMP[7].x, TEMP[7].xxxx, CONST[2][17].xxxx > 79: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[4].xxxx > 80: FMA TEMP[7].xyz, CONST[2][14].xyzz, TEMP[9].xxxx, TEMP[7].xxxx > 81: MUL TEMP[6].xyz, TEMP[9].xxxx, CONST[2][14].xyzz > 82: FMA TEMP[4].xyz, TEMP[6].xyzz, TEMP[8].xxxx, TEMP[4].xxxx > 83: FSEQ TEMP[8].xyz, TEMP[7].xyzz, IMM[4].yyyy > 84: SSG TEMP[9].xyz, TEMP[4].xyzz > 85: MUL TEMP[9].xyz, IMM[4].zzzz, TEMP[9].xyzz > 86: RCP TEMP[11].x, TEMP[7].xxxx > 87: RCP TEMP[11].y, TEMP[7].yyyy > 88: RCP TEMP[11].z, TEMP[7].zzzz > 89: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[11].xyzz > 90: UCMP TEMP[4].xyz, TEMP[8].xyzz, TEMP[9].xyzz, TEMP[4].xyzz > 91: MUL TEMP[6].xyz, TEMP[10].xxxx, -TEMP[7].xyzz > 92: ABS TEMP[2].xyz, TEMP[2].xxxx > 93: MUL TEMP[5].xyz, TEMP[2].xyzz, -TEMP[7].xyzz > 94: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[4].wwww > 95: EX2 TEMP[2].x, TEMP[5].xxxx > 96: EX2 TEMP[2].y, TEMP[5].yyyy > 97: EX2 TEMP[2].z, TEMP[5].zzzz > 98: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[4].wwww > 99: LG2 TEMP[7].x, CONST[2][19].xxxx >100: LG2 TEMP[7].y, CONST[2][19].yyyy >101: LG2 TEMP[7].z, CONST[2][19].zzzz >102: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[6].yyyy >103: EX2 TEMP[8].x, TEMP[7].xxxx >104: EX2 TEMP[8].y, TEMP[7].yyyy >105: EX2 TEMP[8].z, TEMP[7].zzzz >106: EX2 TEMP[7].x, TEMP[6].xxxx >107: EX2 TEMP[7].y, TEMP[6].yyyy >108: EX2 TEMP[7].z, TEMP[6].zzzz >109: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[8].xyzz >110: MUL TEMP[0].xyz, TEMP[4].xyzz, TEMP[6].xyzz >111: ADD TEMP[4].xyz, -TEMP[2].xyzz, IMM[0].xxxx >112: MOV TEMP[2].w, TEMP[2].xxxx >113: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xyzz, IMM[6].zzzz >114: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[4].yyyy >115: FMA TEMP[4].xyz, TEMP[0].xyzz, IMM[6].wwww, IMM[6].xxxx >116: MUL TEMP[5].xyz, TEMP[0].xyzz, TEMP[4].xyzz >117: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[6].wwww, IMM[8].xxxx >118: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].yyyy >119: FSEQ TEMP[4].xyz, TEMP[0].xyzz, IMM[4].yyyy >120: SSG TEMP[6].xyz, TEMP[5].xyzz >121: MUL TEMP[6].xyz, IMM[4].zzzz, TEMP[6].xyzz >122: RCP TEMP[7].x, TEMP[0].xxxx >123: RCP TEMP[7].y, TEMP[0].yyyy >124: RCP TEMP[7].z, TEMP[0].zzzz >125: MUL TEMP[0].xyz, TEMP[5].xyzz, TEMP[7].xyzz >126: UCMP TEMP[2].xyz, TEMP[4].xyzz, TEMP[6].xyzz, TEMP[0].xyzz >127: MUL TEMP[0], IN[2], CONST[1][10] >128: MOV OUT[3], TEMP[0] >129: MOV OUT[2], TEMP[2] >130: MOV OUT[1], TEMP[3] >131: MOV OUT[0], TEMP[1] >132: END >radeonsi: Compiling shader 305 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 0) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 4) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 8) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 12) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 16) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 20) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 24) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 28) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 32) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 36) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 40) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 44) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 48) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 52) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 56) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 60) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 112) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 116) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 120) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 128) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 132) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 136) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 140) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 144) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 148) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 152) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 160) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 164) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 168) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 172) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 176) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 180) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 184) > %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 > %53 = call float @llvm.SI.load.const(<16 x i8> %52, i32 224) > %54 = call float @llvm.SI.load.const(<16 x i8> %52, i32 228) > %55 = call float @llvm.SI.load.const(<16 x i8> %52, i32 232) > %56 = call float @llvm.SI.load.const(<16 x i8> %52, i32 236) > %57 = call float @llvm.SI.load.const(<16 x i8> %52, i32 240) > %58 = call float @llvm.SI.load.const(<16 x i8> %52, i32 244) > %59 = call float @llvm.SI.load.const(<16 x i8> %52, i32 248) > %60 = call float @llvm.SI.load.const(<16 x i8> %52, i32 252) > %61 = call float @llvm.SI.load.const(<16 x i8> %52, i32 256) > %62 = call float @llvm.SI.load.const(<16 x i8> %52, i32 260) > %63 = call float @llvm.SI.load.const(<16 x i8> %52, i32 264) > %64 = call float @llvm.SI.load.const(<16 x i8> %52, i32 268) > %65 = call float @llvm.SI.load.const(<16 x i8> %52, i32 272) > %66 = call float @llvm.SI.load.const(<16 x i8> %52, i32 276) > %67 = call float @llvm.SI.load.const(<16 x i8> %52, i32 296) > %68 = call float @llvm.SI.load.const(<16 x i8> %52, i32 300) > %69 = call float @llvm.SI.load.const(<16 x i8> %52, i32 304) > %70 = call float @llvm.SI.load.const(<16 x i8> %52, i32 308) > %71 = call float @llvm.SI.load.const(<16 x i8> %52, i32 312) > %72 = call float @llvm.SI.load.const(<16 x i8> %52, i32 316) > %73 = call float @llvm.SI.load.const(<16 x i8> %52, i32 320) > %74 = call float @llvm.SI.load.const(<16 x i8> %52, i32 324) > %75 = call float @llvm.SI.load.const(<16 x i8> %52, i32 328) > %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 > %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %13) > %79 = extractelement <4 x float> %78, i32 0 > %80 = extractelement <4 x float> %78, i32 1 > %81 = extractelement <4 x float> %78, i32 2 > %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 > %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %14) > %85 = extractelement <4 x float> %84, i32 0 > %86 = extractelement <4 x float> %84, i32 1 > %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 > %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %15) > %90 = extractelement <4 x float> %89, i32 0 > %91 = extractelement <4 x float> %89, i32 1 > %92 = extractelement <4 x float> %89, i32 2 > %93 = extractelement <4 x float> %89, i32 3 > %94 = fmul float %18, %79 > %95 = fmul float %19, %80 > %96 = fadd float %94, %95 > %97 = fmul float %20, %81 > %98 = fadd float %96, %97 > %99 = fadd float %98, %21 > %100 = fmul float %22, %79 > %101 = fmul float %23, %80 > %102 = fadd float %100, %101 > %103 = fmul float %24, %81 > %104 = fadd float %102, %103 > %105 = fadd float %104, %25 > %106 = fmul float %26, %79 > %107 = fmul float %27, %80 > %108 = fadd float %106, %107 > %109 = fmul float %28, %81 > %110 = fadd float %108, %109 > %111 = fadd float %110, %29 > %112 = fmul float %30, %79 > %113 = fmul float %31, %80 > %114 = fadd float %112, %113 > %115 = fmul float %32, %81 > %116 = fadd float %114, %115 > %117 = fadd float %116, %33 > %118 = fmul float %37, %79 > %119 = fmul float %38, %80 > %120 = fadd float %118, %119 > %121 = fmul float %39, %81 > %122 = fadd float %120, %121 > %123 = fadd float %122, %40 > %124 = fadd float %123, %68 > %125 = call float @llvm.fabs.f32(float %117) > %126 = fmul float %125, 0x3EF4F8B580000000 > %127 = call float @llvm.minnum.f32(float %126, float 1.000000e+00) > %128 = fsub float 1.000000e+00, %127 > %129 = fsub float %48, %79 > %130 = fsub float %49, %80 > %131 = fsub float %50, %81 > %132 = fmul float %34, %129 > %133 = fmul float %35, %130 > %134 = fadd float %133, %132 > %135 = fmul float %36, %131 > %136 = fadd float %134, %135 > %137 = fmul float %41, %129 > %138 = fmul float %42, %130 > %139 = fadd float %138, %137 > %140 = fmul float %43, %131 > %141 = fadd float %139, %140 > %142 = fmul float %37, %129 > %143 = fmul float %38, %130 > %144 = fadd float %143, %142 > %145 = fmul float %39, %131 > %146 = fadd float %144, %145 > %147 = fmul float %136, %136 > %148 = fmul float %146, %146 > %149 = fadd float %148, %147 > %150 = fmul float %141, %141 > %151 = fadd float %149, %150 > %152 = call float @llvm.AMDGPU.rsq.clamped.f32(float %151) > %153 = fmul float %152, %136 > %154 = fmul float %152, %146 > %155 = fmul float %152, %141 > %156 = fsub float -0.000000e+00, %146 > %157 = call float @llvm.fma.f32(float %156, float %152, float 0xBFC3333340000000) > %158 = fsub float 1.000000e+00, %157 > %159 = call float @llvm.AMDGPU.clamp.(float %158, float 0.000000e+00, float 1.000000e+00) > %160 = fmul float %159, %159 > %161 = fmul float %153, %73 > %162 = fsub float -0.000000e+00, %161 > %163 = fmul float %154, %74 > %164 = fsub float %162, %163 > %165 = fmul float %155, %75 > %166 = fsub float %164, %165 > %167 = fsub float -0.000000e+00, %62 > %168 = call float @llvm.fma.f32(float %167, float %166, float %61) > %169 = call float @llvm.fma.f32(float %166, float %166, float 1.000000e+00) > %170 = fmul float %169, 0x3FAE8EC8A0000000 > %171 = call float @llvm.fabs.f32(float %168) > %172 = call float @llvm.log2.f32(float %171) > %173 = fmul float %172, -1.500000e+00 > %174 = call float @llvm.exp2.f32(float %173) > %175 = fsub float -0.000000e+00, %59 > %176 = call float @llvm.fma.f32(float %63, float %174, float %175) > %177 = fmul float %174, %63 > %178 = call float @llvm.maxnum.f32(float %176, float 0.000000e+00) > %179 = fsub float -0.000000e+00, %178 > %180 = call float @llvm.fma.f32(float %179, float %128, float %177) > %181 = call float @llvm.maxnum.f32(float %180, float %72) > %182 = call float @llvm.maxnum.f32(float %124, float 0.000000e+00) > %183 = fsub float %67, %124 > %184 = fcmp une float %58, 0.000000e+00 > br i1 %184, label %IF, label %ELSE > >IF: ; preds = %main_body > %185 = fdiv float 1.000000e+00, %58 > %186 = fmul float %183, %185 > br label %ENDIF > >ELSE: ; preds = %main_body > %187 = fcmp ogt float %183, 0.000000e+00 > %188 = select i1 %187, float 1.000000e+00, float %183 > %189 = fcmp oge float %188, 0.000000e+00 > %.op = fmul float %188, 0x4600000000000000 > %190 = select i1 %189, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp40.0 = phi float [ %186, %IF ], [ %190, %ELSE ] > %191 = fcmp une float %57, 0.000000e+00 > br i1 %191, label %IF53, label %ELSE54 > >IF53: ; preds = %ENDIF > %192 = fdiv float 1.000000e+00, %57 > %193 = fmul float %182, %192 > %194 = fsub float -0.000000e+00, %193 > br label %ENDIF52 > >ELSE54: ; preds = %ENDIF > %195 = fcmp ole float %182, -0.000000e+00 > %.op58 = fmul float %182, 0xC600000000000000 > %196 = select i1 %195, float %.op58, float 0xC600000000000000 > br label %ENDIF52 > >ENDIF52: ; preds = %ELSE54, %IF53 > %temp44.0 = phi float [ %194, %IF53 ], [ %196, %ELSE54 ] > %197 = fcmp une float %56, 0.000000e+00 > br i1 %197, label %IF56, label %ELSE57 > >IF56: ; preds = %ENDIF52 > %198 = fdiv float 1.000000e+00, %56 > %199 = fmul float %182, %198 > %200 = fsub float -0.000000e+00, %199 > br label %ENDIF55 > >ELSE57: ; preds = %ENDIF52 > %201 = fcmp ole float %182, -0.000000e+00 > %.op59 = fmul float %182, 0xC600000000000000 > %202 = select i1 %201, float %.op59, float 0xC600000000000000 > br label %ENDIF55 > >ENDIF55: ; preds = %ELSE57, %IF56 > %temp48.0 = phi float [ %200, %IF56 ], [ %202, %ELSE57 ] > %203 = fmul float %temp48.0, 0x3FF7154760000000 > %204 = call float @llvm.exp2.f32(float %203) > %205 = fmul float %temp44.0, 0x3FF7154760000000 > %206 = call float @llvm.exp2.f32(float %205) > %207 = fadd float %206, %64 > %208 = fmul float %207, %66 > %209 = fmul float %208, 5.000000e-01 > %210 = fmul float %160, %209 > %211 = call float @llvm.minnum.f32(float %210, float %60) > %212 = call float @llvm.maxnum.f32(float %211, float %65) > %213 = fmul float %212, %181 > %214 = call float @llvm.fma.f32(float %53, float %204, float %212) > %215 = call float @llvm.fma.f32(float %54, float %204, float %212) > %216 = call float @llvm.fma.f32(float %55, float %204, float %212) > %217 = fmul float %204, %53 > %218 = fmul float %204, %54 > %219 = fmul float %204, %55 > %220 = call float @llvm.fma.f32(float %217, float %170, float %213) > %221 = call float @llvm.fma.f32(float %218, float %170, float %213) > %222 = call float @llvm.fma.f32(float %219, float %170, float %213) > %223 = fcmp oeq float %214, 0.000000e+00 > %224 = fcmp oeq float %215, 0.000000e+00 > %225 = fcmp oeq float %216, 0.000000e+00 > %226 = fcmp ogt float %220, 0.000000e+00 > %227 = select i1 %226, float 1.000000e+00, float %220 > %228 = fcmp oge float %227, 0.000000e+00 > %229 = fcmp ogt float %221, 0.000000e+00 > %230 = select i1 %229, float 1.000000e+00, float %221 > %231 = fcmp oge float %230, 0.000000e+00 > %232 = fcmp ogt float %222, 0.000000e+00 > %233 = select i1 %232, float 1.000000e+00, float %222 > %234 = fcmp oge float %233, 0.000000e+00 > %.op60 = fmul float %227, 0x4600000000000000 > %235 = select i1 %228, float %.op60, float 0xC600000000000000 > %.op61 = fmul float %230, 0x4600000000000000 > %236 = select i1 %231, float %.op61, float 0xC600000000000000 > %.op62 = fmul float %233, 0x4600000000000000 > %237 = select i1 %234, float %.op62, float 0xC600000000000000 > %238 = fdiv float 1.000000e+00, %214 > %239 = fdiv float 1.000000e+00, %215 > %240 = fdiv float 1.000000e+00, %216 > %241 = fmul float %220, %238 > %242 = fmul float %221, %239 > %243 = fmul float %222, %240 > %244 = select i1 %223, float %235, float %241 > %245 = select i1 %224, float %236, float %242 > %246 = select i1 %225, float %237, float %243 > %247 = fmul float %214, %temp40.0 > %248 = fmul float %215, %temp40.0 > %249 = fmul float %216, %temp40.0 > %250 = call float @llvm.fabs.f32(float %117) > %251 = call float @llvm.fabs.f32(float %117) > %252 = call float @llvm.fabs.f32(float %117) > %253 = fmul float %214, %250 > %254 = fmul float %215, %251 > %255 = fmul float %216, %252 > %256 = fmul float %253, 0xBFF7154760000000 > %257 = fmul float %254, 0xBFF7154760000000 > %258 = fmul float %255, 0xBFF7154760000000 > %259 = call float @llvm.exp2.f32(float %256) > %260 = call float @llvm.exp2.f32(float %257) > %261 = call float @llvm.exp2.f32(float %258) > %262 = fmul float %247, 0xBFF7154760000000 > %263 = fmul float %248, 0xBFF7154760000000 > %264 = fmul float %249, 0xBFF7154760000000 > %265 = call float @llvm.log2.f32(float %69) > %266 = call float @llvm.log2.f32(float %70) > %267 = call float @llvm.log2.f32(float %71) > %268 = fmul float %265, 0x3FDD1745E0000000 > %269 = fmul float %266, 0x3FDD1745E0000000 > %270 = fmul float %267, 0x3FDD1745E0000000 > %271 = call float @llvm.exp2.f32(float %268) > %272 = call float @llvm.exp2.f32(float %269) > %273 = call float @llvm.exp2.f32(float %270) > %274 = call float @llvm.exp2.f32(float %262) > %275 = call float @llvm.exp2.f32(float %263) > %276 = call float @llvm.exp2.f32(float %264) > %277 = fmul float %274, %271 > %278 = fmul float %275, %272 > %279 = fmul float %276, %273 > %280 = fmul float %244, %277 > %281 = fmul float %245, %278 > %282 = fmul float %246, %279 > %283 = fsub float 1.000000e+00, %259 > %284 = fsub float 1.000000e+00, %260 > %285 = fsub float 1.000000e+00, %261 > %286 = call float @llvm.fma.f32(float %280, float %283, float 0xBF70624DE0000000) > %287 = call float @llvm.fma.f32(float %281, float %284, float 0xBF70624DE0000000) > %288 = call float @llvm.fma.f32(float %282, float %285, float 0xBF70624DE0000000) > %289 = call float @llvm.maxnum.f32(float %286, float 0.000000e+00) > %290 = call float @llvm.maxnum.f32(float %287, float 0.000000e+00) > %291 = call float @llvm.maxnum.f32(float %288, float 0.000000e+00) > %292 = call float @llvm.fma.f32(float %289, float 0x4018CCCCC0000000, float 5.000000e-01) > %293 = call float @llvm.fma.f32(float %290, float 0x4018CCCCC0000000, float 5.000000e-01) > %294 = call float @llvm.fma.f32(float %291, float 0x4018CCCCC0000000, float 5.000000e-01) > %295 = fmul float %289, %292 > %296 = fmul float %290, %293 > %297 = fmul float %291, %294 > %298 = call float @llvm.fma.f32(float %289, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %299 = call float @llvm.fma.f32(float %290, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %300 = call float @llvm.fma.f32(float %291, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %301 = call float @llvm.fma.f32(float %289, float %298, float 0x3FAEB851E0000000) > %302 = call float @llvm.fma.f32(float %290, float %299, float 0x3FAEB851E0000000) > %303 = call float @llvm.fma.f32(float %291, float %300, float 0x3FAEB851E0000000) > %304 = fcmp oeq float %301, 0.000000e+00 > %305 = fcmp oeq float %302, 0.000000e+00 > %306 = fcmp oeq float %303, 0.000000e+00 > %307 = fcmp ogt float %295, 0.000000e+00 > %308 = select i1 %307, float 1.000000e+00, float %295 > %309 = fcmp oge float %308, 0.000000e+00 > %310 = fcmp ogt float %296, 0.000000e+00 > %311 = select i1 %310, float 1.000000e+00, float %296 > %312 = fcmp oge float %311, 0.000000e+00 > %313 = fcmp ogt float %297, 0.000000e+00 > %314 = select i1 %313, float 1.000000e+00, float %297 > %315 = fcmp oge float %314, 0.000000e+00 > %.op63 = fmul float %308, 0x4600000000000000 > %316 = select i1 %309, float %.op63, float 0xC600000000000000 > %.op64 = fmul float %311, 0x4600000000000000 > %317 = select i1 %312, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %314, 0x4600000000000000 > %318 = select i1 %315, float %.op65, float 0xC600000000000000 > %319 = fdiv float 1.000000e+00, %301 > %320 = fdiv float 1.000000e+00, %302 > %321 = fdiv float 1.000000e+00, %303 > %322 = fmul float %295, %319 > %323 = fmul float %296, %320 > %324 = fmul float %297, %321 > %325 = select i1 %304, float %316, float %322 > %326 = select i1 %305, float %317, float %323 > %327 = select i1 %306, float %318, float %324 > %328 = fmul float %90, %44 > %329 = fmul float %91, %45 > %330 = fmul float %92, %46 > %331 = fmul float %93, %47 > %332 = bitcast i32 %11 to float > %333 = insertvalue <{ float, float, float }> undef, float %332, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %85, float %86, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %325, float %326, float %327, float %259) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %328, float %329, float %330, float %331) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %99, float %105, float %111, float %117) > ret <{ float, float, float }> %333 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], BUFFER, FLOAT >DCL CONST[1][0..27] >DCL CONST[2][0..22] >DCL CONST[3][0] >DCL TEMP[0..1], LOCAL >IMM[0] INT32 {0, 0, 0, 0} >IMM[1] UINT32 {0, 432, 1, 352} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0], TEMP[0], IN[2] > 3: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[1].wwww > 4: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 5: MOV TEMP[1].x, IMM[0].xxxx > 6: MOV TEMP[1].w, IMM[1].xxxx > 7: TXF TEMP[1].x, TEMP[1], SAMP[1], BUFFER > 8: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx > 9: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][27].xyzz > 10: MOV TEMP[0].xyz, TEMP[0].xyzx > 11: MUL TEMP[1].x, TEMP[1].xxxx, CONST[2][22].xxxx > 12: MOV TEMP[0].w, TEMP[1].xxxx > 13: MOV OUT[0], TEMP[0] > 14: END >radeonsi: Compiling shader 306 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 > %30 = call float @llvm.SI.load.const(<16 x i8> %29, i32 352) > %31 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 > %33 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %34 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %33, i64 0, i64 3 > %35 = load <4 x i32>, <4 x i32> addrspace(2)* %34, align 16, !tbaa !0 > %36 = extractelement <8 x i32> %32, i32 7 > %37 = extractelement <4 x i32> %35, i32 0 > %38 = and i32 %37, %36 > %39 = insertelement <4 x i32> %35, i32 %38, i32 0 > %40 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %41 = bitcast <8 x i32> addrspace(2)* %40 to <2 x i128> addrspace(2)* > %42 = load <2 x i128>, <2 x i128> addrspace(2)* %41, align 32, !tbaa !0 > %43 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %50 = bitcast float %43 to i32 > %51 = bitcast float %44 to i32 > %52 = insertelement <2 x i32> undef, i32 %50, i32 0 > %53 = insertelement <2 x i32> %52, i32 %51, i32 1 > %54 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %53, <8 x i32> %32, <4 x i32> %39, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = extractelement <4 x float> %54, i32 3 > %59 = fmul float %55, %46 > %60 = fmul float %56, %47 > %61 = fmul float %57, %48 > %62 = fmul float %58, %49 > %63 = fmul float %59, %45 > %64 = fmul float %60, %45 > %65 = fmul float %61, %45 > %66 = fmul float %62, %63 > %67 = fmul float %62, %64 > %68 = fmul float %62, %65 > %69 = extractelement <2 x i128> %42, i32 1 > %70 = bitcast i128 %69 to <16 x i8> > %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 0) > %72 = extractelement <4 x float> %71, i32 0 > %73 = fmul float %66, %72 > %74 = fmul float %67, %72 > %75 = fmul float %68, %72 > %76 = fmul float %73, %25 > %77 = fmul float %74, %26 > %78 = fadd float %77, %76 > %79 = fmul float %75, %27 > %80 = fadd float %78, %79 > %81 = fmul float %80, %30 > %82 = bitcast float %5 to i32 > %83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %82, 10 > %84 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %83, float %73, 11 > %85 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %84, float %74, 12 > %86 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %85, float %75, 13 > %87 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %86, float %81, 14 > %88 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %87, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %88 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..34] >DCL TEMP[0..12], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 304, 240, 256} >IMM[2] UINT32 {272, 288, 336, 432} >IMM[3] UINT32 {528, 496, 512, 320} >IMM[4] UINT32 {352, 464, 384, 368} >IMM[5] FLT32 { 0.0597, -1.5000, 158456325028528675187087900672.0000, 1.4427} >IMM[6] UINT32 {448, 400, 416, 0} >IMM[7] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[8] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][19], TEMP[0] > 3: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][15].zzzz > 4: MOV TEMP[2].w, TEMP[1].xxxx > 5: DP4 TEMP[2].x, CONST[1][16], TEMP[0] > 6: DP4 TEMP[3].x, CONST[1][17], TEMP[0] > 7: MOV TEMP[2].y, TEMP[3].xxxx > 8: DP4 TEMP[3].x, CONST[1][18], TEMP[0] > 9: MOV TEMP[2].z, TEMP[3].xxxx > 10: DP4 TEMP[3].x, CONST[1][21], TEMP[0] > 11: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][27].wwww > 12: MOV TEMP[3], TEMP[2] > 13: MOV TEMP[4].xy, IN[1].xyxx > 14: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][33].xyzz > 15: MUL TEMP[6].xyz, CONST[1][31].xyzz, CONST[1][32].xyzz > 16: MOV TEMP[6].w, CONST[1][31].wwww > 17: ABS TEMP[7].x, TEMP[1].xxxx > 18: MUL TEMP[2].x, TEMP[7].xxxx, IMM[0].yyyy > 19: MIN TEMP[7].x, TEMP[2].xxxx, IMM[0].xxxx > 20: ADD TEMP[2].x, -TEMP[7].xxxx, IMM[0].xxxx > 21: DP3 TEMP[7].x, CONST[1][20].xyzz, TEMP[5].xyzz > 22: DP3 TEMP[8].x, CONST[1][22].xyzz, TEMP[5].xyzz > 23: MOV TEMP[7].z, TEMP[8].xxxx > 24: DP3 TEMP[5].x, CONST[1][21].xyzz, TEMP[5].xyzz > 25: MOV TEMP[7].y, TEMP[5].xxxx > 26: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz > 27: RSQ TEMP[8].x, TEMP[8].xxxx > 28: MUL TEMP[9].xyz, TEMP[8].xxxx, TEMP[7].xyzz > 29: FMA TEMP[5].x, -TEMP[5].xxxx, TEMP[8].xxxx, IMM[0].zzzz > 30: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].xxxx > 31: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 32: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 33: DP3 TEMP[8].x, -TEMP[9].xyzz, CONST[1][29].xyzz > 34: FMA TEMP[9].x, -CONST[1][24].yyyy, TEMP[8].xxxx, CONST[1][24].xxxx > 35: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[8].xxxx, IMM[0].xxxx > 36: MOV TEMP[0].z, TEMP[8].xxxx > 37: ABS TEMP[8].x, TEMP[9].xxxx > 38: LG2 TEMP[8].x, TEMP[8].xxxx > 39: MOV TEMP[0].w, TEMP[8].xxxx > 40: MUL TEMP[8].xy, TEMP[0].zwww, IMM[5].xyyy > 41: EX2 TEMP[9].x, TEMP[8].yyyy > 42: FMA TEMP[10].x, CONST[1][24].zzzz, TEMP[9].xxxx, -CONST[1][23].zzzz > 43: MUL TEMP[9].x, TEMP[9].xxxx, CONST[1][24].zzzz > 44: MAX TEMP[10].x, TEMP[10].xxxx, IMM[0].wwww > 45: FMA TEMP[9].x, -TEMP[10].xxxx, TEMP[2].xxxx, TEMP[9].xxxx > 46: MAX TEMP[9].x, TEMP[9].xxxx, CONST[1][28].wwww > 47: FSNE TEMP[10].x, CONST[1][23].xxxx, IMM[0].wwww > 48: UIF TEMP[10].xxxx :0 > 49: RCP TEMP[10].x, CONST[1][23].xxxx > 50: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 51: ELSE :0 > 52: SSG TEMP[11].x, -TEMP[0].xxxx > 53: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 54: ENDIF > 55: MUL TEMP[2].x, TEMP[10].xxxx, IMM[5].wwww > 56: EX2 TEMP[10].x, TEMP[2].xxxx > 57: ADD TEMP[2].x, TEMP[10].xxxx, CONST[1][24].wwww > 58: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][25].yyyy > 59: MUL TEMP[2].x, TEMP[2].xxxx, IMM[7].xxxx > 60: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[2].xxxx > 61: MIN TEMP[5].x, TEMP[5].xxxx, CONST[1][23].wwww > 62: MAX TEMP[5].x, TEMP[5].xxxx, CONST[1][25].xxxx > 63: MUL TEMP[9].x, TEMP[5].xxxx, TEMP[9].xxxx > 64: FSNE TEMP[10].x, CONST[1][26].wwww, IMM[0].wwww > 65: UIF TEMP[10].xxxx :0 > 66: RCP TEMP[10].x, CONST[1][26].wwww > 67: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 68: ELSE :0 > 69: SSG TEMP[11].x, -TEMP[0].xxxx > 70: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 71: ENDIF > 72: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][27].zzzz > 73: FSNE TEMP[11].x, CONST[1][23].yyyy, IMM[0].wwww > 74: UIF TEMP[11].xxxx :0 > 75: RCP TEMP[11].x, CONST[1][23].yyyy > 76: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx > 77: ELSE :0 > 78: SSG TEMP[12].x, TEMP[0].xxxx > 79: MUL TEMP[11].x, IMM[5].zzzz, TEMP[12].xxxx > 80: ENDIF > 81: MUL TEMP[2].x, TEMP[10].xxxx, IMM[5].wwww > 82: EX2 TEMP[10].x, TEMP[2].xxxx > 83: MUL TEMP[7].xyz, TEMP[10].xxxx, CONST[1][26].xyzz > 84: FMA TEMP[5].xyz, CONST[1][26].xyzz, TEMP[10].xxxx, TEMP[5].xxxx > 85: FMA TEMP[8].xyz, TEMP[7].xyzz, TEMP[8].xxxx, TEMP[9].xxxx > 86: FSEQ TEMP[9].xyz, TEMP[5].xyzz, IMM[0].wwww > 87: SSG TEMP[10].xyz, TEMP[8].xyzz > 88: MUL TEMP[10].xyz, IMM[5].zzzz, TEMP[10].xyzz > 89: RCP TEMP[12].x, TEMP[5].xxxx > 90: RCP TEMP[12].y, TEMP[5].yyyy > 91: RCP TEMP[12].z, TEMP[5].zzzz > 92: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[12].xyzz > 93: UCMP TEMP[8].xyz, TEMP[9].xyzz, TEMP[10].xyzz, TEMP[8].xyzz > 94: MUL TEMP[7].xyz, TEMP[11].xxxx, -TEMP[5].xyzz > 95: ABS TEMP[1].xyz, TEMP[1].xxxx > 96: MUL TEMP[2].xyz, TEMP[1].xyzz, -TEMP[5].xyzz > 97: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[5].wwww > 98: EX2 TEMP[1].x, TEMP[2].xxxx > 99: EX2 TEMP[1].y, TEMP[2].yyyy >100: EX2 TEMP[1].z, TEMP[2].zzzz >101: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[5].wwww >102: LG2 TEMP[5].x, CONST[1][28].xxxx >103: LG2 TEMP[5].y, CONST[1][28].yyyy >104: LG2 TEMP[5].z, CONST[1][28].zzzz >105: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[7].yyyy >106: EX2 TEMP[9].x, TEMP[5].xxxx >107: EX2 TEMP[9].y, TEMP[5].yyyy >108: EX2 TEMP[9].z, TEMP[5].zzzz >109: EX2 TEMP[5].x, TEMP[7].xxxx >110: EX2 TEMP[5].y, TEMP[7].yyyy >111: EX2 TEMP[5].z, TEMP[7].zzzz >112: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[9].xyzz >113: MUL TEMP[0].xyz, TEMP[8].xyzz, TEMP[7].xyzz >114: ADD TEMP[5].xyz, -TEMP[1].xyzz, IMM[0].xxxx >115: MOV TEMP[1].w, TEMP[1].xxxx >116: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz, IMM[7].zzzz >117: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >118: FMA TEMP[5].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[7].xxxx >119: MUL TEMP[2].xyz, TEMP[0].xyzz, TEMP[5].xyzz >120: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[8].xxxx >121: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[8].yyyy >122: FSEQ TEMP[5].xyz, TEMP[0].xyzz, IMM[0].wwww >123: SSG TEMP[7].xyz, TEMP[2].xyzz >124: MUL TEMP[7].xyz, IMM[5].zzzz, TEMP[7].xyzz >125: RCP TEMP[8].x, TEMP[0].xxxx >126: RCP TEMP[8].y, TEMP[0].yyyy >127: RCP TEMP[8].z, TEMP[0].zzzz >128: MUL TEMP[0].xyz, TEMP[2].xyzz, TEMP[8].xyzz >129: UCMP TEMP[1].xyz, TEMP[5].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >130: MOV OUT[4], IN[2] >131: MOV OUT[3], TEMP[1] >132: MOV OUT[2], TEMP[6] >133: MOV OUT[1], TEMP[4] >134: MOV OUT[0], TEMP[3] >135: END >radeonsi: Compiling shader 307 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 248) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 256) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 260) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 264) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 268) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 272) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 276) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 280) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 284) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 288) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 292) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 296) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 300) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 304) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 308) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 312) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 316) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 428) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %78 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !tbaa !0 > %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %13) > %81 = extractelement <4 x float> %80, i32 0 > %82 = extractelement <4 x float> %80, i32 1 > %83 = extractelement <4 x float> %80, i32 2 > %84 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0 > %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %14) > %87 = extractelement <4 x float> %86, i32 0 > %88 = extractelement <4 x float> %86, i32 1 > %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 > %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %15) > %92 = extractelement <4 x float> %91, i32 0 > %93 = extractelement <4 x float> %91, i32 1 > %94 = extractelement <4 x float> %91, i32 2 > %95 = extractelement <4 x float> %91, i32 3 > %96 = fmul float %31, %81 > %97 = fmul float %32, %82 > %98 = fadd float %96, %97 > %99 = fmul float %33, %83 > %100 = fadd float %98, %99 > %101 = fadd float %100, %34 > %102 = fmul float %101, %18 > %103 = fmul float %19, %81 > %104 = fmul float %20, %82 > %105 = fadd float %103, %104 > %106 = fmul float %21, %83 > %107 = fadd float %105, %106 > %108 = fadd float %107, %22 > %109 = fmul float %23, %81 > %110 = fmul float %24, %82 > %111 = fadd float %109, %110 > %112 = fmul float %25, %83 > %113 = fadd float %111, %112 > %114 = fadd float %113, %26 > %115 = fmul float %27, %81 > %116 = fmul float %28, %82 > %117 = fadd float %115, %116 > %118 = fmul float %29, %83 > %119 = fadd float %117, %118 > %120 = fadd float %119, %30 > %121 = fmul float %38, %81 > %122 = fmul float %39, %82 > %123 = fadd float %121, %122 > %124 = fmul float %40, %83 > %125 = fadd float %123, %124 > %126 = fadd float %125, %41 > %127 = fadd float %126, %60 > %128 = fsub float %75, %81 > %129 = fsub float %76, %82 > %130 = fsub float %77, %83 > %131 = fmul float %68, %72 > %132 = fmul float %69, %73 > %133 = fmul float %70, %74 > %134 = call float @llvm.fabs.f32(float %102) > %135 = fmul float %134, 0x3EF4F8B580000000 > %136 = call float @llvm.minnum.f32(float %135, float 1.000000e+00) > %137 = fsub float 1.000000e+00, %136 > %138 = fmul float %35, %128 > %139 = fmul float %36, %129 > %140 = fadd float %139, %138 > %141 = fmul float %37, %130 > %142 = fadd float %140, %141 > %143 = fmul float %42, %128 > %144 = fmul float %43, %129 > %145 = fadd float %144, %143 > %146 = fmul float %44, %130 > %147 = fadd float %145, %146 > %148 = fmul float %38, %128 > %149 = fmul float %39, %129 > %150 = fadd float %149, %148 > %151 = fmul float %40, %130 > %152 = fadd float %150, %151 > %153 = fmul float %142, %142 > %154 = fmul float %152, %152 > %155 = fadd float %154, %153 > %156 = fmul float %147, %147 > %157 = fadd float %155, %156 > %158 = call float @llvm.AMDGPU.rsq.clamped.f32(float %157) > %159 = fmul float %158, %142 > %160 = fmul float %158, %152 > %161 = fmul float %158, %147 > %162 = fsub float -0.000000e+00, %152 > %163 = call float @llvm.fma.f32(float %162, float %158, float 0xBFC3333340000000) > %164 = fsub float 1.000000e+00, %163 > %165 = call float @llvm.AMDGPU.clamp.(float %164, float 0.000000e+00, float 1.000000e+00) > %166 = fmul float %165, %165 > %167 = fmul float %159, %65 > %168 = fsub float -0.000000e+00, %167 > %169 = fmul float %160, %66 > %170 = fsub float %168, %169 > %171 = fmul float %161, %67 > %172 = fsub float %170, %171 > %173 = fsub float -0.000000e+00, %50 > %174 = call float @llvm.fma.f32(float %173, float %172, float %49) > %175 = call float @llvm.fma.f32(float %172, float %172, float 1.000000e+00) > %176 = call float @llvm.fabs.f32(float %174) > %177 = call float @llvm.log2.f32(float %176) > %178 = fmul float %175, 0x3FAE8EC8A0000000 > %179 = fmul float %177, -1.500000e+00 > %180 = call float @llvm.exp2.f32(float %179) > %181 = fsub float -0.000000e+00, %47 > %182 = call float @llvm.fma.f32(float %51, float %180, float %181) > %183 = fmul float %180, %51 > %184 = call float @llvm.maxnum.f32(float %182, float 0.000000e+00) > %185 = fsub float -0.000000e+00, %184 > %186 = call float @llvm.fma.f32(float %185, float %137, float %183) > %187 = call float @llvm.maxnum.f32(float %186, float %64) > %188 = fcmp une float %45, 0.000000e+00 > br i1 %188, label %IF, label %ELSE > >IF: ; preds = %main_body > %189 = fdiv float 1.000000e+00, %45 > %190 = fmul float %127, %189 > %191 = fsub float -0.000000e+00, %190 > br label %ENDIF > >ELSE: ; preds = %main_body > %192 = fsub float -0.000000e+00, %127 > %193 = fcmp olt float %127, -0.000000e+00 > %194 = select i1 %193, float 1.000000e+00, float %192 > %195 = fcmp oge float %194, 0.000000e+00 > %.op = fmul float %194, 0x4600000000000000 > %196 = select i1 %195, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp40.0 = phi float [ %191, %IF ], [ %196, %ELSE ] > %197 = fmul float %temp40.0, 0x3FF7154760000000 > %198 = call float @llvm.exp2.f32(float %197) > %199 = fadd float %198, %52 > %200 = fmul float %199, %54 > %201 = fmul float %200, 5.000000e-01 > %202 = fmul float %166, %201 > %203 = call float @llvm.minnum.f32(float %202, float %48) > %204 = call float @llvm.maxnum.f32(float %203, float %53) > %205 = fmul float %204, %187 > %206 = fcmp une float %58, 0.000000e+00 > br i1 %206, label %IF53, label %ELSE54 > >IF53: ; preds = %ENDIF > %207 = fdiv float 1.000000e+00, %58 > %208 = fmul float %127, %207 > %209 = fsub float -0.000000e+00, %208 > br label %ENDIF52 > >ELSE54: ; preds = %ENDIF > %210 = fsub float -0.000000e+00, %127 > %211 = fcmp olt float %127, -0.000000e+00 > %212 = select i1 %211, float 1.000000e+00, float %210 > %213 = fcmp oge float %212, 0.000000e+00 > %.op58 = fmul float %212, 0x4600000000000000 > %214 = select i1 %213, float %.op58, float 0xC600000000000000 > br label %ENDIF52 > >ENDIF52: ; preds = %ELSE54, %IF53 > %temp40.1 = phi float [ %209, %IF53 ], [ %214, %ELSE54 ] > %215 = fsub float %59, %127 > %216 = fcmp une float %46, 0.000000e+00 > br i1 %216, label %IF56, label %ELSE57 > >IF56: ; preds = %ENDIF52 > %217 = fdiv float 1.000000e+00, %46 > %218 = fmul float %215, %217 > br label %ENDIF55 > >ELSE57: ; preds = %ENDIF52 > %219 = fcmp ogt float %215, 0.000000e+00 > %220 = select i1 %219, float 1.000000e+00, float %215 > %221 = fcmp oge float %220, 0.000000e+00 > %.op59 = fmul float %220, 0x4600000000000000 > %222 = select i1 %221, float %.op59, float 0xC600000000000000 > br label %ENDIF55 > >ENDIF55: ; preds = %ELSE57, %IF56 > %temp44.0 = phi float [ %218, %IF56 ], [ %222, %ELSE57 ] > %223 = fmul float %temp40.1, 0x3FF7154760000000 > %224 = call float @llvm.exp2.f32(float %223) > %225 = fmul float %224, %55 > %226 = fmul float %224, %56 > %227 = fmul float %224, %57 > %228 = call float @llvm.fma.f32(float %55, float %224, float %204) > %229 = call float @llvm.fma.f32(float %56, float %224, float %204) > %230 = call float @llvm.fma.f32(float %57, float %224, float %204) > %231 = call float @llvm.fma.f32(float %225, float %178, float %205) > %232 = call float @llvm.fma.f32(float %226, float %178, float %205) > %233 = call float @llvm.fma.f32(float %227, float %178, float %205) > %234 = fcmp oeq float %228, 0.000000e+00 > %235 = fcmp oeq float %229, 0.000000e+00 > %236 = fcmp oeq float %230, 0.000000e+00 > %237 = fcmp ogt float %231, 0.000000e+00 > %238 = select i1 %237, float 1.000000e+00, float %231 > %239 = fcmp oge float %238, 0.000000e+00 > %240 = fcmp ogt float %232, 0.000000e+00 > %241 = select i1 %240, float 1.000000e+00, float %232 > %242 = fcmp oge float %241, 0.000000e+00 > %243 = fcmp ogt float %233, 0.000000e+00 > %244 = select i1 %243, float 1.000000e+00, float %233 > %245 = fcmp oge float %244, 0.000000e+00 > %.op60 = fmul float %238, 0x4600000000000000 > %246 = select i1 %239, float %.op60, float 0xC600000000000000 > %.op61 = fmul float %241, 0x4600000000000000 > %247 = select i1 %242, float %.op61, float 0xC600000000000000 > %.op62 = fmul float %244, 0x4600000000000000 > %248 = select i1 %245, float %.op62, float 0xC600000000000000 > %249 = fdiv float 1.000000e+00, %228 > %250 = fdiv float 1.000000e+00, %229 > %251 = fdiv float 1.000000e+00, %230 > %252 = fmul float %231, %249 > %253 = fmul float %232, %250 > %254 = fmul float %233, %251 > %255 = select i1 %234, float %246, float %252 > %256 = select i1 %235, float %247, float %253 > %257 = select i1 %236, float %248, float %254 > %258 = fmul float %228, %temp44.0 > %259 = fmul float %229, %temp44.0 > %260 = fmul float %230, %temp44.0 > %261 = call float @llvm.fabs.f32(float %102) > %262 = call float @llvm.fabs.f32(float %102) > %263 = call float @llvm.fabs.f32(float %102) > %264 = fmul float %228, %261 > %265 = fmul float %229, %262 > %266 = fmul float %230, %263 > %267 = fmul float %264, 0xBFF7154760000000 > %268 = fmul float %265, 0xBFF7154760000000 > %269 = fmul float %266, 0xBFF7154760000000 > %270 = call float @llvm.exp2.f32(float %267) > %271 = call float @llvm.exp2.f32(float %268) > %272 = call float @llvm.exp2.f32(float %269) > %273 = fmul float %258, 0xBFF7154760000000 > %274 = fmul float %259, 0xBFF7154760000000 > %275 = fmul float %260, 0xBFF7154760000000 > %276 = call float @llvm.log2.f32(float %61) > %277 = call float @llvm.log2.f32(float %62) > %278 = call float @llvm.log2.f32(float %63) > %279 = fmul float %276, 0x3FDD1745E0000000 > %280 = fmul float %277, 0x3FDD1745E0000000 > %281 = fmul float %278, 0x3FDD1745E0000000 > %282 = call float @llvm.exp2.f32(float %279) > %283 = call float @llvm.exp2.f32(float %280) > %284 = call float @llvm.exp2.f32(float %281) > %285 = call float @llvm.exp2.f32(float %273) > %286 = call float @llvm.exp2.f32(float %274) > %287 = call float @llvm.exp2.f32(float %275) > %288 = fmul float %285, %282 > %289 = fmul float %286, %283 > %290 = fmul float %287, %284 > %291 = fmul float %255, %288 > %292 = fmul float %256, %289 > %293 = fmul float %257, %290 > %294 = fsub float 1.000000e+00, %270 > %295 = fsub float 1.000000e+00, %271 > %296 = fsub float 1.000000e+00, %272 > %297 = call float @llvm.fma.f32(float %291, float %294, float 0xBF70624DE0000000) > %298 = call float @llvm.fma.f32(float %292, float %295, float 0xBF70624DE0000000) > %299 = call float @llvm.fma.f32(float %293, float %296, float 0xBF70624DE0000000) > %300 = call float @llvm.maxnum.f32(float %297, float 0.000000e+00) > %301 = call float @llvm.maxnum.f32(float %298, float 0.000000e+00) > %302 = call float @llvm.maxnum.f32(float %299, float 0.000000e+00) > %303 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 5.000000e-01) > %304 = call float @llvm.fma.f32(float %301, float 0x4018CCCCC0000000, float 5.000000e-01) > %305 = call float @llvm.fma.f32(float %302, float 0x4018CCCCC0000000, float 5.000000e-01) > %306 = fmul float %300, %303 > %307 = fmul float %301, %304 > %308 = fmul float %302, %305 > %309 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %310 = call float @llvm.fma.f32(float %301, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %311 = call float @llvm.fma.f32(float %302, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %312 = call float @llvm.fma.f32(float %300, float %309, float 0x3FAEB851E0000000) > %313 = call float @llvm.fma.f32(float %301, float %310, float 0x3FAEB851E0000000) > %314 = call float @llvm.fma.f32(float %302, float %311, float 0x3FAEB851E0000000) > %315 = fcmp oeq float %312, 0.000000e+00 > %316 = fcmp oeq float %313, 0.000000e+00 > %317 = fcmp oeq float %314, 0.000000e+00 > %318 = fcmp ogt float %306, 0.000000e+00 > %319 = select i1 %318, float 1.000000e+00, float %306 > %320 = fcmp oge float %319, 0.000000e+00 > %321 = fcmp ogt float %307, 0.000000e+00 > %322 = select i1 %321, float 1.000000e+00, float %307 > %323 = fcmp oge float %322, 0.000000e+00 > %324 = fcmp ogt float %308, 0.000000e+00 > %325 = select i1 %324, float 1.000000e+00, float %308 > %326 = fcmp oge float %325, 0.000000e+00 > %.op63 = fmul float %319, 0x4600000000000000 > %327 = select i1 %320, float %.op63, float 0xC600000000000000 > %.op64 = fmul float %322, 0x4600000000000000 > %328 = select i1 %323, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %325, 0x4600000000000000 > %329 = select i1 %326, float %.op65, float 0xC600000000000000 > %330 = fdiv float 1.000000e+00, %312 > %331 = fdiv float 1.000000e+00, %313 > %332 = fdiv float 1.000000e+00, %314 > %333 = fmul float %306, %330 > %334 = fmul float %307, %331 > %335 = fmul float %308, %332 > %336 = select i1 %315, float %327, float %333 > %337 = select i1 %316, float %328, float %334 > %338 = select i1 %317, float %329, float %335 > %339 = bitcast i32 %11 to float > %340 = insertvalue <{ float, float, float }> undef, float %339, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %87, float %88, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %131, float %132, float %133, float %71) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %336, float %337, float %338, float %270) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %92, float %93, float %94, float %95) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %108, float %114, float %120, float %102) > ret <{ float, float, float }> %340 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..35] >DCL TEMP[0..13], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.1500} >IMM[1] UINT32 {0, 320, 240, 272} >IMM[2] UINT32 {288, 304, 352, 448} >IMM[3] UINT32 {560, 544, 512, 528} >IMM[4] UINT32 {336, 368, 480, 400} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {384, 464, 416, 432} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][20], TEMP[0] > 3: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][15].zzzz > 4: MOV TEMP[2].w, TEMP[1].xxxx > 5: DP4 TEMP[2].x, CONST[1][17], TEMP[0] > 6: DP4 TEMP[3].x, CONST[1][18], TEMP[0] > 7: MOV TEMP[2].y, TEMP[3].xxxx > 8: DP4 TEMP[3].x, CONST[1][19], TEMP[0] > 9: MOV TEMP[2].z, TEMP[3].xxxx > 10: DP4 TEMP[3].x, CONST[1][22], TEMP[0] > 11: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][28].wwww > 12: MOV TEMP[3], TEMP[2] > 13: MOV TEMP[4].xy, IN[1].xyxx > 14: MUL TEMP[5].xy, CONST[1][35].xyyy, IMM[0].xyyy > 15: MUL TEMP[6].xy, TEMP[1].xxxx, CONST[1][35].xyyy > 16: FMA TEMP[5].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 17: MOV TEMP[5].zw, TEMP[2].wwzw > 18: ADD TEMP[7].xyz, -IN[0].xyzz, CONST[1][34].xyzz > 19: MUL TEMP[8].xyz, CONST[1][32].xyzz, CONST[1][33].xyzz > 20: MOV TEMP[8].w, CONST[1][32].wwww > 21: ABS TEMP[9].x, TEMP[1].xxxx > 22: MUL TEMP[2].x, TEMP[9].xxxx, IMM[0].zzzz > 23: MIN TEMP[9].x, TEMP[2].xxxx, IMM[0].xxxx > 24: ADD TEMP[2].x, -TEMP[9].xxxx, IMM[0].xxxx > 25: DP3 TEMP[6].x, CONST[1][21].xyzz, TEMP[7].xyzz > 26: DP3 TEMP[9].x, CONST[1][23].xyzz, TEMP[7].xyzz > 27: MOV TEMP[6].z, TEMP[9].xxxx > 28: DP3 TEMP[7].x, CONST[1][22].xyzz, TEMP[7].xyzz > 29: MOV TEMP[6].y, TEMP[7].xxxx > 30: DP3 TEMP[9].x, TEMP[6].xyzz, TEMP[6].xyzz > 31: RSQ TEMP[9].x, TEMP[9].xxxx > 32: MUL TEMP[10].xyz, TEMP[9].xxxx, TEMP[6].xyzz > 33: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx, IMM[0].wwww > 34: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 35: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 36: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 37: DP3 TEMP[9].x, -TEMP[10].xyzz, CONST[1][30].xyzz > 38: FMA TEMP[10].x, -CONST[1][25].yyyy, TEMP[9].xxxx, CONST[1][25].xxxx > 39: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].xxxx > 40: MOV TEMP[0].z, TEMP[9].xxxx > 41: ABS TEMP[9].x, TEMP[10].xxxx > 42: LG2 TEMP[9].x, TEMP[9].xxxx > 43: MOV TEMP[0].w, TEMP[9].xxxx > 44: MUL TEMP[9].xy, TEMP[0].zwww, IMM[5].xyyy > 45: EX2 TEMP[10].x, TEMP[9].yyyy > 46: FMA TEMP[11].x, CONST[1][25].zzzz, TEMP[10].xxxx, -CONST[1][24].zzzz > 47: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][25].zzzz > 48: MAX TEMP[11].x, TEMP[11].xxxx, IMM[5].zzzz > 49: FMA TEMP[10].x, -TEMP[11].xxxx, TEMP[2].xxxx, TEMP[10].xxxx > 50: MAX TEMP[10].x, TEMP[10].xxxx, CONST[1][29].wwww > 51: FSNE TEMP[11].x, CONST[1][24].xxxx, IMM[5].zzzz > 52: UIF TEMP[11].xxxx :0 > 53: RCP TEMP[11].x, CONST[1][24].xxxx > 54: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 55: ELSE :0 > 56: SSG TEMP[12].x, -TEMP[0].xxxx > 57: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 58: ENDIF > 59: MUL TEMP[2].x, TEMP[11].xxxx, IMM[7].xxxx > 60: EX2 TEMP[11].x, TEMP[2].xxxx > 61: ADD TEMP[2].x, TEMP[11].xxxx, CONST[1][25].wwww > 62: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][26].yyyy > 63: MUL TEMP[2].x, TEMP[2].xxxx, IMM[7].yyyy > 64: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[2].xxxx > 65: MIN TEMP[7].x, TEMP[7].xxxx, CONST[1][24].wwww > 66: MAX TEMP[7].x, TEMP[7].xxxx, CONST[1][26].xxxx > 67: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx > 68: FSNE TEMP[11].x, CONST[1][27].wwww, IMM[5].zzzz > 69: UIF TEMP[11].xxxx :0 > 70: RCP TEMP[11].x, CONST[1][27].wwww > 71: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 72: ELSE :0 > 73: SSG TEMP[12].x, -TEMP[0].xxxx > 74: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 75: ENDIF > 76: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][28].zzzz > 77: FSNE TEMP[12].x, CONST[1][24].yyyy, IMM[5].zzzz > 78: UIF TEMP[12].xxxx :0 > 79: RCP TEMP[12].x, CONST[1][24].yyyy > 80: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 81: ELSE :0 > 82: SSG TEMP[13].x, TEMP[0].xxxx > 83: MUL TEMP[12].x, IMM[5].wwww, TEMP[13].xxxx > 84: ENDIF > 85: MUL TEMP[2].x, TEMP[11].xxxx, IMM[7].xxxx > 86: EX2 TEMP[11].x, TEMP[2].xxxx > 87: MUL TEMP[6].xyz, TEMP[11].xxxx, CONST[1][27].xyzz > 88: FMA TEMP[7].xyz, CONST[1][27].xyzz, TEMP[11].xxxx, TEMP[7].xxxx > 89: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[10].xxxx > 90: FSEQ TEMP[10].xyz, TEMP[7].xyzz, IMM[5].zzzz > 91: SSG TEMP[11].xyz, TEMP[9].xyzz > 92: MUL TEMP[11].xyz, IMM[5].wwww, TEMP[11].xyzz > 93: RCP TEMP[13].x, TEMP[7].xxxx > 94: RCP TEMP[13].y, TEMP[7].yyyy > 95: RCP TEMP[13].z, TEMP[7].zzzz > 96: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 97: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz > 98: MUL TEMP[6].xyz, TEMP[12].xxxx, -TEMP[7].xyzz > 99: ABS TEMP[1].xyz, TEMP[1].xxxx >100: MUL TEMP[2].xyz, TEMP[1].xyzz, -TEMP[7].xyzz >101: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[7].xxxx >102: EX2 TEMP[1].x, TEMP[2].xxxx >103: EX2 TEMP[1].y, TEMP[2].yyyy >104: EX2 TEMP[1].z, TEMP[2].zzzz >105: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[7].xxxx >106: LG2 TEMP[7].x, CONST[1][29].xxxx >107: LG2 TEMP[7].y, CONST[1][29].yyyy >108: LG2 TEMP[7].z, CONST[1][29].zzzz >109: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >110: EX2 TEMP[10].x, TEMP[7].xxxx >111: EX2 TEMP[10].y, TEMP[7].yyyy >112: EX2 TEMP[10].z, TEMP[7].zzzz >113: EX2 TEMP[7].x, TEMP[6].xxxx >114: EX2 TEMP[7].y, TEMP[6].yyyy >115: EX2 TEMP[7].z, TEMP[6].zzzz >116: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[10].xyzz >117: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[6].xyzz >118: ADD TEMP[7].xyz, -TEMP[1].xyzz, IMM[0].xxxx >119: MOV TEMP[1].w, TEMP[1].xxxx >120: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].wwww >121: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[5].zzzz >122: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >123: MUL TEMP[2].xyz, TEMP[0].xyzz, TEMP[7].xyzz >124: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >125: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].zzzz >126: FSEQ TEMP[6].xyz, TEMP[0].xyzz, IMM[5].zzzz >127: SSG TEMP[7].xyz, TEMP[2].xyzz >128: MUL TEMP[7].xyz, IMM[5].wwww, TEMP[7].xyzz >129: RCP TEMP[9].x, TEMP[0].xxxx >130: RCP TEMP[9].y, TEMP[0].yyyy >131: RCP TEMP[9].z, TEMP[0].zzzz >132: MUL TEMP[0].xyz, TEMP[2].xyzz, TEMP[9].xyzz >133: UCMP TEMP[1].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >134: MOV OUT[5], IN[2] >135: MOV OUT[4], TEMP[1] >136: MOV OUT[3], TEMP[8] >137: MOV OUT[2], TEMP[5] >138: MOV OUT[1], TEMP[4] >139: MOV OUT[0], TEMP[3] >140: END >radeonsi: Compiling shader 308 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 248) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 272) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 276) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 280) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 284) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 288) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 292) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 296) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 300) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 304) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 308) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 312) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 316) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 332) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 412) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 476) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 524) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 544) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 548) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 552) > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %79 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 > %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %13) > %83 = extractelement <4 x float> %82, i32 0 > %84 = extractelement <4 x float> %82, i32 1 > %85 = extractelement <4 x float> %82, i32 2 > %86 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 > %88 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %14) > %89 = extractelement <4 x float> %88, i32 0 > %90 = extractelement <4 x float> %88, i32 1 > %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 > %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %15) > %94 = extractelement <4 x float> %93, i32 0 > %95 = extractelement <4 x float> %93, i32 1 > %96 = extractelement <4 x float> %93, i32 2 > %97 = extractelement <4 x float> %93, i32 3 > %98 = fmul float %31, %83 > %99 = fmul float %32, %84 > %100 = fadd float %98, %99 > %101 = fmul float %33, %85 > %102 = fadd float %100, %101 > %103 = fadd float %102, %34 > %104 = fmul float %103, %18 > %105 = fmul float %19, %83 > %106 = fmul float %20, %84 > %107 = fadd float %105, %106 > %108 = fmul float %21, %85 > %109 = fadd float %107, %108 > %110 = fadd float %109, %22 > %111 = fmul float %23, %83 > %112 = fmul float %24, %84 > %113 = fadd float %111, %112 > %114 = fmul float %25, %85 > %115 = fadd float %113, %114 > %116 = fadd float %115, %26 > %117 = fmul float %27, %83 > %118 = fmul float %28, %84 > %119 = fadd float %117, %118 > %120 = fmul float %29, %85 > %121 = fadd float %119, %120 > %122 = fadd float %121, %30 > %123 = fmul float %38, %83 > %124 = fmul float %39, %84 > %125 = fadd float %123, %124 > %126 = fmul float %40, %85 > %127 = fadd float %125, %126 > %128 = fadd float %127, %41 > %129 = fadd float %128, %60 > %130 = fsub float -0.000000e+00, %79 > %131 = fmul float %104, %78 > %132 = fmul float %104, %79 > %133 = call float @llvm.fma.f32(float %110, float %78, float %131) > %134 = call float @llvm.fma.f32(float %116, float %130, float %132) > %135 = fsub float %75, %83 > %136 = fsub float %76, %84 > %137 = fsub float %77, %85 > %138 = fmul float %68, %72 > %139 = fmul float %69, %73 > %140 = fmul float %70, %74 > %141 = call float @llvm.fabs.f32(float %104) > %142 = fmul float %141, 0x3EF4F8B580000000 > %143 = call float @llvm.minnum.f32(float %142, float 1.000000e+00) > %144 = fsub float 1.000000e+00, %143 > %145 = fmul float %35, %135 > %146 = fmul float %36, %136 > %147 = fadd float %146, %145 > %148 = fmul float %37, %137 > %149 = fadd float %147, %148 > %150 = fmul float %42, %135 > %151 = fmul float %43, %136 > %152 = fadd float %151, %150 > %153 = fmul float %44, %137 > %154 = fadd float %152, %153 > %155 = fmul float %38, %135 > %156 = fmul float %39, %136 > %157 = fadd float %156, %155 > %158 = fmul float %40, %137 > %159 = fadd float %157, %158 > %160 = fmul float %149, %149 > %161 = fmul float %159, %159 > %162 = fadd float %161, %160 > %163 = fmul float %154, %154 > %164 = fadd float %162, %163 > %165 = call float @llvm.AMDGPU.rsq.clamped.f32(float %164) > %166 = fmul float %165, %149 > %167 = fmul float %165, %159 > %168 = fmul float %165, %154 > %169 = fsub float -0.000000e+00, %159 > %170 = call float @llvm.fma.f32(float %169, float %165, float 0xBFC3333340000000) > %171 = fsub float 1.000000e+00, %170 > %172 = call float @llvm.AMDGPU.clamp.(float %171, float 0.000000e+00, float 1.000000e+00) > %173 = fmul float %172, %172 > %174 = fmul float %166, %65 > %175 = fsub float -0.000000e+00, %174 > %176 = fmul float %167, %66 > %177 = fsub float %175, %176 > %178 = fmul float %168, %67 > %179 = fsub float %177, %178 > %180 = fsub float -0.000000e+00, %50 > %181 = call float @llvm.fma.f32(float %180, float %179, float %49) > %182 = call float @llvm.fma.f32(float %179, float %179, float 1.000000e+00) > %183 = call float @llvm.fabs.f32(float %181) > %184 = call float @llvm.log2.f32(float %183) > %185 = fmul float %182, 0x3FAE8EC8A0000000 > %186 = fmul float %184, -1.500000e+00 > %187 = call float @llvm.exp2.f32(float %186) > %188 = fsub float -0.000000e+00, %47 > %189 = call float @llvm.fma.f32(float %51, float %187, float %188) > %190 = fmul float %187, %51 > %191 = call float @llvm.maxnum.f32(float %189, float 0.000000e+00) > %192 = fsub float -0.000000e+00, %191 > %193 = call float @llvm.fma.f32(float %192, float %144, float %190) > %194 = call float @llvm.maxnum.f32(float %193, float %64) > %195 = fcmp une float %45, 0.000000e+00 > br i1 %195, label %IF, label %ELSE > >IF: ; preds = %main_body > %196 = fdiv float 1.000000e+00, %45 > %197 = fmul float %129, %196 > %198 = fsub float -0.000000e+00, %197 > br label %ENDIF > >ELSE: ; preds = %main_body > %199 = fsub float -0.000000e+00, %129 > %200 = fcmp olt float %129, -0.000000e+00 > %201 = select i1 %200, float 1.000000e+00, float %199 > %202 = fcmp oge float %201, 0.000000e+00 > %.op = fmul float %201, 0x4600000000000000 > %203 = select i1 %202, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %198, %IF ], [ %203, %ELSE ] > %204 = fmul float %temp44.0, 0x3FF7154760000000 > %205 = call float @llvm.exp2.f32(float %204) > %206 = fadd float %205, %52 > %207 = fmul float %206, %54 > %208 = fmul float %207, 5.000000e-01 > %209 = fmul float %173, %208 > %210 = call float @llvm.minnum.f32(float %209, float %48) > %211 = call float @llvm.maxnum.f32(float %210, float %53) > %212 = fmul float %211, %194 > %213 = fcmp une float %58, 0.000000e+00 > br i1 %213, label %IF57, label %ELSE58 > >IF57: ; preds = %ENDIF > %214 = fdiv float 1.000000e+00, %58 > %215 = fmul float %129, %214 > %216 = fsub float -0.000000e+00, %215 > br label %ENDIF56 > >ELSE58: ; preds = %ENDIF > %217 = fsub float -0.000000e+00, %129 > %218 = fcmp olt float %129, -0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %217 > %220 = fcmp oge float %219, 0.000000e+00 > %.op62 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op62, float 0xC600000000000000 > br label %ENDIF56 > >ENDIF56: ; preds = %ELSE58, %IF57 > %temp44.1 = phi float [ %216, %IF57 ], [ %221, %ELSE58 ] > %222 = fsub float %59, %129 > %223 = fcmp une float %46, 0.000000e+00 > br i1 %223, label %IF60, label %ELSE61 > >IF60: ; preds = %ENDIF56 > %224 = fdiv float 1.000000e+00, %46 > %225 = fmul float %222, %224 > br label %ENDIF59 > >ELSE61: ; preds = %ENDIF56 > %226 = fcmp ogt float %222, 0.000000e+00 > %227 = select i1 %226, float 1.000000e+00, float %222 > %228 = fcmp oge float %227, 0.000000e+00 > %.op63 = fmul float %227, 0x4600000000000000 > %229 = select i1 %228, float %.op63, float 0xC600000000000000 > br label %ENDIF59 > >ENDIF59: ; preds = %ELSE61, %IF60 > %temp48.0 = phi float [ %225, %IF60 ], [ %229, %ELSE61 ] > %230 = fmul float %temp44.1, 0x3FF7154760000000 > %231 = call float @llvm.exp2.f32(float %230) > %232 = fmul float %231, %55 > %233 = fmul float %231, %56 > %234 = fmul float %231, %57 > %235 = call float @llvm.fma.f32(float %55, float %231, float %211) > %236 = call float @llvm.fma.f32(float %56, float %231, float %211) > %237 = call float @llvm.fma.f32(float %57, float %231, float %211) > %238 = call float @llvm.fma.f32(float %232, float %185, float %212) > %239 = call float @llvm.fma.f32(float %233, float %185, float %212) > %240 = call float @llvm.fma.f32(float %234, float %185, float %212) > %241 = fcmp oeq float %235, 0.000000e+00 > %242 = fcmp oeq float %236, 0.000000e+00 > %243 = fcmp oeq float %237, 0.000000e+00 > %244 = fcmp ogt float %238, 0.000000e+00 > %245 = select i1 %244, float 1.000000e+00, float %238 > %246 = fcmp oge float %245, 0.000000e+00 > %247 = fcmp ogt float %239, 0.000000e+00 > %248 = select i1 %247, float 1.000000e+00, float %239 > %249 = fcmp oge float %248, 0.000000e+00 > %250 = fcmp ogt float %240, 0.000000e+00 > %251 = select i1 %250, float 1.000000e+00, float %240 > %252 = fcmp oge float %251, 0.000000e+00 > %.op64 = fmul float %245, 0x4600000000000000 > %253 = select i1 %246, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %248, 0x4600000000000000 > %254 = select i1 %249, float %.op65, float 0xC600000000000000 > %.op66 = fmul float %251, 0x4600000000000000 > %255 = select i1 %252, float %.op66, float 0xC600000000000000 > %256 = fdiv float 1.000000e+00, %235 > %257 = fdiv float 1.000000e+00, %236 > %258 = fdiv float 1.000000e+00, %237 > %259 = fmul float %238, %256 > %260 = fmul float %239, %257 > %261 = fmul float %240, %258 > %262 = select i1 %241, float %253, float %259 > %263 = select i1 %242, float %254, float %260 > %264 = select i1 %243, float %255, float %261 > %265 = fmul float %235, %temp48.0 > %266 = fmul float %236, %temp48.0 > %267 = fmul float %237, %temp48.0 > %268 = call float @llvm.fabs.f32(float %104) > %269 = call float @llvm.fabs.f32(float %104) > %270 = call float @llvm.fabs.f32(float %104) > %271 = fmul float %235, %268 > %272 = fmul float %236, %269 > %273 = fmul float %237, %270 > %274 = fmul float %271, 0xBFF7154760000000 > %275 = fmul float %272, 0xBFF7154760000000 > %276 = fmul float %273, 0xBFF7154760000000 > %277 = call float @llvm.exp2.f32(float %274) > %278 = call float @llvm.exp2.f32(float %275) > %279 = call float @llvm.exp2.f32(float %276) > %280 = fmul float %265, 0xBFF7154760000000 > %281 = fmul float %266, 0xBFF7154760000000 > %282 = fmul float %267, 0xBFF7154760000000 > %283 = call float @llvm.log2.f32(float %61) > %284 = call float @llvm.log2.f32(float %62) > %285 = call float @llvm.log2.f32(float %63) > %286 = fmul float %283, 0x3FDD1745E0000000 > %287 = fmul float %284, 0x3FDD1745E0000000 > %288 = fmul float %285, 0x3FDD1745E0000000 > %289 = call float @llvm.exp2.f32(float %286) > %290 = call float @llvm.exp2.f32(float %287) > %291 = call float @llvm.exp2.f32(float %288) > %292 = call float @llvm.exp2.f32(float %280) > %293 = call float @llvm.exp2.f32(float %281) > %294 = call float @llvm.exp2.f32(float %282) > %295 = fmul float %292, %289 > %296 = fmul float %293, %290 > %297 = fmul float %294, %291 > %298 = fmul float %262, %295 > %299 = fmul float %263, %296 > %300 = fmul float %264, %297 > %301 = fsub float 1.000000e+00, %277 > %302 = fsub float 1.000000e+00, %278 > %303 = fsub float 1.000000e+00, %279 > %304 = call float @llvm.fma.f32(float %298, float %301, float 0xBF70624DE0000000) > %305 = call float @llvm.fma.f32(float %299, float %302, float 0xBF70624DE0000000) > %306 = call float @llvm.fma.f32(float %300, float %303, float 0xBF70624DE0000000) > %307 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00) > %308 = call float @llvm.maxnum.f32(float %305, float 0.000000e+00) > %309 = call float @llvm.maxnum.f32(float %306, float 0.000000e+00) > %310 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 5.000000e-01) > %311 = call float @llvm.fma.f32(float %308, float 0x4018CCCCC0000000, float 5.000000e-01) > %312 = call float @llvm.fma.f32(float %309, float 0x4018CCCCC0000000, float 5.000000e-01) > %313 = fmul float %307, %310 > %314 = fmul float %308, %311 > %315 = fmul float %309, %312 > %316 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %317 = call float @llvm.fma.f32(float %308, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %318 = call float @llvm.fma.f32(float %309, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %319 = call float @llvm.fma.f32(float %307, float %316, float 0x3FAEB851E0000000) > %320 = call float @llvm.fma.f32(float %308, float %317, float 0x3FAEB851E0000000) > %321 = call float @llvm.fma.f32(float %309, float %318, float 0x3FAEB851E0000000) > %322 = fcmp oeq float %319, 0.000000e+00 > %323 = fcmp oeq float %320, 0.000000e+00 > %324 = fcmp oeq float %321, 0.000000e+00 > %325 = fcmp ogt float %313, 0.000000e+00 > %326 = select i1 %325, float 1.000000e+00, float %313 > %327 = fcmp oge float %326, 0.000000e+00 > %328 = fcmp ogt float %314, 0.000000e+00 > %329 = select i1 %328, float 1.000000e+00, float %314 > %330 = fcmp oge float %329, 0.000000e+00 > %331 = fcmp ogt float %315, 0.000000e+00 > %332 = select i1 %331, float 1.000000e+00, float %315 > %333 = fcmp oge float %332, 0.000000e+00 > %.op67 = fmul float %326, 0x4600000000000000 > %334 = select i1 %327, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %329, 0x4600000000000000 > %335 = select i1 %330, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %332, 0x4600000000000000 > %336 = select i1 %333, float %.op69, float 0xC600000000000000 > %337 = fdiv float 1.000000e+00, %319 > %338 = fdiv float 1.000000e+00, %320 > %339 = fdiv float 1.000000e+00, %321 > %340 = fmul float %313, %337 > %341 = fmul float %314, %338 > %342 = fmul float %315, %339 > %343 = select i1 %322, float %334, float %340 > %344 = select i1 %323, float %335, float %341 > %345 = select i1 %324, float %336, float %342 > %346 = bitcast i32 %11 to float > %347 = insertvalue <{ float, float, float }> undef, float %346, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %133, float %134, float %122, float %104) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %138, float %139, float %140, float %71) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %343, float %344, float %345, float %277) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %94, float %95, float %96, float %97) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %110, float %116, float %122, float %104) > ret <{ float, float, float }> %347 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..31] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 240, 256, 496} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: FSNE TEMP[1].x, CONST[1][15].wwww, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][15].wwww > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][16].xxxx > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: MOV TEMP[1].xy, IN[0].xyyy > 26: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 27: MOV TEMP[2].xyz, TEMP[1].xyzx > 28: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].wwww > 29: MOV TEMP[2].w, TEMP[1].xxxx > 30: MUL TEMP[0], TEMP[2], IN[4] > 31: MUL TEMP[0], TEMP[0], IN[2] > 32: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].wwww > 33: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 34: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][31].xyzz > 35: MOV TEMP[0].xyz, TEMP[0].xyzx > 36: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][16].yyyy > 37: MOV TEMP[0].w, TEMP[1].xxxx > 38: MOV OUT[0], TEMP[0] > 39: END >radeonsi: Compiling shader 309 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 496) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 500) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 504) > %31 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 > %33 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %34 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %33, i64 0, i64 3 > %35 = load <4 x i32>, <4 x i32> addrspace(2)* %34, align 16, !tbaa !0 > %36 = extractelement <8 x i32> %32, i32 7 > %37 = extractelement <4 x i32> %35, i32 0 > %38 = and i32 %37, %36 > %39 = insertelement <4 x i32> %35, i32 %38, i32 0 > %40 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !tbaa !0 > %42 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %43 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %42, i64 0, i64 7 > %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !tbaa !0 > %45 = extractelement <8 x i32> %41, i32 7 > %46 = extractelement <4 x i32> %44, i32 0 > %47 = and i32 %46, %45 > %48 = insertelement <4 x i32> %44, i32 %47, i32 0 > %49 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %63 = fcmp oeq float %53, 0.000000e+00 > %64 = fcmp oeq float %53, 0.000000e+00 > %65 = fcmp ogt float %51, 0.000000e+00 > %66 = select i1 %65, float 1.000000e+00, float %51 > %67 = fcmp oge float %66, 0.000000e+00 > %68 = fcmp ogt float %52, 0.000000e+00 > %69 = select i1 %68, float 1.000000e+00, float %52 > %70 = fcmp oge float %69, 0.000000e+00 > %.op = fmul float %66, 0x4600000000000000 > %71 = select i1 %67, float %.op, float 0xC600000000000000 > %.op12 = fmul float %69, 0x4600000000000000 > %72 = select i1 %70, float %.op12, float 0xC600000000000000 > %73 = fdiv float 1.000000e+00, %53 > %74 = fmul float %51, %73 > %75 = fmul float %52, %73 > %76 = select i1 %63, float %71, float %74 > %77 = select i1 %64, float %72, float %75 > %78 = bitcast float %76 to i32 > %79 = bitcast float %77 to i32 > %80 = insertelement <2 x i32> undef, i32 %78, i32 0 > %81 = insertelement <2 x i32> %80, i32 %79, i32 1 > %82 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %81, <8 x i32> %32, <4 x i32> %39, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %83 = extractelement <4 x float> %82, i32 0 > %84 = fsub float %53, %83 > %85 = fcmp une float %25, 0.000000e+00 > %86 = call float @llvm.fabs.f32(float %84) > br i1 %85, label %IF, label %ELSE > >IF: ; preds = %main_body > %87 = fdiv float 1.000000e+00, %25 > %88 = fmul float %86, %87 > br label %ENDIF > >ELSE: ; preds = %main_body > %89 = fcmp one float %84, 0.000000e+00 > %90 = select i1 %89, float 1.000000e+00, float %86 > %91 = fcmp oge float %90, 0.000000e+00 > %.op13 = fmul float %90, 0x4600000000000000 > %92 = select i1 %91, float %.op13, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %88, %IF ], [ %92, %ELSE ] > %93 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %94 = fsub float 1.000000e+00, %93 > %95 = call float @llvm.log2.f32(float %94) > %96 = fmul float %95, %26 > %97 = call float @llvm.exp2.f32(float %96) > %98 = fsub float 1.000000e+00, %97 > %99 = bitcast float %49 to i32 > %100 = bitcast float %50 to i32 > %101 = insertelement <2 x i32> undef, i32 %99, i32 0 > %102 = insertelement <2 x i32> %101, i32 %100, i32 1 > %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %41, <4 x i32> %48, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = extractelement <4 x float> %103, i32 2 > %107 = extractelement <4 x float> %103, i32 3 > %108 = fmul float %98, %107 > %109 = fmul float %104, %59 > %110 = fmul float %105, %60 > %111 = fmul float %106, %61 > %112 = fmul float %108, %62 > %113 = fmul float %109, %54 > %114 = fmul float %110, %55 > %115 = fmul float %111, %56 > %116 = fmul float %112, %57 > %117 = fmul float %113, %58 > %118 = fmul float %114, %58 > %119 = fmul float %115, %58 > %120 = fmul float %116, %117 > %121 = fmul float %116, %118 > %122 = fmul float %116, %119 > %123 = fmul float %120, %28 > %124 = fmul float %121, %29 > %125 = fadd float %124, %123 > %126 = fmul float %122, %30 > %127 = fadd float %125, %126 > %128 = fmul float %127, %27 > %129 = bitcast float %5 to i32 > %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %129, 10 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130, float %120, 11 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %121, 12 > %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %122, 13 > %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %128, 14 > %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..34] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 272, 304, 256} >IMM[1] UINT32 {288, 240, 544, 0} >IMM[2] FLT32 { 2.0000, -1.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].x, CONST[1][17].xxxx, CONST[1][19].xxxx > 1: MOV TEMP[0].y, TEMP[0].xxxx > 2: MUL TEMP[1].xy, CONST[1][16].wxxx, CONST[1][19].xxxx > 3: MOV TEMP[0].xw, TEMP[1].xxxy > 4: FMA TEMP[0].xy, IN[0].xyyy, CONST[1][18].zwww, TEMP[0].xyyy > 5: MOV TEMP[1].xy, TEMP[0].xyyy > 6: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 7: FMA TEMP[0].xy, TEMP[1].ywww, IMM[2].xxxx, IMM[2].yyyy > 8: FMA TEMP[0].xy, TEMP[0].xyyy, CONST[1][17].yyyy, IN[0].xyyy > 9: MUL TEMP[1].x, CONST[1][15].wwww, CONST[1][19].xxxx > 10: MOV TEMP[0].z, TEMP[1].xxxx > 11: FMA TEMP[1].xy, TEMP[0].xyyy, CONST[1][17].zwww, TEMP[0].zwww > 12: MOV TEMP[1].xy, TEMP[1].xyyy > 13: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 14: MUL TEMP[2].xy, CONST[1][16].yzzz, CONST[1][19].xxxx > 15: FMA TEMP[0].xy, TEMP[0].xyyy, CONST[1][18].xyyy, TEMP[2].xyyy > 16: MOV TEMP[2].xy, TEMP[0].xyyy > 17: TEX TEMP[2], TEMP[2], SAMP[2], 2D > 18: MUL TEMP[3].x, TEMP[2].wwww, TEMP[1].wwww > 19: ADD TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz > 20: MOV TEMP[2].xy, IN[0].xyyy > 21: TEX TEMP[2].w, TEMP[2], SAMP[3], 2D > 22: MUL TEMP[2].x, TEMP[2].wwww, TEMP[3].xxxx > 23: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 24: MOV TEMP[1].w, TEMP[2].xxxx > 25: MUL TEMP[0], TEMP[1], IN[3] > 26: MUL TEMP[0], TEMP[0], IN[1] > 27: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[2].wwww > 28: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 29: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][34].xyzz > 30: MOV TEMP[0].xyz, TEMP[0].xyzx > 31: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][15].zzzz > 32: MOV TEMP[0].w, TEMP[1].xxxx > 33: MOV OUT[0], TEMP[0] > 34: END >radeonsi: Compiling shader 310 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 544) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 548) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 552) > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 3 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 7 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 > %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 11 > %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0 > %66 = extractelement <8 x i32> %62, i32 7 > %67 = extractelement <4 x i32> %65, i32 0 > %68 = and i32 %67, %66 > %69 = insertelement <4 x i32> %65, i32 %68, i32 0 > %70 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %71 = load <8 x i32>, <8 x i32> addrspace(2)* %70, align 32, !tbaa !0 > %72 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %73 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %72, i64 0, i64 15 > %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 > %75 = extractelement <8 x i32> %71, i32 7 > %76 = extractelement <4 x i32> %74, i32 0 > %77 = and i32 %76, %75 > %78 = insertelement <4 x i32> %74, i32 %77, i32 0 > %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %90 = fmul float %31, %39 > %91 = fmul float %30, %39 > %92 = fmul float %27, %39 > %93 = call float @llvm.fma.f32(float %79, float %37, float %91) > %94 = call float @llvm.fma.f32(float %80, float %38, float %90) > %95 = bitcast float %93 to i32 > %96 = bitcast float %94 to i32 > %97 = insertelement <2 x i32> undef, i32 %95, i32 0 > %98 = insertelement <2 x i32> %97, i32 %96, i32 1 > %99 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %98, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %100 = extractelement <4 x float> %99, i32 1 > %101 = extractelement <4 x float> %99, i32 3 > %102 = call float @llvm.fma.f32(float %100, float 2.000000e+00, float -1.000000e+00) > %103 = call float @llvm.fma.f32(float %101, float 2.000000e+00, float -1.000000e+00) > %104 = call float @llvm.fma.f32(float %102, float %32, float %79) > %105 = call float @llvm.fma.f32(float %103, float %32, float %80) > %106 = fmul float %26, %39 > %107 = call float @llvm.fma.f32(float %104, float %33, float %106) > %108 = call float @llvm.fma.f32(float %105, float %34, float %92) > %109 = bitcast float %107 to i32 > %110 = bitcast float %108 to i32 > %111 = insertelement <2 x i32> undef, i32 %109, i32 0 > %112 = insertelement <2 x i32> %111, i32 %110, i32 1 > %113 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %112, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %114 = extractelement <4 x float> %113, i32 0 > %115 = extractelement <4 x float> %113, i32 1 > %116 = extractelement <4 x float> %113, i32 2 > %117 = extractelement <4 x float> %113, i32 3 > %118 = fmul float %28, %39 > %119 = fmul float %29, %39 > %120 = call float @llvm.fma.f32(float %104, float %35, float %118) > %121 = call float @llvm.fma.f32(float %105, float %36, float %119) > %122 = bitcast float %120 to i32 > %123 = bitcast float %121 to i32 > %124 = insertelement <2 x i32> undef, i32 %122, i32 0 > %125 = insertelement <2 x i32> %124, i32 %123, i32 1 > %126 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %125, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %127 = extractelement <4 x float> %126, i32 0 > %128 = extractelement <4 x float> %126, i32 1 > %129 = extractelement <4 x float> %126, i32 2 > %130 = extractelement <4 x float> %126, i32 3 > %131 = fmul float %130, %117 > %132 = fadd float %127, %114 > %133 = fadd float %128, %115 > %134 = fadd float %129, %116 > %135 = bitcast float %79 to i32 > %136 = bitcast float %80 to i32 > %137 = insertelement <2 x i32> undef, i32 %135, i32 0 > %138 = insertelement <2 x i32> %137, i32 %136, i32 1 > %139 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %138, <8 x i32> %71, <4 x i32> %78, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %140 = extractelement <4 x float> %139, i32 3 > %141 = fmul float %140, %131 > %142 = call float @llvm.AMDGPU.clamp.(float %141, float 0.000000e+00, float 1.000000e+00) > %143 = fmul float %132, %86 > %144 = fmul float %133, %87 > %145 = fmul float %134, %88 > %146 = fmul float %142, %89 > %147 = fmul float %143, %81 > %148 = fmul float %144, %82 > %149 = fmul float %145, %83 > %150 = fmul float %146, %84 > %151 = fmul float %147, %85 > %152 = fmul float %148, %85 > %153 = fmul float %149, %85 > %154 = fmul float %150, %151 > %155 = fmul float %150, %152 > %156 = fmul float %150, %153 > %157 = fmul float %154, %40 > %158 = fmul float %155, %41 > %159 = fadd float %158, %157 > %160 = fmul float %156, %42 > %161 = fadd float %159, %160 > %162 = fmul float %161, %25 > %163 = bitcast float %5 to i32 > %164 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %163, 10 > %165 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %164, float %154, 11 > %166 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %165, float %155, 12 > %167 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %166, float %156, 13 > %168 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %167, float %162, 14 > %169 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %168, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %169 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..38] >DCL CONST[2][0..4095] >DCL TEMP[0..20], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 320} >IMM[3] UINT32 {336, 352, 368, 592} >IMM[4] UINT32 {400, 496, 608, 560} >IMM[5] UINT32 {576, 384, 416, 528} >IMM[6] FLT32 { 0.0000, -0.1500, 0.0597, -1.5000} >IMM[7] UINT32 {448, 432, 512, 464} >IMM[8] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.4427, 0.5000} >IMM[9] UINT32 {480, 0, 0, 0} >IMM[10] FLT32 { 0.4545, -0.0040, 6.2000, 1.7000} >IMM[11] FLT32 { 0.0600, 0.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, IN[4].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[3].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[3].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[3].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[3].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[3].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[3].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[3].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[3].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[3].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[3].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[4].y, TEMP[18].xxxx >224: UMUL TEMP[18].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[19].xxxx >227: MOV TEMP[18].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[20].xxxx >231: MOV TEMP[19].z, CONST[2][ADDR[0].x] >232: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].zzzz, -TEMP[8].zzzz >233: MUL TEMP[18].x, TEMP[18].xxxx, IN[3].xxxx >234: MUL TEMP[18].x, IMM[0].yyyy, TEMP[18].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[18].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[19].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[19].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[19].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[20].xxxx >244: MOV TEMP[19].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[19].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[3].xxxx, TEMP[10].zzzz >249: MOV TEMP[18].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[3].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[3].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[3].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].zzzz >259: MOV TEMP[18].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[18] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[3].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[3].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[3].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[4].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[7].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[7].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[6].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[3].zzzz, TEMP[2].xxxx >307: MOV TEMP[16].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[6].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[7].xxxx >315: MOV TEMP[6].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[8].xxxx >317: ADD TEMP[2].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[3].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[9].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[6].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[7].xxxx >329: MOV TEMP[6].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[2].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[6].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[6].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[6].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[2].x, TEMP[2].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[2].xxxx >341: ADD TEMP[2].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[2].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].yzzz >344: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[2].yxyy >346: ADD TEMP[2].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy >348: MOV TEMP[0].y, TEMP[2].xxxx >349: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[2].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[4].x, TEMP[0], TEMP[5] >354: MOV TEMP[4].w, IMM[0].zzzz >355: DP4 TEMP[0].x, CONST[1][20], TEMP[4] >356: DP4 TEMP[2].x, CONST[1][21], TEMP[4] >357: MOV TEMP[0].y, TEMP[2].xxxx >358: DP4 TEMP[2].x, CONST[1][22], TEMP[4] >359: MOV TEMP[0].z, TEMP[2].xxxx >360: DP4 TEMP[2].x, CONST[1][23], TEMP[4] >361: MOV TEMP[0].w, TEMP[2].xxxx >362: ADD TEMP[3].xyz, -TEMP[4].xyzz, CONST[1][37].xyzz >363: DP4 TEMP[5].x, CONST[1][25], TEMP[4] >364: ADD TEMP[1].x, TEMP[5].xxxx, CONST[1][31].wwww >365: MOV TEMP[5], TEMP[0] >366: MOV TEMP[6].xy, IN[1].xyxx >367: MUL TEMP[8].xyz, CONST[1][35].xyzz, CONST[1][36].xyzz >368: MOV TEMP[8].w, CONST[1][35].wwww >369: ABS TEMP[9].x, TEMP[2].xxxx >370: MUL TEMP[0].x, TEMP[9].xxxx, IMM[6].xxxx >371: MIN TEMP[9].x, TEMP[0].xxxx, IMM[0].zzzz >372: ADD TEMP[0].x, -TEMP[9].xxxx, IMM[0].zzzz >373: DP3 TEMP[7].x, CONST[1][24].xyzz, TEMP[3].xyzz >374: DP3 TEMP[9].x, CONST[1][26].xyzz, TEMP[3].xyzz >375: MOV TEMP[7].z, TEMP[9].xxxx >376: DP3 TEMP[3].x, CONST[1][25].xyzz, TEMP[3].xyzz >377: MOV TEMP[7].y, TEMP[3].xxxx >378: DP3 TEMP[9].x, TEMP[7].xyzz, TEMP[7].xyzz >379: RSQ TEMP[9].x, TEMP[9].xxxx >380: MUL TEMP[10].xyz, TEMP[9].xxxx, TEMP[7].xyzz >381: FMA TEMP[3].x, -TEMP[3].xxxx, TEMP[9].xxxx, IMM[6].yyyy >382: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].zzzz >383: MOV_SAT TEMP[3].x, TEMP[3].xxxx >384: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx >385: DP3 TEMP[9].x, -TEMP[10].xyzz, CONST[1][33].xyzz >386: FMA TEMP[10].x, -CONST[1][28].yyyy, TEMP[9].xxxx, CONST[1][28].xxxx >387: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].zzzz >388: MUL TEMP[9].x, TEMP[9].xxxx, IMM[6].zzzz >389: ABS TEMP[10].x, TEMP[10].xxxx >390: LG2 TEMP[10].x, TEMP[10].xxxx >391: MUL TEMP[10].x, TEMP[10].xxxx, IMM[6].wwww >392: EX2 TEMP[10].x, TEMP[10].xxxx >393: FMA TEMP[11].x, CONST[1][28].zzzz, TEMP[10].xxxx, -CONST[1][27].zzzz >394: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][28].zzzz >395: MAX TEMP[11].x, TEMP[11].xxxx, IMM[8].xxxx >396: FMA TEMP[0].x, -TEMP[11].xxxx, TEMP[0].xxxx, TEMP[10].xxxx >397: MAX TEMP[10].x, TEMP[0].xxxx, CONST[1][32].wwww >398: FSNE TEMP[11].x, CONST[1][27].xxxx, IMM[8].xxxx >399: UIF TEMP[11].xxxx :0 >400: RCP TEMP[11].x, CONST[1][27].xxxx >401: MUL TEMP[11].x, -TEMP[1].xxxx, TEMP[11].xxxx >402: ELSE :0 >403: SSG TEMP[12].x, -TEMP[1].xxxx >404: MUL TEMP[11].x, IMM[8].yyyy, TEMP[12].xxxx >405: ENDIF >406: MUL TEMP[11].x, TEMP[11].xxxx, IMM[8].zzzz >407: EX2 TEMP[11].x, TEMP[11].xxxx >408: ADD TEMP[11].x, TEMP[11].xxxx, CONST[1][28].wwww >409: MUL TEMP[11].x, TEMP[11].xxxx, CONST[1][29].yyyy >410: MUL TEMP[11].x, TEMP[11].xxxx, IMM[8].wwww >411: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[11].xxxx >412: MIN TEMP[3].x, TEMP[3].xxxx, CONST[1][27].wwww >413: MAX TEMP[3].x, TEMP[3].xxxx, CONST[1][29].xxxx >414: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[10].xxxx >415: FSNE TEMP[10].x, CONST[1][30].wwww, IMM[8].xxxx >416: UIF TEMP[10].xxxx :0 >417: RCP TEMP[10].x, CONST[1][30].wwww >418: MUL TEMP[10].x, -TEMP[1].xxxx, TEMP[10].xxxx >419: ELSE :0 >420: SSG TEMP[11].x, -TEMP[1].xxxx >421: MUL TEMP[10].x, IMM[8].yyyy, TEMP[11].xxxx >422: ENDIF >423: ADD TEMP[1].x, -TEMP[1].xxxx, CONST[1][31].zzzz >424: FSNE TEMP[11].x, CONST[1][27].yyyy, IMM[8].xxxx >425: UIF TEMP[11].xxxx :0 >426: RCP TEMP[11].x, CONST[1][27].yyyy >427: MUL TEMP[11].x, TEMP[1].xxxx, TEMP[11].xxxx >428: ELSE :0 >429: SSG TEMP[12].x, TEMP[1].xxxx >430: MUL TEMP[11].x, IMM[8].yyyy, TEMP[12].xxxx >431: ENDIF >432: MUL TEMP[10].x, TEMP[10].xxxx, IMM[8].zzzz >433: EX2 TEMP[10].x, TEMP[10].xxxx >434: MUL TEMP[7].xyz, TEMP[10].xxxx, CONST[1][30].xyzz >435: FMA TEMP[3].xyz, CONST[1][30].xyzz, TEMP[10].xxxx, TEMP[3].xxxx >436: FMA TEMP[9].xyz, TEMP[7].xyzz, TEMP[9].xxxx, TEMP[0].xxxx >437: MUL TEMP[7].xyz, TEMP[11].xxxx, -TEMP[3].xyzz >438: ABS TEMP[2].xyz, TEMP[2].xxxx >439: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[3].xyzz >440: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[8].zzzz >441: EX2 TEMP[2].x, TEMP[1].xxxx >442: EX2 TEMP[2].y, TEMP[1].yyyy >443: EX2 TEMP[2].z, TEMP[1].zzzz >444: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[8].zzzz >445: LG2 TEMP[10].x, CONST[1][32].xxxx >446: LG2 TEMP[10].y, CONST[1][32].yyyy >447: LG2 TEMP[10].z, CONST[1][32].zzzz >448: MUL TEMP[4].xyz, TEMP[10].xyzz, IMM[10].xxxx >449: EX2 TEMP[10].x, TEMP[4].xxxx >450: EX2 TEMP[10].y, TEMP[4].yyyy >451: EX2 TEMP[10].z, TEMP[4].zzzz >452: EX2 TEMP[4].x, TEMP[7].xxxx >453: EX2 TEMP[4].y, TEMP[7].yyyy >454: EX2 TEMP[4].z, TEMP[7].zzzz >455: MUL TEMP[7].xyz, TEMP[4].xyzz, TEMP[10].xyzz >456: FSEQ TEMP[4].xyz, TEMP[3].xyzz, IMM[8].xxxx >457: SSG TEMP[10].xyz, TEMP[9].xyzz >458: MUL TEMP[10].xyz, IMM[8].yyyy, TEMP[10].xyzz >459: RCP TEMP[11].x, TEMP[3].xxxx >460: RCP TEMP[11].y, TEMP[3].yyyy >461: RCP TEMP[11].z, TEMP[3].zzzz >462: MUL TEMP[3].xyz, TEMP[9].xyzz, TEMP[11].xyzz >463: UCMP TEMP[3].xyz, TEMP[4].xyzz, TEMP[10].xyzz, TEMP[3].xyzz >464: MUL TEMP[0].xyz, TEMP[3].xyzz, TEMP[7].xyzz >465: ADD TEMP[3].xyz, -TEMP[2].xyzz, IMM[0].zzzz >466: MOV TEMP[2].w, TEMP[2].xxxx >467: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xyzz, IMM[10].yyyy >468: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[8].xxxx >469: FMA TEMP[3].xyz, TEMP[0].xyzz, IMM[10].zzzz, IMM[8].wwww >470: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[3].xyzz >471: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[10].zzzz, IMM[10].wwww >472: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[11].xxxx >473: FSEQ TEMP[3].xyz, TEMP[0].xyzz, IMM[8].xxxx >474: SSG TEMP[4].xyz, TEMP[1].xyzz >475: MUL TEMP[4].xyz, IMM[8].yyyy, TEMP[4].xyzz >476: RCP TEMP[7].x, TEMP[0].xxxx >477: RCP TEMP[7].y, TEMP[0].yyyy >478: RCP TEMP[7].z, TEMP[0].zzzz >479: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[7].xyzz >480: UCMP TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[0].xyzz >481: MOV OUT[4], IN[2] >482: MOV OUT[3], TEMP[2] >483: MOV OUT[2], TEMP[8] >484: MOV OUT[1], TEMP[6] >485: MOV OUT[0], TEMP[5] >486: END >radeonsi: Compiling shader 311 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 320) > %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 324) > %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 328) > %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 332) > %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 336) > %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 340) > %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 344) > %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 348) > %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 360) > %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 364) > %32 = call float @llvm.SI.load.const(<16 x i8> %19, i32 368) > %33 = call float @llvm.SI.load.const(<16 x i8> %19, i32 372) > %34 = call float @llvm.SI.load.const(<16 x i8> %19, i32 376) > %35 = call float @llvm.SI.load.const(<16 x i8> %19, i32 380) > %36 = call float @llvm.SI.load.const(<16 x i8> %19, i32 384) > %37 = call float @llvm.SI.load.const(<16 x i8> %19, i32 388) > %38 = call float @llvm.SI.load.const(<16 x i8> %19, i32 392) > %39 = call float @llvm.SI.load.const(<16 x i8> %19, i32 400) > %40 = call float @llvm.SI.load.const(<16 x i8> %19, i32 404) > %41 = call float @llvm.SI.load.const(<16 x i8> %19, i32 408) > %42 = call float @llvm.SI.load.const(<16 x i8> %19, i32 412) > %43 = call float @llvm.SI.load.const(<16 x i8> %19, i32 416) > %44 = call float @llvm.SI.load.const(<16 x i8> %19, i32 420) > %45 = call float @llvm.SI.load.const(<16 x i8> %19, i32 424) > %46 = call float @llvm.SI.load.const(<16 x i8> %19, i32 432) > %47 = call float @llvm.SI.load.const(<16 x i8> %19, i32 436) > %48 = call float @llvm.SI.load.const(<16 x i8> %19, i32 440) > %49 = call float @llvm.SI.load.const(<16 x i8> %19, i32 444) > %50 = call float @llvm.SI.load.const(<16 x i8> %19, i32 448) > %51 = call float @llvm.SI.load.const(<16 x i8> %19, i32 452) > %52 = call float @llvm.SI.load.const(<16 x i8> %19, i32 456) > %53 = call float @llvm.SI.load.const(<16 x i8> %19, i32 460) > %54 = call float @llvm.SI.load.const(<16 x i8> %19, i32 464) > %55 = call float @llvm.SI.load.const(<16 x i8> %19, i32 468) > %56 = call float @llvm.SI.load.const(<16 x i8> %19, i32 480) > %57 = call float @llvm.SI.load.const(<16 x i8> %19, i32 484) > %58 = call float @llvm.SI.load.const(<16 x i8> %19, i32 488) > %59 = call float @llvm.SI.load.const(<16 x i8> %19, i32 492) > %60 = call float @llvm.SI.load.const(<16 x i8> %19, i32 504) > %61 = call float @llvm.SI.load.const(<16 x i8> %19, i32 508) > %62 = call float @llvm.SI.load.const(<16 x i8> %19, i32 512) > %63 = call float @llvm.SI.load.const(<16 x i8> %19, i32 516) > %64 = call float @llvm.SI.load.const(<16 x i8> %19, i32 520) > %65 = call float @llvm.SI.load.const(<16 x i8> %19, i32 524) > %66 = call float @llvm.SI.load.const(<16 x i8> %19, i32 528) > %67 = call float @llvm.SI.load.const(<16 x i8> %19, i32 532) > %68 = call float @llvm.SI.load.const(<16 x i8> %19, i32 536) > %69 = call float @llvm.SI.load.const(<16 x i8> %19, i32 560) > %70 = call float @llvm.SI.load.const(<16 x i8> %19, i32 564) > %71 = call float @llvm.SI.load.const(<16 x i8> %19, i32 568) > %72 = call float @llvm.SI.load.const(<16 x i8> %19, i32 572) > %73 = call float @llvm.SI.load.const(<16 x i8> %19, i32 576) > %74 = call float @llvm.SI.load.const(<16 x i8> %19, i32 580) > %75 = call float @llvm.SI.load.const(<16 x i8> %19, i32 584) > %76 = call float @llvm.SI.load.const(<16 x i8> %19, i32 592) > %77 = call float @llvm.SI.load.const(<16 x i8> %19, i32 596) > %78 = call float @llvm.SI.load.const(<16 x i8> %19, i32 600) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %13) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 > %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %14) > %90 = extractelement <4 x float> %89, i32 0 > %91 = extractelement <4 x float> %89, i32 1 > %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 > %94 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %15) > %95 = extractelement <4 x float> %94, i32 0 > %96 = extractelement <4 x float> %94, i32 1 > %97 = extractelement <4 x float> %94, i32 2 > %98 = extractelement <4 x float> %94, i32 3 > %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 > %101 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %100, i32 0, i32 %16) > %102 = extractelement <4 x float> %101, i32 0 > %103 = extractelement <4 x float> %101, i32 1 > %104 = extractelement <4 x float> %101, i32 2 > %105 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0 > %107 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %106, i32 0, i32 %17) > %108 = extractelement <4 x float> %107, i32 0 > %109 = extractelement <4 x float> %107, i32 1 > %110 = extractelement <4 x float> %107, i32 2 > %111 = fmul float %110, 0x406FE01000000000 > %112 = fmul float %109, 0x406FE01000000000 > %113 = fmul float %108, 0x406FE01000000000 > %114 = fptosi float %111 to i32 > %115 = fptosi float %112 to i32 > %116 = fptosi float %113 to i32 > %117 = shl i32 %114, 1 > %118 = or i32 %117, 1 > %119 = shl i32 %115, 1 > %120 = or i32 %119, 1 > %121 = shl i32 %116, 1 > %122 = or i32 %121, 1 > %123 = shl i32 %114, 5 > %124 = or i32 %123, 4 > %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %124) > %126 = fmul float %102, %125 > %127 = shl i32 %115, 5 > %128 = or i32 %127, 4 > %129 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %128) > %130 = fmul float %103, %129 > %131 = shl i32 %118, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %131) > %133 = shl i32 %118, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %118, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %138) > %140 = shl i32 %118, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %118, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %146) > %148 = shl i32 %118, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %102 > %153 = fmul float %144, %102 > %154 = fmul float %153, 2.000000e+00 > %155 = shl i32 %120, 4 > %156 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %155) > %157 = shl i32 %120, 4 > %158 = or i32 %157, 12 > %159 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %158) > %160 = fmul float %156, %159 > %161 = shl i32 %120, 4 > %162 = or i32 %161, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %162) > %164 = shl i32 %120, 4 > %165 = or i32 %164, 8 > %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %165) > %167 = fsub float -0.000000e+00, %160 > %168 = call float @llvm.fma.f32(float %163, float %166, float %167) > %169 = shl i32 %120, 4 > %170 = or i32 %169, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %170) > %172 = shl i32 %120, 4 > %173 = or i32 %172, 8 > %174 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %160) > %176 = fmul float %175, %103 > %177 = fmul float %176, 2.000000e+00 > %178 = fmul float %168, %103 > %179 = fmul float %178, 2.000000e+00 > %180 = shl i32 %118, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %181) > %183 = shl i32 %118, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %184) > %186 = shl i32 %118, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %186) > %188 = shl i32 %118, 4 > %189 = or i32 %188, 12 > %190 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %189) > %191 = fmul float %185, %190 > %192 = fmul float %185, %187 > %193 = fmul float %182, %190 > %194 = shl i32 %118, 4 > %195 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %194) > %196 = shl i32 %118, 4 > %197 = or i32 %196, 4 > %198 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %197) > %199 = call float @llvm.fma.f32(float %195, float %198, float %191) > %200 = fmul float %199, %102 > %201 = fmul float %200, 2.000000e+00 > %202 = shl i32 %118, 4 > %203 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %202) > %204 = shl i32 %118, 4 > %205 = or i32 %204, 4 > %206 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %205) > %207 = shl i32 %118, 4 > %208 = or i32 %207, 8 > %209 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %208) > %210 = shl i32 %118, 4 > %211 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %210) > %212 = shl i32 %118, 4 > %213 = or i32 %212, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %213) > %215 = shl i32 %118, 4 > %216 = or i32 %215, 8 > %217 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %216) > %218 = fmul float %203, %211 > %219 = fmul float %206, %214 > %220 = fmul float %209, %217 > %221 = fadd float %220, %219 > %222 = fadd float %220, %218 > %223 = fadd float %219, %218 > %224 = fsub float -0.000000e+00, %221 > %225 = call float @llvm.fma.f32(float %224, float 2.000000e+00, float 1.000000e+00) > %226 = fsub float -0.000000e+00, %222 > %227 = call float @llvm.fma.f32(float %226, float 2.000000e+00, float 1.000000e+00) > %228 = fsub float -0.000000e+00, %223 > %229 = call float @llvm.fma.f32(float %228, float 2.000000e+00, float 1.000000e+00) > %230 = fmul float %102, %227 > %231 = shl i32 %120, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %232) > %234 = shl i32 %120, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %235) > %237 = shl i32 %120, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %237) > %239 = shl i32 %120, 4 > %240 = or i32 %239, 12 > %241 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %240) > %242 = fmul float %236, %241 > %243 = fmul float %236, %238 > %244 = fmul float %233, %241 > %245 = shl i32 %120, 4 > %246 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %245) > %247 = shl i32 %120, 4 > %248 = or i32 %247, 4 > %249 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %248) > %250 = call float @llvm.fma.f32(float %246, float %249, float %242) > %251 = fmul float %250, %103 > %252 = fmul float %251, 2.000000e+00 > %253 = shl i32 %120, 4 > %254 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %253) > %255 = shl i32 %120, 4 > %256 = or i32 %255, 4 > %257 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %256) > %258 = shl i32 %120, 4 > %259 = or i32 %258, 8 > %260 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %259) > %261 = shl i32 %120, 4 > %262 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %261) > %263 = shl i32 %120, 4 > %264 = or i32 %263, 4 > %265 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %264) > %266 = shl i32 %120, 4 > %267 = or i32 %266, 8 > %268 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %267) > %269 = fmul float %254, %262 > %270 = fmul float %257, %265 > %271 = fmul float %260, %268 > %272 = fadd float %271, %270 > %273 = fadd float %271, %269 > %274 = fadd float %270, %269 > %275 = fsub float -0.000000e+00, %272 > %276 = call float @llvm.fma.f32(float %275, float 2.000000e+00, float 1.000000e+00) > %277 = fsub float -0.000000e+00, %273 > %278 = call float @llvm.fma.f32(float %277, float 2.000000e+00, float 1.000000e+00) > %279 = fsub float -0.000000e+00, %274 > %280 = call float @llvm.fma.f32(float %279, float 2.000000e+00, float 1.000000e+00) > %281 = fmul float %103, %278 > %282 = fadd float %201, %252 > %283 = fadd float %230, %281 > %284 = fadd float %154, %179 > %285 = fadd float %126, %130 > %286 = shl i32 %116, 5 > %287 = or i32 %286, 4 > %288 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %287) > %289 = fmul float %104, %288 > %290 = shl i32 %122, 4 > %291 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %290) > %292 = shl i32 %122, 4 > %293 = or i32 %292, 12 > %294 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %293) > %295 = fmul float %291, %294 > %296 = shl i32 %122, 4 > %297 = or i32 %296, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %297) > %299 = shl i32 %122, 4 > %300 = or i32 %299, 8 > %301 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %300) > %302 = fsub float -0.000000e+00, %295 > %303 = call float @llvm.fma.f32(float %298, float %301, float %302) > %304 = shl i32 %122, 4 > %305 = or i32 %304, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %305) > %307 = shl i32 %122, 4 > %308 = or i32 %307, 8 > %309 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %295) > %311 = fmul float %310, %104 > %312 = fmul float %311, 2.000000e+00 > %313 = fmul float %303, %104 > %314 = fmul float %313, 2.000000e+00 > %315 = shl i32 %122, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %316) > %318 = shl i32 %122, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %319) > %321 = shl i32 %122, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %321) > %323 = shl i32 %122, 4 > %324 = or i32 %323, 12 > %325 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %324) > %326 = fmul float %320, %325 > %327 = fmul float %320, %322 > %328 = fmul float %317, %325 > %329 = shl i32 %122, 4 > %330 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %329) > %331 = shl i32 %122, 4 > %332 = or i32 %331, 4 > %333 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %332) > %334 = call float @llvm.fma.f32(float %330, float %333, float %326) > %335 = fmul float %334, %104 > %336 = fmul float %335, 2.000000e+00 > %337 = shl i32 %122, 4 > %338 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %337) > %339 = shl i32 %122, 4 > %340 = or i32 %339, 4 > %341 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %340) > %342 = shl i32 %122, 4 > %343 = or i32 %342, 8 > %344 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %343) > %345 = shl i32 %122, 4 > %346 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %345) > %347 = shl i32 %122, 4 > %348 = or i32 %347, 4 > %349 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %348) > %350 = shl i32 %122, 4 > %351 = or i32 %350, 8 > %352 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %351) > %353 = fmul float %338, %346 > %354 = fmul float %341, %349 > %355 = fmul float %344, %352 > %356 = fadd float %355, %354 > %357 = fadd float %355, %353 > %358 = fadd float %354, %353 > %359 = fsub float -0.000000e+00, %356 > %360 = call float @llvm.fma.f32(float %359, float 2.000000e+00, float 1.000000e+00) > %361 = fsub float -0.000000e+00, %357 > %362 = call float @llvm.fma.f32(float %361, float 2.000000e+00, float 1.000000e+00) > %363 = fsub float -0.000000e+00, %358 > %364 = call float @llvm.fma.f32(float %363, float 2.000000e+00, float 1.000000e+00) > %365 = fmul float %104, %362 > %366 = fadd float %282, %336 > %367 = fadd float %283, %365 > %368 = fadd float %284, %314 > %369 = fadd float %285, %289 > %370 = fmul float %366, %84 > %371 = fmul float %367, %85 > %372 = fadd float %370, %371 > %373 = fmul float %368, %86 > %374 = fadd float %372, %373 > %375 = fadd float %374, %369 > %376 = shl i32 %118, 4 > %377 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %376) > %378 = shl i32 %118, 4 > %379 = or i32 %378, 8 > %380 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %379) > %381 = fsub float -0.000000e+00, %193 > %382 = call float @llvm.fma.f32(float %377, float %380, float %381) > %383 = fmul float %382, %102 > %384 = fmul float %383, 2.000000e+00 > %385 = fmul float %152, 2.000000e+00 > %386 = shl i32 %120, 4 > %387 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %386) > %388 = shl i32 %120, 4 > %389 = or i32 %388, 8 > %390 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %389) > %391 = fsub float -0.000000e+00, %244 > %392 = call float @llvm.fma.f32(float %387, float %390, float %391) > %393 = fmul float %392, %103 > %394 = fmul float %393, 2.000000e+00 > %395 = fmul float %102, %229 > %396 = fmul float %102, %225 > %397 = fmul float %103, %280 > %398 = fmul float %103, %276 > %399 = shl i32 %114, 5 > %400 = or i32 %399, 8 > %401 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %400) > %402 = fmul float %102, %401 > %403 = shl i32 %115, 5 > %404 = or i32 %403, 8 > %405 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %404) > %406 = fmul float %103, %405 > %407 = fadd float %394, %384 > %408 = fadd float %177, %385 > %409 = fadd float %397, %395 > %410 = fadd float %406, %402 > %411 = shl i32 %122, 4 > %412 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %411) > %413 = shl i32 %122, 4 > %414 = or i32 %413, 8 > %415 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %414) > %416 = fsub float -0.000000e+00, %328 > %417 = call float @llvm.fma.f32(float %412, float %415, float %416) > %418 = fmul float %417, %104 > %419 = fmul float %418, 2.000000e+00 > %420 = fmul float %104, %364 > %421 = fmul float %104, %360 > %422 = shl i32 %116, 5 > %423 = or i32 %422, 8 > %424 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %423) > %425 = fmul float %104, %424 > %426 = fadd float %407, %419 > %427 = fadd float %408, %312 > %428 = fadd float %409, %420 > %429 = fadd float %410, %425 > %430 = fmul float %426, %84 > %431 = fmul float %427, %85 > %432 = fadd float %430, %431 > %433 = fmul float %428, %86 > %434 = fadd float %432, %433 > %435 = fadd float %434, %429 > %436 = shl i32 %114, 5 > %437 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %436) > %438 = fmul float %102, %437 > %439 = shl i32 %115, 5 > %440 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %439) > %441 = fmul float %103, %440 > %442 = shl i32 %116, 5 > %443 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %442) > %444 = fmul float %104, %443 > %445 = shl i32 %118, 4 > %446 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %445) > %447 = shl i32 %118, 4 > %448 = or i32 %447, 4 > %449 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %448) > %450 = fsub float -0.000000e+00, %191 > %451 = call float @llvm.fma.f32(float %446, float %449, float %450) > %452 = fadd float %193, %192 > %453 = fmul float %451, %102 > %454 = fmul float %452, %102 > %455 = fmul float %453, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = shl i32 %120, 4 > %458 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %457) > %459 = shl i32 %120, 4 > %460 = or i32 %459, 4 > %461 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %460) > %462 = fsub float -0.000000e+00, %242 > %463 = call float @llvm.fma.f32(float %458, float %461, float %462) > %464 = shl i32 %122, 4 > %465 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %464) > %466 = shl i32 %122, 4 > %467 = or i32 %466, 4 > %468 = call float @llvm.SI.load.const(<16 x i8> %80, i32 %467) > %469 = fsub float -0.000000e+00, %326 > %470 = call float @llvm.fma.f32(float %465, float %468, float %469) > %471 = fadd float %328, %327 > %472 = fmul float %463, %103 > %473 = fmul float %470, %104 > %474 = fmul float %471, %104 > %475 = fmul float %473, 2.000000e+00 > %476 = fmul float %474, 2.000000e+00 > %477 = fadd float %244, %243 > %478 = fmul float %477, %103 > %479 = fmul float %472, 2.000000e+00 > %480 = fmul float %478, 2.000000e+00 > %481 = fadd float %396, %398 > %482 = fadd float %455, %479 > %483 = fadd float %456, %480 > %484 = fadd float %438, %441 > %485 = fadd float %421, %481 > %486 = fadd float %475, %482 > %487 = fadd float %476, %483 > %488 = fadd float %444, %484 > %489 = fmul float %485, %84 > %490 = fmul float %486, %85 > %491 = fadd float %489, %490 > %492 = fmul float %487, %86 > %493 = fadd float %491, %492 > %494 = fadd float %493, %488 > %495 = fmul float %20, %494 > %496 = fmul float %21, %375 > %497 = fadd float %495, %496 > %498 = fmul float %22, %435 > %499 = fadd float %497, %498 > %500 = fadd float %499, %23 > %501 = fmul float %24, %494 > %502 = fmul float %25, %375 > %503 = fadd float %501, %502 > %504 = fmul float %26, %435 > %505 = fadd float %503, %504 > %506 = fadd float %505, %27 > %507 = fmul float %28, %494 > %508 = fmul float %29, %375 > %509 = fadd float %507, %508 > %510 = fmul float %30, %435 > %511 = fadd float %509, %510 > %512 = fadd float %511, %31 > %513 = fmul float %32, %494 > %514 = fmul float %33, %375 > %515 = fadd float %513, %514 > %516 = fmul float %34, %435 > %517 = fadd float %515, %516 > %518 = fadd float %517, %35 > %519 = fsub float %76, %494 > %520 = fsub float %77, %375 > %521 = fsub float %78, %435 > %522 = fmul float %39, %494 > %523 = fmul float %40, %375 > %524 = fadd float %522, %523 > %525 = fmul float %41, %435 > %526 = fadd float %524, %525 > %527 = fadd float %526, %42 > %528 = fadd float %527, %61 > %529 = fmul float %69, %73 > %530 = fmul float %70, %74 > %531 = fmul float %71, %75 > %532 = call float @llvm.fabs.f32(float %518) > %533 = fmul float %532, 0x3EF4F8B580000000 > %534 = call float @llvm.minnum.f32(float %533, float 1.000000e+00) > %535 = fsub float 1.000000e+00, %534 > %536 = fmul float %36, %519 > %537 = fmul float %37, %520 > %538 = fadd float %537, %536 > %539 = fmul float %38, %521 > %540 = fadd float %538, %539 > %541 = fmul float %43, %519 > %542 = fmul float %44, %520 > %543 = fadd float %542, %541 > %544 = fmul float %45, %521 > %545 = fadd float %543, %544 > %546 = fmul float %39, %519 > %547 = fmul float %40, %520 > %548 = fadd float %547, %546 > %549 = fmul float %41, %521 > %550 = fadd float %548, %549 > %551 = fmul float %540, %540 > %552 = fmul float %550, %550 > %553 = fadd float %552, %551 > %554 = fmul float %545, %545 > %555 = fadd float %553, %554 > %556 = call float @llvm.AMDGPU.rsq.clamped.f32(float %555) > %557 = fmul float %556, %540 > %558 = fmul float %556, %550 > %559 = fmul float %556, %545 > %560 = fsub float -0.000000e+00, %550 > %561 = call float @llvm.fma.f32(float %560, float %556, float 0xBFC3333340000000) > %562 = fsub float 1.000000e+00, %561 > %563 = call float @llvm.AMDGPU.clamp.(float %562, float 0.000000e+00, float 1.000000e+00) > %564 = fmul float %563, %563 > %565 = fmul float %557, %66 > %566 = fsub float -0.000000e+00, %565 > %567 = fmul float %558, %67 > %568 = fsub float %566, %567 > %569 = fmul float %559, %68 > %570 = fsub float %568, %569 > %571 = fsub float -0.000000e+00, %51 > %572 = call float @llvm.fma.f32(float %571, float %570, float %50) > %573 = call float @llvm.fma.f32(float %570, float %570, float 1.000000e+00) > %574 = fmul float %573, 0x3FAE8EC8A0000000 > %575 = call float @llvm.fabs.f32(float %572) > %576 = call float @llvm.log2.f32(float %575) > %577 = fmul float %576, -1.500000e+00 > %578 = call float @llvm.exp2.f32(float %577) > %579 = fsub float -0.000000e+00, %48 > %580 = call float @llvm.fma.f32(float %52, float %578, float %579) > %581 = fmul float %578, %52 > %582 = call float @llvm.maxnum.f32(float %580, float 0.000000e+00) > %583 = fsub float -0.000000e+00, %582 > %584 = call float @llvm.fma.f32(float %583, float %535, float %581) > %585 = call float @llvm.maxnum.f32(float %584, float %65) > %586 = fcmp une float %46, 0.000000e+00 > br i1 %586, label %IF, label %ELSE > >IF: ; preds = %main_body > %587 = fdiv float 1.000000e+00, %46 > %588 = fmul float %528, %587 > %589 = fsub float -0.000000e+00, %588 > br label %ENDIF > >ELSE: ; preds = %main_body > %590 = fsub float -0.000000e+00, %528 > %591 = fcmp olt float %528, -0.000000e+00 > %592 = select i1 %591, float 1.000000e+00, float %590 > %593 = fcmp oge float %592, 0.000000e+00 > %.op = fmul float %592, 0x4600000000000000 > %594 = select i1 %593, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %589, %IF ], [ %594, %ELSE ] > %595 = fmul float %temp44.0, 0x3FF7154760000000 > %596 = call float @llvm.exp2.f32(float %595) > %597 = fadd float %596, %53 > %598 = fmul float %597, %55 > %599 = fmul float %598, 5.000000e-01 > %600 = fmul float %564, %599 > %601 = call float @llvm.minnum.f32(float %600, float %49) > %602 = call float @llvm.maxnum.f32(float %601, float %54) > %603 = fmul float %602, %585 > %604 = fcmp une float %59, 0.000000e+00 > br i1 %604, label %IF159, label %ELSE160 > >IF159: ; preds = %ENDIF > %605 = fdiv float 1.000000e+00, %59 > %606 = fmul float %528, %605 > %607 = fsub float -0.000000e+00, %606 > br label %ENDIF158 > >ELSE160: ; preds = %ENDIF > %608 = fsub float -0.000000e+00, %528 > %609 = fcmp olt float %528, -0.000000e+00 > %610 = select i1 %609, float 1.000000e+00, float %608 > %611 = fcmp oge float %610, 0.000000e+00 > %.op164 = fmul float %610, 0x4600000000000000 > %612 = select i1 %611, float %.op164, float 0xC600000000000000 > br label %ENDIF158 > >ENDIF158: ; preds = %ELSE160, %IF159 > %temp40.0 = phi float [ %607, %IF159 ], [ %612, %ELSE160 ] > %613 = fsub float %60, %528 > %614 = fcmp une float %47, 0.000000e+00 > br i1 %614, label %IF162, label %ELSE163 > >IF162: ; preds = %ENDIF158 > %615 = fdiv float 1.000000e+00, %47 > %616 = fmul float %613, %615 > br label %ENDIF161 > >ELSE163: ; preds = %ENDIF158 > %617 = fcmp ogt float %613, 0.000000e+00 > %618 = select i1 %617, float 1.000000e+00, float %613 > %619 = fcmp oge float %618, 0.000000e+00 > %.op165 = fmul float %618, 0x4600000000000000 > %620 = select i1 %619, float %.op165, float 0xC600000000000000 > br label %ENDIF161 > >ENDIF161: ; preds = %ELSE163, %IF162 > %temp44.1 = phi float [ %616, %IF162 ], [ %620, %ELSE163 ] > %621 = fmul float %temp40.0, 0x3FF7154760000000 > %622 = call float @llvm.exp2.f32(float %621) > %623 = fmul float %622, %56 > %624 = fmul float %622, %57 > %625 = fmul float %622, %58 > %626 = call float @llvm.fma.f32(float %56, float %622, float %602) > %627 = call float @llvm.fma.f32(float %57, float %622, float %602) > %628 = call float @llvm.fma.f32(float %58, float %622, float %602) > %629 = call float @llvm.fma.f32(float %623, float %574, float %603) > %630 = call float @llvm.fma.f32(float %624, float %574, float %603) > %631 = call float @llvm.fma.f32(float %625, float %574, float %603) > %632 = fmul float %626, %temp44.1 > %633 = fmul float %627, %temp44.1 > %634 = fmul float %628, %temp44.1 > %635 = call float @llvm.fabs.f32(float %518) > %636 = call float @llvm.fabs.f32(float %518) > %637 = call float @llvm.fabs.f32(float %518) > %638 = fmul float %626, %635 > %639 = fmul float %627, %636 > %640 = fmul float %628, %637 > %641 = fmul float %638, 0xBFF7154760000000 > %642 = fmul float %639, 0xBFF7154760000000 > %643 = fmul float %640, 0xBFF7154760000000 > %644 = call float @llvm.exp2.f32(float %641) > %645 = call float @llvm.exp2.f32(float %642) > %646 = call float @llvm.exp2.f32(float %643) > %647 = fmul float %632, 0xBFF7154760000000 > %648 = fmul float %633, 0xBFF7154760000000 > %649 = fmul float %634, 0xBFF7154760000000 > %650 = call float @llvm.log2.f32(float %62) > %651 = call float @llvm.log2.f32(float %63) > %652 = call float @llvm.log2.f32(float %64) > %653 = fmul float %650, 0x3FDD1745E0000000 > %654 = fmul float %651, 0x3FDD1745E0000000 > %655 = fmul float %652, 0x3FDD1745E0000000 > %656 = call float @llvm.exp2.f32(float %653) > %657 = call float @llvm.exp2.f32(float %654) > %658 = call float @llvm.exp2.f32(float %655) > %659 = call float @llvm.exp2.f32(float %647) > %660 = call float @llvm.exp2.f32(float %648) > %661 = call float @llvm.exp2.f32(float %649) > %662 = fmul float %659, %656 > %663 = fmul float %660, %657 > %664 = fmul float %661, %658 > %665 = fcmp oeq float %626, 0.000000e+00 > %666 = fcmp oeq float %627, 0.000000e+00 > %667 = fcmp oeq float %628, 0.000000e+00 > %668 = fcmp ogt float %629, 0.000000e+00 > %669 = select i1 %668, float 1.000000e+00, float %629 > %670 = fcmp oge float %669, 0.000000e+00 > %671 = fcmp ogt float %630, 0.000000e+00 > %672 = select i1 %671, float 1.000000e+00, float %630 > %673 = fcmp oge float %672, 0.000000e+00 > %674 = fcmp ogt float %631, 0.000000e+00 > %675 = select i1 %674, float 1.000000e+00, float %631 > %676 = fcmp oge float %675, 0.000000e+00 > %.op166 = fmul float %669, 0x4600000000000000 > %677 = select i1 %670, float %.op166, float 0xC600000000000000 > %.op167 = fmul float %672, 0x4600000000000000 > %678 = select i1 %673, float %.op167, float 0xC600000000000000 > %.op168 = fmul float %675, 0x4600000000000000 > %679 = select i1 %676, float %.op168, float 0xC600000000000000 > %680 = fdiv float 1.000000e+00, %626 > %681 = fdiv float 1.000000e+00, %627 > %682 = fdiv float 1.000000e+00, %628 > %683 = fmul float %629, %680 > %684 = fmul float %630, %681 > %685 = fmul float %631, %682 > %686 = select i1 %665, float %677, float %683 > %687 = select i1 %666, float %678, float %684 > %688 = select i1 %667, float %679, float %685 > %689 = fmul float %686, %662 > %690 = fmul float %687, %663 > %691 = fmul float %688, %664 > %692 = fsub float 1.000000e+00, %644 > %693 = fsub float 1.000000e+00, %645 > %694 = fsub float 1.000000e+00, %646 > %695 = call float @llvm.fma.f32(float %689, float %692, float 0xBF70624DE0000000) > %696 = call float @llvm.fma.f32(float %690, float %693, float 0xBF70624DE0000000) > %697 = call float @llvm.fma.f32(float %691, float %694, float 0xBF70624DE0000000) > %698 = call float @llvm.maxnum.f32(float %695, float 0.000000e+00) > %699 = call float @llvm.maxnum.f32(float %696, float 0.000000e+00) > %700 = call float @llvm.maxnum.f32(float %697, float 0.000000e+00) > %701 = call float @llvm.fma.f32(float %698, float 0x4018CCCCC0000000, float 5.000000e-01) > %702 = call float @llvm.fma.f32(float %699, float 0x4018CCCCC0000000, float 5.000000e-01) > %703 = call float @llvm.fma.f32(float %700, float 0x4018CCCCC0000000, float 5.000000e-01) > %704 = fmul float %698, %701 > %705 = fmul float %699, %702 > %706 = fmul float %700, %703 > %707 = call float @llvm.fma.f32(float %698, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %708 = call float @llvm.fma.f32(float %699, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %709 = call float @llvm.fma.f32(float %700, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %710 = call float @llvm.fma.f32(float %698, float %707, float 0x3FAEB851E0000000) > %711 = call float @llvm.fma.f32(float %699, float %708, float 0x3FAEB851E0000000) > %712 = call float @llvm.fma.f32(float %700, float %709, float 0x3FAEB851E0000000) > %713 = fcmp oeq float %710, 0.000000e+00 > %714 = fcmp oeq float %711, 0.000000e+00 > %715 = fcmp oeq float %712, 0.000000e+00 > %716 = fcmp ogt float %704, 0.000000e+00 > %717 = select i1 %716, float 1.000000e+00, float %704 > %718 = fcmp oge float %717, 0.000000e+00 > %719 = fcmp ogt float %705, 0.000000e+00 > %720 = select i1 %719, float 1.000000e+00, float %705 > %721 = fcmp oge float %720, 0.000000e+00 > %722 = fcmp ogt float %706, 0.000000e+00 > %723 = select i1 %722, float 1.000000e+00, float %706 > %724 = fcmp oge float %723, 0.000000e+00 > %.op169 = fmul float %717, 0x4600000000000000 > %725 = select i1 %718, float %.op169, float 0xC600000000000000 > %.op170 = fmul float %720, 0x4600000000000000 > %726 = select i1 %721, float %.op170, float 0xC600000000000000 > %.op171 = fmul float %723, 0x4600000000000000 > %727 = select i1 %724, float %.op171, float 0xC600000000000000 > %728 = fdiv float 1.000000e+00, %710 > %729 = fdiv float 1.000000e+00, %711 > %730 = fdiv float 1.000000e+00, %712 > %731 = fmul float %704, %728 > %732 = fmul float %705, %729 > %733 = fmul float %706, %730 > %734 = select i1 %713, float %725, float %731 > %735 = select i1 %714, float %726, float %732 > %736 = select i1 %715, float %727, float %733 > %737 = bitcast i32 %11 to float > %738 = insertvalue <{ float, float, float }> undef, float %737, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %90, float %91, float %424, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %529, float %530, float %531, float %72) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %734, float %735, float %736, float %644) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %96, float %97, float %98) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %500, float %506, float %512, float %518) > ret <{ float, float, float }> %738 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..34] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 304, 256, 240} >IMM[1] UINT32 {272, 288, 544, 0} >IMM[2] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} > 0: MUL TEMP[0].x, CONST[1][19].yyyy, CONST[1][16].wwww > 1: MUL TEMP[1].x, CONST[1][19].yyyy, CONST[1][15].wwww > 2: MUL TEMP[2].x, IN[3].xxxx, CONST[1][17].xxxx > 3: MOV TEMP[0].y, TEMP[2].xxxx > 4: MUL TEMP[2].x, IN[3].yyyy, CONST[1][16].xxxx > 5: MOV TEMP[1].y, TEMP[2].xxxx > 6: FMA TEMP[0].xy, IN[0].xyyy, CONST[1][18].zwww, TEMP[0].xyyy > 7: MOV TEMP[0].xy, TEMP[0].xyyy > 8: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 9: FMA TEMP[0].xy, TEMP[0].ywww, IMM[2].xxxx, IMM[2].yyyy > 10: MUL TEMP[0].xy, TEMP[0].xyyy, CONST[1][17].yyyy > 11: FMA TEMP[0].xy, TEMP[0].xyyy, IN[3].zzzz, IN[0].xyyy > 12: FMA TEMP[1].xy, TEMP[0].xyyy, CONST[1][17].zwww, TEMP[1].xyyy > 13: MOV TEMP[2].xy, TEMP[1].xyyy > 14: TEX TEMP[2], TEMP[2], SAMP[1], 2D > 15: MUL TEMP[1].xy, CONST[1][16].yzzz, CONST[1][19].yyyy > 16: FMA TEMP[1].xy, TEMP[0].xyyy, CONST[1][18].xyyy, TEMP[1].xyyy > 17: MOV TEMP[0].xy, TEMP[1].xyyy > 18: TEX TEMP[0], TEMP[0], SAMP[2], 2D > 19: ADD TEMP[1].xyz, -TEMP[2].xyzz, TEMP[0].xyzz > 20: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].wwww > 21: MOV_SAT TEMP[0].x, TEMP[0].xxxx > 22: MOV TEMP[0].w, TEMP[0].xxxx > 23: FMA TEMP[0].xyz, CONST[1][19].xxxx, TEMP[1].xyzz, TEMP[2].xyzz > 24: MOV TEMP[1].x, IMM[2].zzzz > 25: MOV TEMP[1].w, IN[3].wwww > 26: MUL TEMP[1], TEMP[1].xxxw, TEMP[0] > 27: MUL TEMP[1], TEMP[1], IN[1] > 28: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[2].wwww > 29: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1].xyzz > 30: DP3 TEMP[0].x, TEMP[1].xyzz, CONST[1][34].xyzz > 31: MOV TEMP[1].xyz, TEMP[1].xyzx > 32: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][15].zzzz > 33: MOV TEMP[1].w, TEMP[0].xxxx > 34: MOV OUT[0], TEMP[1] > 35: END >radeonsi: Compiling shader 312 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 544) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 548) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 552) > %44 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 > %46 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %47 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %46, i64 0, i64 3 > %48 = load <4 x i32>, <4 x i32> addrspace(2)* %47, align 16, !tbaa !0 > %49 = extractelement <8 x i32> %45, i32 7 > %50 = extractelement <4 x i32> %48, i32 0 > %51 = and i32 %50, %49 > %52 = insertelement <4 x i32> %48, i32 %51, i32 0 > %53 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %54 = load <8 x i32>, <8 x i32> addrspace(2)* %53, align 32, !tbaa !0 > %55 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %56 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %55, i64 0, i64 7 > %57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !tbaa !0 > %58 = extractelement <8 x i32> %54, i32 7 > %59 = extractelement <4 x i32> %57, i32 0 > %60 = and i32 %59, %58 > %61 = insertelement <4 x i32> %57, i32 %60, i32 0 > %62 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %63 = load <8 x i32>, <8 x i32> addrspace(2)* %62, align 32, !tbaa !0 > %64 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %65 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %64, i64 0, i64 11 > %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 > %67 = extractelement <8 x i32> %63, i32 7 > %68 = extractelement <4 x i32> %66, i32 0 > %69 = and i32 %68, %67 > %70 = insertelement <4 x i32> %66, i32 %69, i32 0 > %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %82 = fmul float %40, %30 > %83 = fmul float %40, %26 > %84 = fmul float %78, %31 > %85 = fmul float %79, %27 > %86 = call float @llvm.fma.f32(float %71, float %37, float %82) > %87 = call float @llvm.fma.f32(float %72, float %38, float %84) > %88 = bitcast float %86 to i32 > %89 = bitcast float %87 to i32 > %90 = insertelement <2 x i32> undef, i32 %88, i32 0 > %91 = insertelement <2 x i32> %90, i32 %89, i32 1 > %92 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %91, <8 x i32> %45, <4 x i32> %52, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %93 = extractelement <4 x float> %92, i32 1 > %94 = extractelement <4 x float> %92, i32 3 > %95 = call float @llvm.fma.f32(float %93, float 2.000000e+00, float -1.000000e+00) > %96 = call float @llvm.fma.f32(float %94, float 2.000000e+00, float -1.000000e+00) > %97 = fmul float %95, %32 > %98 = fmul float %96, %32 > %99 = call float @llvm.fma.f32(float %97, float %80, float %71) > %100 = call float @llvm.fma.f32(float %98, float %80, float %72) > %101 = call float @llvm.fma.f32(float %99, float %33, float %83) > %102 = call float @llvm.fma.f32(float %100, float %34, float %85) > %103 = bitcast float %101 to i32 > %104 = bitcast float %102 to i32 > %105 = insertelement <2 x i32> undef, i32 %103, i32 0 > %106 = insertelement <2 x i32> %105, i32 %104, i32 1 > %107 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %106, <8 x i32> %54, <4 x i32> %61, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %108 = extractelement <4 x float> %107, i32 0 > %109 = extractelement <4 x float> %107, i32 1 > %110 = extractelement <4 x float> %107, i32 2 > %111 = extractelement <4 x float> %107, i32 3 > %112 = fmul float %28, %40 > %113 = fmul float %29, %40 > %114 = call float @llvm.fma.f32(float %99, float %35, float %112) > %115 = call float @llvm.fma.f32(float %100, float %36, float %113) > %116 = bitcast float %114 to i32 > %117 = bitcast float %115 to i32 > %118 = insertelement <2 x i32> undef, i32 %116, i32 0 > %119 = insertelement <2 x i32> %118, i32 %117, i32 1 > %120 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %119, <8 x i32> %63, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %121 = extractelement <4 x float> %120, i32 0 > %122 = extractelement <4 x float> %120, i32 1 > %123 = extractelement <4 x float> %120, i32 2 > %124 = extractelement <4 x float> %120, i32 3 > %125 = fsub float %121, %108 > %126 = fsub float %122, %109 > %127 = fsub float %123, %110 > %128 = fmul float %124, %111 > %129 = call float @llvm.AMDGPU.clamp.(float %128, float 0.000000e+00, float 1.000000e+00) > %130 = call float @llvm.fma.f32(float %39, float %125, float %108) > %131 = call float @llvm.fma.f32(float %39, float %126, float %109) > %132 = call float @llvm.fma.f32(float %39, float %127, float %110) > %133 = fmul float %81, %129 > %134 = fmul float %130, %73 > %135 = fmul float %131, %74 > %136 = fmul float %132, %75 > %137 = fmul float %133, %76 > %138 = fmul float %134, %77 > %139 = fmul float %135, %77 > %140 = fmul float %136, %77 > %141 = fmul float %137, %138 > %142 = fmul float %137, %139 > %143 = fmul float %137, %140 > %144 = fmul float %141, %41 > %145 = fmul float %142, %42 > %146 = fadd float %145, %144 > %147 = fmul float %143, %43 > %148 = fadd float %146, %147 > %149 = fmul float %148, %25 > %150 = bitcast float %5 to i32 > %151 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %150, 10 > %152 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %151, float %141, 11 > %153 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %152, float %142, 12 > %154 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %153, float %143, 13 > %155 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %154, float %149, 14 > %156 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %155, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %156 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..39] >DCL TEMP[0..13], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 368, 240, 336} >IMM[2] UINT32 {352, 384, 416, 512} >IMM[3] UINT32 {624, 608, 576, 592} >IMM[4] UINT32 {400, 432, 544, 464} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {448, 528, 480, 496} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][23], TEMP[0] > 3: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][15].zzzz > 4: MOV TEMP[1].z, TEMP[1].xxxx > 5: DP4 TEMP[1].x, CONST[1][21], TEMP[0] > 6: DP4 TEMP[2].x, CONST[1][22], TEMP[0] > 7: MOV TEMP[1].y, TEMP[2].xxxx > 8: DP4 TEMP[2].x, CONST[1][24], TEMP[0] > 9: MOV TEMP[1].w, TEMP[2].xxxx > 10: DP4 TEMP[3].x, CONST[1][26], TEMP[0] > 11: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][32].wwww > 12: MOV TEMP[3], TEMP[1] > 13: MOV TEMP[4].xy, IN[1].xyxx > 14: MUL TEMP[5].xy, CONST[1][39].xyyy, IMM[0].xyyy > 15: MUL TEMP[6].xy, TEMP[2].xxxx, CONST[1][39].xyyy > 16: FMA TEMP[5].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 17: MOV TEMP[5].zw, TEMP[1].wwzw > 18: ADD TEMP[7].xyz, -IN[0].xyzz, CONST[1][38].xyzz > 19: MUL TEMP[8].xyz, CONST[1][36].xyzz, CONST[1][37].xyzz > 20: MOV TEMP[8].w, CONST[1][36].wwww > 21: DP3 TEMP[1].x, CONST[1][25].xyzz, TEMP[7].xyzz > 22: DP3 TEMP[9].x, CONST[1][27].xyzz, TEMP[7].xyzz > 23: MOV TEMP[1].z, TEMP[9].xxxx > 24: DP3 TEMP[7].x, CONST[1][26].xyzz, TEMP[7].xyzz > 25: MOV TEMP[1].y, TEMP[7].xxxx > 26: DP3 TEMP[9].x, TEMP[1].xyzz, TEMP[1].xyzz > 27: RSQ TEMP[9].x, TEMP[9].xxxx > 28: MUL TEMP[6].xyz, TEMP[9].xxxx, TEMP[1].xyzz > 29: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx, IMM[0].zzzz > 30: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 31: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 32: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 33: DP3 TEMP[9].x, -TEMP[6].xyzz, CONST[1][34].xyzz > 34: FMA TEMP[10].x, -CONST[1][29].yyyy, TEMP[9].xxxx, CONST[1][29].xxxx > 35: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].xxxx > 36: MOV TEMP[0].z, TEMP[9].xxxx > 37: ABS TEMP[9].x, TEMP[10].xxxx > 38: LG2 TEMP[9].x, TEMP[9].xxxx > 39: MOV TEMP[0].w, TEMP[9].xxxx > 40: MUL TEMP[9].xy, TEMP[0].zwww, IMM[5].xyyy > 41: EX2 TEMP[10].x, TEMP[9].yyyy > 42: FMA TEMP[1].x, CONST[1][29].zzzz, TEMP[10].xxxx, -CONST[1][28].zzzz > 43: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][29].zzzz > 44: MAX TEMP[11].x, TEMP[1].xxxx, IMM[0].wwww > 45: ABS TEMP[12].x, TEMP[2].xxxx > 46: MUL TEMP[12].x, TEMP[12].xxxx, IMM[5].zzzz > 47: MIN TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx > 48: ADD TEMP[12].x, -TEMP[12].xxxx, IMM[0].xxxx > 49: FMA TEMP[10].x, -TEMP[11].xxxx, TEMP[12].xxxx, TEMP[10].xxxx > 50: MAX TEMP[10].x, TEMP[10].xxxx, CONST[1][33].wwww > 51: FSNE TEMP[11].x, CONST[1][28].xxxx, IMM[0].wwww > 52: UIF TEMP[11].xxxx :0 > 53: RCP TEMP[11].x, CONST[1][28].xxxx > 54: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 55: ELSE :0 > 56: SSG TEMP[12].x, -TEMP[0].xxxx > 57: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 58: ENDIF > 59: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 60: EX2 TEMP[11].x, TEMP[1].xxxx > 61: ADD TEMP[1].x, TEMP[11].xxxx, CONST[1][29].wwww > 62: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][30].yyyy > 63: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].yyyy > 64: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[1].xxxx > 65: MIN TEMP[7].x, TEMP[7].xxxx, CONST[1][28].wwww > 66: MAX TEMP[7].x, TEMP[7].xxxx, CONST[1][30].xxxx > 67: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx > 68: FSNE TEMP[11].x, CONST[1][31].wwww, IMM[0].wwww > 69: UIF TEMP[11].xxxx :0 > 70: RCP TEMP[11].x, CONST[1][31].wwww > 71: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 72: ELSE :0 > 73: SSG TEMP[12].x, -TEMP[0].xxxx > 74: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 75: ENDIF > 76: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][32].zzzz > 77: FSNE TEMP[12].x, CONST[1][28].yyyy, IMM[0].wwww > 78: UIF TEMP[12].xxxx :0 > 79: RCP TEMP[12].x, CONST[1][28].yyyy > 80: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 81: ELSE :0 > 82: SSG TEMP[13].x, TEMP[0].xxxx > 83: MUL TEMP[12].x, IMM[5].wwww, TEMP[13].xxxx > 84: ENDIF > 85: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 86: EX2 TEMP[11].x, TEMP[1].xxxx > 87: MUL TEMP[6].xyz, TEMP[11].xxxx, CONST[1][31].xyzz > 88: FMA TEMP[7].xyz, CONST[1][31].xyzz, TEMP[11].xxxx, TEMP[7].xxxx > 89: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[10].xxxx > 90: FSEQ TEMP[10].xyz, TEMP[7].xyzz, IMM[0].wwww > 91: SSG TEMP[11].xyz, TEMP[9].xyzz > 92: MUL TEMP[11].xyz, IMM[5].wwww, TEMP[11].xyzz > 93: RCP TEMP[13].x, TEMP[7].xxxx > 94: RCP TEMP[13].y, TEMP[7].yyyy > 95: RCP TEMP[13].z, TEMP[7].zzzz > 96: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 97: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz > 98: MUL TEMP[6].xyz, TEMP[12].xxxx, -TEMP[7].xyzz > 99: ABS TEMP[2].xyz, TEMP[2].xxxx >100: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[7].xyzz >101: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].xxxx >102: EX2 TEMP[2].x, TEMP[1].xxxx >103: EX2 TEMP[2].y, TEMP[1].yyyy >104: EX2 TEMP[2].z, TEMP[1].zzzz >105: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[7].xxxx >106: LG2 TEMP[7].x, CONST[1][33].xxxx >107: LG2 TEMP[7].y, CONST[1][33].yyyy >108: LG2 TEMP[7].z, CONST[1][33].zzzz >109: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >110: EX2 TEMP[10].x, TEMP[7].xxxx >111: EX2 TEMP[10].y, TEMP[7].yyyy >112: EX2 TEMP[10].z, TEMP[7].zzzz >113: EX2 TEMP[7].x, TEMP[6].xxxx >114: EX2 TEMP[7].y, TEMP[6].yyyy >115: EX2 TEMP[7].z, TEMP[6].zzzz >116: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[10].xyzz >117: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[6].xyzz >118: ADD TEMP[7].xyz, -TEMP[2].xyzz, IMM[0].xxxx >119: MOV TEMP[2].w, TEMP[2].xxxx >120: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].wwww >121: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >122: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >123: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xyzz >124: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >125: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].zzzz >126: FSEQ TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww >127: SSG TEMP[7].xyz, TEMP[1].xyzz >128: MUL TEMP[7].xyz, IMM[5].wwww, TEMP[7].xyzz >129: RCP TEMP[9].x, TEMP[0].xxxx >130: RCP TEMP[9].y, TEMP[0].yyyy >131: RCP TEMP[9].z, TEMP[0].zzzz >132: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[9].xyzz >133: UCMP TEMP[2].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >134: MOV OUT[5], IN[2] >135: MOV OUT[4], TEMP[2] >136: MOV OUT[3], TEMP[8] >137: MOV OUT[2], TEMP[5] >138: MOV OUT[1], TEMP[4] >139: MOV OUT[0], TEMP[3] >140: END >radeonsi: Compiling shader 313 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 248) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 428) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 476) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 524) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 540) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 544) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 548) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 552) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 576) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 580) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 584) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 588) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 592) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 596) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 600) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 608) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 612) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 616) > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 624) > %79 = call float @llvm.SI.load.const(<16 x i8> %17, i32 628) > %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 > %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %13) > %83 = extractelement <4 x float> %82, i32 0 > %84 = extractelement <4 x float> %82, i32 1 > %85 = extractelement <4 x float> %82, i32 2 > %86 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 > %88 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %14) > %89 = extractelement <4 x float> %88, i32 0 > %90 = extractelement <4 x float> %88, i32 1 > %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 > %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %15) > %94 = extractelement <4 x float> %93, i32 0 > %95 = extractelement <4 x float> %93, i32 1 > %96 = extractelement <4 x float> %93, i32 2 > %97 = extractelement <4 x float> %93, i32 3 > %98 = fmul float %27, %83 > %99 = fmul float %28, %84 > %100 = fadd float %98, %99 > %101 = fmul float %29, %85 > %102 = fadd float %100, %101 > %103 = fadd float %102, %30 > %104 = fmul float %103, %18 > %105 = fmul float %19, %83 > %106 = fmul float %20, %84 > %107 = fadd float %105, %106 > %108 = fmul float %21, %85 > %109 = fadd float %107, %108 > %110 = fadd float %109, %22 > %111 = fmul float %23, %83 > %112 = fmul float %24, %84 > %113 = fadd float %111, %112 > %114 = fmul float %25, %85 > %115 = fadd float %113, %114 > %116 = fadd float %115, %26 > %117 = fmul float %31, %83 > %118 = fmul float %32, %84 > %119 = fadd float %117, %118 > %120 = fmul float %33, %85 > %121 = fadd float %119, %120 > %122 = fadd float %121, %34 > %123 = fmul float %38, %83 > %124 = fmul float %39, %84 > %125 = fadd float %123, %124 > %126 = fmul float %40, %85 > %127 = fadd float %125, %126 > %128 = fadd float %127, %41 > %129 = fadd float %128, %60 > %130 = fsub float -0.000000e+00, %79 > %131 = fmul float %122, %78 > %132 = fmul float %122, %79 > %133 = call float @llvm.fma.f32(float %110, float %78, float %131) > %134 = call float @llvm.fma.f32(float %116, float %130, float %132) > %135 = fsub float %75, %83 > %136 = fsub float %76, %84 > %137 = fsub float %77, %85 > %138 = fmul float %68, %72 > %139 = fmul float %69, %73 > %140 = fmul float %70, %74 > %141 = fmul float %35, %135 > %142 = fmul float %36, %136 > %143 = fadd float %142, %141 > %144 = fmul float %37, %137 > %145 = fadd float %143, %144 > %146 = fmul float %42, %135 > %147 = fmul float %43, %136 > %148 = fadd float %147, %146 > %149 = fmul float %44, %137 > %150 = fadd float %148, %149 > %151 = fmul float %38, %135 > %152 = fmul float %39, %136 > %153 = fadd float %152, %151 > %154 = fmul float %40, %137 > %155 = fadd float %153, %154 > %156 = fmul float %145, %145 > %157 = fmul float %155, %155 > %158 = fadd float %157, %156 > %159 = fmul float %150, %150 > %160 = fadd float %158, %159 > %161 = call float @llvm.AMDGPU.rsq.clamped.f32(float %160) > %162 = fmul float %161, %145 > %163 = fmul float %161, %155 > %164 = fmul float %161, %150 > %165 = fsub float -0.000000e+00, %155 > %166 = call float @llvm.fma.f32(float %165, float %161, float 0xBFC3333340000000) > %167 = fsub float 1.000000e+00, %166 > %168 = call float @llvm.AMDGPU.clamp.(float %167, float 0.000000e+00, float 1.000000e+00) > %169 = fmul float %168, %168 > %170 = fmul float %162, %65 > %171 = fsub float -0.000000e+00, %170 > %172 = fmul float %163, %66 > %173 = fsub float %171, %172 > %174 = fmul float %164, %67 > %175 = fsub float %173, %174 > %176 = fsub float -0.000000e+00, %50 > %177 = call float @llvm.fma.f32(float %176, float %175, float %49) > %178 = call float @llvm.fma.f32(float %175, float %175, float 1.000000e+00) > %179 = call float @llvm.fabs.f32(float %177) > %180 = call float @llvm.log2.f32(float %179) > %181 = fmul float %178, 0x3FAE8EC8A0000000 > %182 = fmul float %180, -1.500000e+00 > %183 = call float @llvm.exp2.f32(float %182) > %184 = fsub float -0.000000e+00, %47 > %185 = call float @llvm.fma.f32(float %51, float %183, float %184) > %186 = fmul float %183, %51 > %187 = call float @llvm.maxnum.f32(float %185, float 0.000000e+00) > %188 = call float @llvm.fabs.f32(float %122) > %189 = fmul float %188, 0x3EF4F8B580000000 > %190 = call float @llvm.minnum.f32(float %189, float 1.000000e+00) > %191 = fsub float 1.000000e+00, %190 > %192 = fsub float -0.000000e+00, %187 > %193 = call float @llvm.fma.f32(float %192, float %191, float %186) > %194 = call float @llvm.maxnum.f32(float %193, float %64) > %195 = fcmp une float %45, 0.000000e+00 > br i1 %195, label %IF, label %ELSE > >IF: ; preds = %main_body > %196 = fdiv float 1.000000e+00, %45 > %197 = fmul float %129, %196 > %198 = fsub float -0.000000e+00, %197 > br label %ENDIF > >ELSE: ; preds = %main_body > %199 = fsub float -0.000000e+00, %129 > %200 = fcmp olt float %129, -0.000000e+00 > %201 = select i1 %200, float 1.000000e+00, float %199 > %202 = fcmp oge float %201, 0.000000e+00 > %.op = fmul float %201, 0x4600000000000000 > %203 = select i1 %202, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %198, %IF ], [ %203, %ELSE ] > %204 = fmul float %temp44.0, 0x3FF7154760000000 > %205 = call float @llvm.exp2.f32(float %204) > %206 = fadd float %205, %52 > %207 = fmul float %206, %54 > %208 = fmul float %207, 5.000000e-01 > %209 = fmul float %169, %208 > %210 = call float @llvm.minnum.f32(float %209, float %48) > %211 = call float @llvm.maxnum.f32(float %210, float %53) > %212 = fmul float %211, %194 > %213 = fcmp une float %58, 0.000000e+00 > br i1 %213, label %IF57, label %ELSE58 > >IF57: ; preds = %ENDIF > %214 = fdiv float 1.000000e+00, %58 > %215 = fmul float %129, %214 > %216 = fsub float -0.000000e+00, %215 > br label %ENDIF56 > >ELSE58: ; preds = %ENDIF > %217 = fsub float -0.000000e+00, %129 > %218 = fcmp olt float %129, -0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %217 > %220 = fcmp oge float %219, 0.000000e+00 > %.op62 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op62, float 0xC600000000000000 > br label %ENDIF56 > >ENDIF56: ; preds = %ELSE58, %IF57 > %temp44.1 = phi float [ %216, %IF57 ], [ %221, %ELSE58 ] > %222 = fsub float %59, %129 > %223 = fcmp une float %46, 0.000000e+00 > br i1 %223, label %IF60, label %ELSE61 > >IF60: ; preds = %ENDIF56 > %224 = fdiv float 1.000000e+00, %46 > %225 = fmul float %222, %224 > br label %ENDIF59 > >ELSE61: ; preds = %ENDIF56 > %226 = fcmp ogt float %222, 0.000000e+00 > %227 = select i1 %226, float 1.000000e+00, float %222 > %228 = fcmp oge float %227, 0.000000e+00 > %.op63 = fmul float %227, 0x4600000000000000 > %229 = select i1 %228, float %.op63, float 0xC600000000000000 > br label %ENDIF59 > >ENDIF59: ; preds = %ELSE61, %IF60 > %temp48.0 = phi float [ %225, %IF60 ], [ %229, %ELSE61 ] > %230 = fmul float %temp44.1, 0x3FF7154760000000 > %231 = call float @llvm.exp2.f32(float %230) > %232 = fmul float %231, %55 > %233 = fmul float %231, %56 > %234 = fmul float %231, %57 > %235 = call float @llvm.fma.f32(float %55, float %231, float %211) > %236 = call float @llvm.fma.f32(float %56, float %231, float %211) > %237 = call float @llvm.fma.f32(float %57, float %231, float %211) > %238 = call float @llvm.fma.f32(float %232, float %181, float %212) > %239 = call float @llvm.fma.f32(float %233, float %181, float %212) > %240 = call float @llvm.fma.f32(float %234, float %181, float %212) > %241 = fcmp oeq float %235, 0.000000e+00 > %242 = fcmp oeq float %236, 0.000000e+00 > %243 = fcmp oeq float %237, 0.000000e+00 > %244 = fcmp ogt float %238, 0.000000e+00 > %245 = select i1 %244, float 1.000000e+00, float %238 > %246 = fcmp oge float %245, 0.000000e+00 > %247 = fcmp ogt float %239, 0.000000e+00 > %248 = select i1 %247, float 1.000000e+00, float %239 > %249 = fcmp oge float %248, 0.000000e+00 > %250 = fcmp ogt float %240, 0.000000e+00 > %251 = select i1 %250, float 1.000000e+00, float %240 > %252 = fcmp oge float %251, 0.000000e+00 > %.op64 = fmul float %245, 0x4600000000000000 > %253 = select i1 %246, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %248, 0x4600000000000000 > %254 = select i1 %249, float %.op65, float 0xC600000000000000 > %.op66 = fmul float %251, 0x4600000000000000 > %255 = select i1 %252, float %.op66, float 0xC600000000000000 > %256 = fdiv float 1.000000e+00, %235 > %257 = fdiv float 1.000000e+00, %236 > %258 = fdiv float 1.000000e+00, %237 > %259 = fmul float %238, %256 > %260 = fmul float %239, %257 > %261 = fmul float %240, %258 > %262 = select i1 %241, float %253, float %259 > %263 = select i1 %242, float %254, float %260 > %264 = select i1 %243, float %255, float %261 > %265 = fmul float %235, %temp48.0 > %266 = fmul float %236, %temp48.0 > %267 = fmul float %237, %temp48.0 > %268 = call float @llvm.fabs.f32(float %122) > %269 = call float @llvm.fabs.f32(float %122) > %270 = call float @llvm.fabs.f32(float %122) > %271 = fmul float %235, %268 > %272 = fmul float %236, %269 > %273 = fmul float %237, %270 > %274 = fmul float %271, 0xBFF7154760000000 > %275 = fmul float %272, 0xBFF7154760000000 > %276 = fmul float %273, 0xBFF7154760000000 > %277 = call float @llvm.exp2.f32(float %274) > %278 = call float @llvm.exp2.f32(float %275) > %279 = call float @llvm.exp2.f32(float %276) > %280 = fmul float %265, 0xBFF7154760000000 > %281 = fmul float %266, 0xBFF7154760000000 > %282 = fmul float %267, 0xBFF7154760000000 > %283 = call float @llvm.log2.f32(float %61) > %284 = call float @llvm.log2.f32(float %62) > %285 = call float @llvm.log2.f32(float %63) > %286 = fmul float %283, 0x3FDD1745E0000000 > %287 = fmul float %284, 0x3FDD1745E0000000 > %288 = fmul float %285, 0x3FDD1745E0000000 > %289 = call float @llvm.exp2.f32(float %286) > %290 = call float @llvm.exp2.f32(float %287) > %291 = call float @llvm.exp2.f32(float %288) > %292 = call float @llvm.exp2.f32(float %280) > %293 = call float @llvm.exp2.f32(float %281) > %294 = call float @llvm.exp2.f32(float %282) > %295 = fmul float %292, %289 > %296 = fmul float %293, %290 > %297 = fmul float %294, %291 > %298 = fmul float %262, %295 > %299 = fmul float %263, %296 > %300 = fmul float %264, %297 > %301 = fsub float 1.000000e+00, %277 > %302 = fsub float 1.000000e+00, %278 > %303 = fsub float 1.000000e+00, %279 > %304 = call float @llvm.fma.f32(float %298, float %301, float 0xBF70624DE0000000) > %305 = call float @llvm.fma.f32(float %299, float %302, float 0xBF70624DE0000000) > %306 = call float @llvm.fma.f32(float %300, float %303, float 0xBF70624DE0000000) > %307 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00) > %308 = call float @llvm.maxnum.f32(float %305, float 0.000000e+00) > %309 = call float @llvm.maxnum.f32(float %306, float 0.000000e+00) > %310 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 5.000000e-01) > %311 = call float @llvm.fma.f32(float %308, float 0x4018CCCCC0000000, float 5.000000e-01) > %312 = call float @llvm.fma.f32(float %309, float 0x4018CCCCC0000000, float 5.000000e-01) > %313 = fmul float %307, %310 > %314 = fmul float %308, %311 > %315 = fmul float %309, %312 > %316 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %317 = call float @llvm.fma.f32(float %308, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %318 = call float @llvm.fma.f32(float %309, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %319 = call float @llvm.fma.f32(float %307, float %316, float 0x3FAEB851E0000000) > %320 = call float @llvm.fma.f32(float %308, float %317, float 0x3FAEB851E0000000) > %321 = call float @llvm.fma.f32(float %309, float %318, float 0x3FAEB851E0000000) > %322 = fcmp oeq float %319, 0.000000e+00 > %323 = fcmp oeq float %320, 0.000000e+00 > %324 = fcmp oeq float %321, 0.000000e+00 > %325 = fcmp ogt float %313, 0.000000e+00 > %326 = select i1 %325, float 1.000000e+00, float %313 > %327 = fcmp oge float %326, 0.000000e+00 > %328 = fcmp ogt float %314, 0.000000e+00 > %329 = select i1 %328, float 1.000000e+00, float %314 > %330 = fcmp oge float %329, 0.000000e+00 > %331 = fcmp ogt float %315, 0.000000e+00 > %332 = select i1 %331, float 1.000000e+00, float %315 > %333 = fcmp oge float %332, 0.000000e+00 > %.op67 = fmul float %326, 0x4600000000000000 > %334 = select i1 %327, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %329, 0x4600000000000000 > %335 = select i1 %330, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %332, 0x4600000000000000 > %336 = select i1 %333, float %.op69, float 0xC600000000000000 > %337 = fdiv float 1.000000e+00, %319 > %338 = fdiv float 1.000000e+00, %320 > %339 = fdiv float 1.000000e+00, %321 > %340 = fmul float %313, %337 > %341 = fmul float %314, %338 > %342 = fmul float %315, %339 > %343 = select i1 %322, float %334, float %340 > %344 = select i1 %323, float %335, float %341 > %345 = select i1 %324, float %336, float %342 > %346 = bitcast i32 %11 to float > %347 = insertvalue <{ float, float, float }> undef, float %346, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %133, float %134, float %104, float %122) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %138, float %139, float %140, float %71) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %343, float %344, float %345, float %277) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %94, float %95, float %96, float %97) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %110, float %116, float %104, float %122) > ret <{ float, float, float }> %347 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..35] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[1] UINT32 {0, 240, 256, 320} >IMM[2] UINT32 {272, 304, 288, 560} >IMM[3] FLT32 { -1.0000, 0.0000, 0.0000, 0.0000} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: FSNE TEMP[1].x, CONST[1][15].wwww, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][15].wwww > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][16].xxxx > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: MOV TEMP[1].xz, CONST[1][20].xxxx > 26: MOV TEMP[1].yw, IN[4].yxyy > 27: MUL TEMP[2].xy, TEMP[1].xyyy, CONST[1][17].zwww > 28: FMA TEMP[2].xy, IN[0].xyyy, CONST[1][19].yzzz, TEMP[2].xyyy > 29: MOV TEMP[2].xy, TEMP[2].xyyy > 30: TEX TEMP[2].yw, TEMP[2], SAMP[1], 2D > 31: FMA TEMP[2].xy, TEMP[2].ywww, IMM[0].wwww, IMM[3].xxxx > 32: MUL TEMP[2].xy, TEMP[2].xyyy, CONST[1][18].xxxx > 33: FMA TEMP[2].xy, TEMP[2].xyyy, IN[4].zzzz, IN[0].xyyy > 34: MUL TEMP[3].x, TEMP[2].yyyy, CONST[1][19].xxxx > 35: MOV TEMP[3].w, TEMP[3].xxxx > 36: MUL TEMP[3].xyz, TEMP[2].xyxx, CONST[1][18].yzww > 37: FMA TEMP[2].xy, CONST[1][20].xxxx, CONST[1][17].xyyy, TEMP[3].zwww > 38: FMA TEMP[1].xy, TEMP[1].zwww, CONST[1][16].zwww, TEMP[3].xyyy > 39: MOV TEMP[4].xy, TEMP[1].xyyy > 40: TEX TEMP[4], TEMP[4], SAMP[2], 2D > 41: MOV TEMP[2].xy, TEMP[2].xyyy > 42: TEX TEMP[2], TEMP[2], SAMP[3], 2D > 43: MUL TEMP[5].x, TEMP[4].wwww, TEMP[2].wwww > 44: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 45: ADD TEMP[3].xyz, -TEMP[4].xyzz, TEMP[2].xyzz > 46: FMA TEMP[1].xyz, CONST[1][19].wwww, TEMP[3].xyzz, TEMP[4].xyzz > 47: MUL TEMP[2].x, TEMP[0].xxxx, TEMP[5].xxxx > 48: MOV TEMP[1].w, TEMP[2].xxxx > 49: MOV TEMP[0].x, IMM[0].zzzz > 50: MOV TEMP[0].w, IN[4].wwww > 51: MUL TEMP[0], TEMP[0].xxxw, TEMP[1] > 52: MUL TEMP[0], TEMP[0], IN[2] > 53: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].wwww > 54: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 55: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][35].xyzz > 56: MOV TEMP[0].xyz, TEMP[0].xyzx > 57: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][16].yyyy > 58: MOV TEMP[0].w, TEMP[1].xxxx > 59: MOV OUT[0], TEMP[0] > 60: END >radeonsi: Compiling shader 314 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 312) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 560) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 564) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 568) > %46 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 > %48 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %49 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %48, i64 0, i64 3 > %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 > %51 = extractelement <8 x i32> %47, i32 7 > %52 = extractelement <4 x i32> %50, i32 0 > %53 = and i32 %52, %51 > %54 = insertelement <4 x i32> %50, i32 %53, i32 0 > %55 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0 > %57 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %58 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %57, i64 0, i64 7 > %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 > %60 = extractelement <8 x i32> %56, i32 7 > %61 = extractelement <4 x i32> %59, i32 0 > %62 = and i32 %61, %60 > %63 = insertelement <4 x i32> %59, i32 %62, i32 0 > %64 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %65 = load <8 x i32>, <8 x i32> addrspace(2)* %64, align 32, !tbaa !0 > %66 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %67 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %66, i64 0, i64 11 > %68 = load <4 x i32>, <4 x i32> addrspace(2)* %67, align 16, !tbaa !0 > %69 = extractelement <8 x i32> %65, i32 7 > %70 = extractelement <4 x i32> %68, i32 0 > %71 = and i32 %70, %69 > %72 = insertelement <4 x i32> %68, i32 %71, i32 0 > %73 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %74 = load <8 x i32>, <8 x i32> addrspace(2)* %73, align 32, !tbaa !0 > %75 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %76 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %75, i64 0, i64 15 > %77 = load <4 x i32>, <4 x i32> addrspace(2)* %76, align 16, !tbaa !0 > %78 = extractelement <8 x i32> %74, i32 7 > %79 = extractelement <4 x i32> %77, i32 0 > %80 = and i32 %79, %78 > %81 = insertelement <4 x i32> %77, i32 %80, i32 0 > %82 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %93 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %94 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %95 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %96 = fcmp oeq float %86, 0.000000e+00 > %97 = fcmp oeq float %86, 0.000000e+00 > %98 = fcmp ogt float %84, 0.000000e+00 > %99 = select i1 %98, float 1.000000e+00, float %84 > %100 = fcmp oge float %99, 0.000000e+00 > %101 = fcmp ogt float %85, 0.000000e+00 > %102 = select i1 %101, float 1.000000e+00, float %85 > %103 = fcmp oge float %102, 0.000000e+00 > %.op = fmul float %99, 0x4600000000000000 > %104 = select i1 %100, float %.op, float 0xC600000000000000 > %.op24 = fmul float %102, 0x4600000000000000 > %105 = select i1 %103, float %.op24, float 0xC600000000000000 > %106 = fdiv float 1.000000e+00, %86 > %107 = fmul float %84, %106 > %108 = fmul float %85, %106 > %109 = select i1 %96, float %104, float %107 > %110 = select i1 %97, float %105, float %108 > %111 = bitcast float %109 to i32 > %112 = bitcast float %110 to i32 > %113 = insertelement <2 x i32> undef, i32 %111, i32 0 > %114 = insertelement <2 x i32> %113, i32 %112, i32 1 > %115 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %114, <8 x i32> %47, <4 x i32> %54, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %116 = extractelement <4 x float> %115, i32 0 > %117 = fsub float %86, %116 > %118 = fcmp une float %25, 0.000000e+00 > %119 = call float @llvm.fabs.f32(float %117) > br i1 %118, label %IF, label %ELSE > >IF: ; preds = %main_body > %120 = fdiv float 1.000000e+00, %25 > %121 = fmul float %119, %120 > br label %ENDIF > >ELSE: ; preds = %main_body > %122 = fcmp one float %117, 0.000000e+00 > %123 = select i1 %122, float 1.000000e+00, float %119 > %124 = fcmp oge float %123, 0.000000e+00 > %.op25 = fmul float %123, 0x4600000000000000 > %125 = select i1 %124, float %.op25, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %121, %IF ], [ %125, %ELSE ] > %126 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %127 = fsub float 1.000000e+00, %126 > %128 = call float @llvm.log2.f32(float %127) > %129 = fmul float %128, %26 > %130 = call float @llvm.exp2.f32(float %129) > %131 = fsub float 1.000000e+00, %130 > %132 = fmul float %42, %32 > %133 = fmul float %92, %33 > %134 = call float @llvm.fma.f32(float %82, float %39, float %132) > %135 = call float @llvm.fma.f32(float %83, float %40, float %133) > %136 = bitcast float %134 to i32 > %137 = bitcast float %135 to i32 > %138 = insertelement <2 x i32> undef, i32 %136, i32 0 > %139 = insertelement <2 x i32> %138, i32 %137, i32 1 > %140 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %139, <8 x i32> %56, <4 x i32> %63, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %141 = extractelement <4 x float> %140, i32 1 > %142 = extractelement <4 x float> %140, i32 3 > %143 = call float @llvm.fma.f32(float %141, float 2.000000e+00, float -1.000000e+00) > %144 = call float @llvm.fma.f32(float %142, float 2.000000e+00, float -1.000000e+00) > %145 = fmul float %143, %34 > %146 = fmul float %144, %34 > %147 = call float @llvm.fma.f32(float %145, float %94, float %82) > %148 = call float @llvm.fma.f32(float %146, float %94, float %83) > %149 = fmul float %148, %38 > %150 = fmul float %147, %35 > %151 = fmul float %148, %36 > %152 = fmul float %147, %37 > %153 = call float @llvm.fma.f32(float %42, float %30, float %152) > %154 = call float @llvm.fma.f32(float %42, float %31, float %149) > %155 = call float @llvm.fma.f32(float %42, float %28, float %150) > %156 = call float @llvm.fma.f32(float %93, float %29, float %151) > %157 = bitcast float %155 to i32 > %158 = bitcast float %156 to i32 > %159 = insertelement <2 x i32> undef, i32 %157, i32 0 > %160 = insertelement <2 x i32> %159, i32 %158, i32 1 > %161 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %160, <8 x i32> %65, <4 x i32> %72, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %162 = extractelement <4 x float> %161, i32 0 > %163 = extractelement <4 x float> %161, i32 1 > %164 = extractelement <4 x float> %161, i32 2 > %165 = extractelement <4 x float> %161, i32 3 > %166 = bitcast float %153 to i32 > %167 = bitcast float %154 to i32 > %168 = insertelement <2 x i32> undef, i32 %166, i32 0 > %169 = insertelement <2 x i32> %168, i32 %167, i32 1 > %170 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %169, <8 x i32> %74, <4 x i32> %81, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %171 = extractelement <4 x float> %170, i32 0 > %172 = extractelement <4 x float> %170, i32 1 > %173 = extractelement <4 x float> %170, i32 2 > %174 = extractelement <4 x float> %170, i32 3 > %175 = fmul float %165, %174 > %176 = call float @llvm.AMDGPU.clamp.(float %175, float 0.000000e+00, float 1.000000e+00) > %177 = fsub float %171, %162 > %178 = fsub float %172, %163 > %179 = fsub float %173, %164 > %180 = call float @llvm.fma.f32(float %41, float %177, float %162) > %181 = call float @llvm.fma.f32(float %41, float %178, float %163) > %182 = call float @llvm.fma.f32(float %41, float %179, float %164) > %183 = fmul float %131, %176 > %184 = fmul float %95, %183 > %185 = fmul float %180, %87 > %186 = fmul float %181, %88 > %187 = fmul float %182, %89 > %188 = fmul float %184, %90 > %189 = fmul float %185, %91 > %190 = fmul float %186, %91 > %191 = fmul float %187, %91 > %192 = fmul float %188, %189 > %193 = fmul float %188, %190 > %194 = fmul float %188, %191 > %195 = fmul float %192, %43 > %196 = fmul float %193, %44 > %197 = fadd float %196, %195 > %198 = fmul float %194, %45 > %199 = fadd float %197, %198 > %200 = fmul float %199, %27 > %201 = bitcast float %5 to i32 > %202 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %201, 10 > %203 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %202, float %192, 11 > %204 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %203, float %193, 12 > %205 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %204, float %194, 13 > %206 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %205, float %200, 14 > %207 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %206, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %207 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..34] >DCL TEMP[0..11], LOCAL >IMM[0] FLT32 { 1.0000, -0.1500, 0.0597, -1.5000} >IMM[1] UINT32 {0, 256, 272, 288} >IMM[2] UINT32 {304, 336, 432, 528} >IMM[3] UINT32 {320, 352, 464, 384} >IMM[4] UINT32 {368, 448, 400, 416} >IMM[5] FLT32 { 0.0000, 0.0000, 158456325028528675187087900672.0000, 1.4427} >IMM[6] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[7] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][16], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][17], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][18], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][21], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][27].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][33].xyzz > 14: DP3 TEMP[1].x, CONST[1][20].xyzz, TEMP[5].xyzz > 15: DP3 TEMP[6].x, CONST[1][22].xyzz, TEMP[5].xyzz > 16: MOV TEMP[1].z, TEMP[6].xxxx > 17: DP3 TEMP[5].x, CONST[1][21].xyzz, TEMP[5].xyzz > 18: MOV TEMP[1].y, TEMP[5].xxxx > 19: DP3 TEMP[6].x, TEMP[1].xyzz, TEMP[1].xyzz > 20: RSQ TEMP[6].x, TEMP[6].xxxx > 21: MUL TEMP[7].xyz, TEMP[6].xxxx, TEMP[1].xyzz > 22: FMA TEMP[5].x, -TEMP[5].xxxx, TEMP[6].xxxx, IMM[0].yyyy > 23: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].xxxx > 24: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 25: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 26: DP3 TEMP[6].x, -TEMP[7].xyzz, CONST[1][29].xyzz > 27: FMA TEMP[8].x, -CONST[1][24].yyyy, TEMP[6].xxxx, CONST[1][24].xxxx > 28: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[6].xxxx, IMM[0].xxxx > 29: MOV TEMP[0].z, TEMP[6].xxxx > 30: ABS TEMP[6].x, TEMP[8].xxxx > 31: LG2 TEMP[6].x, TEMP[6].xxxx > 32: MOV TEMP[0].w, TEMP[6].xxxx > 33: MUL TEMP[6].xy, TEMP[0].zwww, IMM[0].zwww > 34: EX2 TEMP[8].x, TEMP[6].yyyy > 35: FMA TEMP[1].x, CONST[1][24].zzzz, TEMP[8].xxxx, -CONST[1][23].zzzz > 36: MUL TEMP[8].x, TEMP[8].xxxx, CONST[1][24].zzzz > 37: MAX TEMP[9].x, TEMP[1].xxxx, IMM[5].xxxx > 38: ABS TEMP[10].x, TEMP[2].xxxx > 39: MUL TEMP[10].x, TEMP[10].xxxx, IMM[5].yyyy > 40: MIN TEMP[10].x, TEMP[10].xxxx, IMM[0].xxxx > 41: ADD TEMP[10].x, -TEMP[10].xxxx, IMM[0].xxxx > 42: FMA TEMP[8].x, -TEMP[9].xxxx, TEMP[10].xxxx, TEMP[8].xxxx > 43: MAX TEMP[8].x, TEMP[8].xxxx, CONST[1][28].wwww > 44: FSNE TEMP[9].x, CONST[1][23].xxxx, IMM[5].xxxx > 45: UIF TEMP[9].xxxx :0 > 46: RCP TEMP[9].x, CONST[1][23].xxxx > 47: MUL TEMP[9].x, -TEMP[0].xxxx, TEMP[9].xxxx > 48: ELSE :0 > 49: SSG TEMP[10].x, -TEMP[0].xxxx > 50: MUL TEMP[9].x, IMM[5].zzzz, TEMP[10].xxxx > 51: ENDIF > 52: MUL TEMP[1].x, TEMP[9].xxxx, IMM[5].wwww > 53: EX2 TEMP[9].x, TEMP[1].xxxx > 54: ADD TEMP[1].x, TEMP[9].xxxx, CONST[1][24].wwww > 55: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][25].yyyy > 56: MUL TEMP[1].x, TEMP[1].xxxx, IMM[6].xxxx > 57: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx > 58: MIN TEMP[5].x, TEMP[5].xxxx, CONST[1][23].wwww > 59: MAX TEMP[5].x, TEMP[5].xxxx, CONST[1][25].xxxx > 60: MUL TEMP[8].x, TEMP[5].xxxx, TEMP[8].xxxx > 61: FSNE TEMP[9].x, CONST[1][26].wwww, IMM[5].xxxx > 62: UIF TEMP[9].xxxx :0 > 63: RCP TEMP[9].x, CONST[1][26].wwww > 64: MUL TEMP[9].x, -TEMP[0].xxxx, TEMP[9].xxxx > 65: ELSE :0 > 66: SSG TEMP[10].x, -TEMP[0].xxxx > 67: MUL TEMP[9].x, IMM[5].zzzz, TEMP[10].xxxx > 68: ENDIF > 69: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][27].zzzz > 70: FSNE TEMP[10].x, CONST[1][23].yyyy, IMM[5].xxxx > 71: UIF TEMP[10].xxxx :0 > 72: RCP TEMP[10].x, CONST[1][23].yyyy > 73: MUL TEMP[10].x, TEMP[0].xxxx, TEMP[10].xxxx > 74: ELSE :0 > 75: SSG TEMP[11].x, TEMP[0].xxxx > 76: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 77: ENDIF > 78: MUL TEMP[1].x, TEMP[9].xxxx, IMM[5].wwww > 79: EX2 TEMP[9].x, TEMP[1].xxxx > 80: MUL TEMP[7].xyz, TEMP[9].xxxx, CONST[1][26].xyzz > 81: FMA TEMP[5].xyz, CONST[1][26].xyzz, TEMP[9].xxxx, TEMP[5].xxxx > 82: FMA TEMP[6].xyz, TEMP[7].xyzz, TEMP[6].xxxx, TEMP[8].xxxx > 83: FSEQ TEMP[8].xyz, TEMP[5].xyzz, IMM[5].xxxx > 84: SSG TEMP[9].xyz, TEMP[6].xyzz > 85: MUL TEMP[9].xyz, IMM[5].zzzz, TEMP[9].xyzz > 86: RCP TEMP[11].x, TEMP[5].xxxx > 87: RCP TEMP[11].y, TEMP[5].yyyy > 88: RCP TEMP[11].z, TEMP[5].zzzz > 89: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[11].xyzz > 90: UCMP TEMP[6].xyz, TEMP[8].xyzz, TEMP[9].xyzz, TEMP[6].xyzz > 91: MUL TEMP[7].xyz, TEMP[10].xxxx, -TEMP[5].xyzz > 92: ABS TEMP[2].xyz, TEMP[2].xxxx > 93: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[5].xyzz > 94: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].wwww > 95: EX2 TEMP[2].x, TEMP[1].xxxx > 96: EX2 TEMP[2].y, TEMP[1].yyyy > 97: EX2 TEMP[2].z, TEMP[1].zzzz > 98: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[5].wwww > 99: LG2 TEMP[5].x, CONST[1][28].xxxx >100: LG2 TEMP[5].y, CONST[1][28].yyyy >101: LG2 TEMP[5].z, CONST[1][28].zzzz >102: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[6].yyyy >103: EX2 TEMP[8].x, TEMP[5].xxxx >104: EX2 TEMP[8].y, TEMP[5].yyyy >105: EX2 TEMP[8].z, TEMP[5].zzzz >106: EX2 TEMP[5].x, TEMP[7].xxxx >107: EX2 TEMP[5].y, TEMP[7].yyyy >108: EX2 TEMP[5].z, TEMP[7].zzzz >109: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[8].xyzz >110: MUL TEMP[0].xyz, TEMP[6].xyzz, TEMP[7].xyzz >111: ADD TEMP[5].xyz, -TEMP[2].xyzz, IMM[0].xxxx >112: MOV TEMP[2].w, TEMP[2].xxxx >113: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz, IMM[6].zzzz >114: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[5].xxxx >115: FMA TEMP[5].xyz, TEMP[0].xyzz, IMM[6].wwww, IMM[6].xxxx >116: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[5].xyzz >117: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[6].wwww, IMM[7].xxxx >118: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].yyyy >119: FSEQ TEMP[5].xyz, TEMP[0].xyzz, IMM[5].xxxx >120: SSG TEMP[6].xyz, TEMP[1].xyzz >121: MUL TEMP[6].xyz, IMM[5].zzzz, TEMP[6].xyzz >122: RCP TEMP[7].x, TEMP[0].xxxx >123: RCP TEMP[7].y, TEMP[0].yyyy >124: RCP TEMP[7].z, TEMP[0].zzzz >125: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[7].xyzz >126: UCMP TEMP[2].xyz, TEMP[5].xyzz, TEMP[6].xyzz, TEMP[0].xyzz >127: MOV OUT[3], IN[2] >128: MOV OUT[2], TEMP[2] >129: MOV OUT[1], TEMP[4] >130: MOV OUT[0], TEMP[3] >131: END >radeonsi: Compiling shader 315 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 256) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 260) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 264) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 268) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 272) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 276) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 280) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 284) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 288) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 292) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 296) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 300) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 304) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 308) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 312) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 316) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 428) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %70 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 > %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %13) > %73 = extractelement <4 x float> %72, i32 0 > %74 = extractelement <4 x float> %72, i32 1 > %75 = extractelement <4 x float> %72, i32 2 > %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 > %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %14) > %79 = extractelement <4 x float> %78, i32 0 > %80 = extractelement <4 x float> %78, i32 1 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %15) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = extractelement <4 x float> %83, i32 3 > %88 = fmul float %18, %73 > %89 = fmul float %19, %74 > %90 = fadd float %88, %89 > %91 = fmul float %20, %75 > %92 = fadd float %90, %91 > %93 = fadd float %92, %21 > %94 = fmul float %22, %73 > %95 = fmul float %23, %74 > %96 = fadd float %94, %95 > %97 = fmul float %24, %75 > %98 = fadd float %96, %97 > %99 = fadd float %98, %25 > %100 = fmul float %26, %73 > %101 = fmul float %27, %74 > %102 = fadd float %100, %101 > %103 = fmul float %28, %75 > %104 = fadd float %102, %103 > %105 = fadd float %104, %29 > %106 = fmul float %30, %73 > %107 = fmul float %31, %74 > %108 = fadd float %106, %107 > %109 = fmul float %32, %75 > %110 = fadd float %108, %109 > %111 = fadd float %110, %33 > %112 = fmul float %37, %73 > %113 = fmul float %38, %74 > %114 = fadd float %112, %113 > %115 = fmul float %39, %75 > %116 = fadd float %114, %115 > %117 = fadd float %116, %40 > %118 = fadd float %117, %59 > %119 = fsub float %67, %73 > %120 = fsub float %68, %74 > %121 = fsub float %69, %75 > %122 = fmul float %34, %119 > %123 = fmul float %35, %120 > %124 = fadd float %123, %122 > %125 = fmul float %36, %121 > %126 = fadd float %124, %125 > %127 = fmul float %41, %119 > %128 = fmul float %42, %120 > %129 = fadd float %128, %127 > %130 = fmul float %43, %121 > %131 = fadd float %129, %130 > %132 = fmul float %37, %119 > %133 = fmul float %38, %120 > %134 = fadd float %133, %132 > %135 = fmul float %39, %121 > %136 = fadd float %134, %135 > %137 = fmul float %126, %126 > %138 = fmul float %136, %136 > %139 = fadd float %138, %137 > %140 = fmul float %131, %131 > %141 = fadd float %139, %140 > %142 = call float @llvm.AMDGPU.rsq.clamped.f32(float %141) > %143 = fmul float %142, %126 > %144 = fmul float %142, %136 > %145 = fmul float %142, %131 > %146 = fsub float -0.000000e+00, %136 > %147 = call float @llvm.fma.f32(float %146, float %142, float 0xBFC3333340000000) > %148 = fsub float 1.000000e+00, %147 > %149 = call float @llvm.AMDGPU.clamp.(float %148, float 0.000000e+00, float 1.000000e+00) > %150 = fmul float %149, %149 > %151 = fmul float %143, %64 > %152 = fsub float -0.000000e+00, %151 > %153 = fmul float %144, %65 > %154 = fsub float %152, %153 > %155 = fmul float %145, %66 > %156 = fsub float %154, %155 > %157 = fsub float -0.000000e+00, %49 > %158 = call float @llvm.fma.f32(float %157, float %156, float %48) > %159 = call float @llvm.fma.f32(float %156, float %156, float 1.000000e+00) > %160 = call float @llvm.fabs.f32(float %158) > %161 = call float @llvm.log2.f32(float %160) > %162 = fmul float %159, 0x3FAE8EC8A0000000 > %163 = fmul float %161, -1.500000e+00 > %164 = call float @llvm.exp2.f32(float %163) > %165 = fsub float -0.000000e+00, %46 > %166 = call float @llvm.fma.f32(float %50, float %164, float %165) > %167 = fmul float %164, %50 > %168 = call float @llvm.maxnum.f32(float %166, float 0.000000e+00) > %169 = call float @llvm.fabs.f32(float %111) > %170 = fmul float %169, 0x3EF4F8B580000000 > %171 = call float @llvm.minnum.f32(float %170, float 1.000000e+00) > %172 = fsub float 1.000000e+00, %171 > %173 = fsub float -0.000000e+00, %168 > %174 = call float @llvm.fma.f32(float %173, float %172, float %167) > %175 = call float @llvm.maxnum.f32(float %174, float %63) > %176 = fcmp une float %44, 0.000000e+00 > br i1 %176, label %IF, label %ELSE > >IF: ; preds = %main_body > %177 = fdiv float 1.000000e+00, %44 > %178 = fmul float %118, %177 > %179 = fsub float -0.000000e+00, %178 > br label %ENDIF > >ELSE: ; preds = %main_body > %180 = fsub float -0.000000e+00, %118 > %181 = fcmp olt float %118, -0.000000e+00 > %182 = select i1 %181, float 1.000000e+00, float %180 > %183 = fcmp oge float %182, 0.000000e+00 > %.op = fmul float %182, 0x4600000000000000 > %184 = select i1 %183, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp36.0 = phi float [ %179, %IF ], [ %184, %ELSE ] > %185 = fmul float %temp36.0, 0x3FF7154760000000 > %186 = call float @llvm.exp2.f32(float %185) > %187 = fadd float %186, %51 > %188 = fmul float %187, %53 > %189 = fmul float %188, 5.000000e-01 > %190 = fmul float %150, %189 > %191 = call float @llvm.minnum.f32(float %190, float %47) > %192 = call float @llvm.maxnum.f32(float %191, float %52) > %193 = fmul float %192, %175 > %194 = fcmp une float %57, 0.000000e+00 > br i1 %194, label %IF49, label %ELSE50 > >IF49: ; preds = %ENDIF > %195 = fdiv float 1.000000e+00, %57 > %196 = fmul float %118, %195 > %197 = fsub float -0.000000e+00, %196 > br label %ENDIF48 > >ELSE50: ; preds = %ENDIF > %198 = fsub float -0.000000e+00, %118 > %199 = fcmp olt float %118, -0.000000e+00 > %200 = select i1 %199, float 1.000000e+00, float %198 > %201 = fcmp oge float %200, 0.000000e+00 > %.op54 = fmul float %200, 0x4600000000000000 > %202 = select i1 %201, float %.op54, float 0xC600000000000000 > br label %ENDIF48 > >ENDIF48: ; preds = %ELSE50, %IF49 > %temp36.1 = phi float [ %197, %IF49 ], [ %202, %ELSE50 ] > %203 = fsub float %58, %118 > %204 = fcmp une float %45, 0.000000e+00 > br i1 %204, label %IF52, label %ELSE53 > >IF52: ; preds = %ENDIF48 > %205 = fdiv float 1.000000e+00, %45 > %206 = fmul float %203, %205 > br label %ENDIF51 > >ELSE53: ; preds = %ENDIF48 > %207 = fcmp ogt float %203, 0.000000e+00 > %208 = select i1 %207, float 1.000000e+00, float %203 > %209 = fcmp oge float %208, 0.000000e+00 > %.op55 = fmul float %208, 0x4600000000000000 > %210 = select i1 %209, float %.op55, float 0xC600000000000000 > br label %ENDIF51 > >ENDIF51: ; preds = %ELSE53, %IF52 > %temp40.0 = phi float [ %206, %IF52 ], [ %210, %ELSE53 ] > %211 = fmul float %temp36.1, 0x3FF7154760000000 > %212 = call float @llvm.exp2.f32(float %211) > %213 = fmul float %212, %54 > %214 = fmul float %212, %55 > %215 = fmul float %212, %56 > %216 = call float @llvm.fma.f32(float %54, float %212, float %192) > %217 = call float @llvm.fma.f32(float %55, float %212, float %192) > %218 = call float @llvm.fma.f32(float %56, float %212, float %192) > %219 = call float @llvm.fma.f32(float %213, float %162, float %193) > %220 = call float @llvm.fma.f32(float %214, float %162, float %193) > %221 = call float @llvm.fma.f32(float %215, float %162, float %193) > %222 = fcmp oeq float %216, 0.000000e+00 > %223 = fcmp oeq float %217, 0.000000e+00 > %224 = fcmp oeq float %218, 0.000000e+00 > %225 = fcmp ogt float %219, 0.000000e+00 > %226 = select i1 %225, float 1.000000e+00, float %219 > %227 = fcmp oge float %226, 0.000000e+00 > %228 = fcmp ogt float %220, 0.000000e+00 > %229 = select i1 %228, float 1.000000e+00, float %220 > %230 = fcmp oge float %229, 0.000000e+00 > %231 = fcmp ogt float %221, 0.000000e+00 > %232 = select i1 %231, float 1.000000e+00, float %221 > %233 = fcmp oge float %232, 0.000000e+00 > %.op56 = fmul float %226, 0x4600000000000000 > %234 = select i1 %227, float %.op56, float 0xC600000000000000 > %.op57 = fmul float %229, 0x4600000000000000 > %235 = select i1 %230, float %.op57, float 0xC600000000000000 > %.op58 = fmul float %232, 0x4600000000000000 > %236 = select i1 %233, float %.op58, float 0xC600000000000000 > %237 = fdiv float 1.000000e+00, %216 > %238 = fdiv float 1.000000e+00, %217 > %239 = fdiv float 1.000000e+00, %218 > %240 = fmul float %219, %237 > %241 = fmul float %220, %238 > %242 = fmul float %221, %239 > %243 = select i1 %222, float %234, float %240 > %244 = select i1 %223, float %235, float %241 > %245 = select i1 %224, float %236, float %242 > %246 = fmul float %216, %temp40.0 > %247 = fmul float %217, %temp40.0 > %248 = fmul float %218, %temp40.0 > %249 = call float @llvm.fabs.f32(float %111) > %250 = call float @llvm.fabs.f32(float %111) > %251 = call float @llvm.fabs.f32(float %111) > %252 = fmul float %216, %249 > %253 = fmul float %217, %250 > %254 = fmul float %218, %251 > %255 = fmul float %252, 0xBFF7154760000000 > %256 = fmul float %253, 0xBFF7154760000000 > %257 = fmul float %254, 0xBFF7154760000000 > %258 = call float @llvm.exp2.f32(float %255) > %259 = call float @llvm.exp2.f32(float %256) > %260 = call float @llvm.exp2.f32(float %257) > %261 = fmul float %246, 0xBFF7154760000000 > %262 = fmul float %247, 0xBFF7154760000000 > %263 = fmul float %248, 0xBFF7154760000000 > %264 = call float @llvm.log2.f32(float %60) > %265 = call float @llvm.log2.f32(float %61) > %266 = call float @llvm.log2.f32(float %62) > %267 = fmul float %264, 0x3FDD1745E0000000 > %268 = fmul float %265, 0x3FDD1745E0000000 > %269 = fmul float %266, 0x3FDD1745E0000000 > %270 = call float @llvm.exp2.f32(float %267) > %271 = call float @llvm.exp2.f32(float %268) > %272 = call float @llvm.exp2.f32(float %269) > %273 = call float @llvm.exp2.f32(float %261) > %274 = call float @llvm.exp2.f32(float %262) > %275 = call float @llvm.exp2.f32(float %263) > %276 = fmul float %273, %270 > %277 = fmul float %274, %271 > %278 = fmul float %275, %272 > %279 = fmul float %243, %276 > %280 = fmul float %244, %277 > %281 = fmul float %245, %278 > %282 = fsub float 1.000000e+00, %258 > %283 = fsub float 1.000000e+00, %259 > %284 = fsub float 1.000000e+00, %260 > %285 = call float @llvm.fma.f32(float %279, float %282, float 0xBF70624DE0000000) > %286 = call float @llvm.fma.f32(float %280, float %283, float 0xBF70624DE0000000) > %287 = call float @llvm.fma.f32(float %281, float %284, float 0xBF70624DE0000000) > %288 = call float @llvm.maxnum.f32(float %285, float 0.000000e+00) > %289 = call float @llvm.maxnum.f32(float %286, float 0.000000e+00) > %290 = call float @llvm.maxnum.f32(float %287, float 0.000000e+00) > %291 = call float @llvm.fma.f32(float %288, float 0x4018CCCCC0000000, float 5.000000e-01) > %292 = call float @llvm.fma.f32(float %289, float 0x4018CCCCC0000000, float 5.000000e-01) > %293 = call float @llvm.fma.f32(float %290, float 0x4018CCCCC0000000, float 5.000000e-01) > %294 = fmul float %288, %291 > %295 = fmul float %289, %292 > %296 = fmul float %290, %293 > %297 = call float @llvm.fma.f32(float %288, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %298 = call float @llvm.fma.f32(float %289, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %299 = call float @llvm.fma.f32(float %290, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %300 = call float @llvm.fma.f32(float %288, float %297, float 0x3FAEB851E0000000) > %301 = call float @llvm.fma.f32(float %289, float %298, float 0x3FAEB851E0000000) > %302 = call float @llvm.fma.f32(float %290, float %299, float 0x3FAEB851E0000000) > %303 = fcmp oeq float %300, 0.000000e+00 > %304 = fcmp oeq float %301, 0.000000e+00 > %305 = fcmp oeq float %302, 0.000000e+00 > %306 = fcmp ogt float %294, 0.000000e+00 > %307 = select i1 %306, float 1.000000e+00, float %294 > %308 = fcmp oge float %307, 0.000000e+00 > %309 = fcmp ogt float %295, 0.000000e+00 > %310 = select i1 %309, float 1.000000e+00, float %295 > %311 = fcmp oge float %310, 0.000000e+00 > %312 = fcmp ogt float %296, 0.000000e+00 > %313 = select i1 %312, float 1.000000e+00, float %296 > %314 = fcmp oge float %313, 0.000000e+00 > %.op59 = fmul float %307, 0x4600000000000000 > %315 = select i1 %308, float %.op59, float 0xC600000000000000 > %.op60 = fmul float %310, 0x4600000000000000 > %316 = select i1 %311, float %.op60, float 0xC600000000000000 > %.op61 = fmul float %313, 0x4600000000000000 > %317 = select i1 %314, float %.op61, float 0xC600000000000000 > %318 = fdiv float 1.000000e+00, %300 > %319 = fdiv float 1.000000e+00, %301 > %320 = fdiv float 1.000000e+00, %302 > %321 = fmul float %294, %318 > %322 = fmul float %295, %319 > %323 = fmul float %296, %320 > %324 = select i1 %303, float %315, float %321 > %325 = select i1 %304, float %316, float %322 > %326 = select i1 %305, float %317, float %323 > %327 = bitcast i32 %11 to float > %328 = insertvalue <{ float, float, float }> undef, float %327, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %79, float %80, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %324, float %325, float %326, float %258) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %84, float %85, float %86, float %87) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %93, float %99, float %105, float %111) > ret <{ float, float, float }> %328 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..32] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 512, 480, 240} >IMM[1] FLT32 { -0.5000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0], TEMP[0], IN[2] > 3: FMA TEMP[1].x, TEMP[0].wwww, CONST[1][32].wwww, IMM[1].xxxx > 4: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][32].xyzz > 5: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx > 6: MAX TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy > 7: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz > 8: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[1].wwww > 9: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][30].xyzz > 10: MOV TEMP[0].xyz, TEMP[0].xyzx > 11: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][15].zzzz > 12: MOV TEMP[0].w, TEMP[1].xxxx > 13: MOV OUT[0], TEMP[0] > 14: END >radeonsi: Compiling shader 316 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 484) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 488) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 512) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 516) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 520) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 524) > %33 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %34 = load <8 x i32>, <8 x i32> addrspace(2)* %33, align 32, !tbaa !0 > %35 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %36 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %35, i64 0, i64 3 > %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !tbaa !0 > %38 = extractelement <8 x i32> %34, i32 7 > %39 = extractelement <4 x i32> %37, i32 0 > %40 = and i32 %39, %38 > %41 = insertelement <4 x i32> %37, i32 %40, i32 0 > %42 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %49 = bitcast float %42 to i32 > %50 = bitcast float %43 to i32 > %51 = insertelement <2 x i32> undef, i32 %49, i32 0 > %52 = insertelement <2 x i32> %51, i32 %50, i32 1 > %53 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %52, <8 x i32> %34, <4 x i32> %41, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = extractelement <4 x float> %53, i32 2 > %57 = extractelement <4 x float> %53, i32 3 > %58 = fmul float %54, %45 > %59 = fmul float %55, %46 > %60 = fmul float %56, %47 > %61 = fmul float %57, %48 > %62 = call float @llvm.fma.f32(float %61, float %32, float -5.000000e-01) > %63 = fmul float %58, %29 > %64 = fmul float %59, %30 > %65 = fmul float %60, %31 > %66 = fadd float %62, %62 > %67 = call float @llvm.maxnum.f32(float %66, float 0.000000e+00) > %68 = fmul float %67, %63 > %69 = fmul float %67, %64 > %70 = fmul float %67, %65 > %71 = fmul float %68, %44 > %72 = fmul float %69, %44 > %73 = fmul float %70, %44 > %74 = fmul float %71, %26 > %75 = fmul float %72, %27 > %76 = fadd float %75, %74 > %77 = fmul float %73, %28 > %78 = fadd float %76, %77 > %79 = fmul float %78, %25 > %80 = bitcast float %5 to i32 > %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %80, 10 > %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %71, 11 > %83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82, float %72, 12 > %84 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %83, float %73, 13 > %85 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %84, float %79, 14 > %86 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %85, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %86 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..34] >DCL CONST[2][0..4095] >DCL TEMP[0..20], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 256} >IMM[3] UINT32 {272, 288, 304, 528} >IMM[4] UINT32 {336, 432, 544, 320} >IMM[5] FLT32 { 0.0000, -0.1500, 0.0597, -1.5000} >IMM[6] UINT32 {352, 464, 384, 368} >IMM[7] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.4427, 0.5000} >IMM[8] UINT32 {448, 400, 416, 0} >IMM[9] FLT32 { 0.4545, -0.0040, 6.2000, 1.7000} >IMM[10] FLT32 { 0.0600, 0.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, IN[4].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[3].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[3].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[3].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[3].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[3].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[3].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[3].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[3].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[3].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[3].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[4].y, TEMP[18].xxxx >224: UMUL TEMP[18].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[19].xxxx >227: MOV TEMP[18].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[20].xxxx >231: MOV TEMP[19].z, CONST[2][ADDR[0].x] >232: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].zzzz, -TEMP[8].zzzz >233: MUL TEMP[18].x, TEMP[18].xxxx, IN[3].xxxx >234: MUL TEMP[18].x, IMM[0].yyyy, TEMP[18].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[18].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[19].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[19].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[19].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[20].xxxx >244: MOV TEMP[19].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[19].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[3].xxxx, TEMP[10].zzzz >249: MOV TEMP[18].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[3].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[3].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[3].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].zzzz >259: MOV TEMP[18].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[18] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[3].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[3].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[3].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[4].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[7].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[7].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[6].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[3].zzzz, TEMP[2].xxxx >307: MOV TEMP[16].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[6].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[7].xxxx >315: MOV TEMP[6].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[8].xxxx >317: ADD TEMP[2].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[3].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[9].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[6].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[7].xxxx >329: MOV TEMP[6].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[2].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[6].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[6].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[6].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[2].x, TEMP[2].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[2].xxxx >341: ADD TEMP[2].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[2].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].yzzz >344: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[2].yxyy >346: ADD TEMP[2].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy >348: MOV TEMP[0].y, TEMP[2].xxxx >349: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[2].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[4].x, TEMP[0], TEMP[5] >354: MOV TEMP[4].w, IMM[0].zzzz >355: DP4 TEMP[0].x, CONST[1][16], TEMP[4] >356: DP4 TEMP[2].x, CONST[1][17], TEMP[4] >357: MOV TEMP[0].y, TEMP[2].xxxx >358: DP4 TEMP[2].x, CONST[1][18], TEMP[4] >359: MOV TEMP[0].z, TEMP[2].xxxx >360: DP4 TEMP[2].x, CONST[1][19], TEMP[4] >361: MOV TEMP[0].w, TEMP[2].xxxx >362: ADD TEMP[3].xyz, -TEMP[4].xyzz, CONST[1][33].xyzz >363: DP4 TEMP[5].x, CONST[1][21], TEMP[4] >364: ADD TEMP[1].x, TEMP[5].xxxx, CONST[1][27].wwww >365: MOV TEMP[5], TEMP[0] >366: MOV TEMP[6].xy, IN[1].xyxx >367: ABS TEMP[8].x, TEMP[2].xxxx >368: MUL TEMP[0].x, TEMP[8].xxxx, IMM[5].xxxx >369: MIN TEMP[8].x, TEMP[0].xxxx, IMM[0].zzzz >370: ADD TEMP[0].x, -TEMP[8].xxxx, IMM[0].zzzz >371: DP3 TEMP[7].x, CONST[1][20].xyzz, TEMP[3].xyzz >372: DP3 TEMP[8].x, CONST[1][22].xyzz, TEMP[3].xyzz >373: MOV TEMP[7].z, TEMP[8].xxxx >374: DP3 TEMP[3].x, CONST[1][21].xyzz, TEMP[3].xyzz >375: MOV TEMP[7].y, TEMP[3].xxxx >376: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz >377: RSQ TEMP[8].x, TEMP[8].xxxx >378: MUL TEMP[9].xyz, TEMP[8].xxxx, TEMP[7].xyzz >379: FMA TEMP[3].x, -TEMP[3].xxxx, TEMP[8].xxxx, IMM[5].yyyy >380: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].zzzz >381: MOV_SAT TEMP[3].x, TEMP[3].xxxx >382: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx >383: DP3 TEMP[8].x, -TEMP[9].xyzz, CONST[1][29].xyzz >384: FMA TEMP[9].x, -CONST[1][24].yyyy, TEMP[8].xxxx, CONST[1][24].xxxx >385: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[8].xxxx, IMM[0].zzzz >386: MUL TEMP[8].x, TEMP[8].xxxx, IMM[5].zzzz >387: ABS TEMP[9].x, TEMP[9].xxxx >388: LG2 TEMP[9].x, TEMP[9].xxxx >389: MUL TEMP[9].x, TEMP[9].xxxx, IMM[5].wwww >390: EX2 TEMP[9].x, TEMP[9].xxxx >391: FMA TEMP[10].x, CONST[1][24].zzzz, TEMP[9].xxxx, -CONST[1][23].zzzz >392: MUL TEMP[9].x, TEMP[9].xxxx, CONST[1][24].zzzz >393: MAX TEMP[10].x, TEMP[10].xxxx, IMM[7].xxxx >394: FMA TEMP[0].x, -TEMP[10].xxxx, TEMP[0].xxxx, TEMP[9].xxxx >395: MAX TEMP[9].x, TEMP[0].xxxx, CONST[1][28].wwww >396: FSNE TEMP[10].x, CONST[1][23].xxxx, IMM[7].xxxx >397: UIF TEMP[10].xxxx :0 >398: RCP TEMP[10].x, CONST[1][23].xxxx >399: MUL TEMP[10].x, -TEMP[1].xxxx, TEMP[10].xxxx >400: ELSE :0 >401: SSG TEMP[11].x, -TEMP[1].xxxx >402: MUL TEMP[10].x, IMM[7].yyyy, TEMP[11].xxxx >403: ENDIF >404: MUL TEMP[10].x, TEMP[10].xxxx, IMM[7].zzzz >405: EX2 TEMP[10].x, TEMP[10].xxxx >406: ADD TEMP[10].x, TEMP[10].xxxx, CONST[1][24].wwww >407: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][25].yyyy >408: MUL TEMP[10].x, TEMP[10].xxxx, IMM[7].wwww >409: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[10].xxxx >410: MIN TEMP[3].x, TEMP[3].xxxx, CONST[1][23].wwww >411: MAX TEMP[3].x, TEMP[3].xxxx, CONST[1][25].xxxx >412: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[9].xxxx >413: FSNE TEMP[9].x, CONST[1][26].wwww, IMM[7].xxxx >414: UIF TEMP[9].xxxx :0 >415: RCP TEMP[9].x, CONST[1][26].wwww >416: MUL TEMP[9].x, -TEMP[1].xxxx, TEMP[9].xxxx >417: ELSE :0 >418: SSG TEMP[10].x, -TEMP[1].xxxx >419: MUL TEMP[9].x, IMM[7].yyyy, TEMP[10].xxxx >420: ENDIF >421: ADD TEMP[1].x, -TEMP[1].xxxx, CONST[1][27].zzzz >422: FSNE TEMP[10].x, CONST[1][23].yyyy, IMM[7].xxxx >423: UIF TEMP[10].xxxx :0 >424: RCP TEMP[10].x, CONST[1][23].yyyy >425: MUL TEMP[10].x, TEMP[1].xxxx, TEMP[10].xxxx >426: ELSE :0 >427: SSG TEMP[11].x, TEMP[1].xxxx >428: MUL TEMP[10].x, IMM[7].yyyy, TEMP[11].xxxx >429: ENDIF >430: MUL TEMP[9].x, TEMP[9].xxxx, IMM[7].zzzz >431: EX2 TEMP[9].x, TEMP[9].xxxx >432: MUL TEMP[7].xyz, TEMP[9].xxxx, CONST[1][26].xyzz >433: FMA TEMP[3].xyz, CONST[1][26].xyzz, TEMP[9].xxxx, TEMP[3].xxxx >434: FMA TEMP[8].xyz, TEMP[7].xyzz, TEMP[8].xxxx, TEMP[0].xxxx >435: MUL TEMP[7].xyz, TEMP[10].xxxx, -TEMP[3].xyzz >436: ABS TEMP[2].xyz, TEMP[2].xxxx >437: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[3].xyzz >438: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].zzzz >439: EX2 TEMP[2].x, TEMP[1].xxxx >440: EX2 TEMP[2].y, TEMP[1].yyyy >441: EX2 TEMP[2].z, TEMP[1].zzzz >442: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >443: LG2 TEMP[9].x, CONST[1][28].xxxx >444: LG2 TEMP[9].y, CONST[1][28].yyyy >445: LG2 TEMP[9].z, CONST[1][28].zzzz >446: MUL TEMP[4].xyz, TEMP[9].xyzz, IMM[9].xxxx >447: EX2 TEMP[9].x, TEMP[4].xxxx >448: EX2 TEMP[9].y, TEMP[4].yyyy >449: EX2 TEMP[9].z, TEMP[4].zzzz >450: EX2 TEMP[4].x, TEMP[7].xxxx >451: EX2 TEMP[4].y, TEMP[7].yyyy >452: EX2 TEMP[4].z, TEMP[7].zzzz >453: MUL TEMP[7].xyz, TEMP[4].xyzz, TEMP[9].xyzz >454: FSEQ TEMP[4].xyz, TEMP[3].xyzz, IMM[7].xxxx >455: SSG TEMP[9].xyz, TEMP[8].xyzz >456: MUL TEMP[9].xyz, IMM[7].yyyy, TEMP[9].xyzz >457: RCP TEMP[10].x, TEMP[3].xxxx >458: RCP TEMP[10].y, TEMP[3].yyyy >459: RCP TEMP[10].z, TEMP[3].zzzz >460: MUL TEMP[3].xyz, TEMP[8].xyzz, TEMP[10].xyzz >461: UCMP TEMP[3].xyz, TEMP[4].xyzz, TEMP[9].xyzz, TEMP[3].xyzz >462: MUL TEMP[0].xyz, TEMP[3].xyzz, TEMP[7].xyzz >463: ADD TEMP[3].xyz, -TEMP[2].xyzz, IMM[0].zzzz >464: MOV TEMP[2].w, TEMP[2].xxxx >465: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xyzz, IMM[9].yyyy >466: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[7].xxxx >467: FMA TEMP[3].xyz, TEMP[0].xyzz, IMM[9].zzzz, IMM[7].wwww >468: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[3].xyzz >469: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[9].zzzz, IMM[9].wwww >470: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[10].xxxx >471: FSEQ TEMP[3].xyz, TEMP[0].xyzz, IMM[7].xxxx >472: SSG TEMP[4].xyz, TEMP[1].xyzz >473: MUL TEMP[4].xyz, IMM[7].yyyy, TEMP[4].xyzz >474: RCP TEMP[7].x, TEMP[0].xxxx >475: RCP TEMP[7].y, TEMP[0].yyyy >476: RCP TEMP[7].z, TEMP[0].zzzz >477: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[7].xyzz >478: UCMP TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[0].xyzz >479: MOV OUT[3], IN[2] >480: MOV OUT[2], TEMP[2] >481: MOV OUT[1], TEMP[6] >482: MOV OUT[0], TEMP[5] >483: END >radeonsi: Compiling shader 317 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 256) > %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 260) > %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 264) > %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 268) > %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 272) > %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 276) > %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 280) > %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 284) > %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 288) > %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 292) > %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 296) > %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 300) > %32 = call float @llvm.SI.load.const(<16 x i8> %19, i32 304) > %33 = call float @llvm.SI.load.const(<16 x i8> %19, i32 308) > %34 = call float @llvm.SI.load.const(<16 x i8> %19, i32 312) > %35 = call float @llvm.SI.load.const(<16 x i8> %19, i32 316) > %36 = call float @llvm.SI.load.const(<16 x i8> %19, i32 320) > %37 = call float @llvm.SI.load.const(<16 x i8> %19, i32 324) > %38 = call float @llvm.SI.load.const(<16 x i8> %19, i32 328) > %39 = call float @llvm.SI.load.const(<16 x i8> %19, i32 336) > %40 = call float @llvm.SI.load.const(<16 x i8> %19, i32 340) > %41 = call float @llvm.SI.load.const(<16 x i8> %19, i32 344) > %42 = call float @llvm.SI.load.const(<16 x i8> %19, i32 348) > %43 = call float @llvm.SI.load.const(<16 x i8> %19, i32 352) > %44 = call float @llvm.SI.load.const(<16 x i8> %19, i32 356) > %45 = call float @llvm.SI.load.const(<16 x i8> %19, i32 360) > %46 = call float @llvm.SI.load.const(<16 x i8> %19, i32 368) > %47 = call float @llvm.SI.load.const(<16 x i8> %19, i32 372) > %48 = call float @llvm.SI.load.const(<16 x i8> %19, i32 376) > %49 = call float @llvm.SI.load.const(<16 x i8> %19, i32 380) > %50 = call float @llvm.SI.load.const(<16 x i8> %19, i32 384) > %51 = call float @llvm.SI.load.const(<16 x i8> %19, i32 388) > %52 = call float @llvm.SI.load.const(<16 x i8> %19, i32 392) > %53 = call float @llvm.SI.load.const(<16 x i8> %19, i32 396) > %54 = call float @llvm.SI.load.const(<16 x i8> %19, i32 400) > %55 = call float @llvm.SI.load.const(<16 x i8> %19, i32 404) > %56 = call float @llvm.SI.load.const(<16 x i8> %19, i32 416) > %57 = call float @llvm.SI.load.const(<16 x i8> %19, i32 420) > %58 = call float @llvm.SI.load.const(<16 x i8> %19, i32 424) > %59 = call float @llvm.SI.load.const(<16 x i8> %19, i32 428) > %60 = call float @llvm.SI.load.const(<16 x i8> %19, i32 440) > %61 = call float @llvm.SI.load.const(<16 x i8> %19, i32 444) > %62 = call float @llvm.SI.load.const(<16 x i8> %19, i32 448) > %63 = call float @llvm.SI.load.const(<16 x i8> %19, i32 452) > %64 = call float @llvm.SI.load.const(<16 x i8> %19, i32 456) > %65 = call float @llvm.SI.load.const(<16 x i8> %19, i32 460) > %66 = call float @llvm.SI.load.const(<16 x i8> %19, i32 464) > %67 = call float @llvm.SI.load.const(<16 x i8> %19, i32 468) > %68 = call float @llvm.SI.load.const(<16 x i8> %19, i32 472) > %69 = call float @llvm.SI.load.const(<16 x i8> %19, i32 528) > %70 = call float @llvm.SI.load.const(<16 x i8> %19, i32 532) > %71 = call float @llvm.SI.load.const(<16 x i8> %19, i32 536) > %72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 > %74 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0 > %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %13) > %77 = extractelement <4 x float> %76, i32 0 > %78 = extractelement <4 x float> %76, i32 1 > %79 = extractelement <4 x float> %76, i32 2 > %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 > %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %14) > %83 = extractelement <4 x float> %82, i32 0 > %84 = extractelement <4 x float> %82, i32 1 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %15) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = extractelement <4 x float> %87, i32 2 > %91 = extractelement <4 x float> %87, i32 3 > %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 > %94 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %16) > %95 = extractelement <4 x float> %94, i32 0 > %96 = extractelement <4 x float> %94, i32 1 > %97 = extractelement <4 x float> %94, i32 2 > %98 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !tbaa !0 > %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %99, i32 0, i32 %17) > %101 = extractelement <4 x float> %100, i32 0 > %102 = extractelement <4 x float> %100, i32 1 > %103 = extractelement <4 x float> %100, i32 2 > %104 = fmul float %103, 0x406FE01000000000 > %105 = fmul float %102, 0x406FE01000000000 > %106 = fmul float %101, 0x406FE01000000000 > %107 = fptosi float %104 to i32 > %108 = fptosi float %105 to i32 > %109 = fptosi float %106 to i32 > %110 = shl i32 %107, 1 > %111 = or i32 %110, 1 > %112 = shl i32 %108, 1 > %113 = or i32 %112, 1 > %114 = shl i32 %109, 1 > %115 = or i32 %114, 1 > %116 = shl i32 %107, 5 > %117 = or i32 %116, 4 > %118 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %117) > %119 = fmul float %95, %118 > %120 = shl i32 %108, 5 > %121 = or i32 %120, 4 > %122 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %121) > %123 = fmul float %96, %122 > %124 = shl i32 %111, 4 > %125 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %124) > %126 = shl i32 %111, 4 > %127 = or i32 %126, 12 > %128 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %127) > %129 = fmul float %125, %128 > %130 = shl i32 %111, 4 > %131 = or i32 %130, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %131) > %133 = shl i32 %111, 4 > %134 = or i32 %133, 8 > %135 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %134) > %136 = fsub float -0.000000e+00, %129 > %137 = call float @llvm.fma.f32(float %132, float %135, float %136) > %138 = shl i32 %111, 4 > %139 = or i32 %138, 4 > %140 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %139) > %141 = shl i32 %111, 4 > %142 = or i32 %141, 8 > %143 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %142) > %144 = call float @llvm.fma.f32(float %140, float %143, float %129) > %145 = fmul float %144, %95 > %146 = fmul float %137, %95 > %147 = fmul float %146, 2.000000e+00 > %148 = shl i32 %113, 4 > %149 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %148) > %150 = shl i32 %113, 4 > %151 = or i32 %150, 12 > %152 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %151) > %153 = fmul float %149, %152 > %154 = shl i32 %113, 4 > %155 = or i32 %154, 4 > %156 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %155) > %157 = shl i32 %113, 4 > %158 = or i32 %157, 8 > %159 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %158) > %160 = fsub float -0.000000e+00, %153 > %161 = call float @llvm.fma.f32(float %156, float %159, float %160) > %162 = shl i32 %113, 4 > %163 = or i32 %162, 4 > %164 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %163) > %165 = shl i32 %113, 4 > %166 = or i32 %165, 8 > %167 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %166) > %168 = call float @llvm.fma.f32(float %164, float %167, float %153) > %169 = fmul float %168, %96 > %170 = fmul float %169, 2.000000e+00 > %171 = fmul float %161, %96 > %172 = fmul float %171, 2.000000e+00 > %173 = shl i32 %111, 4 > %174 = or i32 %173, 4 > %175 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %174) > %176 = shl i32 %111, 4 > %177 = or i32 %176, 8 > %178 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %177) > %179 = shl i32 %111, 4 > %180 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %179) > %181 = shl i32 %111, 4 > %182 = or i32 %181, 12 > %183 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %182) > %184 = fmul float %178, %183 > %185 = fmul float %178, %180 > %186 = fmul float %175, %183 > %187 = shl i32 %111, 4 > %188 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %187) > %189 = shl i32 %111, 4 > %190 = or i32 %189, 4 > %191 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %190) > %192 = call float @llvm.fma.f32(float %188, float %191, float %184) > %193 = fmul float %192, %95 > %194 = fmul float %193, 2.000000e+00 > %195 = shl i32 %111, 4 > %196 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %195) > %197 = shl i32 %111, 4 > %198 = or i32 %197, 4 > %199 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %198) > %200 = shl i32 %111, 4 > %201 = or i32 %200, 8 > %202 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %201) > %203 = shl i32 %111, 4 > %204 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %203) > %205 = shl i32 %111, 4 > %206 = or i32 %205, 4 > %207 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %206) > %208 = shl i32 %111, 4 > %209 = or i32 %208, 8 > %210 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %209) > %211 = fmul float %196, %204 > %212 = fmul float %199, %207 > %213 = fmul float %202, %210 > %214 = fadd float %213, %212 > %215 = fadd float %213, %211 > %216 = fadd float %212, %211 > %217 = fsub float -0.000000e+00, %214 > %218 = call float @llvm.fma.f32(float %217, float 2.000000e+00, float 1.000000e+00) > %219 = fsub float -0.000000e+00, %215 > %220 = call float @llvm.fma.f32(float %219, float 2.000000e+00, float 1.000000e+00) > %221 = fsub float -0.000000e+00, %216 > %222 = call float @llvm.fma.f32(float %221, float 2.000000e+00, float 1.000000e+00) > %223 = fmul float %95, %220 > %224 = shl i32 %113, 4 > %225 = or i32 %224, 4 > %226 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %225) > %227 = shl i32 %113, 4 > %228 = or i32 %227, 8 > %229 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %228) > %230 = shl i32 %113, 4 > %231 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %230) > %232 = shl i32 %113, 4 > %233 = or i32 %232, 12 > %234 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %233) > %235 = fmul float %229, %234 > %236 = fmul float %229, %231 > %237 = fmul float %226, %234 > %238 = shl i32 %113, 4 > %239 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %238) > %240 = shl i32 %113, 4 > %241 = or i32 %240, 4 > %242 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %241) > %243 = call float @llvm.fma.f32(float %239, float %242, float %235) > %244 = fmul float %243, %96 > %245 = fmul float %244, 2.000000e+00 > %246 = shl i32 %113, 4 > %247 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %246) > %248 = shl i32 %113, 4 > %249 = or i32 %248, 4 > %250 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %249) > %251 = shl i32 %113, 4 > %252 = or i32 %251, 8 > %253 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %252) > %254 = shl i32 %113, 4 > %255 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %254) > %256 = shl i32 %113, 4 > %257 = or i32 %256, 4 > %258 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %257) > %259 = shl i32 %113, 4 > %260 = or i32 %259, 8 > %261 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %260) > %262 = fmul float %247, %255 > %263 = fmul float %250, %258 > %264 = fmul float %253, %261 > %265 = fadd float %264, %263 > %266 = fadd float %264, %262 > %267 = fadd float %263, %262 > %268 = fsub float -0.000000e+00, %265 > %269 = call float @llvm.fma.f32(float %268, float 2.000000e+00, float 1.000000e+00) > %270 = fsub float -0.000000e+00, %266 > %271 = call float @llvm.fma.f32(float %270, float 2.000000e+00, float 1.000000e+00) > %272 = fsub float -0.000000e+00, %267 > %273 = call float @llvm.fma.f32(float %272, float 2.000000e+00, float 1.000000e+00) > %274 = fmul float %96, %271 > %275 = fadd float %194, %245 > %276 = fadd float %223, %274 > %277 = fadd float %147, %172 > %278 = fadd float %119, %123 > %279 = shl i32 %109, 5 > %280 = or i32 %279, 4 > %281 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %280) > %282 = fmul float %97, %281 > %283 = shl i32 %115, 4 > %284 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %283) > %285 = shl i32 %115, 4 > %286 = or i32 %285, 12 > %287 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %286) > %288 = fmul float %284, %287 > %289 = shl i32 %115, 4 > %290 = or i32 %289, 4 > %291 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %290) > %292 = shl i32 %115, 4 > %293 = or i32 %292, 8 > %294 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %293) > %295 = fsub float -0.000000e+00, %288 > %296 = call float @llvm.fma.f32(float %291, float %294, float %295) > %297 = shl i32 %115, 4 > %298 = or i32 %297, 4 > %299 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %298) > %300 = shl i32 %115, 4 > %301 = or i32 %300, 8 > %302 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %301) > %303 = call float @llvm.fma.f32(float %299, float %302, float %288) > %304 = fmul float %303, %97 > %305 = fmul float %304, 2.000000e+00 > %306 = fmul float %296, %97 > %307 = fmul float %306, 2.000000e+00 > %308 = shl i32 %115, 4 > %309 = or i32 %308, 4 > %310 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %309) > %311 = shl i32 %115, 4 > %312 = or i32 %311, 8 > %313 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %312) > %314 = shl i32 %115, 4 > %315 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %314) > %316 = shl i32 %115, 4 > %317 = or i32 %316, 12 > %318 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %317) > %319 = fmul float %313, %318 > %320 = fmul float %313, %315 > %321 = fmul float %310, %318 > %322 = shl i32 %115, 4 > %323 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %322) > %324 = shl i32 %115, 4 > %325 = or i32 %324, 4 > %326 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %325) > %327 = call float @llvm.fma.f32(float %323, float %326, float %319) > %328 = fmul float %327, %97 > %329 = fmul float %328, 2.000000e+00 > %330 = shl i32 %115, 4 > %331 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %330) > %332 = shl i32 %115, 4 > %333 = or i32 %332, 4 > %334 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %333) > %335 = shl i32 %115, 4 > %336 = or i32 %335, 8 > %337 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %336) > %338 = shl i32 %115, 4 > %339 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %338) > %340 = shl i32 %115, 4 > %341 = or i32 %340, 4 > %342 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %341) > %343 = shl i32 %115, 4 > %344 = or i32 %343, 8 > %345 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %344) > %346 = fmul float %331, %339 > %347 = fmul float %334, %342 > %348 = fmul float %337, %345 > %349 = fadd float %348, %347 > %350 = fadd float %348, %346 > %351 = fadd float %347, %346 > %352 = fsub float -0.000000e+00, %349 > %353 = call float @llvm.fma.f32(float %352, float 2.000000e+00, float 1.000000e+00) > %354 = fsub float -0.000000e+00, %350 > %355 = call float @llvm.fma.f32(float %354, float 2.000000e+00, float 1.000000e+00) > %356 = fsub float -0.000000e+00, %351 > %357 = call float @llvm.fma.f32(float %356, float 2.000000e+00, float 1.000000e+00) > %358 = fmul float %97, %355 > %359 = fadd float %275, %329 > %360 = fadd float %276, %358 > %361 = fadd float %277, %307 > %362 = fadd float %278, %282 > %363 = fmul float %359, %77 > %364 = fmul float %360, %78 > %365 = fadd float %363, %364 > %366 = fmul float %361, %79 > %367 = fadd float %365, %366 > %368 = fadd float %367, %362 > %369 = shl i32 %111, 4 > %370 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %369) > %371 = shl i32 %111, 4 > %372 = or i32 %371, 8 > %373 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %372) > %374 = fsub float -0.000000e+00, %186 > %375 = call float @llvm.fma.f32(float %370, float %373, float %374) > %376 = fmul float %375, %95 > %377 = fmul float %376, 2.000000e+00 > %378 = fmul float %145, 2.000000e+00 > %379 = shl i32 %113, 4 > %380 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %379) > %381 = shl i32 %113, 4 > %382 = or i32 %381, 8 > %383 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %382) > %384 = fsub float -0.000000e+00, %237 > %385 = call float @llvm.fma.f32(float %380, float %383, float %384) > %386 = fmul float %385, %96 > %387 = fmul float %386, 2.000000e+00 > %388 = fmul float %95, %222 > %389 = fmul float %95, %218 > %390 = fmul float %96, %273 > %391 = fmul float %96, %269 > %392 = shl i32 %107, 5 > %393 = or i32 %392, 8 > %394 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %393) > %395 = fmul float %95, %394 > %396 = shl i32 %108, 5 > %397 = or i32 %396, 8 > %398 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %397) > %399 = fmul float %96, %398 > %400 = fadd float %387, %377 > %401 = fadd float %170, %378 > %402 = fadd float %390, %388 > %403 = fadd float %399, %395 > %404 = shl i32 %115, 4 > %405 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %404) > %406 = shl i32 %115, 4 > %407 = or i32 %406, 8 > %408 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %407) > %409 = fsub float -0.000000e+00, %321 > %410 = call float @llvm.fma.f32(float %405, float %408, float %409) > %411 = fmul float %410, %97 > %412 = fmul float %411, 2.000000e+00 > %413 = fmul float %97, %357 > %414 = fmul float %97, %353 > %415 = shl i32 %109, 5 > %416 = or i32 %415, 8 > %417 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %416) > %418 = fmul float %97, %417 > %419 = fadd float %400, %412 > %420 = fadd float %401, %305 > %421 = fadd float %402, %413 > %422 = fadd float %403, %418 > %423 = fmul float %419, %77 > %424 = fmul float %420, %78 > %425 = fadd float %423, %424 > %426 = fmul float %421, %79 > %427 = fadd float %425, %426 > %428 = fadd float %427, %422 > %429 = shl i32 %107, 5 > %430 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %429) > %431 = fmul float %95, %430 > %432 = shl i32 %108, 5 > %433 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %432) > %434 = fmul float %96, %433 > %435 = shl i32 %109, 5 > %436 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %435) > %437 = fmul float %97, %436 > %438 = shl i32 %111, 4 > %439 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %438) > %440 = shl i32 %111, 4 > %441 = or i32 %440, 4 > %442 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %441) > %443 = fsub float -0.000000e+00, %184 > %444 = call float @llvm.fma.f32(float %439, float %442, float %443) > %445 = fadd float %186, %185 > %446 = fmul float %444, %95 > %447 = fmul float %445, %95 > %448 = fmul float %446, 2.000000e+00 > %449 = fmul float %447, 2.000000e+00 > %450 = shl i32 %113, 4 > %451 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %450) > %452 = shl i32 %113, 4 > %453 = or i32 %452, 4 > %454 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %453) > %455 = fsub float -0.000000e+00, %235 > %456 = call float @llvm.fma.f32(float %451, float %454, float %455) > %457 = shl i32 %115, 4 > %458 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %457) > %459 = shl i32 %115, 4 > %460 = or i32 %459, 4 > %461 = call float @llvm.SI.load.const(<16 x i8> %73, i32 %460) > %462 = fsub float -0.000000e+00, %319 > %463 = call float @llvm.fma.f32(float %458, float %461, float %462) > %464 = fadd float %321, %320 > %465 = fmul float %456, %96 > %466 = fmul float %463, %97 > %467 = fmul float %464, %97 > %468 = fmul float %466, 2.000000e+00 > %469 = fmul float %467, 2.000000e+00 > %470 = fadd float %237, %236 > %471 = fmul float %470, %96 > %472 = fmul float %465, 2.000000e+00 > %473 = fmul float %471, 2.000000e+00 > %474 = fadd float %389, %391 > %475 = fadd float %448, %472 > %476 = fadd float %449, %473 > %477 = fadd float %431, %434 > %478 = fadd float %414, %474 > %479 = fadd float %468, %475 > %480 = fadd float %469, %476 > %481 = fadd float %437, %477 > %482 = fmul float %478, %77 > %483 = fmul float %479, %78 > %484 = fadd float %482, %483 > %485 = fmul float %480, %79 > %486 = fadd float %484, %485 > %487 = fadd float %486, %481 > %488 = fmul float %20, %487 > %489 = fmul float %21, %368 > %490 = fadd float %488, %489 > %491 = fmul float %22, %428 > %492 = fadd float %490, %491 > %493 = fadd float %492, %23 > %494 = fmul float %24, %487 > %495 = fmul float %25, %368 > %496 = fadd float %494, %495 > %497 = fmul float %26, %428 > %498 = fadd float %496, %497 > %499 = fadd float %498, %27 > %500 = fmul float %28, %487 > %501 = fmul float %29, %368 > %502 = fadd float %500, %501 > %503 = fmul float %30, %428 > %504 = fadd float %502, %503 > %505 = fadd float %504, %31 > %506 = fmul float %32, %487 > %507 = fmul float %33, %368 > %508 = fadd float %506, %507 > %509 = fmul float %34, %428 > %510 = fadd float %508, %509 > %511 = fadd float %510, %35 > %512 = fsub float %69, %487 > %513 = fsub float %70, %368 > %514 = fsub float %71, %428 > %515 = fmul float %39, %487 > %516 = fmul float %40, %368 > %517 = fadd float %515, %516 > %518 = fmul float %41, %428 > %519 = fadd float %517, %518 > %520 = fadd float %519, %42 > %521 = fadd float %520, %61 > %522 = call float @llvm.fabs.f32(float %511) > %523 = fmul float %522, 0x3EF4F8B580000000 > %524 = call float @llvm.minnum.f32(float %523, float 1.000000e+00) > %525 = fsub float 1.000000e+00, %524 > %526 = fmul float %36, %512 > %527 = fmul float %37, %513 > %528 = fadd float %527, %526 > %529 = fmul float %38, %514 > %530 = fadd float %528, %529 > %531 = fmul float %43, %512 > %532 = fmul float %44, %513 > %533 = fadd float %532, %531 > %534 = fmul float %45, %514 > %535 = fadd float %533, %534 > %536 = fmul float %39, %512 > %537 = fmul float %40, %513 > %538 = fadd float %537, %536 > %539 = fmul float %41, %514 > %540 = fadd float %538, %539 > %541 = fmul float %530, %530 > %542 = fmul float %540, %540 > %543 = fadd float %542, %541 > %544 = fmul float %535, %535 > %545 = fadd float %543, %544 > %546 = call float @llvm.AMDGPU.rsq.clamped.f32(float %545) > %547 = fmul float %546, %530 > %548 = fmul float %546, %540 > %549 = fmul float %546, %535 > %550 = fsub float -0.000000e+00, %540 > %551 = call float @llvm.fma.f32(float %550, float %546, float 0xBFC3333340000000) > %552 = fsub float 1.000000e+00, %551 > %553 = call float @llvm.AMDGPU.clamp.(float %552, float 0.000000e+00, float 1.000000e+00) > %554 = fmul float %553, %553 > %555 = fmul float %547, %66 > %556 = fsub float -0.000000e+00, %555 > %557 = fmul float %548, %67 > %558 = fsub float %556, %557 > %559 = fmul float %549, %68 > %560 = fsub float %558, %559 > %561 = fsub float -0.000000e+00, %51 > %562 = call float @llvm.fma.f32(float %561, float %560, float %50) > %563 = call float @llvm.fma.f32(float %560, float %560, float 1.000000e+00) > %564 = fmul float %563, 0x3FAE8EC8A0000000 > %565 = call float @llvm.fabs.f32(float %562) > %566 = call float @llvm.log2.f32(float %565) > %567 = fmul float %566, -1.500000e+00 > %568 = call float @llvm.exp2.f32(float %567) > %569 = fsub float -0.000000e+00, %48 > %570 = call float @llvm.fma.f32(float %52, float %568, float %569) > %571 = fmul float %568, %52 > %572 = call float @llvm.maxnum.f32(float %570, float 0.000000e+00) > %573 = fsub float -0.000000e+00, %572 > %574 = call float @llvm.fma.f32(float %573, float %525, float %571) > %575 = call float @llvm.maxnum.f32(float %574, float %65) > %576 = fcmp une float %46, 0.000000e+00 > br i1 %576, label %IF, label %ELSE > >IF: ; preds = %main_body > %577 = fdiv float 1.000000e+00, %46 > %578 = fmul float %521, %577 > %579 = fsub float -0.000000e+00, %578 > br label %ENDIF > >ELSE: ; preds = %main_body > %580 = fsub float -0.000000e+00, %521 > %581 = fcmp olt float %521, -0.000000e+00 > %582 = select i1 %581, float 1.000000e+00, float %580 > %583 = fcmp oge float %582, 0.000000e+00 > %.op = fmul float %582, 0x4600000000000000 > %584 = select i1 %583, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp40.0 = phi float [ %579, %IF ], [ %584, %ELSE ] > %585 = fmul float %temp40.0, 0x3FF7154760000000 > %586 = call float @llvm.exp2.f32(float %585) > %587 = fadd float %586, %53 > %588 = fmul float %587, %55 > %589 = fmul float %588, 5.000000e-01 > %590 = fmul float %554, %589 > %591 = call float @llvm.minnum.f32(float %590, float %49) > %592 = call float @llvm.maxnum.f32(float %591, float %54) > %593 = fmul float %592, %575 > %594 = fcmp une float %59, 0.000000e+00 > br i1 %594, label %IF159, label %ELSE160 > >IF159: ; preds = %ENDIF > %595 = fdiv float 1.000000e+00, %59 > %596 = fmul float %521, %595 > %597 = fsub float -0.000000e+00, %596 > br label %ENDIF158 > >ELSE160: ; preds = %ENDIF > %598 = fsub float -0.000000e+00, %521 > %599 = fcmp olt float %521, -0.000000e+00 > %600 = select i1 %599, float 1.000000e+00, float %598 > %601 = fcmp oge float %600, 0.000000e+00 > %.op164 = fmul float %600, 0x4600000000000000 > %602 = select i1 %601, float %.op164, float 0xC600000000000000 > br label %ENDIF158 > >ENDIF158: ; preds = %ELSE160, %IF159 > %temp36.0 = phi float [ %597, %IF159 ], [ %602, %ELSE160 ] > %603 = fsub float %60, %521 > %604 = fcmp une float %47, 0.000000e+00 > br i1 %604, label %IF162, label %ELSE163 > >IF162: ; preds = %ENDIF158 > %605 = fdiv float 1.000000e+00, %47 > %606 = fmul float %603, %605 > br label %ENDIF161 > >ELSE163: ; preds = %ENDIF158 > %607 = fcmp ogt float %603, 0.000000e+00 > %608 = select i1 %607, float 1.000000e+00, float %603 > %609 = fcmp oge float %608, 0.000000e+00 > %.op165 = fmul float %608, 0x4600000000000000 > %610 = select i1 %609, float %.op165, float 0xC600000000000000 > br label %ENDIF161 > >ENDIF161: ; preds = %ELSE163, %IF162 > %temp40.1 = phi float [ %606, %IF162 ], [ %610, %ELSE163 ] > %611 = fmul float %temp36.0, 0x3FF7154760000000 > %612 = call float @llvm.exp2.f32(float %611) > %613 = fmul float %612, %56 > %614 = fmul float %612, %57 > %615 = fmul float %612, %58 > %616 = call float @llvm.fma.f32(float %56, float %612, float %592) > %617 = call float @llvm.fma.f32(float %57, float %612, float %592) > %618 = call float @llvm.fma.f32(float %58, float %612, float %592) > %619 = call float @llvm.fma.f32(float %613, float %564, float %593) > %620 = call float @llvm.fma.f32(float %614, float %564, float %593) > %621 = call float @llvm.fma.f32(float %615, float %564, float %593) > %622 = fmul float %616, %temp40.1 > %623 = fmul float %617, %temp40.1 > %624 = fmul float %618, %temp40.1 > %625 = call float @llvm.fabs.f32(float %511) > %626 = call float @llvm.fabs.f32(float %511) > %627 = call float @llvm.fabs.f32(float %511) > %628 = fmul float %616, %625 > %629 = fmul float %617, %626 > %630 = fmul float %618, %627 > %631 = fmul float %628, 0xBFF7154760000000 > %632 = fmul float %629, 0xBFF7154760000000 > %633 = fmul float %630, 0xBFF7154760000000 > %634 = call float @llvm.exp2.f32(float %631) > %635 = call float @llvm.exp2.f32(float %632) > %636 = call float @llvm.exp2.f32(float %633) > %637 = fmul float %622, 0xBFF7154760000000 > %638 = fmul float %623, 0xBFF7154760000000 > %639 = fmul float %624, 0xBFF7154760000000 > %640 = call float @llvm.log2.f32(float %62) > %641 = call float @llvm.log2.f32(float %63) > %642 = call float @llvm.log2.f32(float %64) > %643 = fmul float %640, 0x3FDD1745E0000000 > %644 = fmul float %641, 0x3FDD1745E0000000 > %645 = fmul float %642, 0x3FDD1745E0000000 > %646 = call float @llvm.exp2.f32(float %643) > %647 = call float @llvm.exp2.f32(float %644) > %648 = call float @llvm.exp2.f32(float %645) > %649 = call float @llvm.exp2.f32(float %637) > %650 = call float @llvm.exp2.f32(float %638) > %651 = call float @llvm.exp2.f32(float %639) > %652 = fmul float %649, %646 > %653 = fmul float %650, %647 > %654 = fmul float %651, %648 > %655 = fcmp oeq float %616, 0.000000e+00 > %656 = fcmp oeq float %617, 0.000000e+00 > %657 = fcmp oeq float %618, 0.000000e+00 > %658 = fcmp ogt float %619, 0.000000e+00 > %659 = select i1 %658, float 1.000000e+00, float %619 > %660 = fcmp oge float %659, 0.000000e+00 > %661 = fcmp ogt float %620, 0.000000e+00 > %662 = select i1 %661, float 1.000000e+00, float %620 > %663 = fcmp oge float %662, 0.000000e+00 > %664 = fcmp ogt float %621, 0.000000e+00 > %665 = select i1 %664, float 1.000000e+00, float %621 > %666 = fcmp oge float %665, 0.000000e+00 > %.op166 = fmul float %659, 0x4600000000000000 > %667 = select i1 %660, float %.op166, float 0xC600000000000000 > %.op167 = fmul float %662, 0x4600000000000000 > %668 = select i1 %663, float %.op167, float 0xC600000000000000 > %.op168 = fmul float %665, 0x4600000000000000 > %669 = select i1 %666, float %.op168, float 0xC600000000000000 > %670 = fdiv float 1.000000e+00, %616 > %671 = fdiv float 1.000000e+00, %617 > %672 = fdiv float 1.000000e+00, %618 > %673 = fmul float %619, %670 > %674 = fmul float %620, %671 > %675 = fmul float %621, %672 > %676 = select i1 %655, float %667, float %673 > %677 = select i1 %656, float %668, float %674 > %678 = select i1 %657, float %669, float %675 > %679 = fmul float %676, %652 > %680 = fmul float %677, %653 > %681 = fmul float %678, %654 > %682 = fsub float 1.000000e+00, %634 > %683 = fsub float 1.000000e+00, %635 > %684 = fsub float 1.000000e+00, %636 > %685 = call float @llvm.fma.f32(float %679, float %682, float 0xBF70624DE0000000) > %686 = call float @llvm.fma.f32(float %680, float %683, float 0xBF70624DE0000000) > %687 = call float @llvm.fma.f32(float %681, float %684, float 0xBF70624DE0000000) > %688 = call float @llvm.maxnum.f32(float %685, float 0.000000e+00) > %689 = call float @llvm.maxnum.f32(float %686, float 0.000000e+00) > %690 = call float @llvm.maxnum.f32(float %687, float 0.000000e+00) > %691 = call float @llvm.fma.f32(float %688, float 0x4018CCCCC0000000, float 5.000000e-01) > %692 = call float @llvm.fma.f32(float %689, float 0x4018CCCCC0000000, float 5.000000e-01) > %693 = call float @llvm.fma.f32(float %690, float 0x4018CCCCC0000000, float 5.000000e-01) > %694 = fmul float %688, %691 > %695 = fmul float %689, %692 > %696 = fmul float %690, %693 > %697 = call float @llvm.fma.f32(float %688, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %698 = call float @llvm.fma.f32(float %689, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %699 = call float @llvm.fma.f32(float %690, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %700 = call float @llvm.fma.f32(float %688, float %697, float 0x3FAEB851E0000000) > %701 = call float @llvm.fma.f32(float %689, float %698, float 0x3FAEB851E0000000) > %702 = call float @llvm.fma.f32(float %690, float %699, float 0x3FAEB851E0000000) > %703 = fcmp oeq float %700, 0.000000e+00 > %704 = fcmp oeq float %701, 0.000000e+00 > %705 = fcmp oeq float %702, 0.000000e+00 > %706 = fcmp ogt float %694, 0.000000e+00 > %707 = select i1 %706, float 1.000000e+00, float %694 > %708 = fcmp oge float %707, 0.000000e+00 > %709 = fcmp ogt float %695, 0.000000e+00 > %710 = select i1 %709, float 1.000000e+00, float %695 > %711 = fcmp oge float %710, 0.000000e+00 > %712 = fcmp ogt float %696, 0.000000e+00 > %713 = select i1 %712, float 1.000000e+00, float %696 > %714 = fcmp oge float %713, 0.000000e+00 > %.op169 = fmul float %707, 0x4600000000000000 > %715 = select i1 %708, float %.op169, float 0xC600000000000000 > %.op170 = fmul float %710, 0x4600000000000000 > %716 = select i1 %711, float %.op170, float 0xC600000000000000 > %.op171 = fmul float %713, 0x4600000000000000 > %717 = select i1 %714, float %.op171, float 0xC600000000000000 > %718 = fdiv float 1.000000e+00, %700 > %719 = fdiv float 1.000000e+00, %701 > %720 = fdiv float 1.000000e+00, %702 > %721 = fmul float %694, %718 > %722 = fmul float %695, %719 > %723 = fmul float %696, %720 > %724 = select i1 %703, float %715, float %721 > %725 = select i1 %704, float %716, float %722 > %726 = select i1 %705, float %717, float %723 > %727 = bitcast i32 %11 to float > %728 = insertvalue <{ float, float, float }> undef, float %727, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %83, float %84, float %417, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %724, float %725, float %726, float %634) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %88, float %89, float %90, float %91) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %493, float %499, float %505, float %511) > ret <{ float, float, float }> %728 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..27] >DCL TEMP[0..5], LOCAL >IMM[0] UINT32 {0, 368, 208, 224} >IMM[1] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} >IMM[2] UINT32 {240, 144, 160, 176} >IMM[3] UINT32 {192, 432, 336, 0} > 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1][23].xyyy > 1: FMA TEMP[1].x, IN[0].zzzz, CONST[1][23].zzzz, CONST[1][23].zzzz > 2: MOV TEMP[0].z, TEMP[1].xxxx > 3: MOV TEMP[0].w, IMM[1].xxxx > 4: DP4 TEMP[1].x, CONST[1][13], TEMP[0] > 5: DP4 TEMP[2].x, CONST[1][14], TEMP[0] > 6: MOV TEMP[1].y, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][15], TEMP[0] > 8: MOV TEMP[1].z, TEMP[2].xxxx > 9: MOV TEMP[1].w, IMM[1].xxxx > 10: DP4 TEMP[0].x, CONST[1][9], TEMP[1] > 11: DP4 TEMP[2].x, CONST[1][10], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][11], TEMP[1] > 14: MOV TEMP[0].z, TEMP[2].xxxx > 15: DP4 TEMP[2].x, CONST[1][12], TEMP[1] > 16: MOV TEMP[0].w, TEMP[2].xxxx > 17: MOV TEMP[3], TEMP[0] > 18: MOV TEMP[4].zw, TEMP[0].wwzw > 19: MUL TEMP[1].xy, TEMP[2].xxxx, CONST[1][27].zwww > 20: MUL TEMP[2].xy, TEMP[2].xxxx, CONST[1][21].xyyy > 21: MUL TEMP[5].xy, CONST[1][27].zwww, IMM[1].xyyy > 22: FMA TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xyyy, TEMP[1].xyyy > 23: MOV TEMP[4].xy, TEMP[0].xyxx > 24: FMA TEMP[0].xy, TEMP[0].xyyy, CONST[1][21].zwww, TEMP[2].xyyy > 25: MOV OUT[2], TEMP[0] > 26: MOV OUT[1], TEMP[4] > 27: MOV OUT[0], TEMP[3] > 28: END >radeonsi: Compiling shader 318 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 144) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 148) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 152) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 156) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 160) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 164) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 168) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 172) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 176) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 180) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 184) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 188) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 192) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 196) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 200) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 204) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 208) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 212) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 216) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 220) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 224) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 228) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 232) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 236) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 240) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 244) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 248) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 252) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 336) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 340) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 344) > %47 = call float @llvm.SI.load.const(<16 x i8> %15, i32 348) > %48 = call float @llvm.SI.load.const(<16 x i8> %15, i32 368) > %49 = call float @llvm.SI.load.const(<16 x i8> %15, i32 372) > %50 = call float @llvm.SI.load.const(<16 x i8> %15, i32 376) > %51 = call float @llvm.SI.load.const(<16 x i8> %15, i32 440) > %52 = call float @llvm.SI.load.const(<16 x i8> %15, i32 444) > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %13) > %56 = extractelement <4 x float> %55, i32 0 > %57 = extractelement <4 x float> %55, i32 1 > %58 = extractelement <4 x float> %55, i32 2 > %59 = fmul float %56, %48 > %60 = fmul float %57, %49 > %61 = call float @llvm.fma.f32(float %58, float %50, float %50) > %62 = fmul float %32, %59 > %63 = fmul float %33, %60 > %64 = fadd float %62, %63 > %65 = fmul float %34, %61 > %66 = fadd float %64, %65 > %67 = fadd float %66, %35 > %68 = fmul float %36, %59 > %69 = fmul float %37, %60 > %70 = fadd float %68, %69 > %71 = fmul float %38, %61 > %72 = fadd float %70, %71 > %73 = fadd float %72, %39 > %74 = fmul float %40, %59 > %75 = fmul float %41, %60 > %76 = fadd float %74, %75 > %77 = fmul float %42, %61 > %78 = fadd float %76, %77 > %79 = fadd float %78, %43 > %80 = fmul float %16, %67 > %81 = fmul float %17, %73 > %82 = fadd float %80, %81 > %83 = fmul float %18, %79 > %84 = fadd float %82, %83 > %85 = fadd float %84, %19 > %86 = fmul float %20, %67 > %87 = fmul float %21, %73 > %88 = fadd float %86, %87 > %89 = fmul float %22, %79 > %90 = fadd float %88, %89 > %91 = fadd float %90, %23 > %92 = fmul float %24, %67 > %93 = fmul float %25, %73 > %94 = fadd float %92, %93 > %95 = fmul float %26, %79 > %96 = fadd float %94, %95 > %97 = fadd float %96, %27 > %98 = fmul float %28, %67 > %99 = fmul float %29, %73 > %100 = fadd float %98, %99 > %101 = fmul float %30, %79 > %102 = fadd float %100, %101 > %103 = fadd float %102, %31 > %104 = fmul float %103, %51 > %105 = fmul float %103, %52 > %106 = fmul float %103, %44 > %107 = fmul float %103, %45 > %108 = fsub float -0.000000e+00, %52 > %109 = call float @llvm.fma.f32(float %85, float %51, float %104) > %110 = call float @llvm.fma.f32(float %91, float %108, float %105) > %111 = call float @llvm.fma.f32(float %109, float %46, float %106) > %112 = call float @llvm.fma.f32(float %110, float %47, float %107) > %113 = bitcast i32 %11 to float > %114 = insertvalue <{ float, float, float }> undef, float %113, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %109, float %110, float %97, float %103) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %111, float %112, float %97, float %103) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %85, float %91, float %97, float %103) > ret <{ float, float, float }> %114 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..27] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, -0.5020, 2.0000} >IMM[1] FLT32 { -1.0000, 1.0000, 0.0000, 0.0000} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] UINT32 {0, 432, 96, 112} >IMM[4] UINT32 {128, 384, 80, 208} >IMM[5] UINT32 {320, 224, 240, 352} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 8: MOV TEMP[2].xy, TEMP[0].xyyy > 9: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D > 10: MOV TEMP[0].z, TEMP[2].xxxx > 11: ADD TEMP[2].x, TEMP[1].wwww, IMM[0].zzzz > 12: FMA TEMP[3].xyz, TEMP[1].xyzz, IMM[0].wwww, IMM[1].xxxx > 13: FSLT TEMP[2].x, -TEMP[2].xxxx, IMM[0].xxxx > 14: AND TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx > 15: INEG TEMP[2].x, TEMP[2].xxxx > 16: USNE TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx > 17: AND TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy > 18: KILL_IF -TEMP[2].xxxx > 19: MOV TEMP[0].w, IMM[1].yyyy > 20: DP2 TEMP[2].x, TEMP[0].zwww, CONST[1][27].xyyy > 21: FSEQ TEMP[4].xy, IN[0].wwww, IMM[0].xxxx > 22: SSG TEMP[5].xy, IN[1].xyyy > 23: MUL TEMP[5].xy, IMM[0].yyyy, TEMP[5].xyyy > 24: RCP TEMP[6].xy, IN[0].wwww > 25: MUL TEMP[6].xy, IN[1].xyyy, TEMP[6].xyyy > 26: UCMP TEMP[4].xy, TEMP[4].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 27: MUL TEMP[0].xy, TEMP[4].xyyy, TEMP[2].xxxx > 28: DP4 TEMP[2].x, CONST[1][6], TEMP[0] > 29: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 30: DP4 TEMP[4].x, CONST[1][7], TEMP[0] > 31: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 32: MOV TEMP[2].y, TEMP[4].xxxx > 33: DP4 TEMP[4].x, CONST[1][8], TEMP[0] > 34: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 35: MOV TEMP[2].z, TEMP[4].xxxx > 36: MOV TEMP[4].xy, TEMP[2].xyyy > 37: TEX TEMP[4], TEMP[4], SAMP[2], 2D > 38: MOV TEMP[2].w, IMM[0].xxxx > 39: MOV TEMP[5].xy, TEMP[2].wzzz > 40: TEX TEMP[5], TEMP[5], SAMP[3], 2D > 41: MUL TEMP[0], TEMP[4], TEMP[5] > 42: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][24].xyzz > 43: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][24].wwww > 44: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 45: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][5].xxxx > 46: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz > 47: RSQ TEMP[4].x, TEMP[4].xxxx > 48: MUL TEMP[1].xyz, TEMP[4].xxxx, TEMP[3].xyzz > 49: DP3 TEMP[2].x, CONST[1][13].xyzz, -CONST[1][20].xyzz > 50: DP3 TEMP[3].x, CONST[1][14].xyzz, -CONST[1][20].xyzz > 51: MOV TEMP[2].y, TEMP[3].xxxx > 52: DP3 TEMP[3].x, CONST[1][15].xyzz, -CONST[1][20].xyzz > 53: MOV TEMP[2].z, TEMP[3].xxxx > 54: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[1].xyzz > 55: ADD TEMP[2].x, TEMP[2].xxxx, -CONST[1][5].yyyy > 56: ADD TEMP[1].x, -CONST[1][5].yyyy, CONST[1][5].zzzz > 57: FSNE TEMP[3].x, TEMP[1].xxxx, IMM[0].xxxx > 58: UIF TEMP[3].xxxx :0 > 59: RCP TEMP[1].x, TEMP[1].xxxx > 60: MUL TEMP[1].x, TEMP[2].xxxx, TEMP[1].xxxx > 61: ELSE :0 > 62: SSG TEMP[2].x, TEMP[2].xxxx > 63: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 64: ENDIF > 65: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 66: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz > 67: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][22].xyzz > 68: MOV TEMP[1].w, TEMP[1].xxxx > 69: MOV TEMP[1].xyz, TEMP[0].xyzx > 70: MOV OUT[0], TEMP[1] > 71: END >radeonsi: Compiling shader 319 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 124) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 128) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 132) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 136) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 140) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 208) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 212) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 216) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 224) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 228) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 232) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 240) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) > %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 360) > %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %56 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %57 = call float @llvm.SI.load.const(<16 x i8> %24, i32 392) > %58 = call float @llvm.SI.load.const(<16 x i8> %24, i32 396) > %59 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %60 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 > %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 3 > %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0 > %66 = extractelement <8 x i32> %62, i32 7 > %67 = extractelement <4 x i32> %65, i32 0 > %68 = and i32 %67, %66 > %69 = insertelement <4 x i32> %65, i32 %68, i32 0 > %70 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %71 = load <8 x i32>, <8 x i32> addrspace(2)* %70, align 32, !tbaa !0 > %72 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %73 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %72, i64 0, i64 7 > %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 > %75 = extractelement <8 x i32> %71, i32 7 > %76 = extractelement <4 x i32> %74, i32 0 > %77 = and i32 %76, %75 > %78 = insertelement <4 x i32> %74, i32 %77, i32 0 > %79 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %80 = load <8 x i32>, <8 x i32> addrspace(2)* %79, align 32, !tbaa !0 > %81 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %82 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %81, i64 0, i64 11 > %83 = load <4 x i32>, <4 x i32> addrspace(2)* %82, align 16, !tbaa !0 > %84 = extractelement <8 x i32> %80, i32 7 > %85 = extractelement <4 x i32> %83, i32 0 > %86 = and i32 %85, %84 > %87 = insertelement <4 x i32> %83, i32 %86, i32 0 > %88 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %89 = load <8 x i32>, <8 x i32> addrspace(2)* %88, align 32, !tbaa !0 > %90 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %91 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %90, i64 0, i64 15 > %92 = load <4 x i32>, <4 x i32> addrspace(2)* %91, align 16, !tbaa !0 > %93 = extractelement <8 x i32> %89, i32 7 > %94 = extractelement <4 x i32> %92, i32 0 > %95 = and i32 %94, %93 > %96 = insertelement <4 x i32> %92, i32 %95, i32 0 > %97 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %98 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %99 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %100 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %101 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %102 = fcmp oeq float %99, 0.000000e+00 > %103 = fcmp oeq float %99, 0.000000e+00 > %104 = fcmp ogt float %97, 0.000000e+00 > %105 = select i1 %104, float 1.000000e+00, float %97 > %106 = fcmp oge float %105, 0.000000e+00 > %107 = fcmp ogt float %98, 0.000000e+00 > %108 = select i1 %107, float 1.000000e+00, float %98 > %109 = fcmp oge float %108, 0.000000e+00 > %.op = fmul float %105, 0x4600000000000000 > %110 = select i1 %106, float %.op, float 0xC600000000000000 > %.op28 = fmul float %108, 0x4600000000000000 > %111 = select i1 %109, float %.op28, float 0xC600000000000000 > %112 = fdiv float 1.000000e+00, %99 > %113 = fmul float %97, %112 > %114 = fmul float %98, %112 > %115 = select i1 %102, float %110, float %113 > %116 = select i1 %103, float %111, float %114 > %117 = bitcast float %115 to i32 > %118 = bitcast float %116 to i32 > %119 = insertelement <2 x i32> undef, i32 %117, i32 0 > %120 = insertelement <2 x i32> %119, i32 %118, i32 1 > %121 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %120, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %122 = extractelement <4 x float> %121, i32 0 > %123 = extractelement <4 x float> %121, i32 1 > %124 = extractelement <4 x float> %121, i32 2 > %125 = extractelement <4 x float> %121, i32 3 > %126 = bitcast float %115 to i32 > %127 = bitcast float %116 to i32 > %128 = insertelement <2 x i32> undef, i32 %126, i32 0 > %129 = insertelement <2 x i32> %128, i32 %127, i32 1 > %130 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %129, <8 x i32> %71, <4 x i32> %78, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %131 = extractelement <4 x float> %130, i32 0 > %132 = fadd float %125, 0xBFE0101020000000 > %133 = call float @llvm.fma.f32(float %122, float 2.000000e+00, float -1.000000e+00) > %134 = call float @llvm.fma.f32(float %123, float 2.000000e+00, float -1.000000e+00) > %135 = call float @llvm.fma.f32(float %124, float 2.000000e+00, float -1.000000e+00) > %136 = fcmp ogt float %132, -0.000000e+00 > %137 = select i1 %136, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %137) > %138 = fmul float %131, %59 > %139 = fadd float %138, %60 > %140 = fcmp oeq float %99, 0.000000e+00 > %141 = fcmp oeq float %99, 0.000000e+00 > %142 = fcmp ogt float %100, 0.000000e+00 > %143 = select i1 %142, float 1.000000e+00, float %100 > %144 = fcmp oge float %143, 0.000000e+00 > %145 = fcmp ogt float %101, 0.000000e+00 > %146 = select i1 %145, float 1.000000e+00, float %101 > %147 = fcmp oge float %146, 0.000000e+00 > %.op29 = fmul float %143, 0x4600000000000000 > %148 = select i1 %144, float %.op29, float 0xC600000000000000 > %.op30 = fmul float %146, 0x4600000000000000 > %149 = select i1 %147, float %.op30, float 0xC600000000000000 > %150 = fdiv float 1.000000e+00, %99 > %151 = fmul float %100, %150 > %152 = fmul float %101, %150 > %153 = select i1 %140, float %148, float %151 > %154 = select i1 %141, float %149, float %152 > %155 = fmul float %153, %139 > %156 = fmul float %154, %139 > %157 = fmul float %28, %155 > %158 = fmul float %29, %156 > %159 = fadd float %157, %158 > %160 = fmul float %30, %131 > %161 = fadd float %159, %160 > %162 = fadd float %161, %31 > %163 = call float @llvm.AMDGPU.clamp.(float %162, float 0.000000e+00, float 1.000000e+00) > %164 = fmul float %32, %155 > %165 = fmul float %33, %156 > %166 = fadd float %164, %165 > %167 = fmul float %34, %131 > %168 = fadd float %166, %167 > %169 = fadd float %168, %35 > %170 = call float @llvm.AMDGPU.clamp.(float %169, float 0.000000e+00, float 1.000000e+00) > %171 = fmul float %36, %155 > %172 = fmul float %37, %156 > %173 = fadd float %171, %172 > %174 = fmul float %38, %131 > %175 = fadd float %173, %174 > %176 = fadd float %175, %39 > %177 = call float @llvm.AMDGPU.clamp.(float %176, float 0.000000e+00, float 1.000000e+00) > %178 = bitcast float %163 to i32 > %179 = bitcast float %170 to i32 > %180 = insertelement <2 x i32> undef, i32 %178, i32 0 > %181 = insertelement <2 x i32> %180, i32 %179, i32 1 > %182 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %181, <8 x i32> %80, <4 x i32> %87, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %183 = extractelement <4 x float> %182, i32 0 > %184 = extractelement <4 x float> %182, i32 1 > %185 = extractelement <4 x float> %182, i32 2 > %186 = extractelement <4 x float> %182, i32 3 > %187 = bitcast float %177 to i32 > %188 = insertelement <2 x i32> <i32 0, i32 undef>, i32 %187, i32 1 > %189 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %188, <8 x i32> %89, <4 x i32> %96, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %190 = extractelement <4 x float> %189, i32 0 > %191 = extractelement <4 x float> %189, i32 1 > %192 = extractelement <4 x float> %189, i32 2 > %193 = extractelement <4 x float> %189, i32 3 > %194 = fmul float %183, %190 > %195 = fmul float %184, %191 > %196 = fmul float %185, %192 > %197 = fmul float %186, %193 > %198 = fmul float %194, %55 > %199 = fmul float %195, %56 > %200 = fmul float %196, %57 > %201 = fmul float %198, %58 > %202 = fmul float %199, %58 > %203 = fmul float %200, %58 > %204 = fmul float %197, %201 > %205 = fmul float %197, %202 > %206 = fmul float %197, %203 > %207 = fmul float %204, %25 > %208 = fmul float %205, %25 > %209 = fmul float %206, %25 > %210 = fmul float %133, %133 > %211 = fmul float %134, %134 > %212 = fadd float %211, %210 > %213 = fmul float %135, %135 > %214 = fadd float %212, %213 > %215 = call float @llvm.AMDGPU.rsq.clamped.f32(float %214) > %216 = fmul float %215, %133 > %217 = fmul float %215, %134 > %218 = fmul float %215, %135 > %219 = fmul float %49, %40 > %220 = fsub float -0.000000e+00, %219 > %221 = fmul float %50, %41 > %222 = fsub float %220, %221 > %223 = fmul float %51, %42 > %224 = fsub float %222, %223 > %225 = fmul float %49, %43 > %226 = fsub float -0.000000e+00, %225 > %227 = fmul float %50, %44 > %228 = fsub float %226, %227 > %229 = fmul float %51, %45 > %230 = fsub float %228, %229 > %231 = fmul float %49, %46 > %232 = fsub float -0.000000e+00, %231 > %233 = fmul float %50, %47 > %234 = fsub float %232, %233 > %235 = fmul float %51, %48 > %236 = fsub float %234, %235 > %237 = fmul float %224, %216 > %238 = fmul float %230, %217 > %239 = fadd float %238, %237 > %240 = fmul float %236, %218 > %241 = fadd float %239, %240 > %242 = fsub float %241, %26 > %243 = fsub float %27, %26 > %244 = fcmp une float %243, 0.000000e+00 > br i1 %244, label %IF, label %ELSE > >IF: ; preds = %main_body > %245 = fdiv float 1.000000e+00, %243 > %246 = fmul float %242, %245 > br label %ENDIF > >ELSE: ; preds = %main_body > %247 = fcmp ogt float %242, 0.000000e+00 > %248 = select i1 %247, float 1.000000e+00, float %242 > %249 = fcmp oge float %248, 0.000000e+00 > %.op31 = fmul float %248, 0x4600000000000000 > %250 = select i1 %249, float %.op31, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %246, %IF ], [ %250, %ELSE ] > %251 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %252 = fmul float %251, %207 > %253 = fmul float %251, %208 > %254 = fmul float %251, %209 > %255 = fmul float %252, %52 > %256 = fmul float %253, %53 > %257 = fadd float %256, %255 > %258 = fmul float %254, %54 > %259 = fadd float %257, %258 > %260 = bitcast float %5 to i32 > %261 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %260, 10 > %262 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %261, float %252, 11 > %263 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %262, float %253, 12 > %264 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %263, float %254, 13 > %265 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %264, float %259, 14 > %266 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %265, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %266 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], BUFFER, FLOAT >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] INT32 {0, 0, 0, 0} >IMM[2] UINT32 {0, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0], TEMP[0], IN[3] > 3: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[1].xyzz > 4: DP2 TEMP[1].x, TEMP[0].wwww, IN[1].wwww > 5: MIN TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx > 6: MUL TEMP[1].x, TEMP[1].xxxx, IN[2].wwww > 7: MOV TEMP[1].w, TEMP[1].xxxx > 8: FMA TEMP[0].xyz, TEMP[0].xyzz, IN[2].wwww, IN[2].xyzz > 9: MOV TEMP[2].x, IMM[1].xxxx > 10: MOV TEMP[2].w, IMM[2].xxxx > 11: TXF TEMP[2].x, TEMP[2], SAMP[1], BUFFER > 12: MUL TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz > 13: MOV OUT[0], TEMP[1] > 14: END >radeonsi: Compiling shader 320 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %33 = bitcast <8 x i32> addrspace(2)* %32 to <2 x i128> addrspace(2)* > %34 = load <2 x i128>, <2 x i128> addrspace(2)* %33, align 32, !tbaa !0 > %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %49 = bitcast float %35 to i32 > %50 = bitcast float %36 to i32 > %51 = insertelement <2 x i32> undef, i32 %49, i32 0 > %52 = insertelement <2 x i32> %51, i32 %50, i32 1 > %53 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %52, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = extractelement <4 x float> %53, i32 2 > %57 = extractelement <4 x float> %53, i32 3 > %58 = fmul float %54, %45 > %59 = fmul float %55, %46 > %60 = fmul float %56, %47 > %61 = fmul float %57, %48 > %62 = fmul float %58, %37 > %63 = fmul float %59, %38 > %64 = fmul float %60, %39 > %65 = fmul float %61, %40 > %66 = fmul float %61, %40 > %67 = fadd float %65, %66 > %68 = call float @llvm.minnum.f32(float %67, float 1.000000e+00) > %69 = fmul float %68, %44 > %70 = call float @llvm.fma.f32(float %62, float %44, float %41) > %71 = call float @llvm.fma.f32(float %63, float %44, float %42) > %72 = call float @llvm.fma.f32(float %64, float %44, float %43) > %73 = extractelement <2 x i128> %34, i32 1 > %74 = bitcast i128 %73 to <16 x i8> > %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 0) > %76 = extractelement <4 x float> %75, i32 0 > %77 = fmul float %76, %70 > %78 = fmul float %76, %71 > %79 = fmul float %76, %72 > %80 = bitcast float %5 to i32 > %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %80, 10 > %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %77, 11 > %83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82, float %78, 12 > %84 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %83, float %79, 13 > %85 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %84, float %69, 14 > %86 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %85, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %86 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..23] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 368, 208, 224} >IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[2] UINT32 {240, 144, 160, 176} >IMM[3] UINT32 {192, 0, 0, 0} > 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1][23].xyyy > 1: FMA TEMP[1].x, IN[0].zzzz, CONST[1][23].zzzz, CONST[1][23].zzzz > 2: MOV TEMP[0].z, TEMP[1].xxxx > 3: MOV TEMP[0].w, IMM[1].xxxx > 4: DP4 TEMP[1].x, CONST[1][13], TEMP[0] > 5: DP4 TEMP[2].x, CONST[1][14], TEMP[0] > 6: MOV TEMP[1].y, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][15], TEMP[0] > 8: MOV TEMP[1].z, TEMP[0].xxxx > 9: MOV TEMP[1].w, IMM[1].xxxx > 10: DP4 TEMP[0].x, CONST[1][9], TEMP[1] > 11: DP4 TEMP[2].x, CONST[1][10], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][11], TEMP[1] > 14: MOV TEMP[0].z, TEMP[2].xxxx > 15: DP4 TEMP[1].x, CONST[1][12], TEMP[1] > 16: MOV TEMP[0].w, TEMP[1].xxxx > 17: MOV OUT[0], TEMP[0] > 18: END >radeonsi: Compiling shader 321 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 144) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 148) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 152) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 156) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 160) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 164) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 168) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 172) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 176) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 180) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 184) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 188) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 192) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 196) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 200) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 204) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 208) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 212) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 216) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 220) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 224) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 228) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 232) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 236) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 240) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 244) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 248) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 252) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 368) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 372) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 376) > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %13) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = fmul float %50, %44 > %54 = fmul float %51, %45 > %55 = call float @llvm.fma.f32(float %52, float %46, float %46) > %56 = fmul float %32, %53 > %57 = fmul float %33, %54 > %58 = fadd float %56, %57 > %59 = fmul float %34, %55 > %60 = fadd float %58, %59 > %61 = fadd float %60, %35 > %62 = fmul float %36, %53 > %63 = fmul float %37, %54 > %64 = fadd float %62, %63 > %65 = fmul float %38, %55 > %66 = fadd float %64, %65 > %67 = fadd float %66, %39 > %68 = fmul float %40, %53 > %69 = fmul float %41, %54 > %70 = fadd float %68, %69 > %71 = fmul float %42, %55 > %72 = fadd float %70, %71 > %73 = fadd float %72, %43 > %74 = fmul float %16, %61 > %75 = fmul float %17, %67 > %76 = fadd float %74, %75 > %77 = fmul float %18, %73 > %78 = fadd float %76, %77 > %79 = fadd float %78, %19 > %80 = fmul float %20, %61 > %81 = fmul float %21, %67 > %82 = fadd float %80, %81 > %83 = fmul float %22, %73 > %84 = fadd float %82, %83 > %85 = fadd float %84, %23 > %86 = fmul float %24, %61 > %87 = fmul float %25, %67 > %88 = fadd float %86, %87 > %89 = fmul float %26, %73 > %90 = fadd float %88, %89 > %91 = fadd float %90, %27 > %92 = fmul float %28, %61 > %93 = fmul float %29, %67 > %94 = fadd float %92, %93 > %95 = fmul float %30, %73 > %96 = fadd float %94, %95 > %97 = fadd float %96, %31 > %98 = bitcast i32 %11 to float > %99 = insertvalue <{ float, float, float }> undef, float %98, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %79, float %85, float %91, float %97) > ret <{ float, float, float }> %99 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL CONST[1][0..9] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[0].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[0].xxxx > 7: MOV TEMP[0].xy, IN[1].xyxx > 8: MOV OUT[1], TEMP[0] > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 322 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 128) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 132) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 136) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 140) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 144) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 148) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 152) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 156) > %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 > %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %13) > %32 = extractelement <4 x float> %31, i32 0 > %33 = extractelement <4 x float> %31, i32 1 > %34 = extractelement <4 x float> %31, i32 2 > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %14) > %38 = fmul float %17, %32 > %39 = fmul float %18, %33 > %40 = fadd float %38, %39 > %41 = fmul float %19, %34 > %42 = fadd float %40, %41 > %43 = fadd float %42, %20 > %44 = fmul float %21, %32 > %45 = fmul float %22, %33 > %46 = fadd float %44, %45 > %47 = fmul float %23, %34 > %48 = fadd float %46, %47 > %49 = fadd float %48, %24 > %50 = fmul float %25, %32 > %51 = fmul float %26, %33 > %52 = fadd float %50, %51 > %53 = fmul float %27, %34 > %54 = fadd float %52, %53 > %55 = fadd float %54, %28 > %56 = lshr i32 %8, 13 > %57 = and i32 %56, 255 > %58 = mul i32 %57, %10 > %59 = add i32 %58, 16 > %60 = sext i32 %59 to i64 > %61 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %60 > %62 = bitcast i32 addrspace(3)* %61 to float addrspace(3)* > store float %43, float addrspace(3)* %62, align 4 > %63 = add i32 %58, 17 > %64 = sext i32 %63 to i64 > %65 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %64 > %66 = bitcast i32 addrspace(3)* %65 to float addrspace(3)* > store float %49, float addrspace(3)* %66, align 4 > %67 = add i32 %58, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > store float %55, float addrspace(3)* %70, align 4 > %71 = add i32 %58, 20 > %bc = bitcast <4 x float> %37 to <4 x i32> > %72 = extractelement <4 x i32> %bc, i32 0 > %73 = sext i32 %71 to i64 > %74 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %73 > store i32 %72, i32 addrspace(3)* %74, align 4 > %75 = add i32 %58, 21 > %bc12 = bitcast <4 x float> %37 to <4 x i32> > %76 = extractelement <4 x i32> %bc12, i32 1 > %77 = sext i32 %75 to i64 > %78 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %77 > store i32 %76, i32 addrspace(3)* %78, align 4 > %79 = add i32 %58, 22 > %80 = sext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = bitcast i32 addrspace(3)* %81 to float addrspace(3)* > store float %34, float addrspace(3)* %82, align 4 > %83 = add i32 %58, 23 > %84 = sext i32 %83 to i64 > %85 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %84 > store i32 1065353216, i32 addrspace(3)* %85, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..3], ARRAY(1), GENERIC[0] >DCL OUT[4], PATCH >DCL OUT[5], PATCH[1] >DCL OUT[6], PATCH[2] >DCL OUT[7], PATCH[3] >DCL CONST[1][0..49] >DCL CONST[2][0..39] >DCL TEMP[0..10], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 0.0000, 0.4000, 100.0000} >IMM[1] UINT32 {0, 752, 768, 784} >IMM[2] UINT32 {1, 624, 0, 0} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: MOV TEMP[0].xyz, IN[0][0].xyzx > 11: MOV TEMP[0].w, IMM[0].xxxx > 12: MOV TEMP[1], CONST[1][47] > 13: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 14: MOV TEMP[2], CONST[1][48] > 15: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 16: MOV TEMP[3], CONST[1][49] > 17: DP4 TEMP[0].x, TEMP[3], TEMP[0] > 18: MOV TEMP[4].xyz, IN[1][0].xyzx > 19: MOV TEMP[4].w, IMM[0].xxxx > 20: MOV TEMP[5], CONST[1][47] > 21: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 22: MOV TEMP[0].y, TEMP[5].xxxx > 23: MOV TEMP[5], CONST[1][48] > 24: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 25: MOV TEMP[0].z, TEMP[5].xxxx > 26: MOV TEMP[5], CONST[1][49] > 27: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 28: MOV TEMP[0].w, TEMP[5].xxxx > 29: MOV TEMP[4].xyz, IN[2][0].xyzx > 30: MOV TEMP[4].w, IMM[0].xxxx > 31: MOV TEMP[5], CONST[1][47] > 32: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 33: MOV TEMP[3].z, TEMP[5].xxxx > 34: MOV TEMP[6], CONST[1][48] > 35: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 36: MOV TEMP[7].z, CONST[2][39] > 37: MUL TEMP[7].xy, TEMP[0].xwww, TEMP[7].zzzz > 38: MOV TEMP[0].xw, TEMP[7].xxxy > 39: MOV TEMP[7], CONST[1][49] > 40: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 41: MOV TEMP[8].z, CONST[2][39] > 42: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[8].zzzz > 43: MOV TEMP[7].x, CONST[2][39] > 44: FSLT TEMP[7].x, TEMP[1].xxxx, -TEMP[7].xxxx > 45: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 46: INEG TEMP[7].x, TEMP[7].xxxx > 47: MOV TEMP[4].y, TEMP[7].xxxx > 48: MOV TEMP[7].x, CONST[2][39] > 49: FSLT TEMP[7].xy, TEMP[0].yzzz, -TEMP[7].xxxx > 50: AND TEMP[7].xy, TEMP[7].xyyy, IMM[3].xxxx > 51: INEG TEMP[7].xy, TEMP[7].xyyy > 52: MOV TEMP[4].zw, TEMP[7].yyxy > 53: AND TEMP[7].xy, TEMP[4].yzzz, IMM[2].xxxx > 54: MOV TEMP[4].yz, TEMP[7].yxyy > 55: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 56: MOV TEMP[4].y, TEMP[7].xxxx > 57: MOV TEMP[7].x, CONST[2][39] > 58: FSLT TEMP[7].x, TEMP[5].xxxx, -TEMP[7].xxxx > 59: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 60: INEG TEMP[7].x, TEMP[7].xxxx > 61: MOV TEMP[4].z, TEMP[7].xxxx > 62: AND TEMP[7].x, TEMP[4].zzzz, IMM[2].xxxx > 63: MOV TEMP[4].z, TEMP[7].xxxx > 64: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 65: MOV TEMP[4].y, TEMP[7].xxxx > 66: MOV TEMP[7].x, CONST[2][39] > 67: FSLT TEMP[7].x, TEMP[2].xxxx, -TEMP[7].xxxx > 68: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 69: INEG TEMP[7].x, TEMP[7].xxxx > 70: MOV TEMP[4].z, TEMP[7].xxxx > 71: AND TEMP[7].xy, TEMP[4].zwww, IMM[2].xxxx > 72: MOV TEMP[4].zw, TEMP[7].yyxy > 73: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 74: MOV TEMP[4].z, TEMP[7].xxxx > 75: MOV TEMP[7].x, CONST[2][39] > 76: FSLT TEMP[7].x, TEMP[6].xxxx, -TEMP[7].xxxx > 77: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 78: INEG TEMP[7].x, TEMP[7].xxxx > 79: MOV TEMP[4].w, TEMP[7].xxxx > 80: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 81: MOV TEMP[4].w, TEMP[7].xxxx > 82: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 83: MOV TEMP[4].z, TEMP[7].xxxx > 84: FSLT TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy > 85: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 86: INEG TEMP[7].x, TEMP[7].xxxx > 87: MOV TEMP[4].w, TEMP[7].xxxx > 88: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 89: MOV TEMP[4].w, TEMP[7].xxxx > 90: FSLT TEMP[7].x, TEMP[0].wwww, IMM[0].yyyy > 91: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 92: INEG TEMP[7].x, TEMP[7].xxxx > 93: MOV TEMP[7].x, TEMP[7].xxxx > 94: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx > 95: MOV TEMP[7].x, TEMP[8].xxxx > 96: UADD TEMP[8].x, TEMP[4].wwww, TEMP[7].xxxx > 97: MOV TEMP[4].w, TEMP[8].xxxx > 98: FSLT TEMP[8].x, TEMP[4].xxxx, IMM[0].yyyy > 99: AND TEMP[8].x, TEMP[8].xxxx, IMM[3].xxxx >100: INEG TEMP[8].x, TEMP[8].xxxx >101: MOV TEMP[7].x, TEMP[8].xxxx >102: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx >103: MOV TEMP[7].x, TEMP[8].xxxx >104: UADD TEMP[7].x, TEMP[4].wwww, TEMP[7].xxxx >105: MOV TEMP[4].w, TEMP[7].xxxx >106: MOV TEMP[7].x, CONST[2][39] >107: FSLT TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx >108: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >109: INEG TEMP[1].x, TEMP[1].xxxx >110: MOV TEMP[3].x, TEMP[1].xxxx >111: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >112: MOV TEMP[3].x, TEMP[1].xxxx >113: MOV TEMP[1].x, CONST[2][39] >114: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].yzzz >115: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >116: INEG TEMP[1].xy, TEMP[1].xyyy >117: MOV TEMP[0].yz, TEMP[1].yxyy >118: AND TEMP[1].xy, TEMP[0].yzzz, IMM[2].xxxx >119: MOV TEMP[0].yz, TEMP[1].yxyy >120: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >121: MOV TEMP[0].y, TEMP[1].xxxx >122: MOV TEMP[1].x, CONST[2][39] >123: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx >124: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >125: INEG TEMP[1].x, TEMP[1].xxxx >126: MOV TEMP[3].x, TEMP[1].xxxx >127: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >128: MOV TEMP[3].x, TEMP[1].xxxx >129: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >130: MOV TEMP[0].y, TEMP[1].xxxx >131: MOV TEMP[1].x, CONST[2][39] >132: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx >133: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >134: INEG TEMP[1].x, TEMP[1].xxxx >135: MOV TEMP[3].x, TEMP[1].xxxx >136: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >137: MOV TEMP[3].x, TEMP[1].xxxx >138: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >139: MOV TEMP[0].z, TEMP[1].xxxx >140: MOV TEMP[1].x, CONST[2][39] >141: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx >142: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >143: INEG TEMP[1].x, TEMP[1].xxxx >144: MOV TEMP[3].x, TEMP[1].xxxx >145: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >146: MOV TEMP[3].x, TEMP[1].xxxx >147: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >148: MOV TEMP[0].z, TEMP[1].xxxx >149: MOV TEMP[1].x, CONST[2][39] >150: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].xwww >151: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >152: INEG TEMP[1].xy, TEMP[1].xyyy >153: MOV TEMP[3].xy, TEMP[1].xyxx >154: AND TEMP[1].xy, TEMP[3].xyyy, IMM[2].xxxx >155: MOV TEMP[3].xy, TEMP[1].xyxx >156: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >157: MOV TEMP[3].x, TEMP[1].xxxx >158: MOV TEMP[1].x, CONST[2][39] >159: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx >160: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >161: INEG TEMP[1].x, TEMP[1].xxxx >162: MOV TEMP[3].y, TEMP[1].xxxx >163: AND TEMP[1].x, TEMP[3].yyyy, IMM[2].xxxx >164: MOV TEMP[3].y, TEMP[1].xxxx >165: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >166: MOV TEMP[3].x, TEMP[1].xxxx >167: USEQ TEMP[1].x, TEMP[4].yyyy, IMM[3].yyyy >168: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >169: INEG TEMP[1].x, TEMP[1].xxxx >170: MOV TEMP[3].y, TEMP[1].xxxx >171: USEQ TEMP[1].xy, TEMP[0].yzzz, IMM[3].yyyy >172: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >173: INEG TEMP[1].xy, TEMP[1].xyyy >174: MOV TEMP[0].yz, TEMP[1].yxyy >175: OR TEMP[1].x, TEMP[0].yyyy, TEMP[3].yyyy >176: MOV TEMP[0].y, TEMP[1].xxxx >177: USEQ TEMP[1].x, TEMP[4].zzzz, IMM[3].yyyy >178: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >179: INEG TEMP[1].x, TEMP[1].xxxx >180: MOV TEMP[3].y, TEMP[1].xxxx >181: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].yyyy >182: MOV TEMP[0].z, TEMP[1].xxxx >183: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >184: MOV TEMP[0].y, TEMP[1].xxxx >185: USEQ TEMP[1].x, TEMP[4].wwww, IMM[3].yyyy >186: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >187: INEG TEMP[1].x, TEMP[1].xxxx >188: MOV TEMP[0].z, TEMP[1].xxxx >189: USEQ TEMP[1].x, TEMP[3].xxxx, IMM[3].yyyy >190: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >191: INEG TEMP[1].x, TEMP[1].xxxx >192: MOV TEMP[3].x, TEMP[1].xxxx >193: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >194: MOV TEMP[0].z, TEMP[1].xxxx >195: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >196: MOV TEMP[0].y, TEMP[1].xxxx >197: MOV TEMP[1].x, TEMP[0].yyyy >198: USNE TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx >199: UIF TEMP[1].xxxx :0 >200: MOV TEMP[1].x, IMM[0].yyyy >201: MOV TEMP[2].x, IMM[0].yyyy >202: MOV TEMP[5].x, IMM[0].yyyy >203: MOV TEMP[6].x, IMM[0].yyyy >204: ELSE :0 >205: ADD TEMP[3].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >206: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >207: MOV TEMP[0].y, TEMP[7].xxxx >208: ADD TEMP[3].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >209: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >210: MOV TEMP[0].z, TEMP[7].xxxx >211: SQRT TEMP[7].x, TEMP[0].yyyy >212: SQRT TEMP[7].y, TEMP[0].zzzz >213: MOV TEMP[7].xy, TEMP[7].xyxx >214: ADD TEMP[3].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >215: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz >216: SQRT TEMP[8].x, TEMP[3].xxxx >217: MIN TEMP[9].x, TEMP[0].wwww, TEMP[0].xxxx >218: MIN TEMP[10].x, TEMP[0].wwww, TEMP[4].xxxx >219: MOV TEMP[0].w, TEMP[10].xxxx >220: MIN TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >221: MUL TEMP[4].x, TEMP[9].xxxx, IMM[0].zzzz >222: MOV TEMP[3].y, TEMP[4].xxxx >223: MAX TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww >224: MUL TEMP[4].xy, TEMP[0].xwww, IMM[0].zzzz >225: MOV TEMP[0].xw, TEMP[4].xxxy >226: MAX TEMP[4].xy, TEMP[0].xwww, IMM[0].wwww >227: FSNE TEMP[9].x, TEMP[3].xxxx, IMM[0].yyyy >228: UIF TEMP[9].xxxx :0 >229: RCP TEMP[3].x, TEMP[3].xxxx >230: MUL TEMP[3].x, TEMP[7].xxxx, TEMP[3].xxxx >231: ELSE :0 >232: SSG TEMP[9].x, TEMP[7].xxxx >233: MUL TEMP[3].x, IMM[4].xxxx, TEMP[9].xxxx >234: ENDIF >235: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >236: MOV TEMP[0].y, TEMP[3].xxxx >237: FSNE TEMP[3].x, TEMP[4].yyyy, IMM[0].yyyy >238: UIF TEMP[3].xxxx :0 >239: RCP TEMP[3].x, TEMP[4].yyyy >240: MUL TEMP[3].x, TEMP[7].yyyy, TEMP[3].xxxx >241: ELSE :0 >242: SSG TEMP[7].x, TEMP[7].yyyy >243: MUL TEMP[3].x, IMM[4].xxxx, TEMP[7].xxxx >244: ENDIF >245: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >246: MOV TEMP[0].z, TEMP[3].xxxx >247: FSNE TEMP[3].x, TEMP[4].xxxx, IMM[0].yyyy >248: UIF TEMP[3].xxxx :0 >249: RCP TEMP[3].x, TEMP[4].xxxx >250: MUL TEMP[3].x, TEMP[8].xxxx, TEMP[3].xxxx >251: ELSE :0 >252: SSG TEMP[4].x, TEMP[8].xxxx >253: MUL TEMP[3].x, IMM[4].xxxx, TEMP[4].xxxx >254: ENDIF >255: MAX TEMP[0].x, TEMP[3].xxxx, IMM[0].xxxx >256: MIN TEMP[0].xyz, TEMP[0].xyzz, IMM[4].yyyy >257: MAX TEMP[3].x, TEMP[0].yyyy, TEMP[0].xxxx >258: MOV TEMP[0].w, TEMP[3].xxxx >259: MAX TEMP[6].x, TEMP[0].wwww, TEMP[0].zzzz >260: MOV TEMP[1].x, TEMP[0].zzzz >261: MOV TEMP[2].x, TEMP[0].xxxx >262: MOV TEMP[5].x, TEMP[0].yyyy >263: ENDIF >264: MOV OUT[4], TEMP[1] >265: MOV OUT[5], TEMP[2] >266: MOV OUT[6], TEMP[5] >267: MOV OUT[7], TEMP[6] >268: MOV OUT[0].x, TEMP[1].xxxx >269: MOV OUT[0].y, TEMP[2].xxxx >270: MOV OUT[0].z, TEMP[5].xxxx >271: MOV OUT[1].x, TEMP[6].xxxx >272: END >radeonsi: Compiling shader 323 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 752) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 756) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 760) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 764) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = lshr i32 %10, 8 > %30 = and i32 %29, 31 > %31 = lshr i32 %7, 13 > %32 = and i32 %31, 255 > %33 = and i32 %7, 8191 > %34 = and i32 %10, 255 > %35 = mul nuw nsw i32 %33, %34 > %36 = mul nuw nsw i32 %30, %32 > %37 = add nuw nsw i32 %35, %36 > %38 = add nuw nsw i32 %37, 16 > %39 = zext i32 %38 to i64 > %40 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %39 > %41 = load i32, i32 addrspace(3)* %40, align 4 > %42 = lshr i32 %7, 13 > %43 = and i32 %42, 255 > %44 = and i32 %7, 8191 > %45 = and i32 %10, 255 > %46 = mul nuw nsw i32 %44, %45 > %47 = mul nuw nsw i32 %30, %43 > %48 = add nuw nsw i32 %46, %47 > %49 = add nuw nsw i32 %48, 17 > %50 = zext i32 %49 to i64 > %51 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %50 > %52 = load i32, i32 addrspace(3)* %51, align 4 > %53 = lshr i32 %7, 13 > %54 = and i32 %53, 255 > %55 = and i32 %7, 8191 > %56 = and i32 %10, 255 > %57 = mul nuw nsw i32 %55, %56 > %58 = mul nuw nsw i32 %30, %54 > %59 = add nuw nsw i32 %57, %58 > %60 = add nuw nsw i32 %59, 18 > %61 = zext i32 %60 to i64 > %62 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %61 > %63 = load i32, i32 addrspace(3)* %62, align 4 > %64 = lshr i32 %7, 13 > %65 = and i32 %64, 255 > %66 = and i32 %7, 8191 > %67 = and i32 %10, 255 > %68 = mul nuw nsw i32 %66, %67 > %69 = mul nuw nsw i32 %30, %65 > %70 = add nuw nsw i32 %68, %69 > %71 = add nuw nsw i32 %70, 19 > %72 = zext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = load i32, i32 addrspace(3)* %73, align 4 > %75 = lshr i32 %6, 13 > %76 = and i32 %75, 255 > %77 = shl i32 %5, 2 > %78 = and i32 %77, 262140 > %79 = and i32 %6, 8191 > %80 = and i32 %10, 255 > %81 = mul nuw nsw i32 %79, %80 > %82 = add nuw nsw i32 %78, %81 > %83 = mul nuw nsw i32 %30, %76 > %84 = add nuw nsw i32 %82, %83 > %85 = add nuw nsw i32 %84, 16 > %86 = zext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > store i32 %41, i32 addrspace(3)* %87, align 4 > %88 = add nuw nsw i32 %84, 17 > %89 = zext i32 %88 to i64 > %90 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %89 > store i32 %52, i32 addrspace(3)* %90, align 4 > %91 = add nuw nsw i32 %84, 18 > %92 = zext i32 %91 to i64 > %93 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %92 > store i32 %63, i32 addrspace(3)* %93, align 4 > %94 = add nuw nsw i32 %84, 19 > %95 = zext i32 %94 to i64 > %96 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %95 > store i32 %74, i32 addrspace(3)* %96, align 4 > %97 = lshr i32 %7, 13 > %98 = and i32 %97, 255 > %99 = and i32 %7, 8191 > %100 = and i32 %10, 255 > %101 = mul nuw nsw i32 %99, %100 > %102 = mul nuw nsw i32 %30, %98 > %103 = add nuw nsw i32 %101, %102 > %104 = add nuw nsw i32 %103, 20 > %105 = zext i32 %104 to i64 > %106 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %105 > %107 = load i32, i32 addrspace(3)* %106, align 4 > %108 = lshr i32 %7, 13 > %109 = and i32 %108, 255 > %110 = and i32 %7, 8191 > %111 = and i32 %10, 255 > %112 = mul nuw nsw i32 %110, %111 > %113 = mul nuw nsw i32 %30, %109 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 21 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > %118 = load i32, i32 addrspace(3)* %117, align 4 > %119 = lshr i32 %7, 13 > %120 = and i32 %119, 255 > %121 = and i32 %7, 8191 > %122 = and i32 %10, 255 > %123 = mul nuw nsw i32 %121, %122 > %124 = mul nuw nsw i32 %30, %120 > %125 = add nuw nsw i32 %123, %124 > %126 = add nuw nsw i32 %125, 22 > %127 = zext i32 %126 to i64 > %128 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %127 > %129 = load i32, i32 addrspace(3)* %128, align 4 > %130 = lshr i32 %7, 13 > %131 = and i32 %130, 255 > %132 = and i32 %7, 8191 > %133 = and i32 %10, 255 > %134 = mul nuw nsw i32 %132, %133 > %135 = mul nuw nsw i32 %30, %131 > %136 = add nuw nsw i32 %134, %135 > %137 = add nuw nsw i32 %136, 23 > %138 = zext i32 %137 to i64 > %139 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %138 > %140 = load i32, i32 addrspace(3)* %139, align 4 > %141 = lshr i32 %6, 13 > %142 = and i32 %141, 255 > %143 = shl i32 %5, 2 > %144 = and i32 %143, 262140 > %145 = and i32 %6, 8191 > %146 = and i32 %10, 255 > %147 = mul nuw nsw i32 %145, %146 > %148 = add nuw nsw i32 %144, %147 > %149 = mul nuw nsw i32 %30, %142 > %150 = add nuw nsw i32 %148, %149 > %151 = add nuw nsw i32 %150, 20 > %152 = zext i32 %151 to i64 > %153 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %152 > store i32 %107, i32 addrspace(3)* %153, align 4 > %154 = add nuw nsw i32 %150, 21 > %155 = zext i32 %154 to i64 > %156 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %155 > store i32 %118, i32 addrspace(3)* %156, align 4 > %157 = add nuw nsw i32 %150, 22 > %158 = zext i32 %157 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %129, i32 addrspace(3)* %159, align 4 > %160 = add nuw nsw i32 %150, 23 > %161 = zext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > store i32 %140, i32 addrspace(3)* %162, align 4 > %163 = and i32 %7, 8191 > %164 = and i32 %10, 255 > %165 = mul nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 16 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > %169 = bitcast i32 addrspace(3)* %168 to float addrspace(3)* > %170 = load float, float addrspace(3)* %169, align 4 > %171 = and i32 %7, 8191 > %172 = and i32 %10, 255 > %173 = mul nuw nsw i32 %171, %172 > %174 = add nuw nsw i32 %173, 17 > %175 = zext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = and i32 %7, 8191 > %180 = and i32 %10, 255 > %181 = mul nuw nsw i32 %179, %180 > %182 = add nuw nsw i32 %181, 18 > %183 = zext i32 %182 to i64 > %184 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %183 > %185 = bitcast i32 addrspace(3)* %184 to float addrspace(3)* > %186 = load float, float addrspace(3)* %185, align 4 > %187 = fmul float %13, %170 > %188 = fmul float %14, %178 > %189 = fadd float %187, %188 > %190 = fmul float %15, %186 > %191 = fadd float %189, %190 > %192 = fadd float %191, %16 > %193 = fmul float %17, %170 > %194 = fmul float %18, %178 > %195 = fadd float %193, %194 > %196 = fmul float %19, %186 > %197 = fadd float %195, %196 > %198 = fadd float %197, %20 > %199 = fmul float %21, %170 > %200 = fmul float %22, %178 > %201 = fadd float %199, %200 > %202 = fmul float %23, %186 > %203 = fadd float %201, %202 > %204 = fadd float %203, %24 > %205 = lshr i32 %7, 13 > %206 = and i32 %205, 255 > %207 = and i32 %7, 8191 > %208 = and i32 %10, 255 > %209 = mul nuw nsw i32 %207, %208 > %210 = add nuw nsw i32 %209, %206 > %211 = add nuw nsw i32 %210, 16 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = bitcast i32 addrspace(3)* %213 to float addrspace(3)* > %215 = load float, float addrspace(3)* %214, align 4 > %216 = lshr i32 %7, 13 > %217 = and i32 %216, 255 > %218 = and i32 %7, 8191 > %219 = and i32 %10, 255 > %220 = mul nuw nsw i32 %218, %219 > %221 = add nuw nsw i32 %220, %217 > %222 = add nuw nsw i32 %221, 17 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = bitcast i32 addrspace(3)* %224 to float addrspace(3)* > %226 = load float, float addrspace(3)* %225, align 4 > %227 = lshr i32 %7, 13 > %228 = and i32 %227, 255 > %229 = and i32 %7, 8191 > %230 = and i32 %10, 255 > %231 = mul nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, %228 > %233 = add nuw nsw i32 %232, 18 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %13, %215 > %239 = fmul float %14, %226 > %240 = fadd float %238, %239 > %241 = fmul float %15, %237 > %242 = fadd float %240, %241 > %243 = fadd float %242, %16 > %244 = fmul float %17, %215 > %245 = fmul float %18, %226 > %246 = fadd float %244, %245 > %247 = fmul float %19, %237 > %248 = fadd float %246, %247 > %249 = fadd float %248, %20 > %250 = fmul float %21, %215 > %251 = fmul float %22, %226 > %252 = fadd float %250, %251 > %253 = fmul float %23, %237 > %254 = fadd float %252, %253 > %255 = fadd float %254, %24 > %256 = and i32 %7, 8191 > %257 = and i32 %10, 255 > %258 = mul nuw nsw i32 %256, %257 > %259 = lshr i32 %7, 12 > %260 = and i32 %259, 510 > %261 = add nuw nsw i32 %258, %260 > %262 = add nuw nsw i32 %261, 16 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = bitcast i32 addrspace(3)* %264 to float addrspace(3)* > %266 = load float, float addrspace(3)* %265, align 4 > %267 = and i32 %7, 8191 > %268 = and i32 %10, 255 > %269 = mul nuw nsw i32 %267, %268 > %270 = lshr i32 %7, 12 > %271 = and i32 %270, 510 > %272 = add nuw nsw i32 %269, %271 > %273 = add nuw nsw i32 %272, 17 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = bitcast i32 addrspace(3)* %275 to float addrspace(3)* > %277 = load float, float addrspace(3)* %276, align 4 > %278 = and i32 %7, 8191 > %279 = and i32 %10, 255 > %280 = mul nuw nsw i32 %278, %279 > %281 = lshr i32 %7, 12 > %282 = and i32 %281, 510 > %283 = add nuw nsw i32 %280, %282 > %284 = add nuw nsw i32 %283, 18 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = bitcast i32 addrspace(3)* %286 to float addrspace(3)* > %288 = load float, float addrspace(3)* %287, align 4 > %289 = fmul float %13, %266 > %290 = fmul float %14, %277 > %291 = fadd float %289, %290 > %292 = fmul float %15, %288 > %293 = fadd float %291, %292 > %294 = fadd float %293, %16 > %295 = fmul float %17, %266 > %296 = fmul float %18, %277 > %297 = fadd float %295, %296 > %298 = fmul float %19, %288 > %299 = fadd float %297, %298 > %300 = fadd float %299, %20 > %301 = fmul float %204, %28 > %302 = fmul float %255, %28 > %303 = fmul float %21, %266 > %304 = fmul float %22, %277 > %305 = fadd float %303, %304 > %306 = fmul float %23, %288 > %307 = fadd float %305, %306 > %308 = fadd float %307, %24 > %309 = fmul float %308, %28 > %310 = fsub float -0.000000e+00, %27 > %311 = fcmp olt float %192, %310 > %312 = zext i1 %311 to i32 > %313 = fsub float -0.000000e+00, %27 > %314 = fcmp olt float %243, %313 > %315 = fsub float -0.000000e+00, %27 > %316 = fcmp olt float %249, %315 > %317 = zext i1 %314 to i32 > %318 = zext i1 %316 to i32 > %319 = add nuw nsw i32 %317, %312 > %320 = fsub float -0.000000e+00, %27 > %321 = fcmp olt float %294, %320 > %322 = zext i1 %321 to i32 > %323 = add nuw nsw i32 %322, %319 > %324 = fsub float -0.000000e+00, %27 > %325 = fcmp olt float %198, %324 > %326 = zext i1 %325 to i32 > %327 = add nuw nsw i32 %318, %326 > %328 = fsub float -0.000000e+00, %27 > %329 = fcmp olt float %300, %328 > %330 = zext i1 %329 to i32 > %331 = add nuw nsw i32 %330, %327 > %332 = fcmp olt float %301, 0.000000e+00 > %333 = zext i1 %332 to i32 > %334 = fcmp olt float %302, 0.000000e+00 > %335 = zext i1 %334 to i32 > %336 = add nuw nsw i32 %333, %335 > %337 = fcmp olt float %309, 0.000000e+00 > %338 = zext i1 %337 to i32 > %339 = add nuw nsw i32 %336, %338 > %340 = fcmp olt float %27, %192 > %341 = zext i1 %340 to i32 > %342 = fcmp olt float %27, %243 > %343 = fcmp olt float %27, %249 > %344 = zext i1 %342 to i32 > %345 = zext i1 %343 to i32 > %346 = add nuw nsw i32 %344, %341 > %347 = fcmp olt float %27, %294 > %348 = zext i1 %347 to i32 > %349 = add nuw nsw i32 %346, %348 > %350 = fcmp olt float %27, %198 > %351 = zext i1 %350 to i32 > %352 = add nuw nsw i32 %345, %351 > %353 = fcmp olt float %27, %300 > %354 = zext i1 %353 to i32 > %355 = add nuw nsw i32 %352, %354 > %356 = fcmp olt float %27, %301 > %357 = fcmp olt float %27, %302 > %358 = zext i1 %356 to i32 > %359 = zext i1 %357 to i32 > %360 = add nuw nsw i32 %359, %358 > %361 = fcmp olt float %27, %309 > %362 = zext i1 %361 to i32 > %363 = add nuw nsw i32 %362, %360 > %364 = icmp eq i32 %323, 3 > %365 = sext i1 %364 to i32 > %366 = icmp eq i32 %349, 3 > %367 = icmp eq i32 %355, 3 > %368 = sext i1 %367 to i32 > %369 = icmp eq i32 %331, 3 > %370 = sext i1 %369 to i32 > %371 = select i1 %367, i32 -1, i32 %370 > %372 = select i1 %366, i32 -1, i32 %365 > %373 = or i32 %371, %372 > %374 = icmp eq i32 %339, 3 > %375 = icmp eq i32 %363, 3 > %376 = sext i1 %375 to i32 > %377 = select i1 %374, i32 -1, i32 %376 > %378 = or i32 %377, %373 > %379 = icmp eq i32 %378, 0 > br i1 %379, label %ELSE, label %ENDIF > >ELSE: ; preds = %main_body > %380 = lshr i32 %7, 13 > %381 = and i32 %380, 255 > %382 = and i32 %7, 8191 > %383 = and i32 %10, 255 > %384 = mul nuw nsw i32 %382, %383 > %385 = add nuw nsw i32 %384, %381 > %386 = add nuw nsw i32 %385, 16 > %387 = zext i32 %386 to i64 > %388 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %387 > %389 = bitcast i32 addrspace(3)* %388 to float addrspace(3)* > %390 = load float, float addrspace(3)* %389, align 4 > %391 = and i32 %7, 8191 > %392 = and i32 %10, 255 > %393 = mul nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 16 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = bitcast i32 addrspace(3)* %396 to float addrspace(3)* > %398 = load float, float addrspace(3)* %397, align 4 > %399 = fsub float %398, %390 > %400 = lshr i32 %7, 13 > %401 = and i32 %400, 255 > %402 = and i32 %7, 8191 > %403 = and i32 %10, 255 > %404 = mul nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, %401 > %406 = add nuw nsw i32 %405, 17 > %407 = zext i32 %406 to i64 > %408 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %407 > %409 = bitcast i32 addrspace(3)* %408 to float addrspace(3)* > %410 = load float, float addrspace(3)* %409, align 4 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = add nuw nsw i32 %413, 17 > %415 = zext i32 %414 to i64 > %416 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %415 > %417 = bitcast i32 addrspace(3)* %416 to float addrspace(3)* > %418 = load float, float addrspace(3)* %417, align 4 > %419 = fsub float %418, %410 > %420 = lshr i32 %7, 13 > %421 = and i32 %420, 255 > %422 = and i32 %7, 8191 > %423 = and i32 %10, 255 > %424 = mul nuw nsw i32 %422, %423 > %425 = add nuw nsw i32 %424, %421 > %426 = add nuw nsw i32 %425, 18 > %427 = zext i32 %426 to i64 > %428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %427 > %429 = bitcast i32 addrspace(3)* %428 to float addrspace(3)* > %430 = load float, float addrspace(3)* %429, align 4 > %431 = and i32 %7, 8191 > %432 = and i32 %10, 255 > %433 = mul nuw nsw i32 %431, %432 > %434 = add nuw nsw i32 %433, 18 > %435 = zext i32 %434 to i64 > %436 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %435 > %437 = bitcast i32 addrspace(3)* %436 to float addrspace(3)* > %438 = load float, float addrspace(3)* %437, align 4 > %439 = fsub float %438, %430 > %440 = fmul float %399, %399 > %441 = fmul float %419, %419 > %442 = fadd float %441, %440 > %443 = fmul float %439, %439 > %444 = fadd float %442, %443 > %445 = and i32 %7, 8191 > %446 = and i32 %10, 255 > %447 = mul nuw nsw i32 %445, %446 > %448 = lshr i32 %7, 12 > %449 = and i32 %448, 510 > %450 = add nuw nsw i32 %447, %449 > %451 = add nuw nsw i32 %450, 16 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > %454 = bitcast i32 addrspace(3)* %453 to float addrspace(3)* > %455 = load float, float addrspace(3)* %454, align 4 > %456 = lshr i32 %7, 13 > %457 = and i32 %456, 255 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, %457 > %462 = add nuw nsw i32 %461, 16 > %463 = zext i32 %462 to i64 > %464 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %463 > %465 = bitcast i32 addrspace(3)* %464 to float addrspace(3)* > %466 = load float, float addrspace(3)* %465, align 4 > %467 = fsub float %466, %455 > %468 = and i32 %7, 8191 > %469 = and i32 %10, 255 > %470 = mul nuw nsw i32 %468, %469 > %471 = lshr i32 %7, 12 > %472 = and i32 %471, 510 > %473 = add nuw nsw i32 %470, %472 > %474 = add nuw nsw i32 %473, 17 > %475 = zext i32 %474 to i64 > %476 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %475 > %477 = bitcast i32 addrspace(3)* %476 to float addrspace(3)* > %478 = load float, float addrspace(3)* %477, align 4 > %479 = lshr i32 %7, 13 > %480 = and i32 %479, 255 > %481 = and i32 %7, 8191 > %482 = and i32 %10, 255 > %483 = mul nuw nsw i32 %481, %482 > %484 = add nuw nsw i32 %483, %480 > %485 = add nuw nsw i32 %484, 17 > %486 = zext i32 %485 to i64 > %487 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %486 > %488 = bitcast i32 addrspace(3)* %487 to float addrspace(3)* > %489 = load float, float addrspace(3)* %488, align 4 > %490 = fsub float %489, %478 > %491 = and i32 %7, 8191 > %492 = and i32 %10, 255 > %493 = mul nuw nsw i32 %491, %492 > %494 = lshr i32 %7, 12 > %495 = and i32 %494, 510 > %496 = add nuw nsw i32 %493, %495 > %497 = add nuw nsw i32 %496, 18 > %498 = zext i32 %497 to i64 > %499 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %498 > %500 = bitcast i32 addrspace(3)* %499 to float addrspace(3)* > %501 = load float, float addrspace(3)* %500, align 4 > %502 = lshr i32 %7, 13 > %503 = and i32 %502, 255 > %504 = and i32 %7, 8191 > %505 = and i32 %10, 255 > %506 = mul nuw nsw i32 %504, %505 > %507 = add nuw nsw i32 %506, %503 > %508 = add nuw nsw i32 %507, 18 > %509 = zext i32 %508 to i64 > %510 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %509 > %511 = bitcast i32 addrspace(3)* %510 to float addrspace(3)* > %512 = load float, float addrspace(3)* %511, align 4 > %513 = fsub float %512, %501 > %514 = fmul float %467, %467 > %515 = fmul float %490, %490 > %516 = fadd float %515, %514 > %517 = fmul float %513, %513 > %518 = fadd float %516, %517 > %519 = call float @llvm.sqrt.f32(float %444) > %520 = call float @llvm.sqrt.f32(float %518) > %521 = and i32 %7, 8191 > %522 = and i32 %10, 255 > %523 = mul nuw nsw i32 %521, %522 > %524 = add nuw nsw i32 %523, 16 > %525 = zext i32 %524 to i64 > %526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %525 > %527 = bitcast i32 addrspace(3)* %526 to float addrspace(3)* > %528 = load float, float addrspace(3)* %527, align 4 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = lshr i32 %7, 12 > %533 = and i32 %532, 510 > %534 = add nuw nsw i32 %531, %533 > %535 = add nuw nsw i32 %534, 16 > %536 = zext i32 %535 to i64 > %537 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %536 > %538 = bitcast i32 addrspace(3)* %537 to float addrspace(3)* > %539 = load float, float addrspace(3)* %538, align 4 > %540 = fsub float %539, %528 > %541 = and i32 %7, 8191 > %542 = and i32 %10, 255 > %543 = mul nuw nsw i32 %541, %542 > %544 = add nuw nsw i32 %543, 17 > %545 = zext i32 %544 to i64 > %546 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %545 > %547 = bitcast i32 addrspace(3)* %546 to float addrspace(3)* > %548 = load float, float addrspace(3)* %547, align 4 > %549 = and i32 %7, 8191 > %550 = and i32 %10, 255 > %551 = mul nuw nsw i32 %549, %550 > %552 = lshr i32 %7, 12 > %553 = and i32 %552, 510 > %554 = add nuw nsw i32 %551, %553 > %555 = add nuw nsw i32 %554, 17 > %556 = zext i32 %555 to i64 > %557 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %556 > %558 = bitcast i32 addrspace(3)* %557 to float addrspace(3)* > %559 = load float, float addrspace(3)* %558, align 4 > %560 = fsub float %559, %548 > %561 = and i32 %7, 8191 > %562 = and i32 %10, 255 > %563 = mul nuw nsw i32 %561, %562 > %564 = add nuw nsw i32 %563, 18 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fsub float %579, %568 > %581 = fmul float %540, %540 > %582 = fmul float %560, %560 > %583 = fadd float %582, %581 > %584 = fmul float %580, %580 > %585 = fadd float %583, %584 > %586 = call float @llvm.sqrt.f32(float %585) > %587 = call float @llvm.minnum.f32(float %302, float %301) > %588 = call float @llvm.minnum.f32(float %302, float %309) > %589 = call float @llvm.minnum.f32(float %301, float %309) > %590 = fmul float %587, 0x3FD99999A0000000 > %591 = call float @llvm.maxnum.f32(float %590, float 1.000000e+02) > %592 = fmul float %589, 0x3FD99999A0000000 > %593 = fmul float %588, 0x3FD99999A0000000 > %594 = call float @llvm.maxnum.f32(float %592, float 1.000000e+02) > %595 = call float @llvm.maxnum.f32(float %593, float 1.000000e+02) > %596 = fcmp une float %591, 0.000000e+00 > br i1 %596, label %IF45, label %ELSE46 > >ENDIF: ; preds = %main_body, %ENDIF50 > %temp24.0 = phi i32 [ %phitmp57, %ENDIF50 ], [ 0, %main_body ] > %temp20.0 = phi i32 [ %phitmp56, %ENDIF50 ], [ 0, %main_body ] > %temp8.0 = phi i32 [ %phitmp55, %ENDIF50 ], [ 0, %main_body ] > %temp4.0 = phi i32 [ %phitmp, %ENDIF50 ], [ 0, %main_body ] > %597 = lshr i32 %5, 16 > %598 = shl nuw nsw i32 %597, 2 > %599 = and i32 %6, 8191 > %600 = and i32 %10, 255 > %601 = mul nuw nsw i32 %599, %600 > %602 = add nuw nsw i32 %598, %601 > %603 = add nuw nsw i32 %602, 8 > %604 = zext i32 %603 to i64 > %605 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %604 > store i32 %temp4.0, i32 addrspace(3)* %605, align 4 > %606 = add nuw nsw i32 %602, 9 > %607 = zext i32 %606 to i64 > %608 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %607 > store i32 %368, i32 addrspace(3)* %608, align 4 > %609 = add nuw nsw i32 %602, 10 > %610 = zext i32 %609 to i64 > %611 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %610 > %612 = bitcast i32 addrspace(3)* %611 to float addrspace(3)* > store float %15, float addrspace(3)* %612, align 4 > %613 = add nuw nsw i32 %602, 11 > %614 = zext i32 %613 to i64 > %615 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %614 > %616 = bitcast i32 addrspace(3)* %615 to float addrspace(3)* > store float %16, float addrspace(3)* %616, align 4 > %617 = lshr i32 %5, 16 > %618 = shl nuw nsw i32 %617, 2 > %619 = and i32 %6, 8191 > %620 = and i32 %10, 255 > %621 = mul nuw nsw i32 %619, %620 > %622 = add nuw nsw i32 %618, %621 > %623 = add nuw nsw i32 %622, 12 > %624 = zext i32 %623 to i64 > %625 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %624 > store i32 %temp8.0, i32 addrspace(3)* %625, align 4 > %626 = add nuw nsw i32 %622, 13 > %627 = zext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > store float %18, float addrspace(3)* %629, align 4 > %630 = add nuw nsw i32 %622, 14 > %631 = zext i32 %630 to i64 > %632 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %631 > %633 = bitcast i32 addrspace(3)* %632 to float addrspace(3)* > store float %19, float addrspace(3)* %633, align 4 > %634 = add nuw nsw i32 %622, 15 > %635 = zext i32 %634 to i64 > %636 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %635 > %637 = bitcast i32 addrspace(3)* %636 to float addrspace(3)* > store float %20, float addrspace(3)* %637, align 4 > %638 = lshr i32 %5, 16 > %639 = shl nuw nsw i32 %638, 2 > %640 = and i32 %6, 8191 > %641 = and i32 %10, 255 > %642 = mul nuw nsw i32 %640, %641 > %643 = add nuw nsw i32 %639, %642 > %644 = add nuw nsw i32 %643, 16 > %645 = zext i32 %644 to i64 > %646 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %645 > store i32 %temp20.0, i32 addrspace(3)* %646, align 4 > %647 = add nuw nsw i32 %643, 17 > %648 = zext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %14, float addrspace(3)* %650, align 4 > %651 = add nuw nsw i32 %643, 18 > %652 = zext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %15, float addrspace(3)* %654, align 4 > %655 = add nuw nsw i32 %643, 19 > %656 = zext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %16, float addrspace(3)* %658, align 4 > %659 = lshr i32 %5, 16 > %660 = shl nuw nsw i32 %659, 2 > %661 = and i32 %6, 8191 > %662 = and i32 %10, 255 > %663 = mul nuw nsw i32 %661, %662 > %664 = add nuw nsw i32 %660, %663 > %665 = add nuw nsw i32 %664, 20 > %666 = zext i32 %665 to i64 > %667 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %666 > store i32 %temp24.0, i32 addrspace(3)* %667, align 4 > %668 = add nuw nsw i32 %664, 21 > %669 = zext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > store float %18, float addrspace(3)* %671, align 4 > %672 = add nuw nsw i32 %664, 22 > %673 = zext i32 %672 to i64 > %674 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %673 > %675 = bitcast i32 addrspace(3)* %674 to float addrspace(3)* > store float %19, float addrspace(3)* %675, align 4 > %676 = add nuw nsw i32 %664, 23 > %677 = zext i32 %676 to i64 > %678 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %677 > %679 = bitcast i32 addrspace(3)* %678 to float addrspace(3)* > store float %20, float addrspace(3)* %679, align 4 > %680 = lshr i32 %5, 16 > %681 = shl nuw nsw i32 %680, 2 > %682 = and i32 %6, 8191 > %683 = and i32 %10, 255 > %684 = mul nuw nsw i32 %682, %683 > %685 = add nuw nsw i32 %681, %684 > %686 = zext i32 %685 to i64 > %687 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %686 > store i32 %temp4.0, i32 addrspace(3)* %687, align 4 > %688 = lshr i32 %5, 16 > %689 = shl nuw nsw i32 %688, 2 > %690 = and i32 %6, 8191 > %691 = and i32 %10, 255 > %692 = mul nuw nsw i32 %690, %691 > %693 = add nuw nsw i32 %689, %692 > %694 = add nuw nsw i32 %693, 1 > %695 = zext i32 %694 to i64 > %696 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %695 > store i32 %temp8.0, i32 addrspace(3)* %696, align 4 > %697 = lshr i32 %5, 16 > %698 = shl nuw nsw i32 %697, 2 > %699 = and i32 %6, 8191 > %700 = and i32 %10, 255 > %701 = mul nuw nsw i32 %699, %700 > %702 = add nuw nsw i32 %698, %701 > %703 = add nuw nsw i32 %702, 2 > %704 = zext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > store i32 %temp20.0, i32 addrspace(3)* %705, align 4 > %706 = lshr i32 %5, 16 > %707 = shl nuw nsw i32 %706, 2 > %708 = and i32 %6, 8191 > %709 = and i32 %10, 255 > %710 = mul nuw nsw i32 %708, %709 > %711 = add nuw nsw i32 %707, %710 > %712 = add nuw nsw i32 %711, 4 > %713 = zext i32 %712 to i64 > %714 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %713 > store i32 %temp24.0, i32 addrspace(3)* %714, align 4 > %715 = and i32 %10, 255 > %716 = lshr i32 %10, 8 > %717 = and i32 %716, 31 > %718 = lshr i32 %5, 16 > %719 = shl nuw nsw i32 %718, 2 > %720 = and i32 %6, 8191 > %721 = and i32 %10, 255 > %722 = mul nuw nsw i32 %720, %721 > %723 = add nuw nsw i32 %719, %722 > %724 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %725 = bitcast i64 %724 to <2 x i32> > %726 = extractelement <2 x i32> %725, i32 0 > %727 = extractelement <2 x i32> %725, i32 1 > %728 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %726, 0 > %729 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %728, i32 %727, 1 > %730 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %729, i32 %8, 13 > %731 = bitcast i32 %715 to float > %732 = bitcast i32 %717 to float > %733 = bitcast i32 %723 to float > %734 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %730, float %731, 14 > %735 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %734, float %732, 15 > %736 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %735, float %733, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %736 > >IF45: ; preds = %ELSE > %737 = fdiv float 1.000000e+00, %591 > %738 = fmul float %519, %737 > br label %ENDIF44 > >ELSE46: ; preds = %ELSE > %739 = fcmp ogt float %519, 0.000000e+00 > %740 = select i1 %739, float 1.000000e+00, float %519 > %741 = fcmp oge float %740, 0.000000e+00 > %.op = fmul float %740, 0x4600000000000000 > %742 = select i1 %741, float %.op, float 0xC600000000000000 > br label %ENDIF44 > >ENDIF44: ; preds = %ELSE46, %IF45 > %temp12.0 = phi float [ %738, %IF45 ], [ %742, %ELSE46 ] > %743 = call float @llvm.maxnum.f32(float %temp12.0, float 1.000000e+00) > %744 = fcmp une float %595, 0.000000e+00 > br i1 %744, label %IF48, label %ELSE49 > >IF48: ; preds = %ENDIF44 > %745 = fdiv float 1.000000e+00, %595 > %746 = fmul float %520, %745 > br label %ENDIF47 > >ELSE49: ; preds = %ENDIF44 > %747 = fcmp ogt float %520, 0.000000e+00 > %748 = select i1 %747, float 1.000000e+00, float %520 > %749 = fcmp oge float %748, 0.000000e+00 > %.op53 = fmul float %748, 0x4600000000000000 > %750 = select i1 %749, float %.op53, float 0xC600000000000000 > br label %ENDIF47 > >ENDIF47: ; preds = %ELSE49, %IF48 > %temp12.1 = phi float [ %746, %IF48 ], [ %750, %ELSE49 ] > %751 = call float @llvm.maxnum.f32(float %temp12.1, float 1.000000e+00) > %752 = fcmp une float %594, 0.000000e+00 > br i1 %752, label %IF51, label %ELSE52 > >IF51: ; preds = %ENDIF47 > %753 = fdiv float 1.000000e+00, %594 > %754 = fmul float %586, %753 > br label %ENDIF50 > >ELSE52: ; preds = %ENDIF47 > %755 = fcmp ogt float %586, 0.000000e+00 > %756 = select i1 %755, float 1.000000e+00, float %586 > %757 = fcmp oge float %756, 0.000000e+00 > %.op54 = fmul float %756, 0x4600000000000000 > %758 = select i1 %757, float %.op54, float 0xC600000000000000 > br label %ENDIF50 > >ENDIF50: ; preds = %ELSE52, %IF51 > %temp12.2 = phi float [ %754, %IF51 ], [ %758, %ELSE52 ] > %759 = call float @llvm.maxnum.f32(float %temp12.2, float 1.000000e+00) > %760 = call float @llvm.minnum.f32(float %759, float 6.300000e+01) > %761 = call float @llvm.minnum.f32(float %743, float 6.300000e+01) > %762 = call float @llvm.minnum.f32(float %751, float 6.300000e+01) > %763 = call float @llvm.maxnum.f32(float %761, float %760) > %764 = call float @llvm.maxnum.f32(float %763, float %762) > %phitmp = bitcast float %762 to i32 > %phitmp55 = bitcast float %760 to i32 > %phitmp56 = bitcast float %761 to i32 > %phitmp57 = bitcast float %764 to i32 > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..1], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..49] >DCL CONST[2][0..39] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[1] UINT32 {0, 752, 768, 784} >IMM[2] UINT32 {1, 624, 0, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[0].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[0].w, IMM[0].xxxx > 4: MOV TEMP[1], CONST[1][47] > 5: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 6: MOV TEMP[2], CONST[1][48] > 7: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 8: MOV TEMP[1].y, TEMP[2].xxxx > 9: MOV TEMP[2], CONST[1][49] > 10: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 11: MOV TEMP[1].z, TEMP[2].xxxx > 12: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 13: SQRT TEMP[2].x, TEMP[0].xxxx > 14: FSEQ TEMP[3].xyz, TEMP[2].xxxx, IMM[0].yyyy > 15: SSG TEMP[4].xyz, TEMP[1].xyzz > 16: MUL TEMP[4].xyz, IMM[0].zzzz, TEMP[4].xyzz > 17: RCP TEMP[5].xyz, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz > 19: UCMP TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[1].xyzz > 20: MOV TEMP[3].x, CONST[2][39] > 21: FSNE TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy > 22: UIF TEMP[3].xxxx :0 > 23: MOV TEMP[3].x, CONST[2][39] > 24: RCP TEMP[3].x, TEMP[3].xxxx > 25: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[3].xxxx > 26: ELSE :0 > 27: SSG TEMP[2].x, TEMP[2].xxxx > 28: MUL TEMP[3].x, IMM[0].zzzz, TEMP[2].xxxx > 29: ENDIF > 30: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[0].xxxx > 31: MOV TEMP[2].z, CONST[2][39] > 32: FMA TEMP[2].x, TEMP[1].zzzz, TEMP[2].zzzz, IMM[0].xxxx > 33: FSEQ TEMP[3].xy, TEMP[2].xxxx, IMM[0].yyyy > 34: SSG TEMP[4].xy, TEMP[1].xyyy > 35: MUL TEMP[4].xy, IMM[0].zzzz, TEMP[4].xyyy > 36: RCP TEMP[2].xy, TEMP[2].xxxx > 37: MUL TEMP[2].xy, TEMP[1].xyyy, TEMP[2].xyyy > 38: UCMP TEMP[2].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[2].xyyy > 39: MOV TEMP[3].z, CONST[2][39] > 40: MUL TEMP[1].x, TEMP[1].zzzz, TEMP[3].zzzz > 41: MOV TEMP[0].y, TEMP[1].xxxx > 42: MOV TEMP[2].z, TEMP[0].xxxx > 43: MOV TEMP[1].zw, TEMP[0].xxyx > 44: MOV TEMP[2].w, IMM[0].xxxx > 45: MUL TEMP[0].xy, SV[0].yyyy, IN[1][1].xyyy > 46: FMA TEMP[0].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[0].xyyy > 47: FMA TEMP[1].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[0].xyyy > 48: MOV OUT[1], TEMP[1] > 49: MOV OUT[0], TEMP[2] > 50: END >radeonsi: Compiling shader 324 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 752) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 756) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 760) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 764) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = fadd float %7, %8 > %30 = fsub float 1.000000e+00, %29 > %31 = lshr i32 %6, 13 > %32 = and i32 %31, 255 > %33 = shl i32 %5, 2 > %34 = and i32 %33, 262140 > %35 = and i32 %6, 8191 > %36 = mul i32 %35, %9 > %37 = add i32 %34, %36 > %38 = add i32 %37, %32 > %39 = add i32 %38, 16 > %40 = sext i32 %39 to i64 > %41 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %40 > %42 = bitcast i32 addrspace(3)* %41 to float addrspace(3)* > %43 = load float, float addrspace(3)* %42, align 4 > %44 = fmul float %43, %8 > %45 = lshr i32 %6, 13 > %46 = and i32 %45, 255 > %47 = shl i32 %5, 2 > %48 = and i32 %47, 262140 > %49 = and i32 %6, 8191 > %50 = mul i32 %49, %9 > %51 = add i32 %48, %50 > %52 = add i32 %51, %46 > %53 = add i32 %52, 17 > %54 = sext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = bitcast i32 addrspace(3)* %55 to float addrspace(3)* > %57 = load float, float addrspace(3)* %56, align 4 > %58 = fmul float %57, %8 > %59 = lshr i32 %6, 13 > %60 = and i32 %59, 255 > %61 = shl i32 %5, 2 > %62 = and i32 %61, 262140 > %63 = and i32 %6, 8191 > %64 = mul i32 %63, %9 > %65 = add i32 %62, %64 > %66 = add i32 %65, %60 > %67 = add i32 %66, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > %71 = load float, float addrspace(3)* %70, align 4 > %72 = fmul float %71, %8 > %73 = shl i32 %5, 2 > %74 = and i32 %73, 262140 > %75 = and i32 %6, 8191 > %76 = mul i32 %75, %9 > %77 = add i32 %74, %76 > %78 = add i32 %77, 16 > %79 = sext i32 %78 to i64 > %80 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %79 > %81 = bitcast i32 addrspace(3)* %80 to float addrspace(3)* > %82 = load float, float addrspace(3)* %81, align 4 > %83 = call float @llvm.fma.f32(float %7, float %82, float %44) > %84 = shl i32 %5, 2 > %85 = and i32 %84, 262140 > %86 = and i32 %6, 8191 > %87 = mul i32 %86, %9 > %88 = add i32 %85, %87 > %89 = add i32 %88, 17 > %90 = sext i32 %89 to i64 > %91 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %90 > %92 = bitcast i32 addrspace(3)* %91 to float addrspace(3)* > %93 = load float, float addrspace(3)* %92, align 4 > %94 = call float @llvm.fma.f32(float %7, float %93, float %58) > %95 = shl i32 %5, 2 > %96 = and i32 %95, 262140 > %97 = and i32 %6, 8191 > %98 = mul i32 %97, %9 > %99 = add i32 %96, %98 > %100 = add i32 %99, 18 > %101 = sext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > %103 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > %104 = load float, float addrspace(3)* %103, align 4 > %105 = call float @llvm.fma.f32(float %7, float %104, float %72) > %106 = shl i32 %5, 2 > %107 = and i32 %106, 262140 > %108 = and i32 %6, 8191 > %109 = mul i32 %108, %9 > %110 = add i32 %107, %109 > %111 = lshr i32 %6, 12 > %112 = and i32 %111, 510 > %113 = add i32 %110, %112 > %114 = add i32 %113, 16 > %115 = sext i32 %114 to i64 > %116 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %115 > %117 = bitcast i32 addrspace(3)* %116 to float addrspace(3)* > %118 = load float, float addrspace(3)* %117, align 4 > %119 = call float @llvm.fma.f32(float %30, float %118, float %83) > %120 = shl i32 %5, 2 > %121 = and i32 %120, 262140 > %122 = and i32 %6, 8191 > %123 = mul i32 %122, %9 > %124 = add i32 %121, %123 > %125 = lshr i32 %6, 12 > %126 = and i32 %125, 510 > %127 = add i32 %124, %126 > %128 = add i32 %127, 17 > %129 = sext i32 %128 to i64 > %130 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %129 > %131 = bitcast i32 addrspace(3)* %130 to float addrspace(3)* > %132 = load float, float addrspace(3)* %131, align 4 > %133 = call float @llvm.fma.f32(float %30, float %132, float %94) > %134 = shl i32 %5, 2 > %135 = and i32 %134, 262140 > %136 = and i32 %6, 8191 > %137 = mul i32 %136, %9 > %138 = add i32 %135, %137 > %139 = lshr i32 %6, 12 > %140 = and i32 %139, 510 > %141 = add i32 %138, %140 > %142 = add i32 %141, 18 > %143 = sext i32 %142 to i64 > %144 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %143 > %145 = bitcast i32 addrspace(3)* %144 to float addrspace(3)* > %146 = load float, float addrspace(3)* %145, align 4 > %147 = call float @llvm.fma.f32(float %30, float %146, float %105) > %148 = fmul float %13, %119 > %149 = fmul float %14, %133 > %150 = fadd float %148, %149 > %151 = fmul float %15, %147 > %152 = fadd float %150, %151 > %153 = fadd float %152, %16 > %154 = fmul float %17, %119 > %155 = fmul float %18, %133 > %156 = fadd float %154, %155 > %157 = fmul float %19, %147 > %158 = fadd float %156, %157 > %159 = fadd float %158, %20 > %160 = fmul float %21, %119 > %161 = fmul float %22, %133 > %162 = fadd float %160, %161 > %163 = fmul float %23, %147 > %164 = fadd float %162, %163 > %165 = fadd float %164, %24 > %166 = fmul float %153, %153 > %167 = fmul float %159, %159 > %168 = fadd float %167, %166 > %169 = fmul float %165, %165 > %170 = fadd float %168, %169 > %171 = call float @llvm.sqrt.f32(float %170) > %172 = fcmp oeq float %171, 0.000000e+00 > %173 = fcmp oeq float %171, 0.000000e+00 > %174 = fcmp oeq float %171, 0.000000e+00 > %175 = fcmp ogt float %153, 0.000000e+00 > %176 = select i1 %175, float 1.000000e+00, float %153 > %177 = fcmp oge float %176, 0.000000e+00 > %178 = fcmp ogt float %159, 0.000000e+00 > %179 = select i1 %178, float 1.000000e+00, float %159 > %180 = fcmp oge float %179, 0.000000e+00 > %181 = fcmp ogt float %165, 0.000000e+00 > %182 = select i1 %181, float 1.000000e+00, float %165 > %183 = fcmp oge float %182, 0.000000e+00 > %.op = fmul float %176, 0x4600000000000000 > %184 = select i1 %177, float %.op, float 0xC600000000000000 > %.op24 = fmul float %179, 0x4600000000000000 > %185 = select i1 %180, float %.op24, float 0xC600000000000000 > %.op25 = fmul float %182, 0x4600000000000000 > %186 = select i1 %183, float %.op25, float 0xC600000000000000 > %187 = fdiv float 1.000000e+00, %171 > %188 = fmul float %153, %187 > %189 = fmul float %159, %187 > %190 = fmul float %165, %187 > %191 = select i1 %172, float %184, float %188 > %192 = select i1 %173, float %185, float %189 > %193 = select i1 %174, float %186, float %190 > %194 = fcmp une float %27, 0.000000e+00 > br i1 %194, label %IF, label %ELSE > >IF: ; preds = %main_body > %195 = fdiv float 1.000000e+00, %27 > %196 = fmul float %171, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fcmp ogt float %171, 0.000000e+00 > %198 = select i1 %197, float 1.000000e+00, float %171 > %199 = fcmp oge float %198, 0.000000e+00 > %.op26 = fmul float %198, 0x4600000000000000 > %200 = select i1 %199, float %.op26, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %196, %IF ], [ %200, %ELSE ] > %201 = fsub float 1.000000e+00, %temp12.0 > %202 = call float @llvm.fma.f32(float %193, float %28, float 1.000000e+00) > %203 = fcmp oeq float %202, 0.000000e+00 > %204 = fcmp oeq float %202, 0.000000e+00 > %205 = fcmp ogt float %191, 0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %191 > %207 = fcmp oge float %206, 0.000000e+00 > %208 = fcmp ogt float %192, 0.000000e+00 > %209 = select i1 %208, float 1.000000e+00, float %192 > %210 = fcmp oge float %209, 0.000000e+00 > %.op27 = fmul float %206, 0x4600000000000000 > %211 = select i1 %207, float %.op27, float 0xC600000000000000 > %.op28 = fmul float %209, 0x4600000000000000 > %212 = select i1 %210, float %.op28, float 0xC600000000000000 > %213 = fdiv float 1.000000e+00, %202 > %214 = fmul float %191, %213 > %215 = fmul float %192, %213 > %216 = select i1 %203, float %211, float %214 > %217 = select i1 %204, float %212, float %215 > %218 = fmul float %193, %28 > %219 = lshr i32 %6, 13 > %220 = and i32 %219, 255 > %221 = shl i32 %5, 2 > %222 = and i32 %221, 262140 > %223 = and i32 %6, 8191 > %224 = mul i32 %223, %9 > %225 = add i32 %222, %224 > %226 = add i32 %225, %220 > %227 = add i32 %226, 20 > %228 = sext i32 %227 to i64 > %229 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %228 > %230 = bitcast i32 addrspace(3)* %229 to float addrspace(3)* > %231 = load float, float addrspace(3)* %230, align 4 > %232 = fmul float %231, %8 > %233 = lshr i32 %6, 13 > %234 = and i32 %233, 255 > %235 = shl i32 %5, 2 > %236 = and i32 %235, 262140 > %237 = and i32 %6, 8191 > %238 = mul i32 %237, %9 > %239 = add i32 %236, %238 > %240 = add i32 %239, %234 > %241 = add i32 %240, 21 > %242 = sext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > %244 = bitcast i32 addrspace(3)* %243 to float addrspace(3)* > %245 = load float, float addrspace(3)* %244, align 4 > %246 = fmul float %245, %8 > %247 = shl i32 %5, 2 > %248 = and i32 %247, 262140 > %249 = and i32 %6, 8191 > %250 = mul i32 %249, %9 > %251 = add i32 %248, %250 > %252 = add i32 %251, 20 > %253 = sext i32 %252 to i64 > %254 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %253 > %255 = bitcast i32 addrspace(3)* %254 to float addrspace(3)* > %256 = load float, float addrspace(3)* %255, align 4 > %257 = call float @llvm.fma.f32(float %7, float %256, float %232) > %258 = shl i32 %5, 2 > %259 = and i32 %258, 262140 > %260 = and i32 %6, 8191 > %261 = mul i32 %260, %9 > %262 = add i32 %259, %261 > %263 = add i32 %262, 21 > %264 = sext i32 %263 to i64 > %265 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %264 > %266 = bitcast i32 addrspace(3)* %265 to float addrspace(3)* > %267 = load float, float addrspace(3)* %266, align 4 > %268 = call float @llvm.fma.f32(float %7, float %267, float %246) > %269 = shl i32 %5, 2 > %270 = and i32 %269, 262140 > %271 = and i32 %6, 8191 > %272 = mul i32 %271, %9 > %273 = add i32 %270, %272 > %274 = lshr i32 %6, 12 > %275 = and i32 %274, 510 > %276 = add i32 %273, %275 > %277 = add i32 %276, 20 > %278 = sext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = bitcast i32 addrspace(3)* %279 to float addrspace(3)* > %281 = load float, float addrspace(3)* %280, align 4 > %282 = call float @llvm.fma.f32(float %30, float %281, float %257) > %283 = shl i32 %5, 2 > %284 = and i32 %283, 262140 > %285 = and i32 %6, 8191 > %286 = mul i32 %285, %9 > %287 = add i32 %284, %286 > %288 = lshr i32 %6, 12 > %289 = and i32 %288, 510 > %290 = add i32 %287, %289 > %291 = add i32 %290, 21 > %292 = sext i32 %291 to i64 > %293 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %292 > %294 = bitcast i32 addrspace(3)* %293 to float addrspace(3)* > %295 = load float, float addrspace(3)* %294, align 4 > %296 = call float @llvm.fma.f32(float %30, float %295, float %268) > %297 = bitcast i32 %10 to float > %298 = insertvalue <{ float, float, float }> undef, float %297, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %282, float %296, float %218, float %201) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %216, float %217, float %201, float 1.000000e+00) > ret <{ float, float, float }> %298 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} >IMM[2] UINT32 {160, 0, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: MOV TEMP[2].xy, IN[2].xyxx > 8: MUL TEMP[0].xyz, IN[5].wwww, IN[5].xyzz > 9: MOV TEMP[0].w, IN[5].wwww > 10: MUL TEMP[0], TEMP[0], CONST[1][10] > 11: DP3 TEMP[3].x, CONST[1][7].xyzz, IN[3].xyzz > 12: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[4].xyzz > 13: MOV TEMP[3].y, TEMP[4].xxxx > 14: DP3 TEMP[4].x, CONST[1][7].xyzz, IN[1].xyzz > 15: MOV TEMP[3].z, TEMP[4].xxxx > 16: DP3 TEMP[4].x, CONST[1][8].xyzz, IN[3].xyzz > 17: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[4].xyzz > 18: MOV TEMP[4].y, TEMP[5].xxxx > 19: DP3 TEMP[5].x, CONST[1][8].xyzz, IN[1].xyzz > 20: MOV TEMP[4].z, TEMP[5].xxxx > 21: DP3 TEMP[5].x, CONST[1][9].xyzz, IN[3].xyzz > 22: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[4].xyzz > 23: MOV TEMP[5].y, TEMP[6].xxxx > 24: DP3 TEMP[6].x, CONST[1][9].xyzz, IN[1].xyzz > 25: MOV TEMP[5].z, TEMP[6].xxxx > 26: MOV OUT[5], TEMP[5] > 27: MOV OUT[4], TEMP[4] > 28: MOV OUT[3], TEMP[3] > 29: MOV OUT[2], TEMP[0] > 30: MOV OUT[1], TEMP[2] > 31: MOV OUT[0], TEMP[1] > 32: END >radeonsi: Compiling shader 325 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 112) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 116) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 120) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 124) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 128) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 132) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 136) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 140) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 144) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 148) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 152) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 156) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 160) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 164) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 168) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 172) > %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 > %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %13) > %40 = extractelement <4 x float> %39, i32 0 > %41 = extractelement <4 x float> %39, i32 1 > %42 = extractelement <4 x float> %39, i32 2 > %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 > %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %14) > %46 = extractelement <4 x float> %45, i32 0 > %47 = extractelement <4 x float> %45, i32 1 > %48 = extractelement <4 x float> %45, i32 2 > %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 > %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %15) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %16) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %17) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 > %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %18) > %67 = extractelement <4 x float> %66, i32 0 > %68 = extractelement <4 x float> %66, i32 1 > %69 = extractelement <4 x float> %66, i32 2 > %70 = extractelement <4 x float> %66, i32 3 > %71 = fmul float %21, %40 > %72 = fmul float %22, %41 > %73 = fadd float %71, %72 > %74 = fmul float %23, %42 > %75 = fadd float %73, %74 > %76 = fadd float %75, %24 > %77 = fmul float %25, %40 > %78 = fmul float %26, %41 > %79 = fadd float %77, %78 > %80 = fmul float %27, %42 > %81 = fadd float %79, %80 > %82 = fadd float %81, %28 > %83 = fmul float %29, %40 > %84 = fmul float %30, %41 > %85 = fadd float %83, %84 > %86 = fmul float %31, %42 > %87 = fadd float %85, %86 > %88 = fadd float %87, %32 > %89 = fmul float %70, %67 > %90 = fmul float %70, %68 > %91 = fmul float %70, %69 > %92 = fmul float %89, %33 > %93 = fmul float %90, %34 > %94 = fmul float %91, %35 > %95 = fmul float %70, %36 > %96 = fmul float %21, %55 > %97 = fmul float %22, %56 > %98 = fadd float %97, %96 > %99 = fmul float %23, %57 > %100 = fadd float %98, %99 > %101 = fmul float %21, %61 > %102 = fmul float %22, %62 > %103 = fadd float %102, %101 > %104 = fmul float %23, %63 > %105 = fadd float %103, %104 > %106 = fmul float %21, %46 > %107 = fmul float %22, %47 > %108 = fadd float %107, %106 > %109 = fmul float %23, %48 > %110 = fadd float %108, %109 > %111 = fmul float %25, %55 > %112 = fmul float %26, %56 > %113 = fadd float %112, %111 > %114 = fmul float %27, %57 > %115 = fadd float %113, %114 > %116 = fmul float %25, %61 > %117 = fmul float %26, %62 > %118 = fadd float %117, %116 > %119 = fmul float %27, %63 > %120 = fadd float %118, %119 > %121 = fmul float %25, %46 > %122 = fmul float %26, %47 > %123 = fadd float %122, %121 > %124 = fmul float %27, %48 > %125 = fadd float %123, %124 > %126 = fmul float %29, %55 > %127 = fmul float %30, %56 > %128 = fadd float %127, %126 > %129 = fmul float %31, %57 > %130 = fadd float %128, %129 > %131 = fmul float %29, %61 > %132 = fmul float %30, %62 > %133 = fadd float %132, %131 > %134 = fmul float %31, %63 > %135 = fadd float %133, %134 > %136 = fmul float %29, %46 > %137 = fmul float %30, %47 > %138 = fadd float %137, %136 > %139 = fmul float %31, %48 > %140 = fadd float %138, %139 > %141 = lshr i32 %8, 13 > %142 = and i32 %141, 255 > %143 = mul i32 %142, %10 > %144 = add i32 %143, 16 > %145 = sext i32 %144 to i64 > %146 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %145 > %147 = bitcast i32 addrspace(3)* %146 to float addrspace(3)* > store float %76, float addrspace(3)* %147, align 4 > %148 = add i32 %143, 17 > %149 = sext i32 %148 to i64 > %150 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %149 > %151 = bitcast i32 addrspace(3)* %150 to float addrspace(3)* > store float %82, float addrspace(3)* %151, align 4 > %152 = add i32 %143, 18 > %153 = sext i32 %152 to i64 > %154 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %153 > %155 = bitcast i32 addrspace(3)* %154 to float addrspace(3)* > store float %88, float addrspace(3)* %155, align 4 > %156 = add i32 %143, 20 > %bc = bitcast <4 x float> %51 to <4 x i32> > %157 = extractelement <4 x i32> %bc, i32 0 > %158 = sext i32 %156 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %157, i32 addrspace(3)* %159, align 4 > %160 = add i32 %143, 21 > %bc28 = bitcast <4 x float> %51 to <4 x i32> > %161 = extractelement <4 x i32> %bc28, i32 1 > %162 = sext i32 %160 to i64 > %163 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %162 > store i32 %161, i32 addrspace(3)* %163, align 4 > %164 = add i32 %143, 24 > %165 = sext i32 %164 to i64 > %166 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %165 > %167 = bitcast i32 addrspace(3)* %166 to float addrspace(3)* > store float %92, float addrspace(3)* %167, align 4 > %168 = add i32 %143, 25 > %169 = sext i32 %168 to i64 > %170 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %169 > %171 = bitcast i32 addrspace(3)* %170 to float addrspace(3)* > store float %93, float addrspace(3)* %171, align 4 > %172 = add i32 %143, 26 > %173 = sext i32 %172 to i64 > %174 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %173 > %175 = bitcast i32 addrspace(3)* %174 to float addrspace(3)* > store float %94, float addrspace(3)* %175, align 4 > %176 = add i32 %143, 27 > %177 = sext i32 %176 to i64 > %178 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %177 > %179 = bitcast i32 addrspace(3)* %178 to float addrspace(3)* > store float %95, float addrspace(3)* %179, align 4 > %180 = add i32 %143, 28 > %181 = sext i32 %180 to i64 > %182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %181 > %183 = bitcast i32 addrspace(3)* %182 to float addrspace(3)* > store float %100, float addrspace(3)* %183, align 4 > %184 = add i32 %143, 29 > %185 = sext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > %187 = bitcast i32 addrspace(3)* %186 to float addrspace(3)* > store float %105, float addrspace(3)* %187, align 4 > %188 = add i32 %143, 30 > %189 = sext i32 %188 to i64 > %190 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %189 > %191 = bitcast i32 addrspace(3)* %190 to float addrspace(3)* > store float %110, float addrspace(3)* %191, align 4 > %192 = add i32 %143, 32 > %193 = sext i32 %192 to i64 > %194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %193 > %195 = bitcast i32 addrspace(3)* %194 to float addrspace(3)* > store float %115, float addrspace(3)* %195, align 4 > %196 = add i32 %143, 33 > %197 = sext i32 %196 to i64 > %198 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %197 > %199 = bitcast i32 addrspace(3)* %198 to float addrspace(3)* > store float %120, float addrspace(3)* %199, align 4 > %200 = add i32 %143, 34 > %201 = sext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = bitcast i32 addrspace(3)* %202 to float addrspace(3)* > store float %125, float addrspace(3)* %203, align 4 > %204 = add i32 %143, 36 > %205 = sext i32 %204 to i64 > %206 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %205 > %207 = bitcast i32 addrspace(3)* %206 to float addrspace(3)* > store float %130, float addrspace(3)* %207, align 4 > %208 = add i32 %143, 37 > %209 = sext i32 %208 to i64 > %210 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %209 > %211 = bitcast i32 addrspace(3)* %210 to float addrspace(3)* > store float %135, float addrspace(3)* %211, align 4 > %212 = add i32 %143, 38 > %213 = sext i32 %212 to i64 > %214 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %213 > %215 = bitcast i32 addrspace(3)* %214 to float addrspace(3)* > store float %140, float addrspace(3)* %215, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, -0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 176, 112} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {128, 144, 64, 80} >IMM[5] FLT32 { 0.5000, 158456325028528675187087900672.0000, 63.0000, 0.0000} >IMM[6] UINT32 {96, 368, 352, 0} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 66: MOV TEMP[1].z, TEMP[2].xxxx > 67: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 68: MOV TEMP[0].yw, TEMP[2].yxyy > 69: ABS TEMP[2].x, TEMP[3].xxxx > 70: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 71: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 72: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 73: INEG TEMP[9].xy, TEMP[9].xyyy > 74: MOV TEMP[4].yz, TEMP[9].yxyy > 75: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 76: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 77: INEG TEMP[9].xy, TEMP[9].xyyy > 78: MOV TEMP[5].zw, TEMP[9].yyxy > 79: INEG TEMP[9].xy, TEMP[4].yzzz > 80: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 81: MOV TEMP[4].yz, TEMP[9].yxyy > 82: I2F TEMP[9].xy, TEMP[4].yzzz > 83: MOV TEMP[4].yz, TEMP[9].yxyy > 84: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 85: ABS TEMP[2].x, TEMP[6].xxxx > 86: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 87: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 88: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 89: INEG TEMP[9].xy, TEMP[9].xyyy > 90: MOV TEMP[4].yz, TEMP[9].yxyy > 91: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 92: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 93: INEG TEMP[9].xy, TEMP[9].xyyy > 94: MOV TEMP[5].zw, TEMP[9].yyxy > 95: INEG TEMP[9].xy, TEMP[4].yzzz > 96: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 97: MOV TEMP[4].yz, TEMP[9].yxyy > 98: I2F TEMP[9].xy, TEMP[4].yzzz > 99: MOV TEMP[4].yz, TEMP[9].yxyy >100: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >101: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >102: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >103: INEG TEMP[2].xy, TEMP[2].xyyy >104: MOV TEMP[5].xy, TEMP[2].xyxx >105: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >106: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >107: INEG TEMP[2].xy, TEMP[2].xyyy >108: MOV TEMP[5].zw, TEMP[2].yyxy >109: INEG TEMP[2].xy, TEMP[5].xyyy >110: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >111: MOV TEMP[5].xy, TEMP[2].xyxx >112: I2F TEMP[5].xy, TEMP[5].xyyy >113: ABS TEMP[2].x, TEMP[8].xxxx >114: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >115: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >116: MOV TEMP[4].zw, TEMP[2].yyxy >117: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >118: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >119: INEG TEMP[2].xy, TEMP[2].xyyy >120: MOV TEMP[5].xy, TEMP[2].xyxx >121: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >122: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >123: INEG TEMP[2].xy, TEMP[2].xyyy >124: MOV TEMP[5].zw, TEMP[2].yyxy >125: AND TEMP[2], TEMP[5], IMM[2].yyyy >126: MOV TEMP[2], TEMP[2] >127: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >128: MOV TEMP[5].xy, TEMP[2].xyxx >129: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >130: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >131: INEG TEMP[2].xy, TEMP[2].xyyy >132: MOV TEMP[5].zw, TEMP[2].yyxy >133: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >134: MOV TEMP[5].zw, TEMP[2].yyxy >135: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >136: MOV TEMP[5].xy, TEMP[2].xyxx >137: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >138: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >139: INEG TEMP[2].x, TEMP[2].xxxx >140: MOV TEMP[1].z, TEMP[2].xxxx >141: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >142: MOV TEMP[1].z, TEMP[2].xxxx >143: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >144: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >145: INEG TEMP[2].xy, TEMP[2].xyyy >146: MOV TEMP[0].yw, TEMP[2].yxyy >147: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >148: MOV TEMP[0].yw, TEMP[2].yxyy >149: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >150: MOV TEMP[0].y, TEMP[2].xxxx >151: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >152: MOV TEMP[0].y, TEMP[2].xxxx >153: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >154: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >155: INEG TEMP[2].xy, TEMP[2].xyyy >156: MOV TEMP[0].xw, TEMP[2].xxxy >157: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >158: MOV TEMP[0].xw, TEMP[2].xxxy >159: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >160: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >161: INEG TEMP[2].xy, TEMP[2].xyyy >162: MOV TEMP[1].xy, TEMP[2].xyxx >163: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >164: MOV TEMP[1].xy, TEMP[2].xyxx >165: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >166: MOV TEMP[0].xz, TEMP[2].xxyx >167: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >168: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >169: INEG TEMP[2].xy, TEMP[2].xyyy >170: MOV TEMP[1].xy, TEMP[2].xyxx >171: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >172: MOV TEMP[1].xy, TEMP[2].xyxx >173: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >174: MOV TEMP[0].xz, TEMP[2].xxyx >175: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >176: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >177: INEG TEMP[2].xy, TEMP[2].xyyy >178: MOV TEMP[1].xy, TEMP[2].xyxx >179: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >180: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >181: INEG TEMP[2].xyz, TEMP[2].xyzz >182: MOV TEMP[0].xyz, TEMP[2].xyzx >183: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >184: MOV TEMP[0].xz, TEMP[2].xxyx >185: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >186: MOV TEMP[0].x, TEMP[2].xxxx >187: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >188: MOV TEMP[0].x, TEMP[2].xxxx >189: ADD TEMP[2].xyz, -IN[0][0].zxyy, IN[1][0].zxyy >190: MOV TEMP[0].yzw, TEMP[2].yxyz >191: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >192: MUL TEMP[4].xyz, TEMP[0].yzww, TEMP[1].yzxx >193: FMA TEMP[2].xyz, TEMP[0].wyzz, TEMP[1].zxyy, -TEMP[4].xyzz >194: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz >195: MOV TEMP[1].w, TEMP[3].xxxx >196: RSQ TEMP[3].x, TEMP[1].wwww >197: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx >198: MOV TEMP[0].yzw, TEMP[2].yxyz >199: MOV TEMP[2].xyz, CONST[1][11] >200: MOV TEMP[4].xyz, TEMP[2].xyzx >201: MOV TEMP[4].w, IMM[0].xxxx >202: MOV TEMP[2], CONST[1][7] >203: DP4 TEMP[5].x, TEMP[2], TEMP[4] >204: MOV TEMP[2], CONST[1][8] >205: DP4 TEMP[2].x, TEMP[2], TEMP[4] >206: MOV TEMP[5].y, TEMP[2].xxxx >207: MOV TEMP[2], CONST[1][9] >208: DP4 TEMP[2].x, TEMP[2], TEMP[4] >209: MOV TEMP[5].z, TEMP[2].xxxx >210: ADD TEMP[4].xyz, TEMP[5].xyzz, -IN[0][0].xyzz >211: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[4].xyzz >212: MOV TEMP[1].w, TEMP[2].xxxx >213: RSQ TEMP[2].x, TEMP[1].wwww >214: MOV TEMP[1].w, TEMP[2].xxxx >215: MUL TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz >216: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[4].xyzz >217: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].wwww >218: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >219: INEG TEMP[2].x, TEMP[2].xxxx >220: MOV TEMP[0].y, TEMP[2].xxxx >221: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >222: MOV TEMP[0].x, TEMP[2].xxxx >223: MOV TEMP[2].x, TEMP[0].xxxx >224: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >225: UIF TEMP[2].xxxx :0 >226: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >227: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >228: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >229: MOV TEMP[0].yzw, TEMP[2].yxyz >230: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >231: MOV TEMP[0].y, TEMP[2].xxxx >232: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >233: MOV TEMP[0].z, TEMP[2].xxxx >234: SQRT TEMP[2].x, TEMP[0].xxxx >235: SQRT TEMP[2].y, TEMP[0].yyyy >236: SQRT TEMP[2].z, TEMP[0].zzzz >237: MOV TEMP[0].xyz, TEMP[2].xyzx >238: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >239: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].xxxx >240: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >241: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[5].xxxx >242: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >243: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[5].xxxx >244: MOV TEMP[2].y, CONST[3][4] >245: MOV TEMP[7].x, TEMP[2].yyyy >246: MOV TEMP[2].y, CONST[3][5] >247: MOV TEMP[7].y, TEMP[2].yyyy >248: MOV TEMP[2].y, CONST[3][6] >249: MOV TEMP[7].z, TEMP[2].yyyy >250: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >251: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >252: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >253: MOV TEMP[1].w, IMM[0].xxxx >254: MOV TEMP[6], CONST[3][0] >255: DP4 TEMP[7].x, TEMP[6], TEMP[1] >256: MOV TEMP[6], CONST[3][1] >257: DP4 TEMP[6].x, TEMP[6], TEMP[1] >258: MOV TEMP[7].y, TEMP[6].xxxx >259: MOV TEMP[6], CONST[3][3] >260: DP4 TEMP[6].x, TEMP[6], TEMP[1] >261: MOV TEMP[4].w, IMM[0].xxxx >262: MOV TEMP[8], CONST[3][0] >263: DP4 TEMP[8].x, TEMP[8], TEMP[4] >264: MOV TEMP[9], CONST[3][1] >265: DP4 TEMP[9].x, TEMP[9], TEMP[4] >266: MOV TEMP[8].y, TEMP[9].xxxx >267: MOV TEMP[9], CONST[3][3] >268: DP4 TEMP[9].x, TEMP[9], TEMP[4] >269: MOV TEMP[5].w, IMM[0].xxxx >270: MOV TEMP[10], CONST[3][0] >271: DP4 TEMP[4].x, TEMP[10], TEMP[5] >272: MOV TEMP[10], CONST[3][1] >273: DP4 TEMP[10].x, TEMP[10], TEMP[5] >274: MOV TEMP[4].y, TEMP[10].xxxx >275: MOV TEMP[10], CONST[3][3] >276: DP4 TEMP[10].x, TEMP[10], TEMP[5] >277: MOV TEMP[2].w, IMM[0].xxxx >278: MOV TEMP[11], CONST[3][0] >279: DP4 TEMP[5].x, TEMP[11], TEMP[2] >280: MOV TEMP[11], CONST[3][1] >281: DP4 TEMP[11].x, TEMP[11], TEMP[2] >282: MOV TEMP[5].y, TEMP[11].xxxx >283: MOV TEMP[11], CONST[3][3] >284: DP4 TEMP[11].x, TEMP[11], TEMP[2] >285: MOV TEMP[3].w, IMM[0].xxxx >286: MOV TEMP[12], CONST[3][0] >287: DP4 TEMP[2].x, TEMP[12], TEMP[3] >288: MOV TEMP[12], CONST[3][1] >289: DP4 TEMP[12].x, TEMP[12], TEMP[3] >290: MOV TEMP[2].y, TEMP[12].xxxx >291: MOV TEMP[12], CONST[3][3] >292: DP4 TEMP[12].x, TEMP[12], TEMP[3] >293: MOV TEMP[0].w, IMM[0].xxxx >294: MOV TEMP[13], CONST[3][0] >295: DP4 TEMP[3].x, TEMP[13], TEMP[0] >296: MOV TEMP[13], CONST[3][1] >297: DP4 TEMP[13].x, TEMP[13], TEMP[0] >298: MOV TEMP[3].y, TEMP[13].xxxx >299: MOV TEMP[13], CONST[3][3] >300: DP4 TEMP[13].x, TEMP[13], TEMP[0] >301: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >302: SSG TEMP[15].xy, TEMP[8].xyyy >303: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >304: RCP TEMP[16].xy, TEMP[9].xxxx >305: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >306: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >307: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >308: SSG TEMP[15].xy, TEMP[4].xyyy >309: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >310: RCP TEMP[16].xy, TEMP[10].xxxx >311: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >312: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >313: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >314: SSG TEMP[16].xy, TEMP[5].xyyy >315: MUL TEMP[16].xy, IMM[5].yyyy, TEMP[16].xyyy >316: RCP TEMP[11].xy, TEMP[11].xxxx >317: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >318: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >319: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >320: SSG TEMP[15].xy, TEMP[7].xyyy >321: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >322: RCP TEMP[16].xy, TEMP[6].xxxx >323: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >324: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >325: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >326: MOV TEMP[0].yz, TEMP[5].yxyy >327: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >328: SSG TEMP[7].xy, TEMP[2].xyyy >329: MUL TEMP[7].xy, IMM[5].yyyy, TEMP[7].xyyy >330: RCP TEMP[11].xy, TEMP[12].xxxx >331: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >332: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >333: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >334: MOV TEMP[4].zw, TEMP[2].yyxy >335: MOV TEMP[2].xy, CONST[3][23] >336: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >337: MOV TEMP[4].zw, TEMP[2].yyxy >338: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >339: SSG TEMP[5].xy, TEMP[3].xyyy >340: MUL TEMP[5].xy, IMM[5].yyyy, TEMP[5].xyyy >341: RCP TEMP[7].xy, TEMP[13].xxxx >342: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >343: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >344: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >345: MOV TEMP[0].xw, TEMP[2].xxxy >346: MOV TEMP[2].xy, CONST[3][23] >347: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >348: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >349: MOV TEMP[0].y, TEMP[2].xxxx >350: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >351: MOV TEMP[0].z, TEMP[2].xxxx >352: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >353: SQRT TEMP[2].x, TEMP[0].xxxx >354: SQRT TEMP[2].y, TEMP[0].yyyy >355: SQRT TEMP[2].z, TEMP[0].zzzz >356: MOV TEMP[2].xyz, TEMP[2].xyzx >357: MOV TEMP[3].z, CONST[1][22] >358: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >359: MOV TEMP[0].w, TEMP[3].xxxx >360: MOV TEMP[3].z, CONST[1][22] >361: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >362: MOV TEMP[3].z, CONST[1][22] >363: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >364: MOV TEMP[1].y, TEMP[3].xxxx >365: MOV TEMP[3].w, CONST[1][22] >366: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >367: UIF TEMP[3].xxxx :0 >368: MOV TEMP[3].w, CONST[1][22] >369: RCP TEMP[3].x, TEMP[3].wwww >370: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >371: ELSE :0 >372: SSG TEMP[5].x, TEMP[0].wwww >373: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >374: ENDIF >375: MOV_SAT TEMP[3].x, TEMP[3].xxxx >376: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >377: MOV TEMP[0].w, TEMP[3].xxxx >378: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >379: MOV TEMP[0].y, TEMP[3].xxxx >380: MOV TEMP[3].w, CONST[1][22] >381: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >382: UIF TEMP[3].xxxx :0 >383: MOV TEMP[3].w, CONST[1][22] >384: RCP TEMP[3].x, TEMP[3].wwww >385: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >386: ELSE :0 >387: SSG TEMP[5].x, TEMP[1].xxxx >388: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >389: ENDIF >390: MOV_SAT TEMP[3].x, TEMP[3].xxxx >391: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >392: MOV TEMP[0].w, TEMP[3].xxxx >393: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >394: MOV TEMP[0].z, TEMP[3].xxxx >395: MOV TEMP[3].w, CONST[1][22] >396: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >397: UIF TEMP[3].xxxx :0 >398: MOV TEMP[3].w, CONST[1][22] >399: RCP TEMP[3].x, TEMP[3].wwww >400: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >401: ELSE :0 >402: SSG TEMP[5].x, TEMP[1].yyyy >403: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >404: ENDIF >405: MOV_SAT TEMP[3].x, TEMP[3].xxxx >406: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >407: MOV TEMP[0].w, TEMP[3].xxxx >408: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >409: MOV TEMP[2].xy, CONST[1][22] >410: MOV TEMP[3].xy, CONST[2][4] >411: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >412: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >413: MOV TEMP[0].w, TEMP[2].xxxx >414: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >415: SSG TEMP[3].xy, TEMP[0].xyyy >416: MUL TEMP[3].xy, IMM[5].yyyy, TEMP[3].xyyy >417: RCP TEMP[5].xy, TEMP[1].xxxx >418: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >419: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >420: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >421: MOV TEMP[0].y, TEMP[2].xxxx >422: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >423: MOV TEMP[4].z, TEMP[2].xxxx >424: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >425: UIF TEMP[2].xxxx :0 >426: RCP TEMP[1].x, TEMP[1].xxxx >427: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >428: ELSE :0 >429: SSG TEMP[2].x, TEMP[0].zzzz >430: MUL TEMP[1].x, IMM[5].yyyy, TEMP[2].xxxx >431: ENDIF >432: MOV TEMP[0].y, TEMP[1].xxxx >433: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >434: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >435: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >436: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >437: MOV TEMP[4].w, TEMP[0].xxxx >438: ELSE :0 >439: MOV TEMP[4], IMM[0].zzzz >440: ENDIF >441: MIN TEMP[0], TEMP[4], IMM[5].zzzz >442: MOV TEMP[1].x, TEMP[0].xxxx >443: MOV TEMP[2].x, TEMP[0].yyyy >444: MOV TEMP[3].x, TEMP[0].zzzz >445: MOV TEMP[0].x, TEMP[0].wwww >446: MOV OUT[8], TEMP[1] >447: MOV OUT[9], TEMP[2] >448: MOV OUT[10], TEMP[3] >449: MOV OUT[11], TEMP[0] >450: MOV OUT[0].x, TEMP[1].xxxx >451: MOV OUT[0].y, TEMP[2].xxxx >452: MOV OUT[0].z, TEMP[3].xxxx >453: MOV OUT[1].x, TEMP[0].xxxx >454: END >radeonsi: Compiling shader 326 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call float @llvm.SI.load.const(<16 x i8> %33, i32 64) > %35 = call float @llvm.SI.load.const(<16 x i8> %33, i32 68) > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = call float @llvm.SI.load.const(<16 x i8> %37, i32 0) > %39 = call float @llvm.SI.load.const(<16 x i8> %37, i32 4) > %40 = call float @llvm.SI.load.const(<16 x i8> %37, i32 8) > %41 = call float @llvm.SI.load.const(<16 x i8> %37, i32 12) > %42 = call float @llvm.SI.load.const(<16 x i8> %37, i32 16) > %43 = call float @llvm.SI.load.const(<16 x i8> %37, i32 20) > %44 = call float @llvm.SI.load.const(<16 x i8> %37, i32 24) > %45 = call float @llvm.SI.load.const(<16 x i8> %37, i32 28) > %46 = call float @llvm.SI.load.const(<16 x i8> %37, i32 32) > %47 = call float @llvm.SI.load.const(<16 x i8> %37, i32 36) > %48 = call float @llvm.SI.load.const(<16 x i8> %37, i32 40) > %49 = call float @llvm.SI.load.const(<16 x i8> %37, i32 44) > %50 = call float @llvm.SI.load.const(<16 x i8> %37, i32 48) > %51 = call float @llvm.SI.load.const(<16 x i8> %37, i32 52) > %52 = call float @llvm.SI.load.const(<16 x i8> %37, i32 56) > %53 = call float @llvm.SI.load.const(<16 x i8> %37, i32 60) > %54 = call float @llvm.SI.load.const(<16 x i8> %37, i32 68) > %55 = call float @llvm.SI.load.const(<16 x i8> %37, i32 84) > %56 = call float @llvm.SI.load.const(<16 x i8> %37, i32 100) > %57 = call float @llvm.SI.load.const(<16 x i8> %37, i32 368) > %58 = call float @llvm.SI.load.const(<16 x i8> %37, i32 372) > %59 = lshr i32 %10, 8 > %60 = and i32 %59, 31 > %61 = lshr i32 %7, 13 > %62 = and i32 %61, 255 > %63 = and i32 %7, 8191 > %64 = and i32 %10, 255 > %65 = mul nuw nsw i32 %63, %64 > %66 = mul nuw nsw i32 %60, %62 > %67 = add nuw nsw i32 %65, %66 > %68 = add nuw nsw i32 %67, 16 > %69 = zext i32 %68 to i64 > %70 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %69 > %71 = load i32, i32 addrspace(3)* %70, align 4 > %72 = lshr i32 %7, 13 > %73 = and i32 %72, 255 > %74 = and i32 %7, 8191 > %75 = and i32 %10, 255 > %76 = mul nuw nsw i32 %74, %75 > %77 = mul nuw nsw i32 %60, %73 > %78 = add nuw nsw i32 %76, %77 > %79 = add nuw nsw i32 %78, 17 > %80 = zext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = load i32, i32 addrspace(3)* %81, align 4 > %83 = lshr i32 %7, 13 > %84 = and i32 %83, 255 > %85 = and i32 %7, 8191 > %86 = and i32 %10, 255 > %87 = mul nuw nsw i32 %85, %86 > %88 = mul nuw nsw i32 %60, %84 > %89 = add nuw nsw i32 %87, %88 > %90 = add nuw nsw i32 %89, 18 > %91 = zext i32 %90 to i64 > %92 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %91 > %93 = load i32, i32 addrspace(3)* %92, align 4 > %94 = lshr i32 %7, 13 > %95 = and i32 %94, 255 > %96 = and i32 %7, 8191 > %97 = and i32 %10, 255 > %98 = mul nuw nsw i32 %96, %97 > %99 = mul nuw nsw i32 %60, %95 > %100 = add nuw nsw i32 %98, %99 > %101 = add nuw nsw i32 %100, 19 > %102 = zext i32 %101 to i64 > %103 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %102 > %104 = load i32, i32 addrspace(3)* %103, align 4 > %105 = lshr i32 %6, 13 > %106 = and i32 %105, 255 > %107 = shl i32 %5, 2 > %108 = and i32 %107, 262140 > %109 = and i32 %6, 8191 > %110 = and i32 %10, 255 > %111 = mul nuw nsw i32 %109, %110 > %112 = add nuw nsw i32 %108, %111 > %113 = mul nuw nsw i32 %60, %106 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 16 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > store i32 %71, i32 addrspace(3)* %117, align 4 > %118 = add nuw nsw i32 %114, 17 > %119 = zext i32 %118 to i64 > %120 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %119 > store i32 %82, i32 addrspace(3)* %120, align 4 > %121 = add nuw nsw i32 %114, 18 > %122 = zext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > store i32 %93, i32 addrspace(3)* %123, align 4 > %124 = add nuw nsw i32 %114, 19 > %125 = zext i32 %124 to i64 > %126 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %125 > store i32 %104, i32 addrspace(3)* %126, align 4 > %127 = lshr i32 %7, 13 > %128 = and i32 %127, 255 > %129 = and i32 %7, 8191 > %130 = and i32 %10, 255 > %131 = mul nuw nsw i32 %129, %130 > %132 = mul nuw nsw i32 %60, %128 > %133 = add nuw nsw i32 %131, %132 > %134 = add nuw nsw i32 %133, 20 > %135 = zext i32 %134 to i64 > %136 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %135 > %137 = load i32, i32 addrspace(3)* %136, align 4 > %138 = lshr i32 %7, 13 > %139 = and i32 %138, 255 > %140 = and i32 %7, 8191 > %141 = and i32 %10, 255 > %142 = mul nuw nsw i32 %140, %141 > %143 = mul nuw nsw i32 %60, %139 > %144 = add nuw nsw i32 %142, %143 > %145 = add nuw nsw i32 %144, 21 > %146 = zext i32 %145 to i64 > %147 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %146 > %148 = load i32, i32 addrspace(3)* %147, align 4 > %149 = lshr i32 %7, 13 > %150 = and i32 %149, 255 > %151 = and i32 %7, 8191 > %152 = and i32 %10, 255 > %153 = mul nuw nsw i32 %151, %152 > %154 = mul nuw nsw i32 %60, %150 > %155 = add nuw nsw i32 %153, %154 > %156 = add nuw nsw i32 %155, 22 > %157 = zext i32 %156 to i64 > %158 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %157 > %159 = load i32, i32 addrspace(3)* %158, align 4 > %160 = lshr i32 %7, 13 > %161 = and i32 %160, 255 > %162 = and i32 %7, 8191 > %163 = and i32 %10, 255 > %164 = mul nuw nsw i32 %162, %163 > %165 = mul nuw nsw i32 %60, %161 > %166 = add nuw nsw i32 %164, %165 > %167 = add nuw nsw i32 %166, 23 > %168 = zext i32 %167 to i64 > %169 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %168 > %170 = load i32, i32 addrspace(3)* %169, align 4 > %171 = lshr i32 %6, 13 > %172 = and i32 %171, 255 > %173 = shl i32 %5, 2 > %174 = and i32 %173, 262140 > %175 = and i32 %6, 8191 > %176 = and i32 %10, 255 > %177 = mul nuw nsw i32 %175, %176 > %178 = add nuw nsw i32 %174, %177 > %179 = mul nuw nsw i32 %60, %172 > %180 = add nuw nsw i32 %178, %179 > %181 = add nuw nsw i32 %180, 20 > %182 = zext i32 %181 to i64 > %183 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %182 > store i32 %137, i32 addrspace(3)* %183, align 4 > %184 = add nuw nsw i32 %180, 21 > %185 = zext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > store i32 %148, i32 addrspace(3)* %186, align 4 > %187 = add nuw nsw i32 %180, 22 > %188 = zext i32 %187 to i64 > %189 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %188 > store i32 %159, i32 addrspace(3)* %189, align 4 > %190 = add nuw nsw i32 %180, 23 > %191 = zext i32 %190 to i64 > %192 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %191 > store i32 %170, i32 addrspace(3)* %192, align 4 > %193 = lshr i32 %7, 13 > %194 = and i32 %193, 255 > %195 = and i32 %7, 8191 > %196 = and i32 %10, 255 > %197 = mul nuw nsw i32 %195, %196 > %198 = mul nuw nsw i32 %60, %194 > %199 = add nuw nsw i32 %197, %198 > %200 = add nuw nsw i32 %199, 24 > %201 = zext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = load i32, i32 addrspace(3)* %202, align 4 > %204 = lshr i32 %7, 13 > %205 = and i32 %204, 255 > %206 = and i32 %7, 8191 > %207 = and i32 %10, 255 > %208 = mul nuw nsw i32 %206, %207 > %209 = mul nuw nsw i32 %60, %205 > %210 = add nuw nsw i32 %208, %209 > %211 = add nuw nsw i32 %210, 25 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = load i32, i32 addrspace(3)* %213, align 4 > %215 = lshr i32 %7, 13 > %216 = and i32 %215, 255 > %217 = and i32 %7, 8191 > %218 = and i32 %10, 255 > %219 = mul nuw nsw i32 %217, %218 > %220 = mul nuw nsw i32 %60, %216 > %221 = add nuw nsw i32 %219, %220 > %222 = add nuw nsw i32 %221, 26 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = load i32, i32 addrspace(3)* %224, align 4 > %226 = lshr i32 %7, 13 > %227 = and i32 %226, 255 > %228 = and i32 %7, 8191 > %229 = and i32 %10, 255 > %230 = mul nuw nsw i32 %228, %229 > %231 = mul nuw nsw i32 %60, %227 > %232 = add nuw nsw i32 %230, %231 > %233 = add nuw nsw i32 %232, 27 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = load i32, i32 addrspace(3)* %235, align 4 > %237 = lshr i32 %6, 13 > %238 = and i32 %237, 255 > %239 = shl i32 %5, 2 > %240 = and i32 %239, 262140 > %241 = and i32 %6, 8191 > %242 = and i32 %10, 255 > %243 = mul nuw nsw i32 %241, %242 > %244 = add nuw nsw i32 %240, %243 > %245 = mul nuw nsw i32 %60, %238 > %246 = add nuw nsw i32 %244, %245 > %247 = add nuw nsw i32 %246, 24 > %248 = zext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > store i32 %203, i32 addrspace(3)* %249, align 4 > %250 = add nuw nsw i32 %246, 25 > %251 = zext i32 %250 to i64 > %252 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %251 > store i32 %214, i32 addrspace(3)* %252, align 4 > %253 = add nuw nsw i32 %246, 26 > %254 = zext i32 %253 to i64 > %255 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %254 > store i32 %225, i32 addrspace(3)* %255, align 4 > %256 = add nuw nsw i32 %246, 27 > %257 = zext i32 %256 to i64 > %258 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %257 > store i32 %236, i32 addrspace(3)* %258, align 4 > %259 = lshr i32 %7, 13 > %260 = and i32 %259, 255 > %261 = and i32 %7, 8191 > %262 = and i32 %10, 255 > %263 = mul nuw nsw i32 %261, %262 > %264 = mul nuw nsw i32 %60, %260 > %265 = add nuw nsw i32 %263, %264 > %266 = add nuw nsw i32 %265, 28 > %267 = zext i32 %266 to i64 > %268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %267 > %269 = load i32, i32 addrspace(3)* %268, align 4 > %270 = lshr i32 %7, 13 > %271 = and i32 %270, 255 > %272 = and i32 %7, 8191 > %273 = and i32 %10, 255 > %274 = mul nuw nsw i32 %272, %273 > %275 = mul nuw nsw i32 %60, %271 > %276 = add nuw nsw i32 %274, %275 > %277 = add nuw nsw i32 %276, 29 > %278 = zext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = load i32, i32 addrspace(3)* %279, align 4 > %281 = lshr i32 %7, 13 > %282 = and i32 %281, 255 > %283 = and i32 %7, 8191 > %284 = and i32 %10, 255 > %285 = mul nuw nsw i32 %283, %284 > %286 = mul nuw nsw i32 %60, %282 > %287 = add nuw nsw i32 %285, %286 > %288 = add nuw nsw i32 %287, 30 > %289 = zext i32 %288 to i64 > %290 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %289 > %291 = load i32, i32 addrspace(3)* %290, align 4 > %292 = lshr i32 %7, 13 > %293 = and i32 %292, 255 > %294 = and i32 %7, 8191 > %295 = and i32 %10, 255 > %296 = mul nuw nsw i32 %294, %295 > %297 = mul nuw nsw i32 %60, %293 > %298 = add nuw nsw i32 %296, %297 > %299 = add nuw nsw i32 %298, 31 > %300 = zext i32 %299 to i64 > %301 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %300 > %302 = load i32, i32 addrspace(3)* %301, align 4 > %303 = lshr i32 %6, 13 > %304 = and i32 %303, 255 > %305 = shl i32 %5, 2 > %306 = and i32 %305, 262140 > %307 = and i32 %6, 8191 > %308 = and i32 %10, 255 > %309 = mul nuw nsw i32 %307, %308 > %310 = add nuw nsw i32 %306, %309 > %311 = mul nuw nsw i32 %60, %304 > %312 = add nuw nsw i32 %310, %311 > %313 = add nuw nsw i32 %312, 28 > %314 = zext i32 %313 to i64 > %315 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %314 > store i32 %269, i32 addrspace(3)* %315, align 4 > %316 = add nuw nsw i32 %312, 29 > %317 = zext i32 %316 to i64 > %318 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %317 > store i32 %280, i32 addrspace(3)* %318, align 4 > %319 = add nuw nsw i32 %312, 30 > %320 = zext i32 %319 to i64 > %321 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %320 > store i32 %291, i32 addrspace(3)* %321, align 4 > %322 = add nuw nsw i32 %312, 31 > %323 = zext i32 %322 to i64 > %324 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %323 > store i32 %302, i32 addrspace(3)* %324, align 4 > %325 = lshr i32 %7, 13 > %326 = and i32 %325, 255 > %327 = and i32 %7, 8191 > %328 = and i32 %10, 255 > %329 = mul nuw nsw i32 %327, %328 > %330 = mul nuw nsw i32 %60, %326 > %331 = add nuw nsw i32 %329, %330 > %332 = add nuw nsw i32 %331, 32 > %333 = zext i32 %332 to i64 > %334 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %333 > %335 = load i32, i32 addrspace(3)* %334, align 4 > %336 = lshr i32 %7, 13 > %337 = and i32 %336, 255 > %338 = and i32 %7, 8191 > %339 = and i32 %10, 255 > %340 = mul nuw nsw i32 %338, %339 > %341 = mul nuw nsw i32 %60, %337 > %342 = add nuw nsw i32 %340, %341 > %343 = add nuw nsw i32 %342, 33 > %344 = zext i32 %343 to i64 > %345 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %344 > %346 = load i32, i32 addrspace(3)* %345, align 4 > %347 = lshr i32 %7, 13 > %348 = and i32 %347, 255 > %349 = and i32 %7, 8191 > %350 = and i32 %10, 255 > %351 = mul nuw nsw i32 %349, %350 > %352 = mul nuw nsw i32 %60, %348 > %353 = add nuw nsw i32 %351, %352 > %354 = add nuw nsw i32 %353, 34 > %355 = zext i32 %354 to i64 > %356 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %355 > %357 = load i32, i32 addrspace(3)* %356, align 4 > %358 = lshr i32 %7, 13 > %359 = and i32 %358, 255 > %360 = and i32 %7, 8191 > %361 = and i32 %10, 255 > %362 = mul nuw nsw i32 %360, %361 > %363 = mul nuw nsw i32 %60, %359 > %364 = add nuw nsw i32 %362, %363 > %365 = add nuw nsw i32 %364, 35 > %366 = zext i32 %365 to i64 > %367 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %366 > %368 = load i32, i32 addrspace(3)* %367, align 4 > %369 = lshr i32 %6, 13 > %370 = and i32 %369, 255 > %371 = shl i32 %5, 2 > %372 = and i32 %371, 262140 > %373 = and i32 %6, 8191 > %374 = and i32 %10, 255 > %375 = mul nuw nsw i32 %373, %374 > %376 = add nuw nsw i32 %372, %375 > %377 = mul nuw nsw i32 %60, %370 > %378 = add nuw nsw i32 %376, %377 > %379 = add nuw nsw i32 %378, 32 > %380 = zext i32 %379 to i64 > %381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %380 > store i32 %335, i32 addrspace(3)* %381, align 4 > %382 = add nuw nsw i32 %378, 33 > %383 = zext i32 %382 to i64 > %384 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %383 > store i32 %346, i32 addrspace(3)* %384, align 4 > %385 = add nuw nsw i32 %378, 34 > %386 = zext i32 %385 to i64 > %387 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %386 > store i32 %357, i32 addrspace(3)* %387, align 4 > %388 = add nuw nsw i32 %378, 35 > %389 = zext i32 %388 to i64 > %390 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %389 > store i32 %368, i32 addrspace(3)* %390, align 4 > %391 = lshr i32 %7, 13 > %392 = and i32 %391, 255 > %393 = and i32 %7, 8191 > %394 = and i32 %10, 255 > %395 = mul nuw nsw i32 %393, %394 > %396 = mul nuw nsw i32 %60, %392 > %397 = add nuw nsw i32 %395, %396 > %398 = add nuw nsw i32 %397, 36 > %399 = zext i32 %398 to i64 > %400 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %399 > %401 = load i32, i32 addrspace(3)* %400, align 4 > %402 = lshr i32 %7, 13 > %403 = and i32 %402, 255 > %404 = and i32 %7, 8191 > %405 = and i32 %10, 255 > %406 = mul nuw nsw i32 %404, %405 > %407 = mul nuw nsw i32 %60, %403 > %408 = add nuw nsw i32 %406, %407 > %409 = add nuw nsw i32 %408, 37 > %410 = zext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = load i32, i32 addrspace(3)* %411, align 4 > %413 = lshr i32 %7, 13 > %414 = and i32 %413, 255 > %415 = and i32 %7, 8191 > %416 = and i32 %10, 255 > %417 = mul nuw nsw i32 %415, %416 > %418 = mul nuw nsw i32 %60, %414 > %419 = add nuw nsw i32 %417, %418 > %420 = add nuw nsw i32 %419, 38 > %421 = zext i32 %420 to i64 > %422 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %421 > %423 = load i32, i32 addrspace(3)* %422, align 4 > %424 = lshr i32 %7, 13 > %425 = and i32 %424, 255 > %426 = and i32 %7, 8191 > %427 = and i32 %10, 255 > %428 = mul nuw nsw i32 %426, %427 > %429 = mul nuw nsw i32 %60, %425 > %430 = add nuw nsw i32 %428, %429 > %431 = add nuw nsw i32 %430, 39 > %432 = zext i32 %431 to i64 > %433 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %432 > %434 = load i32, i32 addrspace(3)* %433, align 4 > %435 = lshr i32 %6, 13 > %436 = and i32 %435, 255 > %437 = shl i32 %5, 2 > %438 = and i32 %437, 262140 > %439 = and i32 %6, 8191 > %440 = and i32 %10, 255 > %441 = mul nuw nsw i32 %439, %440 > %442 = add nuw nsw i32 %438, %441 > %443 = mul nuw nsw i32 %60, %436 > %444 = add nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 36 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > store i32 %401, i32 addrspace(3)* %447, align 4 > %448 = add nuw nsw i32 %444, 37 > %449 = zext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > store i32 %412, i32 addrspace(3)* %450, align 4 > %451 = add nuw nsw i32 %444, 38 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > store i32 %423, i32 addrspace(3)* %453, align 4 > %454 = add nuw nsw i32 %444, 39 > %455 = zext i32 %454 to i64 > %456 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %455 > store i32 %434, i32 addrspace(3)* %456, align 4 > %457 = and i32 %7, 8191 > %458 = and i32 %10, 255 > %459 = mul nuw nsw i32 %457, %458 > %460 = add nuw nsw i32 %459, 16 > %461 = zext i32 %460 to i64 > %462 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %461 > %463 = bitcast i32 addrspace(3)* %462 to float addrspace(3)* > %464 = load float, float addrspace(3)* %463, align 4 > %465 = and i32 %7, 8191 > %466 = and i32 %10, 255 > %467 = mul nuw nsw i32 %465, %466 > %468 = add nuw nsw i32 %467, 17 > %469 = zext i32 %468 to i64 > %470 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %469 > %471 = bitcast i32 addrspace(3)* %470 to float addrspace(3)* > %472 = load float, float addrspace(3)* %471, align 4 > %473 = and i32 %7, 8191 > %474 = and i32 %10, 255 > %475 = mul nuw nsw i32 %473, %474 > %476 = add nuw nsw i32 %475, 18 > %477 = zext i32 %476 to i64 > %478 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %477 > %479 = bitcast i32 addrspace(3)* %478 to float addrspace(3)* > %480 = load float, float addrspace(3)* %479, align 4 > %481 = fmul float %38, %464 > %482 = fmul float %39, %472 > %483 = fadd float %481, %482 > %484 = fmul float %40, %480 > %485 = fadd float %483, %484 > %486 = fadd float %485, %41 > %487 = fmul float %42, %464 > %488 = fmul float %43, %472 > %489 = fadd float %487, %488 > %490 = fmul float %44, %480 > %491 = fadd float %489, %490 > %492 = fadd float %491, %45 > %493 = fmul float %46, %464 > %494 = fmul float %47, %472 > %495 = fadd float %493, %494 > %496 = fmul float %48, %480 > %497 = fadd float %495, %496 > %498 = fadd float %497, %49 > %499 = fmul float %50, %464 > %500 = fmul float %51, %472 > %501 = fadd float %499, %500 > %502 = fmul float %52, %480 > %503 = fadd float %501, %502 > %504 = fadd float %503, %53 > %505 = lshr i32 %7, 13 > %506 = and i32 %505, 255 > %507 = and i32 %7, 8191 > %508 = and i32 %10, 255 > %509 = mul nuw nsw i32 %507, %508 > %510 = add nuw nsw i32 %509, %506 > %511 = add nuw nsw i32 %510, 16 > %512 = zext i32 %511 to i64 > %513 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %512 > %514 = bitcast i32 addrspace(3)* %513 to float addrspace(3)* > %515 = load float, float addrspace(3)* %514, align 4 > %516 = lshr i32 %7, 13 > %517 = and i32 %516, 255 > %518 = and i32 %7, 8191 > %519 = and i32 %10, 255 > %520 = mul nuw nsw i32 %518, %519 > %521 = add nuw nsw i32 %520, %517 > %522 = add nuw nsw i32 %521, 17 > %523 = zext i32 %522 to i64 > %524 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %523 > %525 = bitcast i32 addrspace(3)* %524 to float addrspace(3)* > %526 = load float, float addrspace(3)* %525, align 4 > %527 = lshr i32 %7, 13 > %528 = and i32 %527, 255 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = add nuw nsw i32 %531, %528 > %533 = add nuw nsw i32 %532, 18 > %534 = zext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %38, %515 > %539 = fmul float %39, %526 > %540 = fadd float %538, %539 > %541 = fmul float %40, %537 > %542 = fadd float %540, %541 > %543 = fadd float %542, %41 > %544 = fmul float %42, %515 > %545 = fmul float %43, %526 > %546 = fadd float %544, %545 > %547 = fmul float %44, %537 > %548 = fadd float %546, %547 > %549 = fadd float %548, %45 > %550 = fmul float %46, %515 > %551 = fmul float %47, %526 > %552 = fadd float %550, %551 > %553 = fmul float %48, %537 > %554 = fadd float %552, %553 > %555 = fadd float %554, %49 > %556 = fmul float %50, %515 > %557 = fmul float %51, %526 > %558 = fadd float %556, %557 > %559 = fmul float %52, %537 > %560 = fadd float %558, %559 > %561 = fadd float %560, %53 > %562 = and i32 %7, 8191 > %563 = and i32 %10, 255 > %564 = mul nuw nsw i32 %562, %563 > %565 = lshr i32 %7, 12 > %566 = and i32 %565, 510 > %567 = add nuw nsw i32 %564, %566 > %568 = add nuw nsw i32 %567, 16 > %569 = zext i32 %568 to i64 > %570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %569 > %571 = bitcast i32 addrspace(3)* %570 to float addrspace(3)* > %572 = load float, float addrspace(3)* %571, align 4 > %573 = and i32 %7, 8191 > %574 = and i32 %10, 255 > %575 = mul nuw nsw i32 %573, %574 > %576 = lshr i32 %7, 12 > %577 = and i32 %576, 510 > %578 = add nuw nsw i32 %575, %577 > %579 = add nuw nsw i32 %578, 17 > %580 = zext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = and i32 %7, 8191 > %585 = and i32 %10, 255 > %586 = mul nuw nsw i32 %584, %585 > %587 = lshr i32 %7, 12 > %588 = and i32 %587, 510 > %589 = add nuw nsw i32 %586, %588 > %590 = add nuw nsw i32 %589, 18 > %591 = zext i32 %590 to i64 > %592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %591 > %593 = bitcast i32 addrspace(3)* %592 to float addrspace(3)* > %594 = load float, float addrspace(3)* %593, align 4 > %595 = fmul float %38, %572 > %596 = fmul float %39, %583 > %597 = fadd float %595, %596 > %598 = fmul float %40, %594 > %599 = fadd float %597, %598 > %600 = fadd float %599, %41 > %601 = fmul float %42, %572 > %602 = fmul float %43, %583 > %603 = fadd float %601, %602 > %604 = fmul float %44, %594 > %605 = fadd float %603, %604 > %606 = fadd float %605, %45 > %607 = fmul float %46, %572 > %608 = fmul float %47, %583 > %609 = fadd float %607, %608 > %610 = fmul float %48, %594 > %611 = fadd float %609, %610 > %612 = fadd float %611, %49 > %613 = fmul float %50, %572 > %614 = fmul float %51, %583 > %615 = fadd float %613, %614 > %616 = fmul float %52, %594 > %617 = fadd float %615, %616 > %618 = fadd float %617, %53 > %619 = fadd float %498, 1.000000e+02 > %620 = fadd float %555, 1.000000e+02 > %621 = fadd float %612, 1.000000e+02 > %622 = call float @llvm.fabs.f32(float %504) > %623 = call float @llvm.minnum.f32(float %622, float 1.000000e+02) > %624 = fcmp ogt float %486, 0.000000e+00 > %625 = fcmp ogt float %492, 0.000000e+00 > %626 = fcmp olt float %486, 0.000000e+00 > %627 = fcmp olt float %492, 0.000000e+00 > %628 = sext i1 %626 to i32 > %629 = sext i1 %627 to i32 > %630 = zext i1 %624 to i32 > %631 = zext i1 %625 to i32 > %632 = add nsw i32 %630, %628 > %633 = add nsw i32 %631, %629 > %634 = sitofp i32 %632 to float > %635 = sitofp i32 %633 to float > %636 = fsub float -0.000000e+00, %623 > %637 = call float @llvm.fma.f32(float %636, float %634, float %486) > %638 = fsub float -0.000000e+00, %623 > %639 = call float @llvm.fma.f32(float %638, float %635, float %492) > %640 = call float @llvm.fabs.f32(float %561) > %641 = call float @llvm.minnum.f32(float %640, float 1.000000e+02) > %642 = fcmp ogt float %543, 0.000000e+00 > %643 = fcmp ogt float %549, 0.000000e+00 > %644 = fcmp olt float %543, 0.000000e+00 > %645 = fcmp olt float %549, 0.000000e+00 > %646 = sext i1 %644 to i32 > %647 = sext i1 %645 to i32 > %648 = zext i1 %642 to i32 > %649 = zext i1 %643 to i32 > %650 = add nsw i32 %648, %646 > %651 = add nsw i32 %649, %647 > %652 = sitofp i32 %650 to float > %653 = sitofp i32 %651 to float > %654 = fsub float -0.000000e+00, %641 > %655 = call float @llvm.fma.f32(float %654, float %652, float %543) > %656 = fsub float -0.000000e+00, %641 > %657 = call float @llvm.fma.f32(float %656, float %653, float %549) > %658 = fcmp ogt float %600, 0.000000e+00 > %659 = fcmp ogt float %606, 0.000000e+00 > %660 = fcmp olt float %600, 0.000000e+00 > %661 = fcmp olt float %606, 0.000000e+00 > %662 = sext i1 %660 to i32 > %663 = sext i1 %661 to i32 > %664 = zext i1 %658 to i32 > %665 = zext i1 %659 to i32 > %666 = add nsw i32 %664, %662 > %667 = add nsw i32 %665, %663 > %668 = sitofp i32 %666 to float > %669 = sitofp i32 %667 to float > %670 = call float @llvm.fabs.f32(float %618) > %671 = call float @llvm.minnum.f32(float %670, float 1.000000e+02) > %672 = fsub float -0.000000e+00, %671 > %673 = call float @llvm.fma.f32(float %672, float %668, float %600) > %674 = fsub float -0.000000e+00, %671 > %675 = call float @llvm.fma.f32(float %674, float %669, float %606) > %676 = fsub float -0.000000e+00, %504 > %677 = fcmp olt float %637, %676 > %678 = fsub float -0.000000e+00, %504 > %679 = fcmp olt float %639, %678 > %680 = zext i1 %677 to i32 > %681 = zext i1 %679 to i32 > %682 = fsub float -0.000000e+00, %561 > %683 = fcmp olt float %655, %682 > %684 = fsub float -0.000000e+00, %561 > %685 = fcmp olt float %657, %684 > %686 = zext i1 %683 to i32 > %687 = zext i1 %685 to i32 > %688 = add nuw nsw i32 %686, %680 > %689 = add nuw nsw i32 %687, %681 > %690 = fsub float -0.000000e+00, %618 > %691 = fcmp olt float %673, %690 > %692 = fsub float -0.000000e+00, %618 > %693 = fcmp olt float %675, %692 > %694 = zext i1 %691 to i32 > %695 = zext i1 %693 to i32 > %696 = add nuw nsw i32 %694, %688 > %697 = add nuw nsw i32 %695, %689 > %698 = fcmp olt float %619, 0.000000e+00 > %699 = zext i1 %698 to i32 > %700 = fcmp olt float %620, 0.000000e+00 > %701 = fcmp olt float %621, 0.000000e+00 > %702 = zext i1 %700 to i32 > %703 = zext i1 %701 to i32 > %704 = add nuw nsw i32 %702, %699 > %705 = add nuw nsw i32 %703, %704 > %706 = fcmp olt float %504, %637 > %707 = fcmp olt float %504, %639 > %708 = zext i1 %706 to i32 > %709 = zext i1 %707 to i32 > %710 = fcmp olt float %561, %655 > %711 = fcmp olt float %561, %657 > %712 = zext i1 %710 to i32 > %713 = zext i1 %711 to i32 > %714 = add nuw nsw i32 %708, %712 > %715 = add nuw nsw i32 %709, %713 > %716 = fcmp olt float %618, %673 > %717 = fcmp olt float %618, %675 > %718 = zext i1 %716 to i32 > %719 = zext i1 %717 to i32 > %720 = add nuw nsw i32 %714, %718 > %721 = add nuw nsw i32 %715, %719 > %722 = icmp eq i32 %696, 3 > %723 = icmp eq i32 %697, 3 > %724 = sext i1 %722 to i32 > %725 = sext i1 %723 to i32 > %726 = icmp eq i32 %720, 3 > %727 = icmp eq i32 %721, 3 > %728 = select i1 %727, i32 -1, i32 %725 > %729 = select i1 %726, i32 -1, i32 %724 > %730 = or i32 %728, %729 > %731 = and i32 %7, 8191 > %732 = and i32 %10, 255 > %733 = mul nuw nsw i32 %731, %732 > %734 = add nuw nsw i32 %733, 18 > %735 = zext i32 %734 to i64 > %736 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %735 > %737 = bitcast i32 addrspace(3)* %736 to float addrspace(3)* > %738 = load float, float addrspace(3)* %737, align 4 > %739 = lshr i32 %7, 13 > %740 = and i32 %739, 255 > %741 = and i32 %7, 8191 > %742 = and i32 %10, 255 > %743 = mul nuw nsw i32 %741, %742 > %744 = add nuw nsw i32 %743, %740 > %745 = add nuw nsw i32 %744, 18 > %746 = zext i32 %745 to i64 > %747 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %746 > %748 = bitcast i32 addrspace(3)* %747 to float addrspace(3)* > %749 = load float, float addrspace(3)* %748, align 4 > %750 = fsub float %749, %738 > %751 = and i32 %7, 8191 > %752 = and i32 %10, 255 > %753 = mul nuw nsw i32 %751, %752 > %754 = add nuw nsw i32 %753, 16 > %755 = zext i32 %754 to i64 > %756 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %755 > %757 = bitcast i32 addrspace(3)* %756 to float addrspace(3)* > %758 = load float, float addrspace(3)* %757, align 4 > %759 = lshr i32 %7, 13 > %760 = and i32 %759, 255 > %761 = and i32 %7, 8191 > %762 = and i32 %10, 255 > %763 = mul nuw nsw i32 %761, %762 > %764 = add nuw nsw i32 %763, %760 > %765 = add nuw nsw i32 %764, 16 > %766 = zext i32 %765 to i64 > %767 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %766 > %768 = bitcast i32 addrspace(3)* %767 to float addrspace(3)* > %769 = load float, float addrspace(3)* %768, align 4 > %770 = fsub float %769, %758 > %771 = and i32 %7, 8191 > %772 = and i32 %10, 255 > %773 = mul nuw nsw i32 %771, %772 > %774 = add nuw nsw i32 %773, 17 > %775 = zext i32 %774 to i64 > %776 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %775 > %777 = bitcast i32 addrspace(3)* %776 to float addrspace(3)* > %778 = load float, float addrspace(3)* %777, align 4 > %779 = lshr i32 %7, 13 > %780 = and i32 %779, 255 > %781 = and i32 %7, 8191 > %782 = and i32 %10, 255 > %783 = mul nuw nsw i32 %781, %782 > %784 = add nuw nsw i32 %783, %780 > %785 = add nuw nsw i32 %784, 17 > %786 = zext i32 %785 to i64 > %787 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %786 > %788 = bitcast i32 addrspace(3)* %787 to float addrspace(3)* > %789 = load float, float addrspace(3)* %788, align 4 > %790 = fsub float %789, %778 > %791 = and i32 %7, 8191 > %792 = and i32 %10, 255 > %793 = mul nuw nsw i32 %791, %792 > %794 = add nuw nsw i32 %793, 16 > %795 = zext i32 %794 to i64 > %796 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %795 > %797 = bitcast i32 addrspace(3)* %796 to float addrspace(3)* > %798 = load float, float addrspace(3)* %797, align 4 > %799 = and i32 %7, 8191 > %800 = and i32 %10, 255 > %801 = mul nuw nsw i32 %799, %800 > %802 = lshr i32 %7, 12 > %803 = and i32 %802, 510 > %804 = add nuw nsw i32 %801, %803 > %805 = add nuw nsw i32 %804, 16 > %806 = zext i32 %805 to i64 > %807 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %806 > %808 = bitcast i32 addrspace(3)* %807 to float addrspace(3)* > %809 = load float, float addrspace(3)* %808, align 4 > %810 = fsub float %809, %798 > %811 = and i32 %7, 8191 > %812 = and i32 %10, 255 > %813 = mul nuw nsw i32 %811, %812 > %814 = add nuw nsw i32 %813, 17 > %815 = zext i32 %814 to i64 > %816 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %815 > %817 = bitcast i32 addrspace(3)* %816 to float addrspace(3)* > %818 = load float, float addrspace(3)* %817, align 4 > %819 = and i32 %7, 8191 > %820 = and i32 %10, 255 > %821 = mul nuw nsw i32 %819, %820 > %822 = lshr i32 %7, 12 > %823 = and i32 %822, 510 > %824 = add nuw nsw i32 %821, %823 > %825 = add nuw nsw i32 %824, 17 > %826 = zext i32 %825 to i64 > %827 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %826 > %828 = bitcast i32 addrspace(3)* %827 to float addrspace(3)* > %829 = load float, float addrspace(3)* %828, align 4 > %830 = fsub float %829, %818 > %831 = and i32 %7, 8191 > %832 = and i32 %10, 255 > %833 = mul nuw nsw i32 %831, %832 > %834 = add nuw nsw i32 %833, 18 > %835 = zext i32 %834 to i64 > %836 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %835 > %837 = bitcast i32 addrspace(3)* %836 to float addrspace(3)* > %838 = load float, float addrspace(3)* %837, align 4 > %839 = and i32 %7, 8191 > %840 = and i32 %10, 255 > %841 = mul nuw nsw i32 %839, %840 > %842 = lshr i32 %7, 12 > %843 = and i32 %842, 510 > %844 = add nuw nsw i32 %841, %843 > %845 = add nuw nsw i32 %844, 18 > %846 = zext i32 %845 to i64 > %847 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %846 > %848 = bitcast i32 addrspace(3)* %847 to float addrspace(3)* > %849 = load float, float addrspace(3)* %848, align 4 > %850 = fsub float %849, %838 > %851 = fmul float %750, %830 > %852 = fmul float %770, %850 > %853 = fmul float %790, %810 > %854 = fsub float -0.000000e+00, %851 > %855 = call float @llvm.fma.f32(float %790, float %850, float %854) > %856 = fsub float -0.000000e+00, %852 > %857 = call float @llvm.fma.f32(float %750, float %810, float %856) > %858 = fsub float -0.000000e+00, %853 > %859 = call float @llvm.fma.f32(float %770, float %830, float %858) > %860 = fmul float %855, %855 > %861 = fmul float %857, %857 > %862 = fadd float %861, %860 > %863 = fmul float %859, %859 > %864 = fadd float %862, %863 > %865 = call float @llvm.AMDGPU.rsq.clamped.f32(float %864) > %866 = fmul float %855, %865 > %867 = fmul float %857, %865 > %868 = fmul float %859, %865 > %869 = fmul float %13, %25 > %870 = fmul float %14, %26 > %871 = fadd float %869, %870 > %872 = fmul float %15, %27 > %873 = fadd float %871, %872 > %874 = fadd float %873, %16 > %875 = fmul float %17, %25 > %876 = fmul float %18, %26 > %877 = fadd float %875, %876 > %878 = fmul float %19, %27 > %879 = fadd float %877, %878 > %880 = fadd float %879, %20 > %881 = fmul float %21, %25 > %882 = fmul float %22, %26 > %883 = fadd float %881, %882 > %884 = fmul float %23, %27 > %885 = fadd float %883, %884 > %886 = fadd float %885, %24 > %887 = and i32 %7, 8191 > %888 = and i32 %10, 255 > %889 = mul nuw nsw i32 %887, %888 > %890 = add nuw nsw i32 %889, 16 > %891 = zext i32 %890 to i64 > %892 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %891 > %893 = bitcast i32 addrspace(3)* %892 to float addrspace(3)* > %894 = load float, float addrspace(3)* %893, align 4 > %895 = fsub float %874, %894 > %896 = and i32 %7, 8191 > %897 = and i32 %10, 255 > %898 = mul nuw nsw i32 %896, %897 > %899 = add nuw nsw i32 %898, 17 > %900 = zext i32 %899 to i64 > %901 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %900 > %902 = bitcast i32 addrspace(3)* %901 to float addrspace(3)* > %903 = load float, float addrspace(3)* %902, align 4 > %904 = fsub float %880, %903 > %905 = and i32 %7, 8191 > %906 = and i32 %10, 255 > %907 = mul nuw nsw i32 %905, %906 > %908 = add nuw nsw i32 %907, 18 > %909 = zext i32 %908 to i64 > %910 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %909 > %911 = bitcast i32 addrspace(3)* %910 to float addrspace(3)* > %912 = load float, float addrspace(3)* %911, align 4 > %913 = fsub float %886, %912 > %914 = fmul float %895, %895 > %915 = fmul float %904, %904 > %916 = fadd float %915, %914 > %917 = fmul float %913, %913 > %918 = fadd float %916, %917 > %919 = call float @llvm.AMDGPU.rsq.clamped.f32(float %918) > %920 = fmul float %919, %895 > %921 = fmul float %919, %904 > %922 = fmul float %919, %913 > %923 = fmul float %866, %920 > %924 = fmul float %867, %921 > %925 = fadd float %924, %923 > %926 = fmul float %868, %922 > %927 = fadd float %925, %926 > %928 = icmp eq i32 %730, 0 > %notlhs = fcmp uge float %927, -5.000000e-01 > %notrhs = icmp ne i32 %705, 3 > %not. = and i1 %notrhs, %notlhs > %929 = and i1 %928, %not. > br i1 %929, label %IF, label %ENDIF > >IF: ; preds = %main_body > %930 = lshr i32 %7, 13 > %931 = and i32 %930, 255 > %932 = and i32 %7, 8191 > %933 = and i32 %10, 255 > %934 = mul nuw nsw i32 %932, %933 > %935 = add nuw nsw i32 %934, %931 > %936 = add nuw nsw i32 %935, 16 > %937 = zext i32 %936 to i64 > %938 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %937 > %939 = bitcast i32 addrspace(3)* %938 to float addrspace(3)* > %940 = load float, float addrspace(3)* %939, align 4 > %941 = and i32 %7, 8191 > %942 = and i32 %10, 255 > %943 = mul nuw nsw i32 %941, %942 > %944 = add nuw nsw i32 %943, 16 > %945 = zext i32 %944 to i64 > %946 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %945 > %947 = bitcast i32 addrspace(3)* %946 to float addrspace(3)* > %948 = load float, float addrspace(3)* %947, align 4 > %949 = fsub float %948, %940 > %950 = lshr i32 %7, 13 > %951 = and i32 %950, 255 > %952 = and i32 %7, 8191 > %953 = and i32 %10, 255 > %954 = mul nuw nsw i32 %952, %953 > %955 = add nuw nsw i32 %954, %951 > %956 = add nuw nsw i32 %955, 17 > %957 = zext i32 %956 to i64 > %958 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %957 > %959 = bitcast i32 addrspace(3)* %958 to float addrspace(3)* > %960 = load float, float addrspace(3)* %959, align 4 > %961 = and i32 %7, 8191 > %962 = and i32 %10, 255 > %963 = mul nuw nsw i32 %961, %962 > %964 = add nuw nsw i32 %963, 17 > %965 = zext i32 %964 to i64 > %966 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %965 > %967 = bitcast i32 addrspace(3)* %966 to float addrspace(3)* > %968 = load float, float addrspace(3)* %967, align 4 > %969 = fsub float %968, %960 > %970 = lshr i32 %7, 13 > %971 = and i32 %970, 255 > %972 = and i32 %7, 8191 > %973 = and i32 %10, 255 > %974 = mul nuw nsw i32 %972, %973 > %975 = add nuw nsw i32 %974, %971 > %976 = add nuw nsw i32 %975, 18 > %977 = zext i32 %976 to i64 > %978 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %977 > %979 = bitcast i32 addrspace(3)* %978 to float addrspace(3)* > %980 = load float, float addrspace(3)* %979, align 4 > %981 = and i32 %7, 8191 > %982 = and i32 %10, 255 > %983 = mul nuw nsw i32 %981, %982 > %984 = add nuw nsw i32 %983, 18 > %985 = zext i32 %984 to i64 > %986 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %985 > %987 = bitcast i32 addrspace(3)* %986 to float addrspace(3)* > %988 = load float, float addrspace(3)* %987, align 4 > %989 = fsub float %988, %980 > %990 = fmul float %949, %949 > %991 = fmul float %969, %969 > %992 = fadd float %991, %990 > %993 = fmul float %989, %989 > %994 = fadd float %992, %993 > %995 = and i32 %7, 8191 > %996 = and i32 %10, 255 > %997 = mul nuw nsw i32 %995, %996 > %998 = lshr i32 %7, 12 > %999 = and i32 %998, 510 > %1000 = add nuw nsw i32 %997, %999 > %1001 = add nuw nsw i32 %1000, 16 > %1002 = zext i32 %1001 to i64 > %1003 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1002 > %1004 = bitcast i32 addrspace(3)* %1003 to float addrspace(3)* > %1005 = load float, float addrspace(3)* %1004, align 4 > %1006 = lshr i32 %7, 13 > %1007 = and i32 %1006, 255 > %1008 = and i32 %7, 8191 > %1009 = and i32 %10, 255 > %1010 = mul nuw nsw i32 %1008, %1009 > %1011 = add nuw nsw i32 %1010, %1007 > %1012 = add nuw nsw i32 %1011, 16 > %1013 = zext i32 %1012 to i64 > %1014 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1013 > %1015 = bitcast i32 addrspace(3)* %1014 to float addrspace(3)* > %1016 = load float, float addrspace(3)* %1015, align 4 > %1017 = fsub float %1016, %1005 > %1018 = and i32 %7, 8191 > %1019 = and i32 %10, 255 > %1020 = mul nuw nsw i32 %1018, %1019 > %1021 = lshr i32 %7, 12 > %1022 = and i32 %1021, 510 > %1023 = add nuw nsw i32 %1020, %1022 > %1024 = add nuw nsw i32 %1023, 17 > %1025 = zext i32 %1024 to i64 > %1026 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1025 > %1027 = bitcast i32 addrspace(3)* %1026 to float addrspace(3)* > %1028 = load float, float addrspace(3)* %1027, align 4 > %1029 = lshr i32 %7, 13 > %1030 = and i32 %1029, 255 > %1031 = and i32 %7, 8191 > %1032 = and i32 %10, 255 > %1033 = mul nuw nsw i32 %1031, %1032 > %1034 = add nuw nsw i32 %1033, %1030 > %1035 = add nuw nsw i32 %1034, 17 > %1036 = zext i32 %1035 to i64 > %1037 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1036 > %1038 = bitcast i32 addrspace(3)* %1037 to float addrspace(3)* > %1039 = load float, float addrspace(3)* %1038, align 4 > %1040 = fsub float %1039, %1028 > %1041 = and i32 %7, 8191 > %1042 = and i32 %10, 255 > %1043 = mul nuw nsw i32 %1041, %1042 > %1044 = lshr i32 %7, 12 > %1045 = and i32 %1044, 510 > %1046 = add nuw nsw i32 %1043, %1045 > %1047 = add nuw nsw i32 %1046, 18 > %1048 = zext i32 %1047 to i64 > %1049 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1048 > %1050 = bitcast i32 addrspace(3)* %1049 to float addrspace(3)* > %1051 = load float, float addrspace(3)* %1050, align 4 > %1052 = lshr i32 %7, 13 > %1053 = and i32 %1052, 255 > %1054 = and i32 %7, 8191 > %1055 = and i32 %10, 255 > %1056 = mul nuw nsw i32 %1054, %1055 > %1057 = add nuw nsw i32 %1056, %1053 > %1058 = add nuw nsw i32 %1057, 18 > %1059 = zext i32 %1058 to i64 > %1060 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1059 > %1061 = bitcast i32 addrspace(3)* %1060 to float addrspace(3)* > %1062 = load float, float addrspace(3)* %1061, align 4 > %1063 = fsub float %1062, %1051 > %1064 = fmul float %1017, %1017 > %1065 = fmul float %1040, %1040 > %1066 = fadd float %1065, %1064 > %1067 = fmul float %1063, %1063 > %1068 = fadd float %1066, %1067 > %1069 = fmul float %810, %810 > %1070 = fmul float %830, %830 > %1071 = fadd float %1070, %1069 > %1072 = fmul float %850, %850 > %1073 = fadd float %1071, %1072 > %1074 = call float @llvm.sqrt.f32(float %994) > %1075 = call float @llvm.sqrt.f32(float %1068) > %1076 = call float @llvm.sqrt.f32(float %1073) > %1077 = lshr i32 %7, 13 > %1078 = and i32 %1077, 255 > %1079 = and i32 %7, 8191 > %1080 = and i32 %10, 255 > %1081 = mul nuw nsw i32 %1079, %1080 > %1082 = add nuw nsw i32 %1081, %1078 > %1083 = add nuw nsw i32 %1082, 16 > %1084 = zext i32 %1083 to i64 > %1085 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1084 > %1086 = bitcast i32 addrspace(3)* %1085 to float addrspace(3)* > %1087 = load float, float addrspace(3)* %1086, align 4 > %1088 = and i32 %7, 8191 > %1089 = and i32 %10, 255 > %1090 = mul nuw nsw i32 %1088, %1089 > %1091 = add nuw nsw i32 %1090, 16 > %1092 = zext i32 %1091 to i64 > %1093 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1092 > %1094 = bitcast i32 addrspace(3)* %1093 to float addrspace(3)* > %1095 = load float, float addrspace(3)* %1094, align 4 > %1096 = fadd float %1087, %1095 > %1097 = lshr i32 %7, 13 > %1098 = and i32 %1097, 255 > %1099 = and i32 %7, 8191 > %1100 = and i32 %10, 255 > %1101 = mul nuw nsw i32 %1099, %1100 > %1102 = add nuw nsw i32 %1101, %1098 > %1103 = add nuw nsw i32 %1102, 17 > %1104 = zext i32 %1103 to i64 > %1105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1104 > %1106 = bitcast i32 addrspace(3)* %1105 to float addrspace(3)* > %1107 = load float, float addrspace(3)* %1106, align 4 > %1108 = and i32 %7, 8191 > %1109 = and i32 %10, 255 > %1110 = mul nuw nsw i32 %1108, %1109 > %1111 = add nuw nsw i32 %1110, 17 > %1112 = zext i32 %1111 to i64 > %1113 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1112 > %1114 = bitcast i32 addrspace(3)* %1113 to float addrspace(3)* > %1115 = load float, float addrspace(3)* %1114, align 4 > %1116 = fadd float %1107, %1115 > %1117 = lshr i32 %7, 13 > %1118 = and i32 %1117, 255 > %1119 = and i32 %7, 8191 > %1120 = and i32 %10, 255 > %1121 = mul nuw nsw i32 %1119, %1120 > %1122 = add nuw nsw i32 %1121, %1118 > %1123 = add nuw nsw i32 %1122, 18 > %1124 = zext i32 %1123 to i64 > %1125 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1124 > %1126 = bitcast i32 addrspace(3)* %1125 to float addrspace(3)* > %1127 = load float, float addrspace(3)* %1126, align 4 > %1128 = and i32 %7, 8191 > %1129 = and i32 %10, 255 > %1130 = mul nuw nsw i32 %1128, %1129 > %1131 = add nuw nsw i32 %1130, 18 > %1132 = zext i32 %1131 to i64 > %1133 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1132 > %1134 = bitcast i32 addrspace(3)* %1133 to float addrspace(3)* > %1135 = load float, float addrspace(3)* %1134, align 4 > %1136 = fadd float %1127, %1135 > %1137 = fmul float %1096, 5.000000e-01 > %1138 = fmul float %1116, 5.000000e-01 > %1139 = fmul float %1136, 5.000000e-01 > %1140 = and i32 %7, 8191 > %1141 = and i32 %10, 255 > %1142 = mul nuw nsw i32 %1140, %1141 > %1143 = lshr i32 %7, 12 > %1144 = and i32 %1143, 510 > %1145 = add nuw nsw i32 %1142, %1144 > %1146 = add nuw nsw i32 %1145, 16 > %1147 = zext i32 %1146 to i64 > %1148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1147 > %1149 = bitcast i32 addrspace(3)* %1148 to float addrspace(3)* > %1150 = load float, float addrspace(3)* %1149, align 4 > %1151 = lshr i32 %7, 13 > %1152 = and i32 %1151, 255 > %1153 = and i32 %7, 8191 > %1154 = and i32 %10, 255 > %1155 = mul nuw nsw i32 %1153, %1154 > %1156 = add nuw nsw i32 %1155, %1152 > %1157 = add nuw nsw i32 %1156, 16 > %1158 = zext i32 %1157 to i64 > %1159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1158 > %1160 = bitcast i32 addrspace(3)* %1159 to float addrspace(3)* > %1161 = load float, float addrspace(3)* %1160, align 4 > %1162 = fadd float %1150, %1161 > %1163 = and i32 %7, 8191 > %1164 = and i32 %10, 255 > %1165 = mul nuw nsw i32 %1163, %1164 > %1166 = lshr i32 %7, 12 > %1167 = and i32 %1166, 510 > %1168 = add nuw nsw i32 %1165, %1167 > %1169 = add nuw nsw i32 %1168, 17 > %1170 = zext i32 %1169 to i64 > %1171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1170 > %1172 = bitcast i32 addrspace(3)* %1171 to float addrspace(3)* > %1173 = load float, float addrspace(3)* %1172, align 4 > %1174 = lshr i32 %7, 13 > %1175 = and i32 %1174, 255 > %1176 = and i32 %7, 8191 > %1177 = and i32 %10, 255 > %1178 = mul nuw nsw i32 %1176, %1177 > %1179 = add nuw nsw i32 %1178, %1175 > %1180 = add nuw nsw i32 %1179, 17 > %1181 = zext i32 %1180 to i64 > %1182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1181 > %1183 = bitcast i32 addrspace(3)* %1182 to float addrspace(3)* > %1184 = load float, float addrspace(3)* %1183, align 4 > %1185 = fadd float %1173, %1184 > %1186 = and i32 %7, 8191 > %1187 = and i32 %10, 255 > %1188 = mul nuw nsw i32 %1186, %1187 > %1189 = lshr i32 %7, 12 > %1190 = and i32 %1189, 510 > %1191 = add nuw nsw i32 %1188, %1190 > %1192 = add nuw nsw i32 %1191, 18 > %1193 = zext i32 %1192 to i64 > %1194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1193 > %1195 = bitcast i32 addrspace(3)* %1194 to float addrspace(3)* > %1196 = load float, float addrspace(3)* %1195, align 4 > %1197 = lshr i32 %7, 13 > %1198 = and i32 %1197, 255 > %1199 = and i32 %7, 8191 > %1200 = and i32 %10, 255 > %1201 = mul nuw nsw i32 %1199, %1200 > %1202 = add nuw nsw i32 %1201, %1198 > %1203 = add nuw nsw i32 %1202, 18 > %1204 = zext i32 %1203 to i64 > %1205 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1204 > %1206 = bitcast i32 addrspace(3)* %1205 to float addrspace(3)* > %1207 = load float, float addrspace(3)* %1206, align 4 > %1208 = fadd float %1196, %1207 > %1209 = fmul float %1162, 5.000000e-01 > %1210 = fmul float %1185, 5.000000e-01 > %1211 = fmul float %1208, 5.000000e-01 > %1212 = and i32 %7, 8191 > %1213 = and i32 %10, 255 > %1214 = mul nuw nsw i32 %1212, %1213 > %1215 = add nuw nsw i32 %1214, 16 > %1216 = zext i32 %1215 to i64 > %1217 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1216 > %1218 = bitcast i32 addrspace(3)* %1217 to float addrspace(3)* > %1219 = load float, float addrspace(3)* %1218, align 4 > %1220 = and i32 %7, 8191 > %1221 = and i32 %10, 255 > %1222 = mul nuw nsw i32 %1220, %1221 > %1223 = lshr i32 %7, 12 > %1224 = and i32 %1223, 510 > %1225 = add nuw nsw i32 %1222, %1224 > %1226 = add nuw nsw i32 %1225, 16 > %1227 = zext i32 %1226 to i64 > %1228 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1227 > %1229 = bitcast i32 addrspace(3)* %1228 to float addrspace(3)* > %1230 = load float, float addrspace(3)* %1229, align 4 > %1231 = fadd float %1219, %1230 > %1232 = and i32 %7, 8191 > %1233 = and i32 %10, 255 > %1234 = mul nuw nsw i32 %1232, %1233 > %1235 = add nuw nsw i32 %1234, 17 > %1236 = zext i32 %1235 to i64 > %1237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1236 > %1238 = bitcast i32 addrspace(3)* %1237 to float addrspace(3)* > %1239 = load float, float addrspace(3)* %1238, align 4 > %1240 = and i32 %7, 8191 > %1241 = and i32 %10, 255 > %1242 = mul nuw nsw i32 %1240, %1241 > %1243 = lshr i32 %7, 12 > %1244 = and i32 %1243, 510 > %1245 = add nuw nsw i32 %1242, %1244 > %1246 = add nuw nsw i32 %1245, 17 > %1247 = zext i32 %1246 to i64 > %1248 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1247 > %1249 = bitcast i32 addrspace(3)* %1248 to float addrspace(3)* > %1250 = load float, float addrspace(3)* %1249, align 4 > %1251 = fadd float %1239, %1250 > %1252 = and i32 %7, 8191 > %1253 = and i32 %10, 255 > %1254 = mul nuw nsw i32 %1252, %1253 > %1255 = add nuw nsw i32 %1254, 18 > %1256 = zext i32 %1255 to i64 > %1257 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1256 > %1258 = bitcast i32 addrspace(3)* %1257 to float addrspace(3)* > %1259 = load float, float addrspace(3)* %1258, align 4 > %1260 = and i32 %7, 8191 > %1261 = and i32 %10, 255 > %1262 = mul nuw nsw i32 %1260, %1261 > %1263 = lshr i32 %7, 12 > %1264 = and i32 %1263, 510 > %1265 = add nuw nsw i32 %1262, %1264 > %1266 = add nuw nsw i32 %1265, 18 > %1267 = zext i32 %1266 to i64 > %1268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1267 > %1269 = bitcast i32 addrspace(3)* %1268 to float addrspace(3)* > %1270 = load float, float addrspace(3)* %1269, align 4 > %1271 = fadd float %1259, %1270 > %1272 = fmul float %1231, 5.000000e-01 > %1273 = fmul float %1251, 5.000000e-01 > %1274 = fmul float %1271, 5.000000e-01 > %1275 = call float @llvm.fma.f32(float %54, float %1074, float %1137) > %1276 = call float @llvm.fma.f32(float %55, float %1074, float %1138) > %1277 = call float @llvm.fma.f32(float %56, float %1074, float %1139) > %1278 = call float @llvm.fma.f32(float %54, float %1075, float %1209) > %1279 = call float @llvm.fma.f32(float %55, float %1075, float %1210) > %1280 = call float @llvm.fma.f32(float %56, float %1075, float %1211) > %1281 = call float @llvm.fma.f32(float %54, float %1076, float %1272) > %1282 = call float @llvm.fma.f32(float %55, float %1076, float %1273) > %1283 = call float @llvm.fma.f32(float %56, float %1076, float %1274) > %1284 = fmul float %38, %1137 > %1285 = fmul float %39, %1138 > %1286 = fadd float %1284, %1285 > %1287 = fmul float %40, %1139 > %1288 = fadd float %1286, %1287 > %1289 = fadd float %1288, %41 > %1290 = fmul float %42, %1137 > %1291 = fmul float %43, %1138 > %1292 = fadd float %1290, %1291 > %1293 = fmul float %44, %1139 > %1294 = fadd float %1292, %1293 > %1295 = fadd float %1294, %45 > %1296 = fmul float %50, %1137 > %1297 = fmul float %51, %1138 > %1298 = fadd float %1296, %1297 > %1299 = fmul float %52, %1139 > %1300 = fadd float %1298, %1299 > %1301 = fadd float %1300, %53 > %1302 = fmul float %38, %1209 > %1303 = fmul float %39, %1210 > %1304 = fadd float %1302, %1303 > %1305 = fmul float %40, %1211 > %1306 = fadd float %1304, %1305 > %1307 = fadd float %1306, %41 > %1308 = fmul float %42, %1209 > %1309 = fmul float %43, %1210 > %1310 = fadd float %1308, %1309 > %1311 = fmul float %44, %1211 > %1312 = fadd float %1310, %1311 > %1313 = fadd float %1312, %45 > %1314 = fmul float %50, %1209 > %1315 = fmul float %51, %1210 > %1316 = fadd float %1314, %1315 > %1317 = fmul float %52, %1211 > %1318 = fadd float %1316, %1317 > %1319 = fadd float %1318, %53 > %1320 = fmul float %38, %1272 > %1321 = fmul float %39, %1273 > %1322 = fadd float %1320, %1321 > %1323 = fmul float %40, %1274 > %1324 = fadd float %1322, %1323 > %1325 = fadd float %1324, %41 > %1326 = fmul float %42, %1272 > %1327 = fmul float %43, %1273 > %1328 = fadd float %1326, %1327 > %1329 = fmul float %44, %1274 > %1330 = fadd float %1328, %1329 > %1331 = fadd float %1330, %45 > %1332 = fmul float %50, %1272 > %1333 = fmul float %51, %1273 > %1334 = fadd float %1332, %1333 > %1335 = fmul float %52, %1274 > %1336 = fadd float %1334, %1335 > %1337 = fadd float %1336, %53 > %1338 = fmul float %38, %1275 > %1339 = fmul float %39, %1276 > %1340 = fadd float %1338, %1339 > %1341 = fmul float %40, %1277 > %1342 = fadd float %1340, %1341 > %1343 = fadd float %1342, %41 > %1344 = fmul float %42, %1275 > %1345 = fmul float %43, %1276 > %1346 = fadd float %1344, %1345 > %1347 = fmul float %44, %1277 > %1348 = fadd float %1346, %1347 > %1349 = fadd float %1348, %45 > %1350 = fmul float %50, %1275 > %1351 = fmul float %51, %1276 > %1352 = fadd float %1350, %1351 > %1353 = fmul float %52, %1277 > %1354 = fadd float %1352, %1353 > %1355 = fadd float %1354, %53 > %1356 = fmul float %38, %1278 > %1357 = fmul float %39, %1279 > %1358 = fadd float %1356, %1357 > %1359 = fmul float %40, %1280 > %1360 = fadd float %1358, %1359 > %1361 = fadd float %1360, %41 > %1362 = fmul float %42, %1278 > %1363 = fmul float %43, %1279 > %1364 = fadd float %1362, %1363 > %1365 = fmul float %44, %1280 > %1366 = fadd float %1364, %1365 > %1367 = fadd float %1366, %45 > %1368 = fmul float %50, %1278 > %1369 = fmul float %51, %1279 > %1370 = fadd float %1368, %1369 > %1371 = fmul float %52, %1280 > %1372 = fadd float %1370, %1371 > %1373 = fadd float %1372, %53 > %1374 = fmul float %38, %1281 > %1375 = fmul float %39, %1282 > %1376 = fadd float %1374, %1375 > %1377 = fmul float %40, %1283 > %1378 = fadd float %1376, %1377 > %1379 = fadd float %1378, %41 > %1380 = fmul float %42, %1281 > %1381 = fmul float %43, %1282 > %1382 = fadd float %1380, %1381 > %1383 = fmul float %44, %1283 > %1384 = fadd float %1382, %1383 > %1385 = fadd float %1384, %45 > %1386 = fmul float %50, %1281 > %1387 = fmul float %51, %1282 > %1388 = fadd float %1386, %1387 > %1389 = fmul float %52, %1283 > %1390 = fadd float %1388, %1389 > %1391 = fadd float %1390, %53 > %1392 = fcmp oeq float %1319, 0.000000e+00 > %1393 = fcmp oeq float %1319, 0.000000e+00 > %1394 = fcmp ogt float %1307, 0.000000e+00 > %1395 = select i1 %1394, float 1.000000e+00, float %1307 > %1396 = fcmp oge float %1395, 0.000000e+00 > %1397 = fcmp ogt float %1313, 0.000000e+00 > %1398 = select i1 %1397, float 1.000000e+00, float %1313 > %1399 = fcmp oge float %1398, 0.000000e+00 > %.op = fmul float %1395, 0x4600000000000000 > %1400 = select i1 %1396, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1398, 0x4600000000000000 > %1401 = select i1 %1399, float %.op80, float 0xC600000000000000 > %1402 = fdiv float 1.000000e+00, %1319 > %1403 = fmul float %1307, %1402 > %1404 = fmul float %1313, %1402 > %1405 = select i1 %1392, float %1400, float %1403 > %1406 = select i1 %1393, float %1401, float %1404 > %1407 = fcmp oeq float %1337, 0.000000e+00 > %1408 = fcmp oeq float %1337, 0.000000e+00 > %1409 = fcmp ogt float %1325, 0.000000e+00 > %1410 = select i1 %1409, float 1.000000e+00, float %1325 > %1411 = fcmp oge float %1410, 0.000000e+00 > %1412 = fcmp ogt float %1331, 0.000000e+00 > %1413 = select i1 %1412, float 1.000000e+00, float %1331 > %1414 = fcmp oge float %1413, 0.000000e+00 > %.op81 = fmul float %1410, 0x4600000000000000 > %1415 = select i1 %1411, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1413, 0x4600000000000000 > %1416 = select i1 %1414, float %.op82, float 0xC600000000000000 > %1417 = fdiv float 1.000000e+00, %1337 > %1418 = fmul float %1325, %1417 > %1419 = fmul float %1331, %1417 > %1420 = select i1 %1407, float %1415, float %1418 > %1421 = select i1 %1408, float %1416, float %1419 > %1422 = fcmp oeq float %1355, 0.000000e+00 > %1423 = fcmp oeq float %1355, 0.000000e+00 > %1424 = fcmp ogt float %1343, 0.000000e+00 > %1425 = select i1 %1424, float 1.000000e+00, float %1343 > %1426 = fcmp oge float %1425, 0.000000e+00 > %1427 = fcmp ogt float %1349, 0.000000e+00 > %1428 = select i1 %1427, float 1.000000e+00, float %1349 > %1429 = fcmp oge float %1428, 0.000000e+00 > %.op83 = fmul float %1425, 0x4600000000000000 > %1430 = select i1 %1426, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1428, 0x4600000000000000 > %1431 = select i1 %1429, float %.op84, float 0xC600000000000000 > %1432 = fdiv float 1.000000e+00, %1355 > %1433 = fmul float %1343, %1432 > %1434 = fmul float %1349, %1432 > %1435 = select i1 %1422, float %1430, float %1433 > %1436 = select i1 %1423, float %1431, float %1434 > %1437 = fcmp oeq float %1301, 0.000000e+00 > %1438 = fcmp oeq float %1301, 0.000000e+00 > %1439 = fcmp ogt float %1289, 0.000000e+00 > %1440 = select i1 %1439, float 1.000000e+00, float %1289 > %1441 = fcmp oge float %1440, 0.000000e+00 > %1442 = fcmp ogt float %1295, 0.000000e+00 > %1443 = select i1 %1442, float 1.000000e+00, float %1295 > %1444 = fcmp oge float %1443, 0.000000e+00 > %.op85 = fmul float %1440, 0x4600000000000000 > %1445 = select i1 %1441, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1443, 0x4600000000000000 > %1446 = select i1 %1444, float %.op86, float 0xC600000000000000 > %1447 = fdiv float 1.000000e+00, %1301 > %1448 = fmul float %1289, %1447 > %1449 = fmul float %1295, %1447 > %1450 = select i1 %1437, float %1445, float %1448 > %1451 = select i1 %1438, float %1446, float %1449 > %1452 = fsub float %1450, %1435 > %1453 = fsub float %1451, %1436 > %1454 = fcmp oeq float %1373, 0.000000e+00 > %1455 = fcmp oeq float %1373, 0.000000e+00 > %1456 = fcmp ogt float %1361, 0.000000e+00 > %1457 = select i1 %1456, float 1.000000e+00, float %1361 > %1458 = fcmp oge float %1457, 0.000000e+00 > %1459 = fcmp ogt float %1367, 0.000000e+00 > %1460 = select i1 %1459, float 1.000000e+00, float %1367 > %1461 = fcmp oge float %1460, 0.000000e+00 > %.op87 = fmul float %1457, 0x4600000000000000 > %1462 = select i1 %1458, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1460, 0x4600000000000000 > %1463 = select i1 %1461, float %.op88, float 0xC600000000000000 > %1464 = fdiv float 1.000000e+00, %1373 > %1465 = fmul float %1361, %1464 > %1466 = fmul float %1367, %1464 > %1467 = select i1 %1454, float %1462, float %1465 > %1468 = select i1 %1455, float %1463, float %1466 > %1469 = fsub float %1405, %1467 > %1470 = fsub float %1406, %1468 > %1471 = fmul float %1469, %57 > %1472 = fmul float %1470, %58 > %1473 = fcmp oeq float %1391, 0.000000e+00 > %1474 = fcmp oeq float %1391, 0.000000e+00 > %1475 = fcmp ogt float %1379, 0.000000e+00 > %1476 = select i1 %1475, float 1.000000e+00, float %1379 > %1477 = fcmp oge float %1476, 0.000000e+00 > %1478 = fcmp ogt float %1385, 0.000000e+00 > %1479 = select i1 %1478, float 1.000000e+00, float %1385 > %1480 = fcmp oge float %1479, 0.000000e+00 > %.op89 = fmul float %1476, 0x4600000000000000 > %1481 = select i1 %1477, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1479, 0x4600000000000000 > %1482 = select i1 %1480, float %.op90, float 0xC600000000000000 > %1483 = fdiv float 1.000000e+00, %1391 > %1484 = fmul float %1379, %1483 > %1485 = fmul float %1385, %1483 > %1486 = select i1 %1473, float %1481, float %1484 > %1487 = select i1 %1474, float %1482, float %1485 > %1488 = fsub float %1420, %1486 > %1489 = fsub float %1421, %1487 > %1490 = fmul float %1488, %57 > %1491 = fmul float %1452, %57 > %1492 = fmul float %1453, %58 > %1493 = fmul float %1489, %58 > %1494 = fmul float %1491, %1491 > %1495 = fmul float %1492, %1492 > %1496 = fadd float %1494, %1495 > %1497 = fmul float %1471, %1471 > %1498 = fmul float %1472, %1472 > %1499 = fadd float %1497, %1498 > %1500 = fmul float %1490, %1490 > %1501 = fmul float %1493, %1493 > %1502 = fadd float %1500, %1501 > %1503 = call float @llvm.sqrt.f32(float %1502) > %1504 = call float @llvm.sqrt.f32(float %1496) > %1505 = call float @llvm.sqrt.f32(float %1499) > %1506 = fsub float %1301, %30 > %1507 = fsub float %1319, %30 > %1508 = fsub float %1337, %30 > %1509 = fcmp une float %31, 0.000000e+00 > br i1 %1509, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %31, %ENDIF77 ], [ %53, %main_body ] > %temp16.0 = phi float [ %1723, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1724, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1713, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1726, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %30, %ENDIF77 ], [ %52, %main_body ] > %temp13.0 = phi float [ %1706, %ENDIF77 ], [ %51, %main_body ] > %temp11.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %24, %main_body ] > %temp10.0 = phi float [ %1505, %ENDIF77 ], [ %23, %main_body ] > %temp9.0 = phi float [ %1698, %ENDIF77 ], [ %22, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %919, %main_body ] > %temp6.0 = phi float [ %1139, %ENDIF77 ], [ %850, %main_body ] > %temp5.0 = phi float [ %1693, %ENDIF77 ], [ %830, %main_body ] > %1510 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1511 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1512 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1513 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1514 = lshr i32 %5, 16 > %1515 = shl nuw nsw i32 %1514, 2 > %1516 = and i32 %6, 8191 > %1517 = and i32 %10, 255 > %1518 = mul nuw nsw i32 %1516, %1517 > %1519 = add nuw nsw i32 %1515, %1518 > %1520 = add nuw nsw i32 %1519, 8 > %1521 = zext i32 %1520 to i64 > %1522 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1521 > %1523 = bitcast i32 addrspace(3)* %1522 to float addrspace(3)* > store float %1510, float addrspace(3)* %1523, align 4 > %1524 = add nuw nsw i32 %1519, 9 > %1525 = zext i32 %1524 to i64 > %1526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1525 > %1527 = bitcast i32 addrspace(3)* %1526 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1527, align 4 > %1528 = add nuw nsw i32 %1519, 10 > %1529 = zext i32 %1528 to i64 > %1530 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1529 > %1531 = bitcast i32 addrspace(3)* %1530 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1531, align 4 > %1532 = add nuw nsw i32 %1519, 11 > %1533 = zext i32 %1532 to i64 > %1534 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1533 > %1535 = bitcast i32 addrspace(3)* %1534 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1535, align 4 > %1536 = lshr i32 %5, 16 > %1537 = shl nuw nsw i32 %1536, 2 > %1538 = and i32 %6, 8191 > %1539 = and i32 %10, 255 > %1540 = mul nuw nsw i32 %1538, %1539 > %1541 = add nuw nsw i32 %1537, %1540 > %1542 = add nuw nsw i32 %1541, 12 > %1543 = zext i32 %1542 to i64 > %1544 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1543 > %1545 = bitcast i32 addrspace(3)* %1544 to float addrspace(3)* > store float %1511, float addrspace(3)* %1545, align 4 > %1546 = add nuw nsw i32 %1541, 13 > %1547 = zext i32 %1546 to i64 > %1548 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1547 > %1549 = bitcast i32 addrspace(3)* %1548 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1549, align 4 > %1550 = add nuw nsw i32 %1541, 14 > %1551 = zext i32 %1550 to i64 > %1552 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1551 > %1553 = bitcast i32 addrspace(3)* %1552 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1553, align 4 > %1554 = add nuw nsw i32 %1541, 15 > %1555 = zext i32 %1554 to i64 > %1556 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1555 > %1557 = bitcast i32 addrspace(3)* %1556 to float addrspace(3)* > store float %temp11.0, float addrspace(3)* %1557, align 4 > %1558 = lshr i32 %5, 16 > %1559 = shl nuw nsw i32 %1558, 2 > %1560 = and i32 %6, 8191 > %1561 = and i32 %10, 255 > %1562 = mul nuw nsw i32 %1560, %1561 > %1563 = add nuw nsw i32 %1559, %1562 > %1564 = add nuw nsw i32 %1563, 16 > %1565 = zext i32 %1564 to i64 > %1566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1565 > %1567 = bitcast i32 addrspace(3)* %1566 to float addrspace(3)* > store float %1512, float addrspace(3)* %1567, align 4 > %1568 = add nuw nsw i32 %1563, 17 > %1569 = zext i32 %1568 to i64 > %1570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1569 > %1571 = bitcast i32 addrspace(3)* %1570 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1571, align 4 > %1572 = add nuw nsw i32 %1563, 18 > %1573 = zext i32 %1572 to i64 > %1574 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1573 > %1575 = bitcast i32 addrspace(3)* %1574 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1575, align 4 > %1576 = add nuw nsw i32 %1563, 19 > %1577 = zext i32 %1576 to i64 > %1578 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1577 > %1579 = bitcast i32 addrspace(3)* %1578 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1579, align 4 > %1580 = lshr i32 %5, 16 > %1581 = shl nuw nsw i32 %1580, 2 > %1582 = and i32 %6, 8191 > %1583 = and i32 %10, 255 > %1584 = mul nuw nsw i32 %1582, %1583 > %1585 = add nuw nsw i32 %1581, %1584 > %1586 = add nuw nsw i32 %1585, 20 > %1587 = zext i32 %1586 to i64 > %1588 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1587 > %1589 = bitcast i32 addrspace(3)* %1588 to float addrspace(3)* > store float %1513, float addrspace(3)* %1589, align 4 > %1590 = add nuw nsw i32 %1585, 21 > %1591 = zext i32 %1590 to i64 > %1592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1591 > %1593 = bitcast i32 addrspace(3)* %1592 to float addrspace(3)* > store float %1511, float addrspace(3)* %1593, align 4 > %1594 = add nuw nsw i32 %1585, 22 > %1595 = zext i32 %1594 to i64 > %1596 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1595 > %1597 = bitcast i32 addrspace(3)* %1596 to float addrspace(3)* > store float %1512, float addrspace(3)* %1597, align 4 > %1598 = add nuw nsw i32 %1585, 23 > %1599 = zext i32 %1598 to i64 > %1600 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1599 > %1601 = bitcast i32 addrspace(3)* %1600 to float addrspace(3)* > store float %1513, float addrspace(3)* %1601, align 4 > %1602 = lshr i32 %5, 16 > %1603 = shl nuw nsw i32 %1602, 2 > %1604 = and i32 %6, 8191 > %1605 = and i32 %10, 255 > %1606 = mul nuw nsw i32 %1604, %1605 > %1607 = add nuw nsw i32 %1603, %1606 > %1608 = zext i32 %1607 to i64 > %1609 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1608 > %1610 = bitcast i32 addrspace(3)* %1609 to float addrspace(3)* > store float %1510, float addrspace(3)* %1610, align 4 > %1611 = lshr i32 %5, 16 > %1612 = shl nuw nsw i32 %1611, 2 > %1613 = and i32 %6, 8191 > %1614 = and i32 %10, 255 > %1615 = mul nuw nsw i32 %1613, %1614 > %1616 = add nuw nsw i32 %1612, %1615 > %1617 = add nuw nsw i32 %1616, 1 > %1618 = zext i32 %1617 to i64 > %1619 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1618 > %1620 = bitcast i32 addrspace(3)* %1619 to float addrspace(3)* > store float %1511, float addrspace(3)* %1620, align 4 > %1621 = lshr i32 %5, 16 > %1622 = shl nuw nsw i32 %1621, 2 > %1623 = and i32 %6, 8191 > %1624 = and i32 %10, 255 > %1625 = mul nuw nsw i32 %1623, %1624 > %1626 = add nuw nsw i32 %1622, %1625 > %1627 = add nuw nsw i32 %1626, 2 > %1628 = zext i32 %1627 to i64 > %1629 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1628 > %1630 = bitcast i32 addrspace(3)* %1629 to float addrspace(3)* > store float %1512, float addrspace(3)* %1630, align 4 > %1631 = lshr i32 %5, 16 > %1632 = shl nuw nsw i32 %1631, 2 > %1633 = and i32 %6, 8191 > %1634 = and i32 %10, 255 > %1635 = mul nuw nsw i32 %1633, %1634 > %1636 = add nuw nsw i32 %1632, %1635 > %1637 = add nuw nsw i32 %1636, 4 > %1638 = zext i32 %1637 to i64 > %1639 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1638 > %1640 = bitcast i32 addrspace(3)* %1639 to float addrspace(3)* > store float %1513, float addrspace(3)* %1640, align 4 > %1641 = and i32 %10, 255 > %1642 = lshr i32 %10, 8 > %1643 = and i32 %1642, 31 > %1644 = lshr i32 %5, 16 > %1645 = shl nuw nsw i32 %1644, 2 > %1646 = and i32 %6, 8191 > %1647 = and i32 %10, 255 > %1648 = mul nuw nsw i32 %1646, %1647 > %1649 = add nuw nsw i32 %1645, %1648 > %1650 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1651 = bitcast i64 %1650 to <2 x i32> > %1652 = extractelement <2 x i32> %1651, i32 0 > %1653 = extractelement <2 x i32> %1651, i32 1 > %1654 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1652, 0 > %1655 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1654, i32 %1653, 1 > %1656 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1655, i32 %8, 13 > %1657 = bitcast i32 %1641 to float > %1658 = bitcast i32 %1643 to float > %1659 = bitcast i32 %1649 to float > %1660 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1656, float %1657, 14 > %1661 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1660, float %1658, 15 > %1662 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1661, float %1659, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1662 > >IF69: ; preds = %IF > %1663 = fdiv float 1.000000e+00, %31 > %1664 = fmul float %1506, %1663 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1665 = fcmp ogt float %1506, 0.000000e+00 > %1666 = select i1 %1665, float 1.000000e+00, float %1506 > %1667 = fcmp oge float %1666, 0.000000e+00 > %.op91 = fmul float %1666, 0x4600000000000000 > %1668 = select i1 %1667, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1664, %IF69 ], [ %1668, %ELSE70 ] > %1669 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1670 = fsub float 1.000000e+00, %1669 > %1671 = fmul float %1670, %1504 > %1672 = fcmp une float %31, 0.000000e+00 > br i1 %1672, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1673 = fdiv float 1.000000e+00, %31 > %1674 = fmul float %1507, %1673 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1675 = fcmp ogt float %1507, 0.000000e+00 > %1676 = select i1 %1675, float 1.000000e+00, float %1507 > %1677 = fcmp oge float %1676, 0.000000e+00 > %.op92 = fmul float %1676, 0x4600000000000000 > %1678 = select i1 %1677, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1674, %IF72 ], [ %1678, %ELSE73 ] > %1679 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1680 = fsub float 1.000000e+00, %1679 > %1681 = fmul float %1680, %1505 > %1682 = fcmp une float %31, 0.000000e+00 > br i1 %1682, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1683 = fdiv float 1.000000e+00, %31 > %1684 = fmul float %1508, %1683 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1685 = fcmp ogt float %1508, 0.000000e+00 > %1686 = select i1 %1685, float 1.000000e+00, float %1508 > %1687 = fcmp oge float %1686, 0.000000e+00 > %.op93 = fmul float %1686, 0x4600000000000000 > %1688 = select i1 %1687, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1684, %IF75 ], [ %1688, %ELSE76 ] > %1689 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1690 = fsub float 1.000000e+00, %1689 > %1691 = fmul float %1690, %1503 > %1692 = fmul float %28, %34 > %1693 = fmul float %29, %35 > %1694 = call float @llvm.maxnum.f32(float %1693, float 1.000000e+00) > %1695 = fcmp oeq float %1692, 0.000000e+00 > %1696 = fcmp oeq float %1692, 0.000000e+00 > %1697 = sext i1 %1696 to i32 > %1698 = bitcast i32 %1697 to float > %1699 = fcmp ogt float %1691, 0.000000e+00 > %1700 = select i1 %1699, float 1.000000e+00, float %1691 > %1701 = fcmp oge float %1700, 0.000000e+00 > %1702 = fcmp ogt float %1671, 0.000000e+00 > %1703 = select i1 %1702, float 1.000000e+00, float %1671 > %1704 = fcmp oge float %1703, 0.000000e+00 > %.op94 = fmul float %1700, 0x4600000000000000 > %1705 = select i1 %1701, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1703, 0x4600000000000000 > %1706 = select i1 %1704, float %.op95, float 0xC600000000000000 > %1707 = fdiv float 1.000000e+00, %1692 > %1708 = fmul float %1691, %1707 > %1709 = fmul float %1671, %1707 > %1710 = select i1 %1695, float %1705, float %1708 > %1711 = select i1 %1696, float %1706, float %1709 > %1712 = call float @llvm.maxnum.f32(float %1711, float 1.000000e+00) > %1713 = call float @llvm.minnum.f32(float %1694, float %1712) > %1714 = fcmp une float %1692, 0.000000e+00 > br i1 %1714, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1715 = fdiv float 1.000000e+00, %1692 > %1716 = fmul float %1681, %1715 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1717 = fcmp ogt float %1681, 0.000000e+00 > %1718 = select i1 %1717, float 1.000000e+00, float %1681 > %1719 = fcmp oge float %1718, 0.000000e+00 > %.op96 = fmul float %1718, 0x4600000000000000 > %1720 = select i1 %1719, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1716, %IF78 ], [ %1720, %ELSE79 ] > %1721 = call float @llvm.maxnum.f32(float %1710, float 1.000000e+00) > %1722 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1723 = call float @llvm.minnum.f32(float %1694, float %1722) > %1724 = call float @llvm.minnum.f32(float %1694, float %1721) > %1725 = call float @llvm.maxnum.f32(float %1713, float %1724) > %1726 = call float @llvm.maxnum.f32(float %1725, float %1723) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[2].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[2].xxxx >101: MUL TEMP[0], SV[0].yyyy, IN[1][2] >102: FMA TEMP[0], SV[0].xxxx, IN[0][2], TEMP[0] >103: FMA TEMP[0], SV[0].zzzz, IN[2][2], TEMP[0] >104: MOV OUT[5], TEMP[0] >105: MOV OUT[4], TEMP[3] >106: MOV OUT[2], TEMP[6] >107: MOV OUT[3], TEMP[4] >108: MOV OUT[1], TEMP[5] >109: MOV OUT[0], TEMP[1] >110: END >radeonsi: Compiling shader 327 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = lshr i32 %6, 13 > %711 = and i32 %710, 255 > %712 = shl i32 %5, 2 > %713 = and i32 %712, 262140 > %714 = and i32 %6, 8191 > %715 = mul i32 %714, %9 > %716 = add i32 %713, %715 > %717 = add i32 %716, %711 > %718 = add i32 %717, 24 > %719 = sext i32 %718 to i64 > %720 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %719 > %721 = bitcast i32 addrspace(3)* %720 to float addrspace(3)* > %722 = load float, float addrspace(3)* %721, align 4 > %723 = fmul float %722, %8 > %724 = lshr i32 %6, 13 > %725 = and i32 %724, 255 > %726 = shl i32 %5, 2 > %727 = and i32 %726, 262140 > %728 = and i32 %6, 8191 > %729 = mul i32 %728, %9 > %730 = add i32 %727, %729 > %731 = add i32 %730, %725 > %732 = add i32 %731, 25 > %733 = sext i32 %732 to i64 > %734 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %733 > %735 = bitcast i32 addrspace(3)* %734 to float addrspace(3)* > %736 = load float, float addrspace(3)* %735, align 4 > %737 = fmul float %736, %8 > %738 = lshr i32 %6, 13 > %739 = and i32 %738, 255 > %740 = shl i32 %5, 2 > %741 = and i32 %740, 262140 > %742 = and i32 %6, 8191 > %743 = mul i32 %742, %9 > %744 = add i32 %741, %743 > %745 = add i32 %744, %739 > %746 = add i32 %745, 26 > %747 = sext i32 %746 to i64 > %748 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %747 > %749 = bitcast i32 addrspace(3)* %748 to float addrspace(3)* > %750 = load float, float addrspace(3)* %749, align 4 > %751 = fmul float %750, %8 > %752 = lshr i32 %6, 13 > %753 = and i32 %752, 255 > %754 = shl i32 %5, 2 > %755 = and i32 %754, 262140 > %756 = and i32 %6, 8191 > %757 = mul i32 %756, %9 > %758 = add i32 %755, %757 > %759 = add i32 %758, %753 > %760 = add i32 %759, 27 > %761 = sext i32 %760 to i64 > %762 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %761 > %763 = bitcast i32 addrspace(3)* %762 to float addrspace(3)* > %764 = load float, float addrspace(3)* %763, align 4 > %765 = fmul float %764, %8 > %766 = shl i32 %5, 2 > %767 = and i32 %766, 262140 > %768 = and i32 %6, 8191 > %769 = mul i32 %768, %9 > %770 = add i32 %767, %769 > %771 = add i32 %770, 24 > %772 = sext i32 %771 to i64 > %773 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %772 > %774 = bitcast i32 addrspace(3)* %773 to float addrspace(3)* > %775 = load float, float addrspace(3)* %774, align 4 > %776 = call float @llvm.fma.f32(float %7, float %775, float %723) > %777 = shl i32 %5, 2 > %778 = and i32 %777, 262140 > %779 = and i32 %6, 8191 > %780 = mul i32 %779, %9 > %781 = add i32 %778, %780 > %782 = add i32 %781, 25 > %783 = sext i32 %782 to i64 > %784 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %783 > %785 = bitcast i32 addrspace(3)* %784 to float addrspace(3)* > %786 = load float, float addrspace(3)* %785, align 4 > %787 = call float @llvm.fma.f32(float %7, float %786, float %737) > %788 = shl i32 %5, 2 > %789 = and i32 %788, 262140 > %790 = and i32 %6, 8191 > %791 = mul i32 %790, %9 > %792 = add i32 %789, %791 > %793 = add i32 %792, 26 > %794 = sext i32 %793 to i64 > %795 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %794 > %796 = bitcast i32 addrspace(3)* %795 to float addrspace(3)* > %797 = load float, float addrspace(3)* %796, align 4 > %798 = call float @llvm.fma.f32(float %7, float %797, float %751) > %799 = shl i32 %5, 2 > %800 = and i32 %799, 262140 > %801 = and i32 %6, 8191 > %802 = mul i32 %801, %9 > %803 = add i32 %800, %802 > %804 = add i32 %803, 27 > %805 = sext i32 %804 to i64 > %806 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %805 > %807 = bitcast i32 addrspace(3)* %806 to float addrspace(3)* > %808 = load float, float addrspace(3)* %807, align 4 > %809 = call float @llvm.fma.f32(float %7, float %808, float %765) > %810 = shl i32 %5, 2 > %811 = and i32 %810, 262140 > %812 = and i32 %6, 8191 > %813 = mul i32 %812, %9 > %814 = add i32 %811, %813 > %815 = lshr i32 %6, 12 > %816 = and i32 %815, 510 > %817 = add i32 %814, %816 > %818 = add i32 %817, 24 > %819 = sext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = call float @llvm.fma.f32(float %62, float %822, float %776) > %824 = shl i32 %5, 2 > %825 = and i32 %824, 262140 > %826 = and i32 %6, 8191 > %827 = mul i32 %826, %9 > %828 = add i32 %825, %827 > %829 = lshr i32 %6, 12 > %830 = and i32 %829, 510 > %831 = add i32 %828, %830 > %832 = add i32 %831, 25 > %833 = sext i32 %832 to i64 > %834 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %833 > %835 = bitcast i32 addrspace(3)* %834 to float addrspace(3)* > %836 = load float, float addrspace(3)* %835, align 4 > %837 = call float @llvm.fma.f32(float %62, float %836, float %787) > %838 = shl i32 %5, 2 > %839 = and i32 %838, 262140 > %840 = and i32 %6, 8191 > %841 = mul i32 %840, %9 > %842 = add i32 %839, %841 > %843 = lshr i32 %6, 12 > %844 = and i32 %843, 510 > %845 = add i32 %842, %844 > %846 = add i32 %845, 26 > %847 = sext i32 %846 to i64 > %848 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %847 > %849 = bitcast i32 addrspace(3)* %848 to float addrspace(3)* > %850 = load float, float addrspace(3)* %849, align 4 > %851 = call float @llvm.fma.f32(float %62, float %850, float %798) > %852 = shl i32 %5, 2 > %853 = and i32 %852, 262140 > %854 = and i32 %6, 8191 > %855 = mul i32 %854, %9 > %856 = add i32 %853, %855 > %857 = lshr i32 %6, 12 > %858 = and i32 %857, 510 > %859 = add i32 %856, %858 > %860 = add i32 %859, 27 > %861 = sext i32 %860 to i64 > %862 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %861 > %863 = bitcast i32 addrspace(3)* %862 to float addrspace(3)* > %864 = load float, float addrspace(3)* %863, align 4 > %865 = call float @llvm.fma.f32(float %62, float %864, float %809) > %866 = bitcast i32 %10 to float > %867 = insertvalue <{ float, float, float }> undef, float %866, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %823, float %837, float %851, float %865) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %867 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..24] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} >IMM[1] UINT32 {0, 384, 0, 0} >IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 9: DP3 TEMP[2].x, IN[2].xyzz, TEMP[0].xyzz > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 14: RSQ TEMP[2].x, TEMP[0].xxxx > 15: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 16: FMA TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[0].wwww > 17: MOV TEMP[0].w, CONST[1][24].yyyy > 18: MOV TEMP[1].xy, IN[0].xyyy > 19: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 20: MUL TEMP[2].xyz, TEMP[1].xyzz, IN[4].xyzz > 21: MOV TEMP[2].w, TEMP[1].wwww > 22: MOV TEMP[1].w, IMM[2].xxxx > 23: MOV TEMP[3].xy, IN[0].xyyy > 24: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D > 25: MUL TEMP[1].x, TEMP[3].zzzz, CONST[1][24].xxxx > 26: MOV TEMP[1].yz, TEMP[3].xyxx > 27: MOV OUT[0], TEMP[0] > 28: MOV OUT[1], TEMP[2] > 29: MOV OUT[2], TEMP[1] > 30: END >radeonsi: Compiling shader 328 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 7 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 11 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %68 = bitcast float %54 to i32 > %69 = bitcast float %55 to i32 > %70 = insertelement <2 x i32> undef, i32 %68, i32 0 > %71 = insertelement <2 x i32> %70, i32 %69, i32 1 > %72 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %71, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %73 = extractelement <4 x float> %72, i32 1 > %74 = extractelement <4 x float> %72, i32 3 > %75 = call float @llvm.fma.f32(float %73, float 2.000000e+00, float -1.000000e+00) > %76 = call float @llvm.fma.f32(float %74, float 2.000000e+00, float -1.000000e+00) > %77 = fsub float -0.000000e+00, %75 > %78 = call float @llvm.fma.f32(float %77, float %75, float 1.000000e+00) > %79 = fsub float -0.000000e+00, %76 > %80 = call float @llvm.fma.f32(float %79, float %76, float %78) > %81 = call float @llvm.sqrt.f32(float %80) > %82 = fmul float %56, %75 > %83 = fmul float %57, %76 > %84 = fadd float %83, %82 > %85 = fmul float %58, %81 > %86 = fadd float %84, %85 > %87 = fmul float %59, %75 > %88 = fmul float %60, %76 > %89 = fadd float %88, %87 > %90 = fmul float %61, %81 > %91 = fadd float %89, %90 > %92 = fmul float %62, %75 > %93 = fmul float %63, %76 > %94 = fadd float %93, %92 > %95 = fmul float %64, %81 > %96 = fadd float %94, %95 > %97 = fmul float %86, %86 > %98 = fmul float %91, %91 > %99 = fadd float %98, %97 > %100 = fmul float %96, %96 > %101 = fadd float %99, %100 > %102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101) > %103 = fmul float %102, %86 > %104 = fmul float %102, %91 > %105 = fmul float %102, %96 > %106 = call float @llvm.fma.f32(float %103, float 5.000000e-01, float 5.000000e-01) > %107 = call float @llvm.fma.f32(float %104, float 5.000000e-01, float 5.000000e-01) > %108 = call float @llvm.fma.f32(float %105, float 5.000000e-01, float 5.000000e-01) > %109 = bitcast float %54 to i32 > %110 = bitcast float %55 to i32 > %111 = insertelement <2 x i32> undef, i32 %109, i32 0 > %112 = insertelement <2 x i32> %111, i32 %110, i32 1 > %113 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %112, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %114 = extractelement <4 x float> %113, i32 0 > %115 = extractelement <4 x float> %113, i32 1 > %116 = extractelement <4 x float> %113, i32 2 > %117 = extractelement <4 x float> %113, i32 3 > %118 = fmul float %114, %65 > %119 = fmul float %115, %66 > %120 = fmul float %116, %67 > %121 = bitcast float %54 to i32 > %122 = bitcast float %55 to i32 > %123 = insertelement <2 x i32> undef, i32 %121, i32 0 > %124 = insertelement <2 x i32> %123, i32 %122, i32 1 > %125 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %124, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %126 = extractelement <4 x float> %125, i32 0 > %127 = extractelement <4 x float> %125, i32 1 > %128 = extractelement <4 x float> %125, i32 2 > %129 = fmul float %128, %25 > %130 = bitcast float %5 to i32 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %130, 10 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %106, 11 > %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %107, 12 > %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %108, 13 > %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %26, 14 > %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135, float %118, 15 > %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136, float %119, 16 > %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %120, 17 > %139 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138, float %117, 18 > %140 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %139, float %129, 19 > %141 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %140, float %127, 20 > %142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %141, float %126, 21 > %143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float 0.000000e+00, 22 > %144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL CONST[1][0..9] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 112, 128, 144} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][7], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][8], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[0].x, CONST[1][9], TEMP[0] > 6: MOV TEMP[1].z, TEMP[0].xxxx > 7: MOV TEMP[0].xy, IN[1].xyxx > 8: MOV OUT[1], TEMP[0] > 9: MOV OUT[0], TEMP[1] > 10: END >radeonsi: Compiling shader 329 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 112) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 116) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 120) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 124) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 128) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 132) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 136) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 140) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 144) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 148) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 152) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 156) > %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 > %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %13) > %32 = extractelement <4 x float> %31, i32 0 > %33 = extractelement <4 x float> %31, i32 1 > %34 = extractelement <4 x float> %31, i32 2 > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %14) > %38 = fmul float %17, %32 > %39 = fmul float %18, %33 > %40 = fadd float %38, %39 > %41 = fmul float %19, %34 > %42 = fadd float %40, %41 > %43 = fadd float %42, %20 > %44 = fmul float %21, %32 > %45 = fmul float %22, %33 > %46 = fadd float %44, %45 > %47 = fmul float %23, %34 > %48 = fadd float %46, %47 > %49 = fadd float %48, %24 > %50 = fmul float %25, %32 > %51 = fmul float %26, %33 > %52 = fadd float %50, %51 > %53 = fmul float %27, %34 > %54 = fadd float %52, %53 > %55 = fadd float %54, %28 > %56 = lshr i32 %8, 13 > %57 = and i32 %56, 255 > %58 = mul i32 %57, %10 > %59 = add i32 %58, 16 > %60 = sext i32 %59 to i64 > %61 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %60 > %62 = bitcast i32 addrspace(3)* %61 to float addrspace(3)* > store float %43, float addrspace(3)* %62, align 4 > %63 = add i32 %58, 17 > %64 = sext i32 %63 to i64 > %65 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %64 > %66 = bitcast i32 addrspace(3)* %65 to float addrspace(3)* > store float %49, float addrspace(3)* %66, align 4 > %67 = add i32 %58, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > store float %55, float addrspace(3)* %70, align 4 > %71 = add i32 %58, 20 > %bc = bitcast <4 x float> %37 to <4 x i32> > %72 = extractelement <4 x i32> %bc, i32 0 > %73 = sext i32 %71 to i64 > %74 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %73 > store i32 %72, i32 addrspace(3)* %74, align 4 > %75 = add i32 %58, 21 > %bc12 = bitcast <4 x float> %37 to <4 x i32> > %76 = extractelement <4 x i32> %bc12, i32 1 > %77 = sext i32 %75 to i64 > %78 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %77 > store i32 %76, i32 addrspace(3)* %78, align 4 > %79 = add i32 %58, 22 > %80 = sext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = bitcast i32 addrspace(3)* %81 to float addrspace(3)* > store float %34, float addrspace(3)* %82, align 4 > %83 = add i32 %58, 23 > %84 = sext i32 %83 to i64 > %85 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %84 > store i32 1065353216, i32 addrspace(3)* %85, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..3], ARRAY(1), GENERIC[0] >DCL OUT[4], PATCH >DCL OUT[5], PATCH[1] >DCL OUT[6], PATCH[2] >DCL OUT[7], PATCH[3] >DCL CONST[1][0..50] >DCL CONST[2][0..39] >DCL TEMP[0..10], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 0.0000, 0.4000, 100.0000} >IMM[1] UINT32 {0, 768, 784, 800} >IMM[2] UINT32 {1, 624, 0, 0} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] FLT32 {158456325028528675187087900672.0000, 63.0000, 0.0000, 0.0000} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: MOV TEMP[0].xyz, IN[0][0].xyzx > 11: MOV TEMP[0].w, IMM[0].xxxx > 12: MOV TEMP[1], CONST[1][48] > 13: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 14: MOV TEMP[2], CONST[1][49] > 15: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 16: MOV TEMP[3], CONST[1][50] > 17: DP4 TEMP[0].x, TEMP[3], TEMP[0] > 18: MOV TEMP[4].xyz, IN[1][0].xyzx > 19: MOV TEMP[4].w, IMM[0].xxxx > 20: MOV TEMP[5], CONST[1][48] > 21: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 22: MOV TEMP[0].y, TEMP[5].xxxx > 23: MOV TEMP[5], CONST[1][49] > 24: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 25: MOV TEMP[0].z, TEMP[5].xxxx > 26: MOV TEMP[5], CONST[1][50] > 27: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 28: MOV TEMP[0].w, TEMP[5].xxxx > 29: MOV TEMP[4].xyz, IN[2][0].xyzx > 30: MOV TEMP[4].w, IMM[0].xxxx > 31: MOV TEMP[5], CONST[1][48] > 32: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 33: MOV TEMP[3].z, TEMP[5].xxxx > 34: MOV TEMP[6], CONST[1][49] > 35: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 36: MOV TEMP[7].z, CONST[2][39] > 37: MUL TEMP[7].xy, TEMP[0].xwww, TEMP[7].zzzz > 38: MOV TEMP[0].xw, TEMP[7].xxxy > 39: MOV TEMP[7], CONST[1][50] > 40: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 41: MOV TEMP[8].z, CONST[2][39] > 42: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[8].zzzz > 43: MOV TEMP[7].x, CONST[2][39] > 44: FSLT TEMP[7].x, TEMP[1].xxxx, -TEMP[7].xxxx > 45: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 46: INEG TEMP[7].x, TEMP[7].xxxx > 47: MOV TEMP[4].y, TEMP[7].xxxx > 48: MOV TEMP[7].x, CONST[2][39] > 49: FSLT TEMP[7].xy, TEMP[0].yzzz, -TEMP[7].xxxx > 50: AND TEMP[7].xy, TEMP[7].xyyy, IMM[3].xxxx > 51: INEG TEMP[7].xy, TEMP[7].xyyy > 52: MOV TEMP[4].zw, TEMP[7].yyxy > 53: AND TEMP[7].xy, TEMP[4].yzzz, IMM[2].xxxx > 54: MOV TEMP[4].yz, TEMP[7].yxyy > 55: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 56: MOV TEMP[4].y, TEMP[7].xxxx > 57: MOV TEMP[7].x, CONST[2][39] > 58: FSLT TEMP[7].x, TEMP[5].xxxx, -TEMP[7].xxxx > 59: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 60: INEG TEMP[7].x, TEMP[7].xxxx > 61: MOV TEMP[4].z, TEMP[7].xxxx > 62: AND TEMP[7].x, TEMP[4].zzzz, IMM[2].xxxx > 63: MOV TEMP[4].z, TEMP[7].xxxx > 64: UADD TEMP[7].x, TEMP[4].zzzz, TEMP[4].yyyy > 65: MOV TEMP[4].y, TEMP[7].xxxx > 66: MOV TEMP[7].x, CONST[2][39] > 67: FSLT TEMP[7].x, TEMP[2].xxxx, -TEMP[7].xxxx > 68: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 69: INEG TEMP[7].x, TEMP[7].xxxx > 70: MOV TEMP[4].z, TEMP[7].xxxx > 71: AND TEMP[7].xy, TEMP[4].zwww, IMM[2].xxxx > 72: MOV TEMP[4].zw, TEMP[7].yyxy > 73: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 74: MOV TEMP[4].z, TEMP[7].xxxx > 75: MOV TEMP[7].x, CONST[2][39] > 76: FSLT TEMP[7].x, TEMP[6].xxxx, -TEMP[7].xxxx > 77: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 78: INEG TEMP[7].x, TEMP[7].xxxx > 79: MOV TEMP[4].w, TEMP[7].xxxx > 80: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 81: MOV TEMP[4].w, TEMP[7].xxxx > 82: UADD TEMP[7].x, TEMP[4].wwww, TEMP[4].zzzz > 83: MOV TEMP[4].z, TEMP[7].xxxx > 84: FSLT TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy > 85: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 86: INEG TEMP[7].x, TEMP[7].xxxx > 87: MOV TEMP[4].w, TEMP[7].xxxx > 88: AND TEMP[7].x, TEMP[4].wwww, IMM[2].xxxx > 89: MOV TEMP[4].w, TEMP[7].xxxx > 90: FSLT TEMP[7].x, TEMP[0].wwww, IMM[0].yyyy > 91: AND TEMP[7].x, TEMP[7].xxxx, IMM[3].xxxx > 92: INEG TEMP[7].x, TEMP[7].xxxx > 93: MOV TEMP[7].x, TEMP[7].xxxx > 94: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx > 95: MOV TEMP[7].x, TEMP[8].xxxx > 96: UADD TEMP[8].x, TEMP[4].wwww, TEMP[7].xxxx > 97: MOV TEMP[4].w, TEMP[8].xxxx > 98: FSLT TEMP[8].x, TEMP[4].xxxx, IMM[0].yyyy > 99: AND TEMP[8].x, TEMP[8].xxxx, IMM[3].xxxx >100: INEG TEMP[8].x, TEMP[8].xxxx >101: MOV TEMP[7].x, TEMP[8].xxxx >102: AND TEMP[8].x, TEMP[7].xxxx, IMM[2].xxxx >103: MOV TEMP[7].x, TEMP[8].xxxx >104: UADD TEMP[7].x, TEMP[4].wwww, TEMP[7].xxxx >105: MOV TEMP[4].w, TEMP[7].xxxx >106: MOV TEMP[7].x, CONST[2][39] >107: FSLT TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx >108: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >109: INEG TEMP[1].x, TEMP[1].xxxx >110: MOV TEMP[3].x, TEMP[1].xxxx >111: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >112: MOV TEMP[3].x, TEMP[1].xxxx >113: MOV TEMP[1].x, CONST[2][39] >114: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].yzzz >115: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >116: INEG TEMP[1].xy, TEMP[1].xyyy >117: MOV TEMP[0].yz, TEMP[1].yxyy >118: AND TEMP[1].xy, TEMP[0].yzzz, IMM[2].xxxx >119: MOV TEMP[0].yz, TEMP[1].yxyy >120: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >121: MOV TEMP[0].y, TEMP[1].xxxx >122: MOV TEMP[1].x, CONST[2][39] >123: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx >124: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >125: INEG TEMP[1].x, TEMP[1].xxxx >126: MOV TEMP[3].x, TEMP[1].xxxx >127: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >128: MOV TEMP[3].x, TEMP[1].xxxx >129: UADD TEMP[1].x, TEMP[0].yyyy, TEMP[3].xxxx >130: MOV TEMP[0].y, TEMP[1].xxxx >131: MOV TEMP[1].x, CONST[2][39] >132: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx >133: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >134: INEG TEMP[1].x, TEMP[1].xxxx >135: MOV TEMP[3].x, TEMP[1].xxxx >136: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >137: MOV TEMP[3].x, TEMP[1].xxxx >138: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >139: MOV TEMP[0].z, TEMP[1].xxxx >140: MOV TEMP[1].x, CONST[2][39] >141: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx >142: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >143: INEG TEMP[1].x, TEMP[1].xxxx >144: MOV TEMP[3].x, TEMP[1].xxxx >145: AND TEMP[1].x, TEMP[3].xxxx, IMM[2].xxxx >146: MOV TEMP[3].x, TEMP[1].xxxx >147: UADD TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >148: MOV TEMP[0].z, TEMP[1].xxxx >149: MOV TEMP[1].x, CONST[2][39] >150: FSLT TEMP[1].xy, TEMP[1].xxxx, TEMP[0].xwww >151: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >152: INEG TEMP[1].xy, TEMP[1].xyyy >153: MOV TEMP[3].xy, TEMP[1].xyxx >154: AND TEMP[1].xy, TEMP[3].xyyy, IMM[2].xxxx >155: MOV TEMP[3].xy, TEMP[1].xyxx >156: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >157: MOV TEMP[3].x, TEMP[1].xxxx >158: MOV TEMP[1].x, CONST[2][39] >159: FSLT TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx >160: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >161: INEG TEMP[1].x, TEMP[1].xxxx >162: MOV TEMP[3].y, TEMP[1].xxxx >163: AND TEMP[1].x, TEMP[3].yyyy, IMM[2].xxxx >164: MOV TEMP[3].y, TEMP[1].xxxx >165: UADD TEMP[1].x, TEMP[3].yyyy, TEMP[3].xxxx >166: MOV TEMP[3].x, TEMP[1].xxxx >167: USEQ TEMP[1].x, TEMP[4].yyyy, IMM[3].yyyy >168: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >169: INEG TEMP[1].x, TEMP[1].xxxx >170: MOV TEMP[3].y, TEMP[1].xxxx >171: USEQ TEMP[1].xy, TEMP[0].yzzz, IMM[3].yyyy >172: AND TEMP[1].xy, TEMP[1].xyyy, IMM[3].xxxx >173: INEG TEMP[1].xy, TEMP[1].xyyy >174: MOV TEMP[0].yz, TEMP[1].yxyy >175: OR TEMP[1].x, TEMP[0].yyyy, TEMP[3].yyyy >176: MOV TEMP[0].y, TEMP[1].xxxx >177: USEQ TEMP[1].x, TEMP[4].zzzz, IMM[3].yyyy >178: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >179: INEG TEMP[1].x, TEMP[1].xxxx >180: MOV TEMP[3].y, TEMP[1].xxxx >181: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].yyyy >182: MOV TEMP[0].z, TEMP[1].xxxx >183: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >184: MOV TEMP[0].y, TEMP[1].xxxx >185: USEQ TEMP[1].x, TEMP[4].wwww, IMM[3].yyyy >186: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >187: INEG TEMP[1].x, TEMP[1].xxxx >188: MOV TEMP[0].z, TEMP[1].xxxx >189: USEQ TEMP[1].x, TEMP[3].xxxx, IMM[3].yyyy >190: AND TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx >191: INEG TEMP[1].x, TEMP[1].xxxx >192: MOV TEMP[3].x, TEMP[1].xxxx >193: OR TEMP[1].x, TEMP[0].zzzz, TEMP[3].xxxx >194: MOV TEMP[0].z, TEMP[1].xxxx >195: OR TEMP[1].x, TEMP[0].zzzz, TEMP[0].yyyy >196: MOV TEMP[0].y, TEMP[1].xxxx >197: MOV TEMP[1].x, TEMP[0].yyyy >198: USNE TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx >199: UIF TEMP[1].xxxx :0 >200: MOV TEMP[1].x, IMM[0].yyyy >201: MOV TEMP[2].x, IMM[0].yyyy >202: MOV TEMP[5].x, IMM[0].yyyy >203: MOV TEMP[6].x, IMM[0].yyyy >204: ELSE :0 >205: ADD TEMP[3].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >206: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >207: MOV TEMP[0].y, TEMP[7].xxxx >208: ADD TEMP[3].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >209: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz >210: MOV TEMP[0].z, TEMP[7].xxxx >211: SQRT TEMP[7].x, TEMP[0].yyyy >212: SQRT TEMP[7].y, TEMP[0].zzzz >213: MOV TEMP[7].xy, TEMP[7].xyxx >214: ADD TEMP[3].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >215: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz >216: SQRT TEMP[8].x, TEMP[3].xxxx >217: MIN TEMP[9].x, TEMP[0].wwww, TEMP[0].xxxx >218: MIN TEMP[10].x, TEMP[0].wwww, TEMP[4].xxxx >219: MOV TEMP[0].w, TEMP[10].xxxx >220: MIN TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >221: MUL TEMP[4].x, TEMP[9].xxxx, IMM[0].zzzz >222: MOV TEMP[3].y, TEMP[4].xxxx >223: MAX TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww >224: MUL TEMP[4].xy, TEMP[0].xwww, IMM[0].zzzz >225: MOV TEMP[0].xw, TEMP[4].xxxy >226: MAX TEMP[4].xy, TEMP[0].xwww, IMM[0].wwww >227: FSNE TEMP[9].x, TEMP[3].xxxx, IMM[0].yyyy >228: UIF TEMP[9].xxxx :0 >229: RCP TEMP[3].x, TEMP[3].xxxx >230: MUL TEMP[3].x, TEMP[7].xxxx, TEMP[3].xxxx >231: ELSE :0 >232: SSG TEMP[9].x, TEMP[7].xxxx >233: MUL TEMP[3].x, IMM[4].xxxx, TEMP[9].xxxx >234: ENDIF >235: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >236: MOV TEMP[0].y, TEMP[3].xxxx >237: FSNE TEMP[3].x, TEMP[4].yyyy, IMM[0].yyyy >238: UIF TEMP[3].xxxx :0 >239: RCP TEMP[3].x, TEMP[4].yyyy >240: MUL TEMP[3].x, TEMP[7].yyyy, TEMP[3].xxxx >241: ELSE :0 >242: SSG TEMP[7].x, TEMP[7].yyyy >243: MUL TEMP[3].x, IMM[4].xxxx, TEMP[7].xxxx >244: ENDIF >245: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx >246: MOV TEMP[0].z, TEMP[3].xxxx >247: FSNE TEMP[3].x, TEMP[4].xxxx, IMM[0].yyyy >248: UIF TEMP[3].xxxx :0 >249: RCP TEMP[3].x, TEMP[4].xxxx >250: MUL TEMP[3].x, TEMP[8].xxxx, TEMP[3].xxxx >251: ELSE :0 >252: SSG TEMP[4].x, TEMP[8].xxxx >253: MUL TEMP[3].x, IMM[4].xxxx, TEMP[4].xxxx >254: ENDIF >255: MAX TEMP[0].x, TEMP[3].xxxx, IMM[0].xxxx >256: MIN TEMP[0].xyz, TEMP[0].xyzz, IMM[4].yyyy >257: MAX TEMP[3].x, TEMP[0].yyyy, TEMP[0].xxxx >258: MOV TEMP[0].w, TEMP[3].xxxx >259: MAX TEMP[6].x, TEMP[0].wwww, TEMP[0].zzzz >260: MOV TEMP[1].x, TEMP[0].zzzz >261: MOV TEMP[2].x, TEMP[0].xxxx >262: MOV TEMP[5].x, TEMP[0].yyyy >263: ENDIF >264: MOV OUT[4], TEMP[1] >265: MOV OUT[5], TEMP[2] >266: MOV OUT[6], TEMP[5] >267: MOV OUT[7], TEMP[6] >268: MOV OUT[0].x, TEMP[1].xxxx >269: MOV OUT[0].y, TEMP[2].xxxx >270: MOV OUT[0].z, TEMP[5].xxxx >271: MOV OUT[1].x, TEMP[6].xxxx >272: END >radeonsi: Compiling shader 330 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 800) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 804) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 808) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 812) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = lshr i32 %10, 8 > %30 = and i32 %29, 31 > %31 = lshr i32 %7, 13 > %32 = and i32 %31, 255 > %33 = and i32 %7, 8191 > %34 = and i32 %10, 255 > %35 = mul nuw nsw i32 %33, %34 > %36 = mul nuw nsw i32 %30, %32 > %37 = add nuw nsw i32 %35, %36 > %38 = add nuw nsw i32 %37, 16 > %39 = zext i32 %38 to i64 > %40 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %39 > %41 = load i32, i32 addrspace(3)* %40, align 4 > %42 = lshr i32 %7, 13 > %43 = and i32 %42, 255 > %44 = and i32 %7, 8191 > %45 = and i32 %10, 255 > %46 = mul nuw nsw i32 %44, %45 > %47 = mul nuw nsw i32 %30, %43 > %48 = add nuw nsw i32 %46, %47 > %49 = add nuw nsw i32 %48, 17 > %50 = zext i32 %49 to i64 > %51 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %50 > %52 = load i32, i32 addrspace(3)* %51, align 4 > %53 = lshr i32 %7, 13 > %54 = and i32 %53, 255 > %55 = and i32 %7, 8191 > %56 = and i32 %10, 255 > %57 = mul nuw nsw i32 %55, %56 > %58 = mul nuw nsw i32 %30, %54 > %59 = add nuw nsw i32 %57, %58 > %60 = add nuw nsw i32 %59, 18 > %61 = zext i32 %60 to i64 > %62 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %61 > %63 = load i32, i32 addrspace(3)* %62, align 4 > %64 = lshr i32 %7, 13 > %65 = and i32 %64, 255 > %66 = and i32 %7, 8191 > %67 = and i32 %10, 255 > %68 = mul nuw nsw i32 %66, %67 > %69 = mul nuw nsw i32 %30, %65 > %70 = add nuw nsw i32 %68, %69 > %71 = add nuw nsw i32 %70, 19 > %72 = zext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = load i32, i32 addrspace(3)* %73, align 4 > %75 = lshr i32 %6, 13 > %76 = and i32 %75, 255 > %77 = shl i32 %5, 2 > %78 = and i32 %77, 262140 > %79 = and i32 %6, 8191 > %80 = and i32 %10, 255 > %81 = mul nuw nsw i32 %79, %80 > %82 = add nuw nsw i32 %78, %81 > %83 = mul nuw nsw i32 %30, %76 > %84 = add nuw nsw i32 %82, %83 > %85 = add nuw nsw i32 %84, 16 > %86 = zext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > store i32 %41, i32 addrspace(3)* %87, align 4 > %88 = add nuw nsw i32 %84, 17 > %89 = zext i32 %88 to i64 > %90 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %89 > store i32 %52, i32 addrspace(3)* %90, align 4 > %91 = add nuw nsw i32 %84, 18 > %92 = zext i32 %91 to i64 > %93 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %92 > store i32 %63, i32 addrspace(3)* %93, align 4 > %94 = add nuw nsw i32 %84, 19 > %95 = zext i32 %94 to i64 > %96 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %95 > store i32 %74, i32 addrspace(3)* %96, align 4 > %97 = lshr i32 %7, 13 > %98 = and i32 %97, 255 > %99 = and i32 %7, 8191 > %100 = and i32 %10, 255 > %101 = mul nuw nsw i32 %99, %100 > %102 = mul nuw nsw i32 %30, %98 > %103 = add nuw nsw i32 %101, %102 > %104 = add nuw nsw i32 %103, 20 > %105 = zext i32 %104 to i64 > %106 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %105 > %107 = load i32, i32 addrspace(3)* %106, align 4 > %108 = lshr i32 %7, 13 > %109 = and i32 %108, 255 > %110 = and i32 %7, 8191 > %111 = and i32 %10, 255 > %112 = mul nuw nsw i32 %110, %111 > %113 = mul nuw nsw i32 %30, %109 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 21 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > %118 = load i32, i32 addrspace(3)* %117, align 4 > %119 = lshr i32 %7, 13 > %120 = and i32 %119, 255 > %121 = and i32 %7, 8191 > %122 = and i32 %10, 255 > %123 = mul nuw nsw i32 %121, %122 > %124 = mul nuw nsw i32 %30, %120 > %125 = add nuw nsw i32 %123, %124 > %126 = add nuw nsw i32 %125, 22 > %127 = zext i32 %126 to i64 > %128 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %127 > %129 = load i32, i32 addrspace(3)* %128, align 4 > %130 = lshr i32 %7, 13 > %131 = and i32 %130, 255 > %132 = and i32 %7, 8191 > %133 = and i32 %10, 255 > %134 = mul nuw nsw i32 %132, %133 > %135 = mul nuw nsw i32 %30, %131 > %136 = add nuw nsw i32 %134, %135 > %137 = add nuw nsw i32 %136, 23 > %138 = zext i32 %137 to i64 > %139 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %138 > %140 = load i32, i32 addrspace(3)* %139, align 4 > %141 = lshr i32 %6, 13 > %142 = and i32 %141, 255 > %143 = shl i32 %5, 2 > %144 = and i32 %143, 262140 > %145 = and i32 %6, 8191 > %146 = and i32 %10, 255 > %147 = mul nuw nsw i32 %145, %146 > %148 = add nuw nsw i32 %144, %147 > %149 = mul nuw nsw i32 %30, %142 > %150 = add nuw nsw i32 %148, %149 > %151 = add nuw nsw i32 %150, 20 > %152 = zext i32 %151 to i64 > %153 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %152 > store i32 %107, i32 addrspace(3)* %153, align 4 > %154 = add nuw nsw i32 %150, 21 > %155 = zext i32 %154 to i64 > %156 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %155 > store i32 %118, i32 addrspace(3)* %156, align 4 > %157 = add nuw nsw i32 %150, 22 > %158 = zext i32 %157 to i64 > %159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %158 > store i32 %129, i32 addrspace(3)* %159, align 4 > %160 = add nuw nsw i32 %150, 23 > %161 = zext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > store i32 %140, i32 addrspace(3)* %162, align 4 > %163 = and i32 %7, 8191 > %164 = and i32 %10, 255 > %165 = mul nuw nsw i32 %163, %164 > %166 = add nuw nsw i32 %165, 16 > %167 = zext i32 %166 to i64 > %168 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %167 > %169 = bitcast i32 addrspace(3)* %168 to float addrspace(3)* > %170 = load float, float addrspace(3)* %169, align 4 > %171 = and i32 %7, 8191 > %172 = and i32 %10, 255 > %173 = mul nuw nsw i32 %171, %172 > %174 = add nuw nsw i32 %173, 17 > %175 = zext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = and i32 %7, 8191 > %180 = and i32 %10, 255 > %181 = mul nuw nsw i32 %179, %180 > %182 = add nuw nsw i32 %181, 18 > %183 = zext i32 %182 to i64 > %184 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %183 > %185 = bitcast i32 addrspace(3)* %184 to float addrspace(3)* > %186 = load float, float addrspace(3)* %185, align 4 > %187 = fmul float %13, %170 > %188 = fmul float %14, %178 > %189 = fadd float %187, %188 > %190 = fmul float %15, %186 > %191 = fadd float %189, %190 > %192 = fadd float %191, %16 > %193 = fmul float %17, %170 > %194 = fmul float %18, %178 > %195 = fadd float %193, %194 > %196 = fmul float %19, %186 > %197 = fadd float %195, %196 > %198 = fadd float %197, %20 > %199 = fmul float %21, %170 > %200 = fmul float %22, %178 > %201 = fadd float %199, %200 > %202 = fmul float %23, %186 > %203 = fadd float %201, %202 > %204 = fadd float %203, %24 > %205 = lshr i32 %7, 13 > %206 = and i32 %205, 255 > %207 = and i32 %7, 8191 > %208 = and i32 %10, 255 > %209 = mul nuw nsw i32 %207, %208 > %210 = add nuw nsw i32 %209, %206 > %211 = add nuw nsw i32 %210, 16 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = bitcast i32 addrspace(3)* %213 to float addrspace(3)* > %215 = load float, float addrspace(3)* %214, align 4 > %216 = lshr i32 %7, 13 > %217 = and i32 %216, 255 > %218 = and i32 %7, 8191 > %219 = and i32 %10, 255 > %220 = mul nuw nsw i32 %218, %219 > %221 = add nuw nsw i32 %220, %217 > %222 = add nuw nsw i32 %221, 17 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = bitcast i32 addrspace(3)* %224 to float addrspace(3)* > %226 = load float, float addrspace(3)* %225, align 4 > %227 = lshr i32 %7, 13 > %228 = and i32 %227, 255 > %229 = and i32 %7, 8191 > %230 = and i32 %10, 255 > %231 = mul nuw nsw i32 %229, %230 > %232 = add nuw nsw i32 %231, %228 > %233 = add nuw nsw i32 %232, 18 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %13, %215 > %239 = fmul float %14, %226 > %240 = fadd float %238, %239 > %241 = fmul float %15, %237 > %242 = fadd float %240, %241 > %243 = fadd float %242, %16 > %244 = fmul float %17, %215 > %245 = fmul float %18, %226 > %246 = fadd float %244, %245 > %247 = fmul float %19, %237 > %248 = fadd float %246, %247 > %249 = fadd float %248, %20 > %250 = fmul float %21, %215 > %251 = fmul float %22, %226 > %252 = fadd float %250, %251 > %253 = fmul float %23, %237 > %254 = fadd float %252, %253 > %255 = fadd float %254, %24 > %256 = and i32 %7, 8191 > %257 = and i32 %10, 255 > %258 = mul nuw nsw i32 %256, %257 > %259 = lshr i32 %7, 12 > %260 = and i32 %259, 510 > %261 = add nuw nsw i32 %258, %260 > %262 = add nuw nsw i32 %261, 16 > %263 = zext i32 %262 to i64 > %264 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %263 > %265 = bitcast i32 addrspace(3)* %264 to float addrspace(3)* > %266 = load float, float addrspace(3)* %265, align 4 > %267 = and i32 %7, 8191 > %268 = and i32 %10, 255 > %269 = mul nuw nsw i32 %267, %268 > %270 = lshr i32 %7, 12 > %271 = and i32 %270, 510 > %272 = add nuw nsw i32 %269, %271 > %273 = add nuw nsw i32 %272, 17 > %274 = zext i32 %273 to i64 > %275 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %274 > %276 = bitcast i32 addrspace(3)* %275 to float addrspace(3)* > %277 = load float, float addrspace(3)* %276, align 4 > %278 = and i32 %7, 8191 > %279 = and i32 %10, 255 > %280 = mul nuw nsw i32 %278, %279 > %281 = lshr i32 %7, 12 > %282 = and i32 %281, 510 > %283 = add nuw nsw i32 %280, %282 > %284 = add nuw nsw i32 %283, 18 > %285 = zext i32 %284 to i64 > %286 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %285 > %287 = bitcast i32 addrspace(3)* %286 to float addrspace(3)* > %288 = load float, float addrspace(3)* %287, align 4 > %289 = fmul float %13, %266 > %290 = fmul float %14, %277 > %291 = fadd float %289, %290 > %292 = fmul float %15, %288 > %293 = fadd float %291, %292 > %294 = fadd float %293, %16 > %295 = fmul float %17, %266 > %296 = fmul float %18, %277 > %297 = fadd float %295, %296 > %298 = fmul float %19, %288 > %299 = fadd float %297, %298 > %300 = fadd float %299, %20 > %301 = fmul float %204, %28 > %302 = fmul float %255, %28 > %303 = fmul float %21, %266 > %304 = fmul float %22, %277 > %305 = fadd float %303, %304 > %306 = fmul float %23, %288 > %307 = fadd float %305, %306 > %308 = fadd float %307, %24 > %309 = fmul float %308, %28 > %310 = fsub float -0.000000e+00, %27 > %311 = fcmp olt float %192, %310 > %312 = zext i1 %311 to i32 > %313 = fsub float -0.000000e+00, %27 > %314 = fcmp olt float %243, %313 > %315 = fsub float -0.000000e+00, %27 > %316 = fcmp olt float %249, %315 > %317 = zext i1 %314 to i32 > %318 = zext i1 %316 to i32 > %319 = add nuw nsw i32 %317, %312 > %320 = fsub float -0.000000e+00, %27 > %321 = fcmp olt float %294, %320 > %322 = zext i1 %321 to i32 > %323 = add nuw nsw i32 %322, %319 > %324 = fsub float -0.000000e+00, %27 > %325 = fcmp olt float %198, %324 > %326 = zext i1 %325 to i32 > %327 = add nuw nsw i32 %318, %326 > %328 = fsub float -0.000000e+00, %27 > %329 = fcmp olt float %300, %328 > %330 = zext i1 %329 to i32 > %331 = add nuw nsw i32 %330, %327 > %332 = fcmp olt float %301, 0.000000e+00 > %333 = zext i1 %332 to i32 > %334 = fcmp olt float %302, 0.000000e+00 > %335 = zext i1 %334 to i32 > %336 = add nuw nsw i32 %333, %335 > %337 = fcmp olt float %309, 0.000000e+00 > %338 = zext i1 %337 to i32 > %339 = add nuw nsw i32 %336, %338 > %340 = fcmp olt float %27, %192 > %341 = zext i1 %340 to i32 > %342 = fcmp olt float %27, %243 > %343 = fcmp olt float %27, %249 > %344 = zext i1 %342 to i32 > %345 = zext i1 %343 to i32 > %346 = add nuw nsw i32 %344, %341 > %347 = fcmp olt float %27, %294 > %348 = zext i1 %347 to i32 > %349 = add nuw nsw i32 %346, %348 > %350 = fcmp olt float %27, %198 > %351 = zext i1 %350 to i32 > %352 = add nuw nsw i32 %345, %351 > %353 = fcmp olt float %27, %300 > %354 = zext i1 %353 to i32 > %355 = add nuw nsw i32 %352, %354 > %356 = fcmp olt float %27, %301 > %357 = fcmp olt float %27, %302 > %358 = zext i1 %356 to i32 > %359 = zext i1 %357 to i32 > %360 = add nuw nsw i32 %359, %358 > %361 = fcmp olt float %27, %309 > %362 = zext i1 %361 to i32 > %363 = add nuw nsw i32 %362, %360 > %364 = icmp eq i32 %323, 3 > %365 = sext i1 %364 to i32 > %366 = icmp eq i32 %349, 3 > %367 = icmp eq i32 %355, 3 > %368 = sext i1 %367 to i32 > %369 = icmp eq i32 %331, 3 > %370 = sext i1 %369 to i32 > %371 = select i1 %367, i32 -1, i32 %370 > %372 = select i1 %366, i32 -1, i32 %365 > %373 = or i32 %371, %372 > %374 = icmp eq i32 %339, 3 > %375 = icmp eq i32 %363, 3 > %376 = sext i1 %375 to i32 > %377 = select i1 %374, i32 -1, i32 %376 > %378 = or i32 %377, %373 > %379 = icmp eq i32 %378, 0 > br i1 %379, label %ELSE, label %ENDIF > >ELSE: ; preds = %main_body > %380 = lshr i32 %7, 13 > %381 = and i32 %380, 255 > %382 = and i32 %7, 8191 > %383 = and i32 %10, 255 > %384 = mul nuw nsw i32 %382, %383 > %385 = add nuw nsw i32 %384, %381 > %386 = add nuw nsw i32 %385, 16 > %387 = zext i32 %386 to i64 > %388 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %387 > %389 = bitcast i32 addrspace(3)* %388 to float addrspace(3)* > %390 = load float, float addrspace(3)* %389, align 4 > %391 = and i32 %7, 8191 > %392 = and i32 %10, 255 > %393 = mul nuw nsw i32 %391, %392 > %394 = add nuw nsw i32 %393, 16 > %395 = zext i32 %394 to i64 > %396 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %395 > %397 = bitcast i32 addrspace(3)* %396 to float addrspace(3)* > %398 = load float, float addrspace(3)* %397, align 4 > %399 = fsub float %398, %390 > %400 = lshr i32 %7, 13 > %401 = and i32 %400, 255 > %402 = and i32 %7, 8191 > %403 = and i32 %10, 255 > %404 = mul nuw nsw i32 %402, %403 > %405 = add nuw nsw i32 %404, %401 > %406 = add nuw nsw i32 %405, 17 > %407 = zext i32 %406 to i64 > %408 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %407 > %409 = bitcast i32 addrspace(3)* %408 to float addrspace(3)* > %410 = load float, float addrspace(3)* %409, align 4 > %411 = and i32 %7, 8191 > %412 = and i32 %10, 255 > %413 = mul nuw nsw i32 %411, %412 > %414 = add nuw nsw i32 %413, 17 > %415 = zext i32 %414 to i64 > %416 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %415 > %417 = bitcast i32 addrspace(3)* %416 to float addrspace(3)* > %418 = load float, float addrspace(3)* %417, align 4 > %419 = fsub float %418, %410 > %420 = lshr i32 %7, 13 > %421 = and i32 %420, 255 > %422 = and i32 %7, 8191 > %423 = and i32 %10, 255 > %424 = mul nuw nsw i32 %422, %423 > %425 = add nuw nsw i32 %424, %421 > %426 = add nuw nsw i32 %425, 18 > %427 = zext i32 %426 to i64 > %428 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %427 > %429 = bitcast i32 addrspace(3)* %428 to float addrspace(3)* > %430 = load float, float addrspace(3)* %429, align 4 > %431 = and i32 %7, 8191 > %432 = and i32 %10, 255 > %433 = mul nuw nsw i32 %431, %432 > %434 = add nuw nsw i32 %433, 18 > %435 = zext i32 %434 to i64 > %436 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %435 > %437 = bitcast i32 addrspace(3)* %436 to float addrspace(3)* > %438 = load float, float addrspace(3)* %437, align 4 > %439 = fsub float %438, %430 > %440 = fmul float %399, %399 > %441 = fmul float %419, %419 > %442 = fadd float %441, %440 > %443 = fmul float %439, %439 > %444 = fadd float %442, %443 > %445 = and i32 %7, 8191 > %446 = and i32 %10, 255 > %447 = mul nuw nsw i32 %445, %446 > %448 = lshr i32 %7, 12 > %449 = and i32 %448, 510 > %450 = add nuw nsw i32 %447, %449 > %451 = add nuw nsw i32 %450, 16 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > %454 = bitcast i32 addrspace(3)* %453 to float addrspace(3)* > %455 = load float, float addrspace(3)* %454, align 4 > %456 = lshr i32 %7, 13 > %457 = and i32 %456, 255 > %458 = and i32 %7, 8191 > %459 = and i32 %10, 255 > %460 = mul nuw nsw i32 %458, %459 > %461 = add nuw nsw i32 %460, %457 > %462 = add nuw nsw i32 %461, 16 > %463 = zext i32 %462 to i64 > %464 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %463 > %465 = bitcast i32 addrspace(3)* %464 to float addrspace(3)* > %466 = load float, float addrspace(3)* %465, align 4 > %467 = fsub float %466, %455 > %468 = and i32 %7, 8191 > %469 = and i32 %10, 255 > %470 = mul nuw nsw i32 %468, %469 > %471 = lshr i32 %7, 12 > %472 = and i32 %471, 510 > %473 = add nuw nsw i32 %470, %472 > %474 = add nuw nsw i32 %473, 17 > %475 = zext i32 %474 to i64 > %476 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %475 > %477 = bitcast i32 addrspace(3)* %476 to float addrspace(3)* > %478 = load float, float addrspace(3)* %477, align 4 > %479 = lshr i32 %7, 13 > %480 = and i32 %479, 255 > %481 = and i32 %7, 8191 > %482 = and i32 %10, 255 > %483 = mul nuw nsw i32 %481, %482 > %484 = add nuw nsw i32 %483, %480 > %485 = add nuw nsw i32 %484, 17 > %486 = zext i32 %485 to i64 > %487 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %486 > %488 = bitcast i32 addrspace(3)* %487 to float addrspace(3)* > %489 = load float, float addrspace(3)* %488, align 4 > %490 = fsub float %489, %478 > %491 = and i32 %7, 8191 > %492 = and i32 %10, 255 > %493 = mul nuw nsw i32 %491, %492 > %494 = lshr i32 %7, 12 > %495 = and i32 %494, 510 > %496 = add nuw nsw i32 %493, %495 > %497 = add nuw nsw i32 %496, 18 > %498 = zext i32 %497 to i64 > %499 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %498 > %500 = bitcast i32 addrspace(3)* %499 to float addrspace(3)* > %501 = load float, float addrspace(3)* %500, align 4 > %502 = lshr i32 %7, 13 > %503 = and i32 %502, 255 > %504 = and i32 %7, 8191 > %505 = and i32 %10, 255 > %506 = mul nuw nsw i32 %504, %505 > %507 = add nuw nsw i32 %506, %503 > %508 = add nuw nsw i32 %507, 18 > %509 = zext i32 %508 to i64 > %510 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %509 > %511 = bitcast i32 addrspace(3)* %510 to float addrspace(3)* > %512 = load float, float addrspace(3)* %511, align 4 > %513 = fsub float %512, %501 > %514 = fmul float %467, %467 > %515 = fmul float %490, %490 > %516 = fadd float %515, %514 > %517 = fmul float %513, %513 > %518 = fadd float %516, %517 > %519 = call float @llvm.sqrt.f32(float %444) > %520 = call float @llvm.sqrt.f32(float %518) > %521 = and i32 %7, 8191 > %522 = and i32 %10, 255 > %523 = mul nuw nsw i32 %521, %522 > %524 = add nuw nsw i32 %523, 16 > %525 = zext i32 %524 to i64 > %526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %525 > %527 = bitcast i32 addrspace(3)* %526 to float addrspace(3)* > %528 = load float, float addrspace(3)* %527, align 4 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = lshr i32 %7, 12 > %533 = and i32 %532, 510 > %534 = add nuw nsw i32 %531, %533 > %535 = add nuw nsw i32 %534, 16 > %536 = zext i32 %535 to i64 > %537 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %536 > %538 = bitcast i32 addrspace(3)* %537 to float addrspace(3)* > %539 = load float, float addrspace(3)* %538, align 4 > %540 = fsub float %539, %528 > %541 = and i32 %7, 8191 > %542 = and i32 %10, 255 > %543 = mul nuw nsw i32 %541, %542 > %544 = add nuw nsw i32 %543, 17 > %545 = zext i32 %544 to i64 > %546 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %545 > %547 = bitcast i32 addrspace(3)* %546 to float addrspace(3)* > %548 = load float, float addrspace(3)* %547, align 4 > %549 = and i32 %7, 8191 > %550 = and i32 %10, 255 > %551 = mul nuw nsw i32 %549, %550 > %552 = lshr i32 %7, 12 > %553 = and i32 %552, 510 > %554 = add nuw nsw i32 %551, %553 > %555 = add nuw nsw i32 %554, 17 > %556 = zext i32 %555 to i64 > %557 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %556 > %558 = bitcast i32 addrspace(3)* %557 to float addrspace(3)* > %559 = load float, float addrspace(3)* %558, align 4 > %560 = fsub float %559, %548 > %561 = and i32 %7, 8191 > %562 = and i32 %10, 255 > %563 = mul nuw nsw i32 %561, %562 > %564 = add nuw nsw i32 %563, 18 > %565 = zext i32 %564 to i64 > %566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %565 > %567 = bitcast i32 addrspace(3)* %566 to float addrspace(3)* > %568 = load float, float addrspace(3)* %567, align 4 > %569 = and i32 %7, 8191 > %570 = and i32 %10, 255 > %571 = mul nuw nsw i32 %569, %570 > %572 = lshr i32 %7, 12 > %573 = and i32 %572, 510 > %574 = add nuw nsw i32 %571, %573 > %575 = add nuw nsw i32 %574, 18 > %576 = zext i32 %575 to i64 > %577 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %576 > %578 = bitcast i32 addrspace(3)* %577 to float addrspace(3)* > %579 = load float, float addrspace(3)* %578, align 4 > %580 = fsub float %579, %568 > %581 = fmul float %540, %540 > %582 = fmul float %560, %560 > %583 = fadd float %582, %581 > %584 = fmul float %580, %580 > %585 = fadd float %583, %584 > %586 = call float @llvm.sqrt.f32(float %585) > %587 = call float @llvm.minnum.f32(float %302, float %301) > %588 = call float @llvm.minnum.f32(float %302, float %309) > %589 = call float @llvm.minnum.f32(float %301, float %309) > %590 = fmul float %587, 0x3FD99999A0000000 > %591 = call float @llvm.maxnum.f32(float %590, float 1.000000e+02) > %592 = fmul float %589, 0x3FD99999A0000000 > %593 = fmul float %588, 0x3FD99999A0000000 > %594 = call float @llvm.maxnum.f32(float %592, float 1.000000e+02) > %595 = call float @llvm.maxnum.f32(float %593, float 1.000000e+02) > %596 = fcmp une float %591, 0.000000e+00 > br i1 %596, label %IF45, label %ELSE46 > >ENDIF: ; preds = %main_body, %ENDIF50 > %temp24.0 = phi i32 [ %phitmp57, %ENDIF50 ], [ 0, %main_body ] > %temp20.0 = phi i32 [ %phitmp56, %ENDIF50 ], [ 0, %main_body ] > %temp8.0 = phi i32 [ %phitmp55, %ENDIF50 ], [ 0, %main_body ] > %temp4.0 = phi i32 [ %phitmp, %ENDIF50 ], [ 0, %main_body ] > %597 = lshr i32 %5, 16 > %598 = shl nuw nsw i32 %597, 2 > %599 = and i32 %6, 8191 > %600 = and i32 %10, 255 > %601 = mul nuw nsw i32 %599, %600 > %602 = add nuw nsw i32 %598, %601 > %603 = add nuw nsw i32 %602, 8 > %604 = zext i32 %603 to i64 > %605 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %604 > store i32 %temp4.0, i32 addrspace(3)* %605, align 4 > %606 = add nuw nsw i32 %602, 9 > %607 = zext i32 %606 to i64 > %608 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %607 > store i32 %368, i32 addrspace(3)* %608, align 4 > %609 = add nuw nsw i32 %602, 10 > %610 = zext i32 %609 to i64 > %611 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %610 > %612 = bitcast i32 addrspace(3)* %611 to float addrspace(3)* > store float %15, float addrspace(3)* %612, align 4 > %613 = add nuw nsw i32 %602, 11 > %614 = zext i32 %613 to i64 > %615 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %614 > %616 = bitcast i32 addrspace(3)* %615 to float addrspace(3)* > store float %16, float addrspace(3)* %616, align 4 > %617 = lshr i32 %5, 16 > %618 = shl nuw nsw i32 %617, 2 > %619 = and i32 %6, 8191 > %620 = and i32 %10, 255 > %621 = mul nuw nsw i32 %619, %620 > %622 = add nuw nsw i32 %618, %621 > %623 = add nuw nsw i32 %622, 12 > %624 = zext i32 %623 to i64 > %625 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %624 > store i32 %temp8.0, i32 addrspace(3)* %625, align 4 > %626 = add nuw nsw i32 %622, 13 > %627 = zext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > store float %18, float addrspace(3)* %629, align 4 > %630 = add nuw nsw i32 %622, 14 > %631 = zext i32 %630 to i64 > %632 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %631 > %633 = bitcast i32 addrspace(3)* %632 to float addrspace(3)* > store float %19, float addrspace(3)* %633, align 4 > %634 = add nuw nsw i32 %622, 15 > %635 = zext i32 %634 to i64 > %636 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %635 > %637 = bitcast i32 addrspace(3)* %636 to float addrspace(3)* > store float %20, float addrspace(3)* %637, align 4 > %638 = lshr i32 %5, 16 > %639 = shl nuw nsw i32 %638, 2 > %640 = and i32 %6, 8191 > %641 = and i32 %10, 255 > %642 = mul nuw nsw i32 %640, %641 > %643 = add nuw nsw i32 %639, %642 > %644 = add nuw nsw i32 %643, 16 > %645 = zext i32 %644 to i64 > %646 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %645 > store i32 %temp20.0, i32 addrspace(3)* %646, align 4 > %647 = add nuw nsw i32 %643, 17 > %648 = zext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %14, float addrspace(3)* %650, align 4 > %651 = add nuw nsw i32 %643, 18 > %652 = zext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %15, float addrspace(3)* %654, align 4 > %655 = add nuw nsw i32 %643, 19 > %656 = zext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %16, float addrspace(3)* %658, align 4 > %659 = lshr i32 %5, 16 > %660 = shl nuw nsw i32 %659, 2 > %661 = and i32 %6, 8191 > %662 = and i32 %10, 255 > %663 = mul nuw nsw i32 %661, %662 > %664 = add nuw nsw i32 %660, %663 > %665 = add nuw nsw i32 %664, 20 > %666 = zext i32 %665 to i64 > %667 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %666 > store i32 %temp24.0, i32 addrspace(3)* %667, align 4 > %668 = add nuw nsw i32 %664, 21 > %669 = zext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > store float %18, float addrspace(3)* %671, align 4 > %672 = add nuw nsw i32 %664, 22 > %673 = zext i32 %672 to i64 > %674 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %673 > %675 = bitcast i32 addrspace(3)* %674 to float addrspace(3)* > store float %19, float addrspace(3)* %675, align 4 > %676 = add nuw nsw i32 %664, 23 > %677 = zext i32 %676 to i64 > %678 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %677 > %679 = bitcast i32 addrspace(3)* %678 to float addrspace(3)* > store float %20, float addrspace(3)* %679, align 4 > %680 = lshr i32 %5, 16 > %681 = shl nuw nsw i32 %680, 2 > %682 = and i32 %6, 8191 > %683 = and i32 %10, 255 > %684 = mul nuw nsw i32 %682, %683 > %685 = add nuw nsw i32 %681, %684 > %686 = zext i32 %685 to i64 > %687 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %686 > store i32 %temp4.0, i32 addrspace(3)* %687, align 4 > %688 = lshr i32 %5, 16 > %689 = shl nuw nsw i32 %688, 2 > %690 = and i32 %6, 8191 > %691 = and i32 %10, 255 > %692 = mul nuw nsw i32 %690, %691 > %693 = add nuw nsw i32 %689, %692 > %694 = add nuw nsw i32 %693, 1 > %695 = zext i32 %694 to i64 > %696 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %695 > store i32 %temp8.0, i32 addrspace(3)* %696, align 4 > %697 = lshr i32 %5, 16 > %698 = shl nuw nsw i32 %697, 2 > %699 = and i32 %6, 8191 > %700 = and i32 %10, 255 > %701 = mul nuw nsw i32 %699, %700 > %702 = add nuw nsw i32 %698, %701 > %703 = add nuw nsw i32 %702, 2 > %704 = zext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > store i32 %temp20.0, i32 addrspace(3)* %705, align 4 > %706 = lshr i32 %5, 16 > %707 = shl nuw nsw i32 %706, 2 > %708 = and i32 %6, 8191 > %709 = and i32 %10, 255 > %710 = mul nuw nsw i32 %708, %709 > %711 = add nuw nsw i32 %707, %710 > %712 = add nuw nsw i32 %711, 4 > %713 = zext i32 %712 to i64 > %714 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %713 > store i32 %temp24.0, i32 addrspace(3)* %714, align 4 > %715 = and i32 %10, 255 > %716 = lshr i32 %10, 8 > %717 = and i32 %716, 31 > %718 = lshr i32 %5, 16 > %719 = shl nuw nsw i32 %718, 2 > %720 = and i32 %6, 8191 > %721 = and i32 %10, 255 > %722 = mul nuw nsw i32 %720, %721 > %723 = add nuw nsw i32 %719, %722 > %724 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %725 = bitcast i64 %724 to <2 x i32> > %726 = extractelement <2 x i32> %725, i32 0 > %727 = extractelement <2 x i32> %725, i32 1 > %728 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %726, 0 > %729 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %728, i32 %727, 1 > %730 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %729, i32 %8, 13 > %731 = bitcast i32 %715 to float > %732 = bitcast i32 %717 to float > %733 = bitcast i32 %723 to float > %734 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %730, float %731, 14 > %735 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %734, float %732, 15 > %736 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %735, float %733, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %736 > >IF45: ; preds = %ELSE > %737 = fdiv float 1.000000e+00, %591 > %738 = fmul float %519, %737 > br label %ENDIF44 > >ELSE46: ; preds = %ELSE > %739 = fcmp ogt float %519, 0.000000e+00 > %740 = select i1 %739, float 1.000000e+00, float %519 > %741 = fcmp oge float %740, 0.000000e+00 > %.op = fmul float %740, 0x4600000000000000 > %742 = select i1 %741, float %.op, float 0xC600000000000000 > br label %ENDIF44 > >ENDIF44: ; preds = %ELSE46, %IF45 > %temp12.0 = phi float [ %738, %IF45 ], [ %742, %ELSE46 ] > %743 = call float @llvm.maxnum.f32(float %temp12.0, float 1.000000e+00) > %744 = fcmp une float %595, 0.000000e+00 > br i1 %744, label %IF48, label %ELSE49 > >IF48: ; preds = %ENDIF44 > %745 = fdiv float 1.000000e+00, %595 > %746 = fmul float %520, %745 > br label %ENDIF47 > >ELSE49: ; preds = %ENDIF44 > %747 = fcmp ogt float %520, 0.000000e+00 > %748 = select i1 %747, float 1.000000e+00, float %520 > %749 = fcmp oge float %748, 0.000000e+00 > %.op53 = fmul float %748, 0x4600000000000000 > %750 = select i1 %749, float %.op53, float 0xC600000000000000 > br label %ENDIF47 > >ENDIF47: ; preds = %ELSE49, %IF48 > %temp12.1 = phi float [ %746, %IF48 ], [ %750, %ELSE49 ] > %751 = call float @llvm.maxnum.f32(float %temp12.1, float 1.000000e+00) > %752 = fcmp une float %594, 0.000000e+00 > br i1 %752, label %IF51, label %ELSE52 > >IF51: ; preds = %ENDIF47 > %753 = fdiv float 1.000000e+00, %594 > %754 = fmul float %586, %753 > br label %ENDIF50 > >ELSE52: ; preds = %ENDIF47 > %755 = fcmp ogt float %586, 0.000000e+00 > %756 = select i1 %755, float 1.000000e+00, float %586 > %757 = fcmp oge float %756, 0.000000e+00 > %.op54 = fmul float %756, 0x4600000000000000 > %758 = select i1 %757, float %.op54, float 0xC600000000000000 > br label %ENDIF50 > >ENDIF50: ; preds = %ELSE52, %IF51 > %temp12.2 = phi float [ %754, %IF51 ], [ %758, %ELSE52 ] > %759 = call float @llvm.maxnum.f32(float %temp12.2, float 1.000000e+00) > %760 = call float @llvm.minnum.f32(float %759, float 6.300000e+01) > %761 = call float @llvm.minnum.f32(float %743, float 6.300000e+01) > %762 = call float @llvm.minnum.f32(float %751, float 6.300000e+01) > %763 = call float @llvm.maxnum.f32(float %761, float %760) > %764 = call float @llvm.maxnum.f32(float %763, float %762) > %phitmp = bitcast float %762 to i32 > %phitmp55 = bitcast float %760 to i32 > %phitmp56 = bitcast float %761 to i32 > %phitmp57 = bitcast float %764 to i32 > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..1], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..50] >DCL CONST[2][0..39] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[1] UINT32 {0, 768, 784, 800} >IMM[2] UINT32 {1, 624, 0, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[0].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[0].w, IMM[0].xxxx > 4: MOV TEMP[1], CONST[1][48] > 5: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 6: MOV TEMP[2], CONST[1][49] > 7: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 8: MOV TEMP[1].y, TEMP[2].xxxx > 9: MOV TEMP[2], CONST[1][50] > 10: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 11: MOV TEMP[1].z, TEMP[2].xxxx > 12: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 13: SQRT TEMP[2].x, TEMP[0].xxxx > 14: FSEQ TEMP[3].xyz, TEMP[2].xxxx, IMM[0].yyyy > 15: SSG TEMP[4].xyz, TEMP[1].xyzz > 16: MUL TEMP[4].xyz, IMM[0].zzzz, TEMP[4].xyzz > 17: RCP TEMP[5].xyz, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz > 19: UCMP TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[1].xyzz > 20: MOV TEMP[3].x, CONST[2][39] > 21: FSNE TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy > 22: UIF TEMP[3].xxxx :0 > 23: MOV TEMP[3].x, CONST[2][39] > 24: RCP TEMP[3].x, TEMP[3].xxxx > 25: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[3].xxxx > 26: ELSE :0 > 27: SSG TEMP[2].x, TEMP[2].xxxx > 28: MUL TEMP[3].x, IMM[0].zzzz, TEMP[2].xxxx > 29: ENDIF > 30: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[0].xxxx > 31: MOV TEMP[2].z, CONST[2][39] > 32: FMA TEMP[2].x, TEMP[1].zzzz, TEMP[2].zzzz, IMM[0].xxxx > 33: FSEQ TEMP[3].xy, TEMP[2].xxxx, IMM[0].yyyy > 34: SSG TEMP[4].xy, TEMP[1].xyyy > 35: MUL TEMP[4].xy, IMM[0].zzzz, TEMP[4].xyyy > 36: RCP TEMP[2].xy, TEMP[2].xxxx > 37: MUL TEMP[2].xy, TEMP[1].xyyy, TEMP[2].xyyy > 38: UCMP TEMP[2].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[2].xyyy > 39: MOV TEMP[3].z, CONST[2][39] > 40: MUL TEMP[1].x, TEMP[1].zzzz, TEMP[3].zzzz > 41: MOV TEMP[0].y, TEMP[1].xxxx > 42: MOV TEMP[2].z, TEMP[0].xxxx > 43: MOV TEMP[1].zw, TEMP[0].xxyx > 44: MOV TEMP[2].w, IMM[0].xxxx > 45: MUL TEMP[0].xy, SV[0].yyyy, IN[1][1].xyyy > 46: FMA TEMP[0].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[0].xyyy > 47: FMA TEMP[1].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[0].xyyy > 48: MOV OUT[1], TEMP[1] > 49: MOV OUT[0], TEMP[2] > 50: END >radeonsi: Compiling shader 331 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 800) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 804) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 808) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 812) > %25 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 > %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 624) > %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 632) > %29 = fadd float %7, %8 > %30 = fsub float 1.000000e+00, %29 > %31 = lshr i32 %6, 13 > %32 = and i32 %31, 255 > %33 = shl i32 %5, 2 > %34 = and i32 %33, 262140 > %35 = and i32 %6, 8191 > %36 = mul i32 %35, %9 > %37 = add i32 %34, %36 > %38 = add i32 %37, %32 > %39 = add i32 %38, 16 > %40 = sext i32 %39 to i64 > %41 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %40 > %42 = bitcast i32 addrspace(3)* %41 to float addrspace(3)* > %43 = load float, float addrspace(3)* %42, align 4 > %44 = fmul float %43, %8 > %45 = lshr i32 %6, 13 > %46 = and i32 %45, 255 > %47 = shl i32 %5, 2 > %48 = and i32 %47, 262140 > %49 = and i32 %6, 8191 > %50 = mul i32 %49, %9 > %51 = add i32 %48, %50 > %52 = add i32 %51, %46 > %53 = add i32 %52, 17 > %54 = sext i32 %53 to i64 > %55 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %54 > %56 = bitcast i32 addrspace(3)* %55 to float addrspace(3)* > %57 = load float, float addrspace(3)* %56, align 4 > %58 = fmul float %57, %8 > %59 = lshr i32 %6, 13 > %60 = and i32 %59, 255 > %61 = shl i32 %5, 2 > %62 = and i32 %61, 262140 > %63 = and i32 %6, 8191 > %64 = mul i32 %63, %9 > %65 = add i32 %62, %64 > %66 = add i32 %65, %60 > %67 = add i32 %66, 18 > %68 = sext i32 %67 to i64 > %69 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %68 > %70 = bitcast i32 addrspace(3)* %69 to float addrspace(3)* > %71 = load float, float addrspace(3)* %70, align 4 > %72 = fmul float %71, %8 > %73 = shl i32 %5, 2 > %74 = and i32 %73, 262140 > %75 = and i32 %6, 8191 > %76 = mul i32 %75, %9 > %77 = add i32 %74, %76 > %78 = add i32 %77, 16 > %79 = sext i32 %78 to i64 > %80 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %79 > %81 = bitcast i32 addrspace(3)* %80 to float addrspace(3)* > %82 = load float, float addrspace(3)* %81, align 4 > %83 = call float @llvm.fma.f32(float %7, float %82, float %44) > %84 = shl i32 %5, 2 > %85 = and i32 %84, 262140 > %86 = and i32 %6, 8191 > %87 = mul i32 %86, %9 > %88 = add i32 %85, %87 > %89 = add i32 %88, 17 > %90 = sext i32 %89 to i64 > %91 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %90 > %92 = bitcast i32 addrspace(3)* %91 to float addrspace(3)* > %93 = load float, float addrspace(3)* %92, align 4 > %94 = call float @llvm.fma.f32(float %7, float %93, float %58) > %95 = shl i32 %5, 2 > %96 = and i32 %95, 262140 > %97 = and i32 %6, 8191 > %98 = mul i32 %97, %9 > %99 = add i32 %96, %98 > %100 = add i32 %99, 18 > %101 = sext i32 %100 to i64 > %102 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %101 > %103 = bitcast i32 addrspace(3)* %102 to float addrspace(3)* > %104 = load float, float addrspace(3)* %103, align 4 > %105 = call float @llvm.fma.f32(float %7, float %104, float %72) > %106 = shl i32 %5, 2 > %107 = and i32 %106, 262140 > %108 = and i32 %6, 8191 > %109 = mul i32 %108, %9 > %110 = add i32 %107, %109 > %111 = lshr i32 %6, 12 > %112 = and i32 %111, 510 > %113 = add i32 %110, %112 > %114 = add i32 %113, 16 > %115 = sext i32 %114 to i64 > %116 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %115 > %117 = bitcast i32 addrspace(3)* %116 to float addrspace(3)* > %118 = load float, float addrspace(3)* %117, align 4 > %119 = call float @llvm.fma.f32(float %30, float %118, float %83) > %120 = shl i32 %5, 2 > %121 = and i32 %120, 262140 > %122 = and i32 %6, 8191 > %123 = mul i32 %122, %9 > %124 = add i32 %121, %123 > %125 = lshr i32 %6, 12 > %126 = and i32 %125, 510 > %127 = add i32 %124, %126 > %128 = add i32 %127, 17 > %129 = sext i32 %128 to i64 > %130 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %129 > %131 = bitcast i32 addrspace(3)* %130 to float addrspace(3)* > %132 = load float, float addrspace(3)* %131, align 4 > %133 = call float @llvm.fma.f32(float %30, float %132, float %94) > %134 = shl i32 %5, 2 > %135 = and i32 %134, 262140 > %136 = and i32 %6, 8191 > %137 = mul i32 %136, %9 > %138 = add i32 %135, %137 > %139 = lshr i32 %6, 12 > %140 = and i32 %139, 510 > %141 = add i32 %138, %140 > %142 = add i32 %141, 18 > %143 = sext i32 %142 to i64 > %144 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %143 > %145 = bitcast i32 addrspace(3)* %144 to float addrspace(3)* > %146 = load float, float addrspace(3)* %145, align 4 > %147 = call float @llvm.fma.f32(float %30, float %146, float %105) > %148 = fmul float %13, %119 > %149 = fmul float %14, %133 > %150 = fadd float %148, %149 > %151 = fmul float %15, %147 > %152 = fadd float %150, %151 > %153 = fadd float %152, %16 > %154 = fmul float %17, %119 > %155 = fmul float %18, %133 > %156 = fadd float %154, %155 > %157 = fmul float %19, %147 > %158 = fadd float %156, %157 > %159 = fadd float %158, %20 > %160 = fmul float %21, %119 > %161 = fmul float %22, %133 > %162 = fadd float %160, %161 > %163 = fmul float %23, %147 > %164 = fadd float %162, %163 > %165 = fadd float %164, %24 > %166 = fmul float %153, %153 > %167 = fmul float %159, %159 > %168 = fadd float %167, %166 > %169 = fmul float %165, %165 > %170 = fadd float %168, %169 > %171 = call float @llvm.sqrt.f32(float %170) > %172 = fcmp oeq float %171, 0.000000e+00 > %173 = fcmp oeq float %171, 0.000000e+00 > %174 = fcmp oeq float %171, 0.000000e+00 > %175 = fcmp ogt float %153, 0.000000e+00 > %176 = select i1 %175, float 1.000000e+00, float %153 > %177 = fcmp oge float %176, 0.000000e+00 > %178 = fcmp ogt float %159, 0.000000e+00 > %179 = select i1 %178, float 1.000000e+00, float %159 > %180 = fcmp oge float %179, 0.000000e+00 > %181 = fcmp ogt float %165, 0.000000e+00 > %182 = select i1 %181, float 1.000000e+00, float %165 > %183 = fcmp oge float %182, 0.000000e+00 > %.op = fmul float %176, 0x4600000000000000 > %184 = select i1 %177, float %.op, float 0xC600000000000000 > %.op24 = fmul float %179, 0x4600000000000000 > %185 = select i1 %180, float %.op24, float 0xC600000000000000 > %.op25 = fmul float %182, 0x4600000000000000 > %186 = select i1 %183, float %.op25, float 0xC600000000000000 > %187 = fdiv float 1.000000e+00, %171 > %188 = fmul float %153, %187 > %189 = fmul float %159, %187 > %190 = fmul float %165, %187 > %191 = select i1 %172, float %184, float %188 > %192 = select i1 %173, float %185, float %189 > %193 = select i1 %174, float %186, float %190 > %194 = fcmp une float %27, 0.000000e+00 > br i1 %194, label %IF, label %ELSE > >IF: ; preds = %main_body > %195 = fdiv float 1.000000e+00, %27 > %196 = fmul float %171, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fcmp ogt float %171, 0.000000e+00 > %198 = select i1 %197, float 1.000000e+00, float %171 > %199 = fcmp oge float %198, 0.000000e+00 > %.op26 = fmul float %198, 0x4600000000000000 > %200 = select i1 %199, float %.op26, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %196, %IF ], [ %200, %ELSE ] > %201 = fsub float 1.000000e+00, %temp12.0 > %202 = call float @llvm.fma.f32(float %193, float %28, float 1.000000e+00) > %203 = fcmp oeq float %202, 0.000000e+00 > %204 = fcmp oeq float %202, 0.000000e+00 > %205 = fcmp ogt float %191, 0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %191 > %207 = fcmp oge float %206, 0.000000e+00 > %208 = fcmp ogt float %192, 0.000000e+00 > %209 = select i1 %208, float 1.000000e+00, float %192 > %210 = fcmp oge float %209, 0.000000e+00 > %.op27 = fmul float %206, 0x4600000000000000 > %211 = select i1 %207, float %.op27, float 0xC600000000000000 > %.op28 = fmul float %209, 0x4600000000000000 > %212 = select i1 %210, float %.op28, float 0xC600000000000000 > %213 = fdiv float 1.000000e+00, %202 > %214 = fmul float %191, %213 > %215 = fmul float %192, %213 > %216 = select i1 %203, float %211, float %214 > %217 = select i1 %204, float %212, float %215 > %218 = fmul float %193, %28 > %219 = lshr i32 %6, 13 > %220 = and i32 %219, 255 > %221 = shl i32 %5, 2 > %222 = and i32 %221, 262140 > %223 = and i32 %6, 8191 > %224 = mul i32 %223, %9 > %225 = add i32 %222, %224 > %226 = add i32 %225, %220 > %227 = add i32 %226, 20 > %228 = sext i32 %227 to i64 > %229 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %228 > %230 = bitcast i32 addrspace(3)* %229 to float addrspace(3)* > %231 = load float, float addrspace(3)* %230, align 4 > %232 = fmul float %231, %8 > %233 = lshr i32 %6, 13 > %234 = and i32 %233, 255 > %235 = shl i32 %5, 2 > %236 = and i32 %235, 262140 > %237 = and i32 %6, 8191 > %238 = mul i32 %237, %9 > %239 = add i32 %236, %238 > %240 = add i32 %239, %234 > %241 = add i32 %240, 21 > %242 = sext i32 %241 to i64 > %243 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %242 > %244 = bitcast i32 addrspace(3)* %243 to float addrspace(3)* > %245 = load float, float addrspace(3)* %244, align 4 > %246 = fmul float %245, %8 > %247 = shl i32 %5, 2 > %248 = and i32 %247, 262140 > %249 = and i32 %6, 8191 > %250 = mul i32 %249, %9 > %251 = add i32 %248, %250 > %252 = add i32 %251, 20 > %253 = sext i32 %252 to i64 > %254 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %253 > %255 = bitcast i32 addrspace(3)* %254 to float addrspace(3)* > %256 = load float, float addrspace(3)* %255, align 4 > %257 = call float @llvm.fma.f32(float %7, float %256, float %232) > %258 = shl i32 %5, 2 > %259 = and i32 %258, 262140 > %260 = and i32 %6, 8191 > %261 = mul i32 %260, %9 > %262 = add i32 %259, %261 > %263 = add i32 %262, 21 > %264 = sext i32 %263 to i64 > %265 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %264 > %266 = bitcast i32 addrspace(3)* %265 to float addrspace(3)* > %267 = load float, float addrspace(3)* %266, align 4 > %268 = call float @llvm.fma.f32(float %7, float %267, float %246) > %269 = shl i32 %5, 2 > %270 = and i32 %269, 262140 > %271 = and i32 %6, 8191 > %272 = mul i32 %271, %9 > %273 = add i32 %270, %272 > %274 = lshr i32 %6, 12 > %275 = and i32 %274, 510 > %276 = add i32 %273, %275 > %277 = add i32 %276, 20 > %278 = sext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = bitcast i32 addrspace(3)* %279 to float addrspace(3)* > %281 = load float, float addrspace(3)* %280, align 4 > %282 = call float @llvm.fma.f32(float %30, float %281, float %257) > %283 = shl i32 %5, 2 > %284 = and i32 %283, 262140 > %285 = and i32 %6, 8191 > %286 = mul i32 %285, %9 > %287 = add i32 %284, %286 > %288 = lshr i32 %6, 12 > %289 = and i32 %288, 510 > %290 = add i32 %287, %289 > %291 = add i32 %290, 21 > %292 = sext i32 %291 to i64 > %293 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %292 > %294 = bitcast i32 addrspace(3)* %293 to float addrspace(3)* > %295 = load float, float addrspace(3)* %294, align 4 > %296 = call float @llvm.fma.f32(float %30, float %295, float %268) > %297 = bitcast i32 %10 to float > %298 = insertvalue <{ float, float, float }> undef, float %297, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %282, float %296, float %218, float %201) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %216, float %217, float %201, float 1.000000e+00) > ret <{ float, float, float }> %298 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 1 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 3 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], GENERIC[0] >DCL OUT[1], GENERIC[1] >DCL OUT[2], GENERIC[2] >DCL OUT[3], GENERIC[3] >DCL OUT[4], GENERIC[4] >DCL OUT[5], GENERIC[5] >DCL CONST[1][0..10] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 112} >IMM[3] UINT32 {128, 144, 160, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][7], TEMP[18] >356: DP4 TEMP[5].x, CONST[1][8], TEMP[18] >357: MOV TEMP[3].y, TEMP[5].xxxx >358: DP4 TEMP[5].x, CONST[1][9], TEMP[18] >359: MOV TEMP[3].z, TEMP[5].xxxx >360: MOV TEMP[5].xy, IN[2].xyxx >361: MUL TEMP[1].xyz, IN[5].wwww, IN[5].xyzz >362: MOV TEMP[1].w, IN[5].wwww >363: MUL TEMP[6], TEMP[1], CONST[1][10] >364: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >365: MOV TEMP[1].y, TEMP[8].xxxx >366: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >367: MOV TEMP[1].z, TEMP[8].xxxx >368: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >369: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >370: RSQ TEMP[8].x, TEMP[8].xxxx >371: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >372: DP3 TEMP[8].x, CONST[1][7].xyzz, TEMP[1].xyzz >373: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >374: MOV TEMP[2].y, TEMP[9].xxxx >375: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >376: MOV TEMP[4].y, TEMP[9].xxxx >377: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >378: MOV TEMP[2].z, TEMP[9].xxxx >379: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >380: MOV TEMP[4].z, TEMP[7].xxxx >381: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >382: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >383: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >384: RSQ TEMP[7].x, TEMP[0].xxxx >385: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >386: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >387: MOV TEMP[8].y, TEMP[7].xxxx >388: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >389: RSQ TEMP[7].x, TEMP[7].xxxx >390: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >391: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >392: MOV TEMP[8].z, TEMP[4].xxxx >393: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >394: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >395: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >396: MOV TEMP[4].y, TEMP[7].xxxx >397: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[0].xyzz >398: MOV TEMP[1].y, TEMP[0].xxxx >399: DP3 TEMP[0].x, CONST[1][8].xyzz, TEMP[2].xyzz >400: MOV TEMP[4].z, TEMP[0].xxxx >401: DP3 TEMP[0].x, CONST[1][9].xyzz, TEMP[2].xyzz >402: MOV TEMP[1].z, TEMP[0].xxxx >403: MOV OUT[5], TEMP[1] >404: MOV OUT[4], TEMP[4] >405: MOV OUT[3], TEMP[8] >406: MOV OUT[2], TEMP[6] >407: MOV OUT[1], TEMP[5] >408: MOV OUT[0], TEMP[3] >409: END >radeonsi: Compiling shader 332 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs void @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 124) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 156) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %15) > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %19) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %20) > %84 = extractelement <4 x float> %83, i32 0 > %85 = extractelement <4 x float> %83, i32 1 > %86 = extractelement <4 x float> %83, i32 2 > %87 = fmul float %86, 0x406FE01000000000 > %88 = fmul float %85, 0x406FE01000000000 > %89 = fmul float %84, 0x406FE01000000000 > %90 = fptosi float %87 to i32 > %91 = fptosi float %88 to i32 > %92 = fptosi float %89 to i32 > %93 = shl i32 %90, 1 > %94 = or i32 %93, 1 > %95 = shl i32 %91, 1 > %96 = or i32 %95, 1 > %97 = shl i32 %92, 1 > %98 = or i32 %97, 1 > %99 = shl i32 %90, 5 > %100 = or i32 %99, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %100) > %102 = fmul float %78, %101 > %103 = shl i32 %91, 5 > %104 = or i32 %103, 4 > %105 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %104) > %106 = fmul float %79, %105 > %107 = shl i32 %94, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %107) > %109 = shl i32 %94, 4 > %110 = or i32 %109, 12 > %111 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %110) > %112 = fmul float %108, %111 > %113 = shl i32 %94, 4 > %114 = or i32 %113, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %114) > %116 = shl i32 %94, 4 > %117 = or i32 %116, 8 > %118 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %117) > %119 = fsub float -0.000000e+00, %112 > %120 = call float @llvm.fma.f32(float %115, float %118, float %119) > %121 = shl i32 %94, 4 > %122 = or i32 %121, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %122) > %124 = shl i32 %94, 4 > %125 = or i32 %124, 8 > %126 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %125) > %127 = call float @llvm.fma.f32(float %123, float %126, float %112) > %128 = fmul float %127, %78 > %129 = fmul float %120, %78 > %130 = fmul float %129, 2.000000e+00 > %131 = shl i32 %96, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %131) > %133 = shl i32 %96, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %134) > %136 = fmul float %132, %135 > %137 = shl i32 %96, 4 > %138 = or i32 %137, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %138) > %140 = shl i32 %96, 4 > %141 = or i32 %140, 8 > %142 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %141) > %143 = fsub float -0.000000e+00, %136 > %144 = call float @llvm.fma.f32(float %139, float %142, float %143) > %145 = shl i32 %96, 4 > %146 = or i32 %145, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %146) > %148 = shl i32 %96, 4 > %149 = or i32 %148, 8 > %150 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %149) > %151 = call float @llvm.fma.f32(float %147, float %150, float %136) > %152 = fmul float %151, %79 > %153 = fmul float %152, 2.000000e+00 > %154 = fmul float %144, %79 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %94, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %157) > %159 = shl i32 %94, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %160) > %162 = shl i32 %94, 4 > %163 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %162) > %164 = shl i32 %94, 4 > %165 = or i32 %164, 12 > %166 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %165) > %167 = fmul float %161, %166 > %168 = fmul float %161, %163 > %169 = fmul float %158, %166 > %170 = shl i32 %94, 4 > %171 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %170) > %172 = shl i32 %94, 4 > %173 = or i32 %172, 4 > %174 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %173) > %175 = call float @llvm.fma.f32(float %171, float %174, float %167) > %176 = fmul float %175, %78 > %177 = fmul float %176, 2.000000e+00 > %178 = shl i32 %94, 4 > %179 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %178) > %180 = shl i32 %94, 4 > %181 = or i32 %180, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %181) > %183 = shl i32 %94, 4 > %184 = or i32 %183, 8 > %185 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %184) > %186 = shl i32 %94, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %186) > %188 = shl i32 %94, 4 > %189 = or i32 %188, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %189) > %191 = shl i32 %94, 4 > %192 = or i32 %191, 8 > %193 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %192) > %194 = fmul float %179, %187 > %195 = fmul float %182, %190 > %196 = fmul float %185, %193 > %197 = fadd float %196, %195 > %198 = fadd float %196, %194 > %199 = fadd float %195, %194 > %200 = fsub float -0.000000e+00, %197 > %201 = call float @llvm.fma.f32(float %200, float 2.000000e+00, float 1.000000e+00) > %202 = fsub float -0.000000e+00, %198 > %203 = call float @llvm.fma.f32(float %202, float 2.000000e+00, float 1.000000e+00) > %204 = fsub float -0.000000e+00, %199 > %205 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float 1.000000e+00) > %206 = fmul float %78, %203 > %207 = shl i32 %96, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %208) > %210 = shl i32 %96, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %211) > %213 = shl i32 %96, 4 > %214 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %213) > %215 = shl i32 %96, 4 > %216 = or i32 %215, 12 > %217 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %216) > %218 = fmul float %212, %217 > %219 = fmul float %212, %214 > %220 = fmul float %209, %217 > %221 = shl i32 %96, 4 > %222 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %221) > %223 = shl i32 %96, 4 > %224 = or i32 %223, 4 > %225 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %224) > %226 = call float @llvm.fma.f32(float %222, float %225, float %218) > %227 = fmul float %226, %79 > %228 = fmul float %227, 2.000000e+00 > %229 = shl i32 %96, 4 > %230 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %229) > %231 = shl i32 %96, 4 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %232) > %234 = shl i32 %96, 4 > %235 = or i32 %234, 8 > %236 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %235) > %237 = shl i32 %96, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %237) > %239 = shl i32 %96, 4 > %240 = or i32 %239, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %240) > %242 = shl i32 %96, 4 > %243 = or i32 %242, 8 > %244 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %243) > %245 = fmul float %230, %238 > %246 = fmul float %233, %241 > %247 = fmul float %236, %244 > %248 = fadd float %247, %246 > %249 = fadd float %247, %245 > %250 = fadd float %246, %245 > %251 = fsub float -0.000000e+00, %248 > %252 = call float @llvm.fma.f32(float %251, float 2.000000e+00, float 1.000000e+00) > %253 = fsub float -0.000000e+00, %249 > %254 = call float @llvm.fma.f32(float %253, float 2.000000e+00, float 1.000000e+00) > %255 = fsub float -0.000000e+00, %250 > %256 = call float @llvm.fma.f32(float %255, float 2.000000e+00, float 1.000000e+00) > %257 = fmul float %79, %254 > %258 = fadd float %177, %228 > %259 = fadd float %206, %257 > %260 = fadd float %130, %155 > %261 = fadd float %102, %106 > %262 = shl i32 %92, 5 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %263) > %265 = fmul float %80, %264 > %266 = shl i32 %98, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %266) > %268 = shl i32 %98, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %269) > %271 = fmul float %267, %270 > %272 = shl i32 %98, 4 > %273 = or i32 %272, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %273) > %275 = shl i32 %98, 4 > %276 = or i32 %275, 8 > %277 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %276) > %278 = fsub float -0.000000e+00, %271 > %279 = call float @llvm.fma.f32(float %274, float %277, float %278) > %280 = shl i32 %98, 4 > %281 = or i32 %280, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %281) > %283 = shl i32 %98, 4 > %284 = or i32 %283, 8 > %285 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %284) > %286 = call float @llvm.fma.f32(float %282, float %285, float %271) > %287 = fmul float %286, %80 > %288 = fmul float %287, 2.000000e+00 > %289 = fmul float %279, %80 > %290 = fmul float %289, 2.000000e+00 > %291 = shl i32 %98, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %292) > %294 = shl i32 %98, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %295) > %297 = shl i32 %98, 4 > %298 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %297) > %299 = shl i32 %98, 4 > %300 = or i32 %299, 12 > %301 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %300) > %302 = fmul float %296, %301 > %303 = fmul float %296, %298 > %304 = fmul float %293, %301 > %305 = shl i32 %98, 4 > %306 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %305) > %307 = shl i32 %98, 4 > %308 = or i32 %307, 4 > %309 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %308) > %310 = call float @llvm.fma.f32(float %306, float %309, float %302) > %311 = fmul float %310, %80 > %312 = fmul float %311, 2.000000e+00 > %313 = shl i32 %98, 4 > %314 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %313) > %315 = shl i32 %98, 4 > %316 = or i32 %315, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %316) > %318 = shl i32 %98, 4 > %319 = or i32 %318, 8 > %320 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %319) > %321 = shl i32 %98, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %321) > %323 = shl i32 %98, 4 > %324 = or i32 %323, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %324) > %326 = shl i32 %98, 4 > %327 = or i32 %326, 8 > %328 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %327) > %329 = fmul float %314, %322 > %330 = fmul float %317, %325 > %331 = fmul float %320, %328 > %332 = fadd float %331, %330 > %333 = fadd float %331, %329 > %334 = fadd float %330, %329 > %335 = fsub float -0.000000e+00, %332 > %336 = call float @llvm.fma.f32(float %335, float 2.000000e+00, float 1.000000e+00) > %337 = fsub float -0.000000e+00, %333 > %338 = call float @llvm.fma.f32(float %337, float 2.000000e+00, float 1.000000e+00) > %339 = fsub float -0.000000e+00, %334 > %340 = call float @llvm.fma.f32(float %339, float 2.000000e+00, float 1.000000e+00) > %341 = fmul float %80, %338 > %342 = fadd float %258, %312 > %343 = fadd float %259, %341 > %344 = fadd float %260, %290 > %345 = fadd float %261, %265 > %346 = fmul float %342, %44 > %347 = fmul float %343, %45 > %348 = fadd float %346, %347 > %349 = fmul float %344, %46 > %350 = fadd float %348, %349 > %351 = fadd float %350, %345 > %352 = shl i32 %94, 4 > %353 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %352) > %354 = shl i32 %94, 4 > %355 = or i32 %354, 8 > %356 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %355) > %357 = fsub float -0.000000e+00, %169 > %358 = call float @llvm.fma.f32(float %353, float %356, float %357) > %359 = fmul float %358, %78 > %360 = fmul float %359, 2.000000e+00 > %361 = fmul float %128, 2.000000e+00 > %362 = shl i32 %96, 4 > %363 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %362) > %364 = shl i32 %96, 4 > %365 = or i32 %364, 8 > %366 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %365) > %367 = fsub float -0.000000e+00, %220 > %368 = call float @llvm.fma.f32(float %363, float %366, float %367) > %369 = fmul float %368, %79 > %370 = fmul float %369, 2.000000e+00 > %371 = fmul float %78, %205 > %372 = fmul float %78, %201 > %373 = fmul float %79, %256 > %374 = fmul float %79, %252 > %375 = shl i32 %90, 5 > %376 = or i32 %375, 8 > %377 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %376) > %378 = fmul float %78, %377 > %379 = shl i32 %91, 5 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %380) > %382 = fmul float %79, %381 > %383 = fadd float %370, %360 > %384 = fadd float %153, %361 > %385 = fadd float %373, %371 > %386 = fadd float %382, %378 > %387 = shl i32 %98, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %387) > %389 = shl i32 %98, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %390) > %392 = fsub float -0.000000e+00, %304 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %80 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %80, %340 > %397 = fmul float %80, %336 > %398 = shl i32 %92, 5 > %399 = or i32 %398, 8 > %400 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %399) > %401 = fmul float %80, %400 > %402 = fadd float %383, %395 > %403 = fadd float %384, %288 > %404 = fadd float %385, %396 > %405 = fadd float %386, %401 > %406 = fmul float %402, %44 > %407 = fmul float %403, %45 > %408 = fadd float %406, %407 > %409 = fmul float %404, %46 > %410 = fadd float %408, %409 > %411 = fadd float %410, %405 > %412 = shl i32 %90, 5 > %413 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %412) > %414 = fmul float %78, %413 > %415 = shl i32 %91, 5 > %416 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %415) > %417 = fmul float %79, %416 > %418 = shl i32 %92, 5 > %419 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %418) > %420 = fmul float %80, %419 > %421 = shl i32 %94, 4 > %422 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %421) > %423 = shl i32 %94, 4 > %424 = or i32 %423, 4 > %425 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %424) > %426 = fsub float -0.000000e+00, %167 > %427 = call float @llvm.fma.f32(float %422, float %425, float %426) > %428 = fadd float %169, %168 > %429 = fmul float %427, %78 > %430 = fmul float %428, %78 > %431 = fmul float %429, 2.000000e+00 > %432 = fmul float %430, 2.000000e+00 > %433 = shl i32 %96, 4 > %434 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %433) > %435 = shl i32 %96, 4 > %436 = or i32 %435, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %436) > %438 = fsub float -0.000000e+00, %218 > %439 = call float @llvm.fma.f32(float %434, float %437, float %438) > %440 = shl i32 %98, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %440) > %442 = shl i32 %98, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %40, i32 %443) > %445 = fsub float -0.000000e+00, %302 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = fadd float %304, %303 > %448 = fmul float %439, %79 > %449 = fmul float %446, %80 > %450 = fmul float %447, %80 > %451 = fmul float %449, 2.000000e+00 > %452 = fmul float %450, 2.000000e+00 > %453 = fadd float %220, %219 > %454 = fmul float %453, %79 > %455 = fmul float %448, 2.000000e+00 > %456 = fmul float %454, 2.000000e+00 > %457 = fadd float %372, %374 > %458 = fadd float %431, %455 > %459 = fadd float %432, %456 > %460 = fadd float %414, %417 > %461 = fadd float %397, %457 > %462 = fadd float %451, %458 > %463 = fadd float %452, %459 > %464 = fadd float %420, %460 > %465 = fmul float %461, %44 > %466 = fmul float %462, %45 > %467 = fadd float %465, %466 > %468 = fmul float %463, %46 > %469 = fadd float %467, %468 > %470 = fadd float %469, %464 > %471 = fmul float %23, %470 > %472 = fmul float %24, %351 > %473 = fadd float %471, %472 > %474 = fmul float %25, %411 > %475 = fadd float %473, %474 > %476 = fadd float %475, %26 > %477 = fmul float %27, %470 > %478 = fmul float %28, %351 > %479 = fadd float %477, %478 > %480 = fmul float %29, %411 > %481 = fadd float %479, %480 > %482 = fadd float %481, %30 > %483 = fmul float %31, %470 > %484 = fmul float %32, %351 > %485 = fadd float %483, %484 > %486 = fmul float %33, %411 > %487 = fadd float %485, %486 > %488 = fadd float %487, %34 > %489 = fmul float %74, %71 > %490 = fmul float %74, %72 > %491 = fmul float %74, %73 > %492 = fmul float %489, %35 > %493 = fmul float %490, %36 > %494 = fmul float %491, %37 > %495 = fmul float %74, %38 > %496 = fmul float %342, %59 > %497 = fmul float %343, %60 > %498 = fadd float %497, %496 > %499 = fmul float %344, %61 > %500 = fadd float %498, %499 > %501 = fmul float %402, %59 > %502 = fmul float %403, %60 > %503 = fadd float %502, %501 > %504 = fmul float %404, %61 > %505 = fadd float %503, %504 > %506 = fmul float %461, %59 > %507 = fmul float %462, %60 > %508 = fadd float %507, %506 > %509 = fmul float %463, %61 > %510 = fadd float %508, %509 > %511 = fmul float %510, %510 > %512 = fmul float %500, %500 > %513 = fadd float %512, %511 > %514 = fmul float %505, %505 > %515 = fadd float %513, %514 > %516 = call float @llvm.AMDGPU.rsq.clamped.f32(float %515) > %517 = fmul float %516, %510 > %518 = fmul float %516, %500 > %519 = fmul float %516, %505 > %520 = fmul float %23, %517 > %521 = fmul float %24, %518 > %522 = fadd float %521, %520 > %523 = fmul float %25, %519 > %524 = fadd float %522, %523 > %525 = fmul float %342, %65 > %526 = fmul float %343, %66 > %527 = fadd float %526, %525 > %528 = fmul float %344, %67 > %529 = fadd float %527, %528 > %530 = fmul float %342, %50 > %531 = fmul float %343, %51 > %532 = fadd float %531, %530 > %533 = fmul float %344, %52 > %534 = fadd float %532, %533 > %535 = fmul float %402, %65 > %536 = fmul float %403, %66 > %537 = fadd float %536, %535 > %538 = fmul float %404, %67 > %539 = fadd float %537, %538 > %540 = fmul float %402, %50 > %541 = fmul float %403, %51 > %542 = fadd float %541, %540 > %543 = fmul float %404, %52 > %544 = fadd float %542, %543 > %545 = fmul float %461, %65 > %546 = fmul float %462, %66 > %547 = fadd float %546, %545 > %548 = fmul float %463, %67 > %549 = fadd float %547, %548 > %550 = fmul float %461, %50 > %551 = fmul float %462, %51 > %552 = fadd float %551, %550 > %553 = fmul float %463, %52 > %554 = fadd float %552, %553 > %555 = fmul float %549, %549 > %556 = fmul float %529, %529 > %557 = fadd float %556, %555 > %558 = fmul float %539, %539 > %559 = fadd float %557, %558 > %560 = call float @llvm.AMDGPU.rsq.clamped.f32(float %559) > %561 = fmul float %560, %549 > %562 = fmul float %560, %529 > %563 = fmul float %560, %539 > %564 = fmul float %23, %561 > %565 = fmul float %24, %562 > %566 = fadd float %565, %564 > %567 = fmul float %25, %563 > %568 = fadd float %566, %567 > %569 = fmul float %554, %554 > %570 = fmul float %534, %534 > %571 = fadd float %570, %569 > %572 = fmul float %544, %544 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %574, %554 > %576 = fmul float %574, %534 > %577 = fmul float %574, %544 > %578 = fmul float %23, %575 > %579 = fmul float %24, %576 > %580 = fadd float %579, %578 > %581 = fmul float %25, %577 > %582 = fadd float %580, %581 > %583 = fmul float %27, %517 > %584 = fmul float %28, %518 > %585 = fadd float %584, %583 > %586 = fmul float %29, %519 > %587 = fadd float %585, %586 > %588 = fmul float %31, %517 > %589 = fmul float %32, %518 > %590 = fadd float %589, %588 > %591 = fmul float %33, %519 > %592 = fadd float %590, %591 > %593 = fmul float %27, %561 > %594 = fmul float %28, %562 > %595 = fadd float %594, %593 > %596 = fmul float %29, %563 > %597 = fadd float %595, %596 > %598 = fmul float %31, %561 > %599 = fmul float %32, %562 > %600 = fadd float %599, %598 > %601 = fmul float %33, %563 > %602 = fadd float %600, %601 > %603 = fmul float %27, %575 > %604 = fmul float %28, %576 > %605 = fadd float %604, %603 > %606 = fmul float %29, %577 > %607 = fadd float %605, %606 > %608 = fmul float %31, %575 > %609 = fmul float %32, %576 > %610 = fadd float %609, %608 > %611 = fmul float %33, %577 > %612 = fadd float %610, %611 > %613 = lshr i32 %8, 13 > %614 = and i32 %613, 255 > %615 = mul i32 %614, %10 > %616 = add i32 %615, 16 > %617 = sext i32 %616 to i64 > %618 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %617 > %619 = bitcast i32 addrspace(3)* %618 to float addrspace(3)* > store float %476, float addrspace(3)* %619, align 4 > %620 = add i32 %615, 17 > %621 = sext i32 %620 to i64 > %622 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %621 > %623 = bitcast i32 addrspace(3)* %622 to float addrspace(3)* > store float %482, float addrspace(3)* %623, align 4 > %624 = add i32 %615, 18 > %625 = sext i32 %624 to i64 > %626 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %625 > %627 = bitcast i32 addrspace(3)* %626 to float addrspace(3)* > store float %488, float addrspace(3)* %627, align 4 > %628 = add i32 %615, 20 > %bc = bitcast <4 x float> %55 to <4 x i32> > %629 = extractelement <4 x i32> %bc, i32 0 > %630 = sext i32 %628 to i64 > %631 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %630 > store i32 %629, i32 addrspace(3)* %631, align 4 > %632 = add i32 %615, 21 > %bc162 = bitcast <4 x float> %55 to <4 x i32> > %633 = extractelement <4 x i32> %bc162, i32 1 > %634 = sext i32 %632 to i64 > %635 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %634 > store i32 %633, i32 addrspace(3)* %635, align 4 > %636 = add i32 %615, 22 > %637 = sext i32 %636 to i64 > %638 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %637 > %639 = bitcast i32 addrspace(3)* %638 to float addrspace(3)* > store float %46, float addrspace(3)* %639, align 4 > %640 = add i32 %615, 23 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > store i32 1065353216, i32 addrspace(3)* %642, align 4 > %643 = add i32 %615, 24 > %644 = sext i32 %643 to i64 > %645 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %644 > %646 = bitcast i32 addrspace(3)* %645 to float addrspace(3)* > store float %492, float addrspace(3)* %646, align 4 > %647 = add i32 %615, 25 > %648 = sext i32 %647 to i64 > %649 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %648 > %650 = bitcast i32 addrspace(3)* %649 to float addrspace(3)* > store float %493, float addrspace(3)* %650, align 4 > %651 = add i32 %615, 26 > %652 = sext i32 %651 to i64 > %653 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %652 > %654 = bitcast i32 addrspace(3)* %653 to float addrspace(3)* > store float %494, float addrspace(3)* %654, align 4 > %655 = add i32 %615, 27 > %656 = sext i32 %655 to i64 > %657 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %656 > %658 = bitcast i32 addrspace(3)* %657 to float addrspace(3)* > store float %495, float addrspace(3)* %658, align 4 > %659 = add i32 %615, 28 > %660 = sext i32 %659 to i64 > %661 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %660 > %662 = bitcast i32 addrspace(3)* %661 to float addrspace(3)* > store float %524, float addrspace(3)* %662, align 4 > %663 = add i32 %615, 29 > %664 = sext i32 %663 to i64 > %665 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %664 > %666 = bitcast i32 addrspace(3)* %665 to float addrspace(3)* > store float %568, float addrspace(3)* %666, align 4 > %667 = add i32 %615, 30 > %668 = sext i32 %667 to i64 > %669 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %668 > %670 = bitcast i32 addrspace(3)* %669 to float addrspace(3)* > store float %582, float addrspace(3)* %670, align 4 > %671 = add i32 %615, 31 > %672 = sext i32 %671 to i64 > %673 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %672 > %674 = bitcast i32 addrspace(3)* %673 to float addrspace(3)* > store float %135, float addrspace(3)* %674, align 4 > %675 = add i32 %615, 32 > %676 = sext i32 %675 to i64 > %677 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %676 > %678 = bitcast i32 addrspace(3)* %677 to float addrspace(3)* > store float %587, float addrspace(3)* %678, align 4 > %679 = add i32 %615, 33 > %680 = sext i32 %679 to i64 > %681 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %680 > %682 = bitcast i32 addrspace(3)* %681 to float addrspace(3)* > store float %597, float addrspace(3)* %682, align 4 > %683 = add i32 %615, 34 > %684 = sext i32 %683 to i64 > %685 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %684 > %686 = bitcast i32 addrspace(3)* %685 to float addrspace(3)* > store float %607, float addrspace(3)* %686, align 4 > %687 = add i32 %615, 35 > %688 = sext i32 %687 to i64 > %689 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %688 > %690 = bitcast i32 addrspace(3)* %689 to float addrspace(3)* > store float %345, float addrspace(3)* %690, align 4 > %691 = add i32 %615, 36 > %692 = sext i32 %691 to i64 > %693 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %692 > %694 = bitcast i32 addrspace(3)* %693 to float addrspace(3)* > store float %592, float addrspace(3)* %694, align 4 > %695 = add i32 %615, 37 > %696 = sext i32 %695 to i64 > %697 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %696 > %698 = bitcast i32 addrspace(3)* %697 to float addrspace(3)* > store float %602, float addrspace(3)* %698, align 4 > %699 = add i32 %615, 38 > %700 = sext i32 %699 to i64 > %701 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %700 > %702 = bitcast i32 addrspace(3)* %701 to float addrspace(3)* > store float %612, float addrspace(3)* %702, align 4 > %703 = add i32 %615, 39 > %704 = sext i32 %703 to i64 > %705 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %704 > %706 = bitcast i32 addrspace(3)* %705 to float addrspace(3)* > store float %74, float addrspace(3)* %706, align 4 > ret void undef >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prim_mode = 0 >TESS_CTRL >PROPERTY TCS_VERTICES_OUT 3 >DCL IN[][0], GENERIC[0] >DCL IN[][1], GENERIC[1] >DCL IN[][2], GENERIC[2] >DCL IN[][3], GENERIC[3] >DCL IN[][4], GENERIC[4] >DCL IN[][5], GENERIC[5] >DCL SV[0], INVOCATIONID >DCL OUT[0], TESSOUTER >DCL OUT[1], TESSINNER >DCL OUT[][2..7], ARRAY(1), GENERIC[0] >DCL OUT[8], PATCH >DCL OUT[9], PATCH[1] >DCL OUT[10], PATCH[2] >DCL OUT[11], PATCH[3] >DCL CONST[1][0..22] >DCL CONST[2][0..4] >DCL CONST[3][0..23] >DCL TEMP[0..16], LOCAL >DCL ADDR[0..1] >IMM[0] FLT32 { 1.0000, 100.0000, 0.0000, -0.5000} >IMM[1] UINT32 {2, 0, 16, 32} >IMM[2] UINT32 {48, 1, 176, 112} >IMM[3] INT32 {1, 3, 0, 0} >IMM[4] UINT32 {128, 144, 64, 80} >IMM[5] FLT32 { 0.5000, 158456325028528675187087900672.0000, 63.0000, 0.0000} >IMM[6] UINT32 {96, 368, 352, 0} > 0: UARL ADDR[1].x, SV[0].xxxx > 1: UARL ADDR[1].x, SV[0].xxxx > 2: MOV TEMP[0], IN[ADDR[1].x][0] > 3: UARL ADDR[1].x, SV[0].xxxx > 4: MOV OUT[ADDR[1].x](1)[2], TEMP[0] > 5: UARL ADDR[1].x, SV[0].xxxx > 6: UARL ADDR[1].x, SV[0].xxxx > 7: MOV TEMP[0], IN[ADDR[1].x][1] > 8: UARL ADDR[1].x, SV[0].xxxx > 9: MOV OUT[ADDR[1].x](1)[3], TEMP[0] > 10: UARL ADDR[1].x, SV[0].xxxx > 11: UARL ADDR[1].x, SV[0].xxxx > 12: MOV TEMP[0], IN[ADDR[1].x][2] > 13: UARL ADDR[1].x, SV[0].xxxx > 14: MOV OUT[ADDR[1].x](1)[4], TEMP[0] > 15: UARL ADDR[1].x, SV[0].xxxx > 16: UARL ADDR[1].x, SV[0].xxxx > 17: MOV TEMP[0], IN[ADDR[1].x][3] > 18: UARL ADDR[1].x, SV[0].xxxx > 19: MOV OUT[ADDR[1].x](1)[5], TEMP[0] > 20: UARL ADDR[1].x, SV[0].xxxx > 21: UARL ADDR[1].x, SV[0].xxxx > 22: MOV TEMP[0], IN[ADDR[1].x][4] > 23: UARL ADDR[1].x, SV[0].xxxx > 24: MOV OUT[ADDR[1].x](1)[6], TEMP[0] > 25: UARL ADDR[1].x, SV[0].xxxx > 26: UARL ADDR[1].x, SV[0].xxxx > 27: MOV TEMP[0], IN[ADDR[1].x][5] > 28: UARL ADDR[1].x, SV[0].xxxx > 29: MOV OUT[ADDR[1].x](1)[7], TEMP[0] > 30: MOV TEMP[0].xyz, IN[0][0].xyzx > 31: MOV TEMP[0].w, IMM[0].xxxx > 32: MOV TEMP[1], CONST[3][0] > 33: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 34: MOV TEMP[2], CONST[3][1] > 35: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 36: MOV TEMP[1].y, TEMP[2].xxxx > 37: MOV TEMP[2], CONST[3][2] > 38: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 39: MOV TEMP[3], CONST[3][3] > 40: DP4 TEMP[3].x, TEMP[3], TEMP[0] > 41: MOV TEMP[4].xyz, IN[1][0].xyzx > 42: MOV TEMP[4].w, IMM[0].xxxx > 43: MOV TEMP[5], CONST[3][0] > 44: DP4 TEMP[5].x, TEMP[5], TEMP[4] > 45: MOV TEMP[6], CONST[3][1] > 46: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 47: MOV TEMP[5].y, TEMP[6].xxxx > 48: MOV TEMP[6], CONST[3][2] > 49: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 50: MOV TEMP[0].y, TEMP[6].xxxx > 51: MOV TEMP[6], CONST[3][3] > 52: DP4 TEMP[6].x, TEMP[6], TEMP[4] > 53: MOV TEMP[4].xyz, IN[2][0].xyzx > 54: MOV TEMP[4].w, IMM[0].xxxx > 55: MOV TEMP[7], CONST[3][0] > 56: DP4 TEMP[7].x, TEMP[7], TEMP[4] > 57: MOV TEMP[8], CONST[3][1] > 58: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 59: MOV TEMP[7].y, TEMP[8].xxxx > 60: MOV TEMP[8], CONST[3][2] > 61: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 62: MOV TEMP[0].w, TEMP[8].xxxx > 63: MOV TEMP[8], CONST[3][3] > 64: DP4 TEMP[8].x, TEMP[8], TEMP[4] > 65: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 66: MOV TEMP[1].z, TEMP[2].xxxx > 67: ADD TEMP[2].xy, TEMP[0].ywww, IMM[0].yyyy > 68: MOV TEMP[0].yw, TEMP[2].yxyy > 69: ABS TEMP[2].x, TEMP[3].xxxx > 70: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 71: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[1].xyyy > 72: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 73: INEG TEMP[9].xy, TEMP[9].xyyy > 74: MOV TEMP[4].yz, TEMP[9].yxyy > 75: FSLT TEMP[9].xy, TEMP[1].xyyy, IMM[0].zzzz > 76: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 77: INEG TEMP[9].xy, TEMP[9].xyyy > 78: MOV TEMP[5].zw, TEMP[9].yyxy > 79: INEG TEMP[9].xy, TEMP[4].yzzz > 80: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 81: MOV TEMP[4].yz, TEMP[9].yxyy > 82: I2F TEMP[9].xy, TEMP[4].yzzz > 83: MOV TEMP[4].yz, TEMP[9].yxyy > 84: FMA TEMP[1].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[1].xyyy > 85: ABS TEMP[2].x, TEMP[6].xxxx > 86: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy > 87: FSLT TEMP[9].xy, IMM[0].zzzz, TEMP[5].xyyy > 88: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 89: INEG TEMP[9].xy, TEMP[9].xyyy > 90: MOV TEMP[4].yz, TEMP[9].yxyy > 91: FSLT TEMP[9].xy, TEMP[5].xyyy, IMM[0].zzzz > 92: AND TEMP[9].xy, TEMP[9].xyyy, IMM[3].xxxx > 93: INEG TEMP[9].xy, TEMP[9].xyyy > 94: MOV TEMP[5].zw, TEMP[9].yyxy > 95: INEG TEMP[9].xy, TEMP[4].yzzz > 96: UADD TEMP[9].xy, TEMP[9].xyyy, TEMP[5].zwww > 97: MOV TEMP[4].yz, TEMP[9].yxyy > 98: I2F TEMP[9].xy, TEMP[4].yzzz > 99: MOV TEMP[4].yz, TEMP[9].yxyy >100: FMA TEMP[4].xy, -TEMP[2].xxxx, TEMP[4].yzzz, TEMP[5].xyyy >101: FSLT TEMP[2].xy, IMM[0].zzzz, TEMP[7].xyyy >102: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >103: INEG TEMP[2].xy, TEMP[2].xyyy >104: MOV TEMP[5].xy, TEMP[2].xyxx >105: FSLT TEMP[2].xy, TEMP[7].xyyy, IMM[0].zzzz >106: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >107: INEG TEMP[2].xy, TEMP[2].xyyy >108: MOV TEMP[5].zw, TEMP[2].yyxy >109: INEG TEMP[2].xy, TEMP[5].xyyy >110: UADD TEMP[2].xy, TEMP[2].xyyy, TEMP[5].zwww >111: MOV TEMP[5].xy, TEMP[2].xyxx >112: I2F TEMP[5].xy, TEMP[5].xyyy >113: ABS TEMP[2].x, TEMP[8].xxxx >114: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy >115: FMA TEMP[2].xy, -TEMP[2].xxxx, TEMP[5].xyyy, TEMP[7].xyyy >116: MOV TEMP[4].zw, TEMP[2].yyxy >117: FSLT TEMP[2].xy, TEMP[1].xyyy, -TEMP[3].xxxx >118: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >119: INEG TEMP[2].xy, TEMP[2].xyyy >120: MOV TEMP[5].xy, TEMP[2].xyxx >121: FSLT TEMP[2].xy, TEMP[4].xyyy, -TEMP[6].xxxx >122: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >123: INEG TEMP[2].xy, TEMP[2].xyyy >124: MOV TEMP[5].zw, TEMP[2].yyxy >125: AND TEMP[2], TEMP[5], IMM[2].yyyy >126: MOV TEMP[2], TEMP[2] >127: UADD TEMP[2].xy, TEMP[2].zwww, TEMP[2].xyyy >128: MOV TEMP[5].xy, TEMP[2].xyxx >129: FSLT TEMP[2].xy, TEMP[4].zwww, -TEMP[8].xxxx >130: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >131: INEG TEMP[2].xy, TEMP[2].xyyy >132: MOV TEMP[5].zw, TEMP[2].yyxy >133: AND TEMP[2].xy, TEMP[5].zwww, IMM[2].yyyy >134: MOV TEMP[5].zw, TEMP[2].yyxy >135: UADD TEMP[2].xy, TEMP[5].zwww, TEMP[5].xyyy >136: MOV TEMP[5].xy, TEMP[2].xyxx >137: FSLT TEMP[2].x, TEMP[1].zzzz, IMM[0].zzzz >138: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >139: INEG TEMP[2].x, TEMP[2].xxxx >140: MOV TEMP[1].z, TEMP[2].xxxx >141: AND TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >142: MOV TEMP[1].z, TEMP[2].xxxx >143: FSLT TEMP[2].xy, TEMP[0].ywww, IMM[0].zzzz >144: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >145: INEG TEMP[2].xy, TEMP[2].xyyy >146: MOV TEMP[0].yw, TEMP[2].yxyy >147: AND TEMP[2].xy, TEMP[0].ywww, IMM[2].yyyy >148: MOV TEMP[0].yw, TEMP[2].yxyy >149: UADD TEMP[2].x, TEMP[0].yyyy, TEMP[1].zzzz >150: MOV TEMP[0].y, TEMP[2].xxxx >151: UADD TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >152: MOV TEMP[0].y, TEMP[2].xxxx >153: FSLT TEMP[2].xy, TEMP[3].xxxx, TEMP[1].xyyy >154: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >155: INEG TEMP[2].xy, TEMP[2].xyyy >156: MOV TEMP[0].xw, TEMP[2].xxxy >157: AND TEMP[2].xy, TEMP[0].xwww, IMM[2].yyyy >158: MOV TEMP[0].xw, TEMP[2].xxxy >159: FSLT TEMP[2].xy, TEMP[6].xxxx, TEMP[4].xyyy >160: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >161: INEG TEMP[2].xy, TEMP[2].xyyy >162: MOV TEMP[1].xy, TEMP[2].xyxx >163: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >164: MOV TEMP[1].xy, TEMP[2].xyxx >165: UADD TEMP[2].xy, TEMP[0].xwww, TEMP[1].xyyy >166: MOV TEMP[0].xz, TEMP[2].xxyx >167: FSLT TEMP[2].xy, TEMP[8].xxxx, TEMP[4].zwww >168: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >169: INEG TEMP[2].xy, TEMP[2].xyyy >170: MOV TEMP[1].xy, TEMP[2].xyxx >171: AND TEMP[2].xy, TEMP[1].xyyy, IMM[2].yyyy >172: MOV TEMP[1].xy, TEMP[2].xyxx >173: UADD TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >174: MOV TEMP[0].xz, TEMP[2].xxyx >175: USEQ TEMP[2].xy, TEMP[5].xyyy, IMM[3].yyyy >176: AND TEMP[2].xy, TEMP[2].xyyy, IMM[3].xxxx >177: INEG TEMP[2].xy, TEMP[2].xyyy >178: MOV TEMP[1].xy, TEMP[2].xyxx >179: USEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[3].yyyy >180: AND TEMP[2].xyz, TEMP[2].xyzz, IMM[3].xxxx >181: INEG TEMP[2].xyz, TEMP[2].xyzz >182: MOV TEMP[0].xyz, TEMP[2].xyzx >183: OR TEMP[2].xy, TEMP[0].xzzz, TEMP[1].xyyy >184: MOV TEMP[0].xz, TEMP[2].xxyx >185: OR TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx >186: MOV TEMP[0].x, TEMP[2].xxxx >187: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >188: MOV TEMP[0].x, TEMP[2].xxxx >189: ADD TEMP[2].xyz, -IN[0][0].zxyy, IN[1][0].zxyy >190: MOV TEMP[0].yzw, TEMP[2].yxyz >191: ADD TEMP[1].xyz, -IN[0][0].xyzz, IN[2][0].xyzz >192: MUL TEMP[4].xyz, TEMP[0].yzww, TEMP[1].yzxx >193: FMA TEMP[2].xyz, TEMP[0].wyzz, TEMP[1].zxyy, -TEMP[4].xyzz >194: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz >195: MOV TEMP[1].w, TEMP[3].xxxx >196: RSQ TEMP[3].x, TEMP[1].wwww >197: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx >198: MOV TEMP[0].yzw, TEMP[2].yxyz >199: MOV TEMP[2].xyz, CONST[1][11] >200: MOV TEMP[4].xyz, TEMP[2].xyzx >201: MOV TEMP[4].w, IMM[0].xxxx >202: MOV TEMP[2], CONST[1][7] >203: DP4 TEMP[5].x, TEMP[2], TEMP[4] >204: MOV TEMP[2], CONST[1][8] >205: DP4 TEMP[2].x, TEMP[2], TEMP[4] >206: MOV TEMP[5].y, TEMP[2].xxxx >207: MOV TEMP[2], CONST[1][9] >208: DP4 TEMP[2].x, TEMP[2], TEMP[4] >209: MOV TEMP[5].z, TEMP[2].xxxx >210: ADD TEMP[4].xyz, TEMP[5].xyzz, -IN[0][0].xyzz >211: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[4].xyzz >212: MOV TEMP[1].w, TEMP[2].xxxx >213: RSQ TEMP[2].x, TEMP[1].wwww >214: MOV TEMP[1].w, TEMP[2].xxxx >215: MUL TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz >216: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[4].xyzz >217: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].wwww >218: AND TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx >219: INEG TEMP[2].x, TEMP[2].xxxx >220: MOV TEMP[0].y, TEMP[2].xxxx >221: OR TEMP[2].x, TEMP[0].yyyy, TEMP[0].xxxx >222: MOV TEMP[0].x, TEMP[2].xxxx >223: MOV TEMP[2].x, TEMP[0].xxxx >224: USEQ TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy >225: UIF TEMP[2].xxxx :0 >226: ADD TEMP[0].xyz, -IN[1][0].xyzz, IN[0][0].xyzz >227: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz >228: ADD TEMP[2].xyz, -IN[2][0].xyzz, IN[1][0].xyzz >229: MOV TEMP[0].yzw, TEMP[2].yxyz >230: DP3 TEMP[2].x, TEMP[0].yzww, TEMP[0].yzww >231: MOV TEMP[0].y, TEMP[2].xxxx >232: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >233: MOV TEMP[0].z, TEMP[2].xxxx >234: SQRT TEMP[2].x, TEMP[0].xxxx >235: SQRT TEMP[2].y, TEMP[0].yyyy >236: SQRT TEMP[2].z, TEMP[0].zzzz >237: MOV TEMP[0].xyz, TEMP[2].xyzx >238: ADD TEMP[1].xyz, IN[1][0].xyzz, IN[0][0].xyzz >239: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].xxxx >240: ADD TEMP[4].xyz, IN[2][0].xyzz, IN[1][0].xyzz >241: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[5].xxxx >242: ADD TEMP[5].xyz, IN[0][0].xyzz, IN[2][0].xyzz >243: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[5].xxxx >244: MOV TEMP[2].y, CONST[3][4] >245: MOV TEMP[7].x, TEMP[2].yyyy >246: MOV TEMP[2].y, CONST[3][5] >247: MOV TEMP[7].y, TEMP[2].yyyy >248: MOV TEMP[2].y, CONST[3][6] >249: MOV TEMP[7].z, TEMP[2].yyyy >250: FMA TEMP[2].xyz, TEMP[7].xyzz, TEMP[0].xxxx, TEMP[1].xyzz >251: FMA TEMP[3].xyz, TEMP[7].xyzz, TEMP[0].yyyy, TEMP[4].xyzz >252: FMA TEMP[0].xyz, TEMP[7].xyzz, TEMP[0].zzzz, TEMP[5].xyzz >253: MOV TEMP[1].w, IMM[0].xxxx >254: MOV TEMP[6], CONST[3][0] >255: DP4 TEMP[7].x, TEMP[6], TEMP[1] >256: MOV TEMP[6], CONST[3][1] >257: DP4 TEMP[6].x, TEMP[6], TEMP[1] >258: MOV TEMP[7].y, TEMP[6].xxxx >259: MOV TEMP[6], CONST[3][3] >260: DP4 TEMP[6].x, TEMP[6], TEMP[1] >261: MOV TEMP[4].w, IMM[0].xxxx >262: MOV TEMP[8], CONST[3][0] >263: DP4 TEMP[8].x, TEMP[8], TEMP[4] >264: MOV TEMP[9], CONST[3][1] >265: DP4 TEMP[9].x, TEMP[9], TEMP[4] >266: MOV TEMP[8].y, TEMP[9].xxxx >267: MOV TEMP[9], CONST[3][3] >268: DP4 TEMP[9].x, TEMP[9], TEMP[4] >269: MOV TEMP[5].w, IMM[0].xxxx >270: MOV TEMP[10], CONST[3][0] >271: DP4 TEMP[4].x, TEMP[10], TEMP[5] >272: MOV TEMP[10], CONST[3][1] >273: DP4 TEMP[10].x, TEMP[10], TEMP[5] >274: MOV TEMP[4].y, TEMP[10].xxxx >275: MOV TEMP[10], CONST[3][3] >276: DP4 TEMP[10].x, TEMP[10], TEMP[5] >277: MOV TEMP[2].w, IMM[0].xxxx >278: MOV TEMP[11], CONST[3][0] >279: DP4 TEMP[5].x, TEMP[11], TEMP[2] >280: MOV TEMP[11], CONST[3][1] >281: DP4 TEMP[11].x, TEMP[11], TEMP[2] >282: MOV TEMP[5].y, TEMP[11].xxxx >283: MOV TEMP[11], CONST[3][3] >284: DP4 TEMP[11].x, TEMP[11], TEMP[2] >285: MOV TEMP[3].w, IMM[0].xxxx >286: MOV TEMP[12], CONST[3][0] >287: DP4 TEMP[2].x, TEMP[12], TEMP[3] >288: MOV TEMP[12], CONST[3][1] >289: DP4 TEMP[12].x, TEMP[12], TEMP[3] >290: MOV TEMP[2].y, TEMP[12].xxxx >291: MOV TEMP[12], CONST[3][3] >292: DP4 TEMP[12].x, TEMP[12], TEMP[3] >293: MOV TEMP[0].w, IMM[0].xxxx >294: MOV TEMP[13], CONST[3][0] >295: DP4 TEMP[3].x, TEMP[13], TEMP[0] >296: MOV TEMP[13], CONST[3][1] >297: DP4 TEMP[13].x, TEMP[13], TEMP[0] >298: MOV TEMP[3].y, TEMP[13].xxxx >299: MOV TEMP[13], CONST[3][3] >300: DP4 TEMP[13].x, TEMP[13], TEMP[0] >301: FSEQ TEMP[14].xy, TEMP[9].xxxx, IMM[0].zzzz >302: SSG TEMP[15].xy, TEMP[8].xyyy >303: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >304: RCP TEMP[16].xy, TEMP[9].xxxx >305: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[16].xyyy >306: UCMP TEMP[8].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[8].xyyy >307: FSEQ TEMP[14].xy, TEMP[10].xxxx, IMM[0].zzzz >308: SSG TEMP[15].xy, TEMP[4].xyyy >309: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >310: RCP TEMP[16].xy, TEMP[10].xxxx >311: MUL TEMP[16].xy, TEMP[4].xyyy, TEMP[16].xyyy >312: UCMP TEMP[14].xy, TEMP[14].xyyy, TEMP[15].xyyy, TEMP[16].xyyy >313: FSEQ TEMP[15].xy, TEMP[11].xxxx, IMM[0].zzzz >314: SSG TEMP[16].xy, TEMP[5].xyyy >315: MUL TEMP[16].xy, IMM[5].yyyy, TEMP[16].xyyy >316: RCP TEMP[11].xy, TEMP[11].xxxx >317: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[11].xyyy >318: UCMP TEMP[5].xy, TEMP[15].xyyy, TEMP[16].xyyy, TEMP[5].xyyy >319: FSEQ TEMP[11].xy, TEMP[6].xxxx, IMM[0].zzzz >320: SSG TEMP[15].xy, TEMP[7].xyyy >321: MUL TEMP[15].xy, IMM[5].yyyy, TEMP[15].xyyy >322: RCP TEMP[16].xy, TEMP[6].xxxx >323: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[16].xyyy >324: UCMP TEMP[7].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[7].xyyy >325: ADD TEMP[5].xy, TEMP[7].xyyy, -TEMP[5].xyyy >326: MOV TEMP[0].yz, TEMP[5].yxyy >327: FSEQ TEMP[5].xy, TEMP[12].xxxx, IMM[0].zzzz >328: SSG TEMP[7].xy, TEMP[2].xyyy >329: MUL TEMP[7].xy, IMM[5].yyyy, TEMP[7].xyyy >330: RCP TEMP[11].xy, TEMP[12].xxxx >331: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[11].xyyy >332: UCMP TEMP[2].xy, TEMP[5].xyyy, TEMP[7].xyyy, TEMP[2].xyyy >333: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[8].xyyy >334: MOV TEMP[4].zw, TEMP[2].yyxy >335: MOV TEMP[2].xy, CONST[3][23] >336: MUL TEMP[2].xy, TEMP[4].zwww, TEMP[2].xyyy >337: MOV TEMP[4].zw, TEMP[2].yyxy >338: FSEQ TEMP[2].xy, TEMP[13].xxxx, IMM[0].zzzz >339: SSG TEMP[5].xy, TEMP[3].xyyy >340: MUL TEMP[5].xy, IMM[5].yyyy, TEMP[5].xyyy >341: RCP TEMP[7].xy, TEMP[13].xxxx >342: MUL TEMP[3].xy, TEMP[3].xyyy, TEMP[7].xyyy >343: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >344: ADD TEMP[2].xy, -TEMP[2].xyyy, TEMP[14].xyyy >345: MOV TEMP[0].xw, TEMP[2].xxxy >346: MOV TEMP[2].xy, CONST[3][23] >347: MUL TEMP[0], TEMP[0], TEMP[2].xxyy >348: DP2 TEMP[2].x, TEMP[0].yzzz, TEMP[0].yzzz >349: MOV TEMP[0].y, TEMP[2].xxxx >350: DP2 TEMP[2].x, TEMP[4].zwww, TEMP[4].zwww >351: MOV TEMP[0].z, TEMP[2].xxxx >352: DP2 TEMP[0].x, TEMP[0].xwww, TEMP[0].xwww >353: SQRT TEMP[2].x, TEMP[0].xxxx >354: SQRT TEMP[2].y, TEMP[0].yyyy >355: SQRT TEMP[2].z, TEMP[0].zzzz >356: MOV TEMP[2].xyz, TEMP[2].xyzx >357: MOV TEMP[3].z, CONST[1][22] >358: ADD TEMP[3].x, TEMP[6].xxxx, -TEMP[3].zzzz >359: MOV TEMP[0].w, TEMP[3].xxxx >360: MOV TEMP[3].z, CONST[1][22] >361: ADD TEMP[1].x, TEMP[9].xxxx, -TEMP[3].zzzz >362: MOV TEMP[3].z, CONST[1][22] >363: ADD TEMP[3].x, TEMP[10].xxxx, -TEMP[3].zzzz >364: MOV TEMP[1].y, TEMP[3].xxxx >365: MOV TEMP[3].w, CONST[1][22] >366: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >367: UIF TEMP[3].xxxx :0 >368: MOV TEMP[3].w, CONST[1][22] >369: RCP TEMP[3].x, TEMP[3].wwww >370: MUL TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx >371: ELSE :0 >372: SSG TEMP[5].x, TEMP[0].wwww >373: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >374: ENDIF >375: MOV_SAT TEMP[3].x, TEMP[3].xxxx >376: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >377: MOV TEMP[0].w, TEMP[3].xxxx >378: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].yyyy >379: MOV TEMP[0].y, TEMP[3].xxxx >380: MOV TEMP[3].w, CONST[1][22] >381: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >382: UIF TEMP[3].xxxx :0 >383: MOV TEMP[3].w, CONST[1][22] >384: RCP TEMP[3].x, TEMP[3].wwww >385: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx >386: ELSE :0 >387: SSG TEMP[5].x, TEMP[1].xxxx >388: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >389: ENDIF >390: MOV_SAT TEMP[3].x, TEMP[3].xxxx >391: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >392: MOV TEMP[0].w, TEMP[3].xxxx >393: MUL TEMP[3].x, TEMP[0].wwww, TEMP[2].zzzz >394: MOV TEMP[0].z, TEMP[3].xxxx >395: MOV TEMP[3].w, CONST[1][22] >396: FSNE TEMP[3].x, TEMP[3].wwww, IMM[0].zzzz >397: UIF TEMP[3].xxxx :0 >398: MOV TEMP[3].w, CONST[1][22] >399: RCP TEMP[3].x, TEMP[3].wwww >400: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx >401: ELSE :0 >402: SSG TEMP[5].x, TEMP[1].yyyy >403: MUL TEMP[3].x, IMM[5].yyyy, TEMP[5].xxxx >404: ENDIF >405: MOV_SAT TEMP[3].x, TEMP[3].xxxx >406: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx >407: MOV TEMP[0].w, TEMP[3].xxxx >408: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx >409: MOV TEMP[2].xy, CONST[1][22] >410: MOV TEMP[3].xy, CONST[2][4] >411: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[3].xyyy >412: MAX TEMP[2].x, TEMP[1].yyyy, IMM[0].xxxx >413: MOV TEMP[0].w, TEMP[2].xxxx >414: FSEQ TEMP[2].xy, TEMP[1].xxxx, IMM[0].zzzz >415: SSG TEMP[3].xy, TEMP[0].xyyy >416: MUL TEMP[3].xy, IMM[5].yyyy, TEMP[3].xyyy >417: RCP TEMP[5].xy, TEMP[1].xxxx >418: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[5].xyyy >419: UCMP TEMP[0].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >420: MAX TEMP[2].x, TEMP[0].yyyy, IMM[0].xxxx >421: MOV TEMP[0].y, TEMP[2].xxxx >422: MIN TEMP[2].x, TEMP[0].wwww, TEMP[0].yyyy >423: MOV TEMP[4].z, TEMP[2].xxxx >424: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz >425: UIF TEMP[2].xxxx :0 >426: RCP TEMP[1].x, TEMP[1].xxxx >427: MUL TEMP[1].x, TEMP[0].zzzz, TEMP[1].xxxx >428: ELSE :0 >429: SSG TEMP[2].x, TEMP[0].zzzz >430: MUL TEMP[1].x, IMM[5].yyyy, TEMP[2].xxxx >431: ENDIF >432: MOV TEMP[0].y, TEMP[1].xxxx >433: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].xxxx >434: MIN TEMP[4].xy, TEMP[0].wwww, TEMP[0].yxxx >435: MAX TEMP[0].x, TEMP[4].zzzz, TEMP[4].yyyy >436: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx >437: MOV TEMP[4].w, TEMP[0].xxxx >438: ELSE :0 >439: MOV TEMP[4], IMM[0].zzzz >440: ENDIF >441: MIN TEMP[0], TEMP[4], IMM[5].zzzz >442: MOV TEMP[1].x, TEMP[0].xxxx >443: MOV TEMP[2].x, TEMP[0].yyyy >444: MOV TEMP[3].x, TEMP[0].zzzz >445: MOV TEMP[0].x, TEMP[0].wwww >446: MOV OUT[8], TEMP[1] >447: MOV OUT[9], TEMP[2] >448: MOV OUT[10], TEMP[3] >449: MOV OUT[11], TEMP[0] >450: MOV OUT[0].x, TEMP[1].xxxx >451: MOV OUT[0].y, TEMP[2].xxxx >452: MOV OUT[0].z, TEMP[3].xxxx >453: MOV OUT[1].x, TEMP[0].xxxx >454: END >radeonsi: Compiling shader 333 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 > %34 = call float @llvm.SI.load.const(<16 x i8> %33, i32 64) > %35 = call float @llvm.SI.load.const(<16 x i8> %33, i32 68) > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = call float @llvm.SI.load.const(<16 x i8> %37, i32 0) > %39 = call float @llvm.SI.load.const(<16 x i8> %37, i32 4) > %40 = call float @llvm.SI.load.const(<16 x i8> %37, i32 8) > %41 = call float @llvm.SI.load.const(<16 x i8> %37, i32 12) > %42 = call float @llvm.SI.load.const(<16 x i8> %37, i32 16) > %43 = call float @llvm.SI.load.const(<16 x i8> %37, i32 20) > %44 = call float @llvm.SI.load.const(<16 x i8> %37, i32 24) > %45 = call float @llvm.SI.load.const(<16 x i8> %37, i32 28) > %46 = call float @llvm.SI.load.const(<16 x i8> %37, i32 32) > %47 = call float @llvm.SI.load.const(<16 x i8> %37, i32 36) > %48 = call float @llvm.SI.load.const(<16 x i8> %37, i32 40) > %49 = call float @llvm.SI.load.const(<16 x i8> %37, i32 44) > %50 = call float @llvm.SI.load.const(<16 x i8> %37, i32 48) > %51 = call float @llvm.SI.load.const(<16 x i8> %37, i32 52) > %52 = call float @llvm.SI.load.const(<16 x i8> %37, i32 56) > %53 = call float @llvm.SI.load.const(<16 x i8> %37, i32 60) > %54 = call float @llvm.SI.load.const(<16 x i8> %37, i32 68) > %55 = call float @llvm.SI.load.const(<16 x i8> %37, i32 84) > %56 = call float @llvm.SI.load.const(<16 x i8> %37, i32 100) > %57 = call float @llvm.SI.load.const(<16 x i8> %37, i32 368) > %58 = call float @llvm.SI.load.const(<16 x i8> %37, i32 372) > %59 = lshr i32 %10, 8 > %60 = and i32 %59, 31 > %61 = lshr i32 %7, 13 > %62 = and i32 %61, 255 > %63 = and i32 %7, 8191 > %64 = and i32 %10, 255 > %65 = mul nuw nsw i32 %63, %64 > %66 = mul nuw nsw i32 %60, %62 > %67 = add nuw nsw i32 %65, %66 > %68 = add nuw nsw i32 %67, 16 > %69 = zext i32 %68 to i64 > %70 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %69 > %71 = load i32, i32 addrspace(3)* %70, align 4 > %72 = lshr i32 %7, 13 > %73 = and i32 %72, 255 > %74 = and i32 %7, 8191 > %75 = and i32 %10, 255 > %76 = mul nuw nsw i32 %74, %75 > %77 = mul nuw nsw i32 %60, %73 > %78 = add nuw nsw i32 %76, %77 > %79 = add nuw nsw i32 %78, 17 > %80 = zext i32 %79 to i64 > %81 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %80 > %82 = load i32, i32 addrspace(3)* %81, align 4 > %83 = lshr i32 %7, 13 > %84 = and i32 %83, 255 > %85 = and i32 %7, 8191 > %86 = and i32 %10, 255 > %87 = mul nuw nsw i32 %85, %86 > %88 = mul nuw nsw i32 %60, %84 > %89 = add nuw nsw i32 %87, %88 > %90 = add nuw nsw i32 %89, 18 > %91 = zext i32 %90 to i64 > %92 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %91 > %93 = load i32, i32 addrspace(3)* %92, align 4 > %94 = lshr i32 %7, 13 > %95 = and i32 %94, 255 > %96 = and i32 %7, 8191 > %97 = and i32 %10, 255 > %98 = mul nuw nsw i32 %96, %97 > %99 = mul nuw nsw i32 %60, %95 > %100 = add nuw nsw i32 %98, %99 > %101 = add nuw nsw i32 %100, 19 > %102 = zext i32 %101 to i64 > %103 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %102 > %104 = load i32, i32 addrspace(3)* %103, align 4 > %105 = lshr i32 %6, 13 > %106 = and i32 %105, 255 > %107 = shl i32 %5, 2 > %108 = and i32 %107, 262140 > %109 = and i32 %6, 8191 > %110 = and i32 %10, 255 > %111 = mul nuw nsw i32 %109, %110 > %112 = add nuw nsw i32 %108, %111 > %113 = mul nuw nsw i32 %60, %106 > %114 = add nuw nsw i32 %112, %113 > %115 = add nuw nsw i32 %114, 16 > %116 = zext i32 %115 to i64 > %117 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %116 > store i32 %71, i32 addrspace(3)* %117, align 4 > %118 = add nuw nsw i32 %114, 17 > %119 = zext i32 %118 to i64 > %120 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %119 > store i32 %82, i32 addrspace(3)* %120, align 4 > %121 = add nuw nsw i32 %114, 18 > %122 = zext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > store i32 %93, i32 addrspace(3)* %123, align 4 > %124 = add nuw nsw i32 %114, 19 > %125 = zext i32 %124 to i64 > %126 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %125 > store i32 %104, i32 addrspace(3)* %126, align 4 > %127 = lshr i32 %7, 13 > %128 = and i32 %127, 255 > %129 = and i32 %7, 8191 > %130 = and i32 %10, 255 > %131 = mul nuw nsw i32 %129, %130 > %132 = mul nuw nsw i32 %60, %128 > %133 = add nuw nsw i32 %131, %132 > %134 = add nuw nsw i32 %133, 20 > %135 = zext i32 %134 to i64 > %136 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %135 > %137 = load i32, i32 addrspace(3)* %136, align 4 > %138 = lshr i32 %7, 13 > %139 = and i32 %138, 255 > %140 = and i32 %7, 8191 > %141 = and i32 %10, 255 > %142 = mul nuw nsw i32 %140, %141 > %143 = mul nuw nsw i32 %60, %139 > %144 = add nuw nsw i32 %142, %143 > %145 = add nuw nsw i32 %144, 21 > %146 = zext i32 %145 to i64 > %147 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %146 > %148 = load i32, i32 addrspace(3)* %147, align 4 > %149 = lshr i32 %7, 13 > %150 = and i32 %149, 255 > %151 = and i32 %7, 8191 > %152 = and i32 %10, 255 > %153 = mul nuw nsw i32 %151, %152 > %154 = mul nuw nsw i32 %60, %150 > %155 = add nuw nsw i32 %153, %154 > %156 = add nuw nsw i32 %155, 22 > %157 = zext i32 %156 to i64 > %158 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %157 > %159 = load i32, i32 addrspace(3)* %158, align 4 > %160 = lshr i32 %7, 13 > %161 = and i32 %160, 255 > %162 = and i32 %7, 8191 > %163 = and i32 %10, 255 > %164 = mul nuw nsw i32 %162, %163 > %165 = mul nuw nsw i32 %60, %161 > %166 = add nuw nsw i32 %164, %165 > %167 = add nuw nsw i32 %166, 23 > %168 = zext i32 %167 to i64 > %169 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %168 > %170 = load i32, i32 addrspace(3)* %169, align 4 > %171 = lshr i32 %6, 13 > %172 = and i32 %171, 255 > %173 = shl i32 %5, 2 > %174 = and i32 %173, 262140 > %175 = and i32 %6, 8191 > %176 = and i32 %10, 255 > %177 = mul nuw nsw i32 %175, %176 > %178 = add nuw nsw i32 %174, %177 > %179 = mul nuw nsw i32 %60, %172 > %180 = add nuw nsw i32 %178, %179 > %181 = add nuw nsw i32 %180, 20 > %182 = zext i32 %181 to i64 > %183 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %182 > store i32 %137, i32 addrspace(3)* %183, align 4 > %184 = add nuw nsw i32 %180, 21 > %185 = zext i32 %184 to i64 > %186 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %185 > store i32 %148, i32 addrspace(3)* %186, align 4 > %187 = add nuw nsw i32 %180, 22 > %188 = zext i32 %187 to i64 > %189 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %188 > store i32 %159, i32 addrspace(3)* %189, align 4 > %190 = add nuw nsw i32 %180, 23 > %191 = zext i32 %190 to i64 > %192 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %191 > store i32 %170, i32 addrspace(3)* %192, align 4 > %193 = lshr i32 %7, 13 > %194 = and i32 %193, 255 > %195 = and i32 %7, 8191 > %196 = and i32 %10, 255 > %197 = mul nuw nsw i32 %195, %196 > %198 = mul nuw nsw i32 %60, %194 > %199 = add nuw nsw i32 %197, %198 > %200 = add nuw nsw i32 %199, 24 > %201 = zext i32 %200 to i64 > %202 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %201 > %203 = load i32, i32 addrspace(3)* %202, align 4 > %204 = lshr i32 %7, 13 > %205 = and i32 %204, 255 > %206 = and i32 %7, 8191 > %207 = and i32 %10, 255 > %208 = mul nuw nsw i32 %206, %207 > %209 = mul nuw nsw i32 %60, %205 > %210 = add nuw nsw i32 %208, %209 > %211 = add nuw nsw i32 %210, 25 > %212 = zext i32 %211 to i64 > %213 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %212 > %214 = load i32, i32 addrspace(3)* %213, align 4 > %215 = lshr i32 %7, 13 > %216 = and i32 %215, 255 > %217 = and i32 %7, 8191 > %218 = and i32 %10, 255 > %219 = mul nuw nsw i32 %217, %218 > %220 = mul nuw nsw i32 %60, %216 > %221 = add nuw nsw i32 %219, %220 > %222 = add nuw nsw i32 %221, 26 > %223 = zext i32 %222 to i64 > %224 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %223 > %225 = load i32, i32 addrspace(3)* %224, align 4 > %226 = lshr i32 %7, 13 > %227 = and i32 %226, 255 > %228 = and i32 %7, 8191 > %229 = and i32 %10, 255 > %230 = mul nuw nsw i32 %228, %229 > %231 = mul nuw nsw i32 %60, %227 > %232 = add nuw nsw i32 %230, %231 > %233 = add nuw nsw i32 %232, 27 > %234 = zext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = load i32, i32 addrspace(3)* %235, align 4 > %237 = lshr i32 %6, 13 > %238 = and i32 %237, 255 > %239 = shl i32 %5, 2 > %240 = and i32 %239, 262140 > %241 = and i32 %6, 8191 > %242 = and i32 %10, 255 > %243 = mul nuw nsw i32 %241, %242 > %244 = add nuw nsw i32 %240, %243 > %245 = mul nuw nsw i32 %60, %238 > %246 = add nuw nsw i32 %244, %245 > %247 = add nuw nsw i32 %246, 24 > %248 = zext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > store i32 %203, i32 addrspace(3)* %249, align 4 > %250 = add nuw nsw i32 %246, 25 > %251 = zext i32 %250 to i64 > %252 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %251 > store i32 %214, i32 addrspace(3)* %252, align 4 > %253 = add nuw nsw i32 %246, 26 > %254 = zext i32 %253 to i64 > %255 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %254 > store i32 %225, i32 addrspace(3)* %255, align 4 > %256 = add nuw nsw i32 %246, 27 > %257 = zext i32 %256 to i64 > %258 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %257 > store i32 %236, i32 addrspace(3)* %258, align 4 > %259 = lshr i32 %7, 13 > %260 = and i32 %259, 255 > %261 = and i32 %7, 8191 > %262 = and i32 %10, 255 > %263 = mul nuw nsw i32 %261, %262 > %264 = mul nuw nsw i32 %60, %260 > %265 = add nuw nsw i32 %263, %264 > %266 = add nuw nsw i32 %265, 28 > %267 = zext i32 %266 to i64 > %268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %267 > %269 = load i32, i32 addrspace(3)* %268, align 4 > %270 = lshr i32 %7, 13 > %271 = and i32 %270, 255 > %272 = and i32 %7, 8191 > %273 = and i32 %10, 255 > %274 = mul nuw nsw i32 %272, %273 > %275 = mul nuw nsw i32 %60, %271 > %276 = add nuw nsw i32 %274, %275 > %277 = add nuw nsw i32 %276, 29 > %278 = zext i32 %277 to i64 > %279 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %278 > %280 = load i32, i32 addrspace(3)* %279, align 4 > %281 = lshr i32 %7, 13 > %282 = and i32 %281, 255 > %283 = and i32 %7, 8191 > %284 = and i32 %10, 255 > %285 = mul nuw nsw i32 %283, %284 > %286 = mul nuw nsw i32 %60, %282 > %287 = add nuw nsw i32 %285, %286 > %288 = add nuw nsw i32 %287, 30 > %289 = zext i32 %288 to i64 > %290 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %289 > %291 = load i32, i32 addrspace(3)* %290, align 4 > %292 = lshr i32 %7, 13 > %293 = and i32 %292, 255 > %294 = and i32 %7, 8191 > %295 = and i32 %10, 255 > %296 = mul nuw nsw i32 %294, %295 > %297 = mul nuw nsw i32 %60, %293 > %298 = add nuw nsw i32 %296, %297 > %299 = add nuw nsw i32 %298, 31 > %300 = zext i32 %299 to i64 > %301 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %300 > %302 = load i32, i32 addrspace(3)* %301, align 4 > %303 = lshr i32 %6, 13 > %304 = and i32 %303, 255 > %305 = shl i32 %5, 2 > %306 = and i32 %305, 262140 > %307 = and i32 %6, 8191 > %308 = and i32 %10, 255 > %309 = mul nuw nsw i32 %307, %308 > %310 = add nuw nsw i32 %306, %309 > %311 = mul nuw nsw i32 %60, %304 > %312 = add nuw nsw i32 %310, %311 > %313 = add nuw nsw i32 %312, 28 > %314 = zext i32 %313 to i64 > %315 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %314 > store i32 %269, i32 addrspace(3)* %315, align 4 > %316 = add nuw nsw i32 %312, 29 > %317 = zext i32 %316 to i64 > %318 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %317 > store i32 %280, i32 addrspace(3)* %318, align 4 > %319 = add nuw nsw i32 %312, 30 > %320 = zext i32 %319 to i64 > %321 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %320 > store i32 %291, i32 addrspace(3)* %321, align 4 > %322 = add nuw nsw i32 %312, 31 > %323 = zext i32 %322 to i64 > %324 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %323 > store i32 %302, i32 addrspace(3)* %324, align 4 > %325 = lshr i32 %7, 13 > %326 = and i32 %325, 255 > %327 = and i32 %7, 8191 > %328 = and i32 %10, 255 > %329 = mul nuw nsw i32 %327, %328 > %330 = mul nuw nsw i32 %60, %326 > %331 = add nuw nsw i32 %329, %330 > %332 = add nuw nsw i32 %331, 32 > %333 = zext i32 %332 to i64 > %334 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %333 > %335 = load i32, i32 addrspace(3)* %334, align 4 > %336 = lshr i32 %7, 13 > %337 = and i32 %336, 255 > %338 = and i32 %7, 8191 > %339 = and i32 %10, 255 > %340 = mul nuw nsw i32 %338, %339 > %341 = mul nuw nsw i32 %60, %337 > %342 = add nuw nsw i32 %340, %341 > %343 = add nuw nsw i32 %342, 33 > %344 = zext i32 %343 to i64 > %345 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %344 > %346 = load i32, i32 addrspace(3)* %345, align 4 > %347 = lshr i32 %7, 13 > %348 = and i32 %347, 255 > %349 = and i32 %7, 8191 > %350 = and i32 %10, 255 > %351 = mul nuw nsw i32 %349, %350 > %352 = mul nuw nsw i32 %60, %348 > %353 = add nuw nsw i32 %351, %352 > %354 = add nuw nsw i32 %353, 34 > %355 = zext i32 %354 to i64 > %356 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %355 > %357 = load i32, i32 addrspace(3)* %356, align 4 > %358 = lshr i32 %7, 13 > %359 = and i32 %358, 255 > %360 = and i32 %7, 8191 > %361 = and i32 %10, 255 > %362 = mul nuw nsw i32 %360, %361 > %363 = mul nuw nsw i32 %60, %359 > %364 = add nuw nsw i32 %362, %363 > %365 = add nuw nsw i32 %364, 35 > %366 = zext i32 %365 to i64 > %367 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %366 > %368 = load i32, i32 addrspace(3)* %367, align 4 > %369 = lshr i32 %6, 13 > %370 = and i32 %369, 255 > %371 = shl i32 %5, 2 > %372 = and i32 %371, 262140 > %373 = and i32 %6, 8191 > %374 = and i32 %10, 255 > %375 = mul nuw nsw i32 %373, %374 > %376 = add nuw nsw i32 %372, %375 > %377 = mul nuw nsw i32 %60, %370 > %378 = add nuw nsw i32 %376, %377 > %379 = add nuw nsw i32 %378, 32 > %380 = zext i32 %379 to i64 > %381 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %380 > store i32 %335, i32 addrspace(3)* %381, align 4 > %382 = add nuw nsw i32 %378, 33 > %383 = zext i32 %382 to i64 > %384 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %383 > store i32 %346, i32 addrspace(3)* %384, align 4 > %385 = add nuw nsw i32 %378, 34 > %386 = zext i32 %385 to i64 > %387 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %386 > store i32 %357, i32 addrspace(3)* %387, align 4 > %388 = add nuw nsw i32 %378, 35 > %389 = zext i32 %388 to i64 > %390 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %389 > store i32 %368, i32 addrspace(3)* %390, align 4 > %391 = lshr i32 %7, 13 > %392 = and i32 %391, 255 > %393 = and i32 %7, 8191 > %394 = and i32 %10, 255 > %395 = mul nuw nsw i32 %393, %394 > %396 = mul nuw nsw i32 %60, %392 > %397 = add nuw nsw i32 %395, %396 > %398 = add nuw nsw i32 %397, 36 > %399 = zext i32 %398 to i64 > %400 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %399 > %401 = load i32, i32 addrspace(3)* %400, align 4 > %402 = lshr i32 %7, 13 > %403 = and i32 %402, 255 > %404 = and i32 %7, 8191 > %405 = and i32 %10, 255 > %406 = mul nuw nsw i32 %404, %405 > %407 = mul nuw nsw i32 %60, %403 > %408 = add nuw nsw i32 %406, %407 > %409 = add nuw nsw i32 %408, 37 > %410 = zext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = load i32, i32 addrspace(3)* %411, align 4 > %413 = lshr i32 %7, 13 > %414 = and i32 %413, 255 > %415 = and i32 %7, 8191 > %416 = and i32 %10, 255 > %417 = mul nuw nsw i32 %415, %416 > %418 = mul nuw nsw i32 %60, %414 > %419 = add nuw nsw i32 %417, %418 > %420 = add nuw nsw i32 %419, 38 > %421 = zext i32 %420 to i64 > %422 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %421 > %423 = load i32, i32 addrspace(3)* %422, align 4 > %424 = lshr i32 %7, 13 > %425 = and i32 %424, 255 > %426 = and i32 %7, 8191 > %427 = and i32 %10, 255 > %428 = mul nuw nsw i32 %426, %427 > %429 = mul nuw nsw i32 %60, %425 > %430 = add nuw nsw i32 %428, %429 > %431 = add nuw nsw i32 %430, 39 > %432 = zext i32 %431 to i64 > %433 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %432 > %434 = load i32, i32 addrspace(3)* %433, align 4 > %435 = lshr i32 %6, 13 > %436 = and i32 %435, 255 > %437 = shl i32 %5, 2 > %438 = and i32 %437, 262140 > %439 = and i32 %6, 8191 > %440 = and i32 %10, 255 > %441 = mul nuw nsw i32 %439, %440 > %442 = add nuw nsw i32 %438, %441 > %443 = mul nuw nsw i32 %60, %436 > %444 = add nuw nsw i32 %442, %443 > %445 = add nuw nsw i32 %444, 36 > %446 = zext i32 %445 to i64 > %447 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %446 > store i32 %401, i32 addrspace(3)* %447, align 4 > %448 = add nuw nsw i32 %444, 37 > %449 = zext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > store i32 %412, i32 addrspace(3)* %450, align 4 > %451 = add nuw nsw i32 %444, 38 > %452 = zext i32 %451 to i64 > %453 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %452 > store i32 %423, i32 addrspace(3)* %453, align 4 > %454 = add nuw nsw i32 %444, 39 > %455 = zext i32 %454 to i64 > %456 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %455 > store i32 %434, i32 addrspace(3)* %456, align 4 > %457 = and i32 %7, 8191 > %458 = and i32 %10, 255 > %459 = mul nuw nsw i32 %457, %458 > %460 = add nuw nsw i32 %459, 16 > %461 = zext i32 %460 to i64 > %462 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %461 > %463 = bitcast i32 addrspace(3)* %462 to float addrspace(3)* > %464 = load float, float addrspace(3)* %463, align 4 > %465 = and i32 %7, 8191 > %466 = and i32 %10, 255 > %467 = mul nuw nsw i32 %465, %466 > %468 = add nuw nsw i32 %467, 17 > %469 = zext i32 %468 to i64 > %470 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %469 > %471 = bitcast i32 addrspace(3)* %470 to float addrspace(3)* > %472 = load float, float addrspace(3)* %471, align 4 > %473 = and i32 %7, 8191 > %474 = and i32 %10, 255 > %475 = mul nuw nsw i32 %473, %474 > %476 = add nuw nsw i32 %475, 18 > %477 = zext i32 %476 to i64 > %478 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %477 > %479 = bitcast i32 addrspace(3)* %478 to float addrspace(3)* > %480 = load float, float addrspace(3)* %479, align 4 > %481 = fmul float %38, %464 > %482 = fmul float %39, %472 > %483 = fadd float %481, %482 > %484 = fmul float %40, %480 > %485 = fadd float %483, %484 > %486 = fadd float %485, %41 > %487 = fmul float %42, %464 > %488 = fmul float %43, %472 > %489 = fadd float %487, %488 > %490 = fmul float %44, %480 > %491 = fadd float %489, %490 > %492 = fadd float %491, %45 > %493 = fmul float %46, %464 > %494 = fmul float %47, %472 > %495 = fadd float %493, %494 > %496 = fmul float %48, %480 > %497 = fadd float %495, %496 > %498 = fadd float %497, %49 > %499 = fmul float %50, %464 > %500 = fmul float %51, %472 > %501 = fadd float %499, %500 > %502 = fmul float %52, %480 > %503 = fadd float %501, %502 > %504 = fadd float %503, %53 > %505 = lshr i32 %7, 13 > %506 = and i32 %505, 255 > %507 = and i32 %7, 8191 > %508 = and i32 %10, 255 > %509 = mul nuw nsw i32 %507, %508 > %510 = add nuw nsw i32 %509, %506 > %511 = add nuw nsw i32 %510, 16 > %512 = zext i32 %511 to i64 > %513 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %512 > %514 = bitcast i32 addrspace(3)* %513 to float addrspace(3)* > %515 = load float, float addrspace(3)* %514, align 4 > %516 = lshr i32 %7, 13 > %517 = and i32 %516, 255 > %518 = and i32 %7, 8191 > %519 = and i32 %10, 255 > %520 = mul nuw nsw i32 %518, %519 > %521 = add nuw nsw i32 %520, %517 > %522 = add nuw nsw i32 %521, 17 > %523 = zext i32 %522 to i64 > %524 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %523 > %525 = bitcast i32 addrspace(3)* %524 to float addrspace(3)* > %526 = load float, float addrspace(3)* %525, align 4 > %527 = lshr i32 %7, 13 > %528 = and i32 %527, 255 > %529 = and i32 %7, 8191 > %530 = and i32 %10, 255 > %531 = mul nuw nsw i32 %529, %530 > %532 = add nuw nsw i32 %531, %528 > %533 = add nuw nsw i32 %532, 18 > %534 = zext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %38, %515 > %539 = fmul float %39, %526 > %540 = fadd float %538, %539 > %541 = fmul float %40, %537 > %542 = fadd float %540, %541 > %543 = fadd float %542, %41 > %544 = fmul float %42, %515 > %545 = fmul float %43, %526 > %546 = fadd float %544, %545 > %547 = fmul float %44, %537 > %548 = fadd float %546, %547 > %549 = fadd float %548, %45 > %550 = fmul float %46, %515 > %551 = fmul float %47, %526 > %552 = fadd float %550, %551 > %553 = fmul float %48, %537 > %554 = fadd float %552, %553 > %555 = fadd float %554, %49 > %556 = fmul float %50, %515 > %557 = fmul float %51, %526 > %558 = fadd float %556, %557 > %559 = fmul float %52, %537 > %560 = fadd float %558, %559 > %561 = fadd float %560, %53 > %562 = and i32 %7, 8191 > %563 = and i32 %10, 255 > %564 = mul nuw nsw i32 %562, %563 > %565 = lshr i32 %7, 12 > %566 = and i32 %565, 510 > %567 = add nuw nsw i32 %564, %566 > %568 = add nuw nsw i32 %567, 16 > %569 = zext i32 %568 to i64 > %570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %569 > %571 = bitcast i32 addrspace(3)* %570 to float addrspace(3)* > %572 = load float, float addrspace(3)* %571, align 4 > %573 = and i32 %7, 8191 > %574 = and i32 %10, 255 > %575 = mul nuw nsw i32 %573, %574 > %576 = lshr i32 %7, 12 > %577 = and i32 %576, 510 > %578 = add nuw nsw i32 %575, %577 > %579 = add nuw nsw i32 %578, 17 > %580 = zext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = and i32 %7, 8191 > %585 = and i32 %10, 255 > %586 = mul nuw nsw i32 %584, %585 > %587 = lshr i32 %7, 12 > %588 = and i32 %587, 510 > %589 = add nuw nsw i32 %586, %588 > %590 = add nuw nsw i32 %589, 18 > %591 = zext i32 %590 to i64 > %592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %591 > %593 = bitcast i32 addrspace(3)* %592 to float addrspace(3)* > %594 = load float, float addrspace(3)* %593, align 4 > %595 = fmul float %38, %572 > %596 = fmul float %39, %583 > %597 = fadd float %595, %596 > %598 = fmul float %40, %594 > %599 = fadd float %597, %598 > %600 = fadd float %599, %41 > %601 = fmul float %42, %572 > %602 = fmul float %43, %583 > %603 = fadd float %601, %602 > %604 = fmul float %44, %594 > %605 = fadd float %603, %604 > %606 = fadd float %605, %45 > %607 = fmul float %46, %572 > %608 = fmul float %47, %583 > %609 = fadd float %607, %608 > %610 = fmul float %48, %594 > %611 = fadd float %609, %610 > %612 = fadd float %611, %49 > %613 = fmul float %50, %572 > %614 = fmul float %51, %583 > %615 = fadd float %613, %614 > %616 = fmul float %52, %594 > %617 = fadd float %615, %616 > %618 = fadd float %617, %53 > %619 = fadd float %498, 1.000000e+02 > %620 = fadd float %555, 1.000000e+02 > %621 = fadd float %612, 1.000000e+02 > %622 = call float @llvm.fabs.f32(float %504) > %623 = call float @llvm.minnum.f32(float %622, float 1.000000e+02) > %624 = fcmp ogt float %486, 0.000000e+00 > %625 = fcmp ogt float %492, 0.000000e+00 > %626 = fcmp olt float %486, 0.000000e+00 > %627 = fcmp olt float %492, 0.000000e+00 > %628 = sext i1 %626 to i32 > %629 = sext i1 %627 to i32 > %630 = zext i1 %624 to i32 > %631 = zext i1 %625 to i32 > %632 = add nsw i32 %630, %628 > %633 = add nsw i32 %631, %629 > %634 = sitofp i32 %632 to float > %635 = sitofp i32 %633 to float > %636 = fsub float -0.000000e+00, %623 > %637 = call float @llvm.fma.f32(float %636, float %634, float %486) > %638 = fsub float -0.000000e+00, %623 > %639 = call float @llvm.fma.f32(float %638, float %635, float %492) > %640 = call float @llvm.fabs.f32(float %561) > %641 = call float @llvm.minnum.f32(float %640, float 1.000000e+02) > %642 = fcmp ogt float %543, 0.000000e+00 > %643 = fcmp ogt float %549, 0.000000e+00 > %644 = fcmp olt float %543, 0.000000e+00 > %645 = fcmp olt float %549, 0.000000e+00 > %646 = sext i1 %644 to i32 > %647 = sext i1 %645 to i32 > %648 = zext i1 %642 to i32 > %649 = zext i1 %643 to i32 > %650 = add nsw i32 %648, %646 > %651 = add nsw i32 %649, %647 > %652 = sitofp i32 %650 to float > %653 = sitofp i32 %651 to float > %654 = fsub float -0.000000e+00, %641 > %655 = call float @llvm.fma.f32(float %654, float %652, float %543) > %656 = fsub float -0.000000e+00, %641 > %657 = call float @llvm.fma.f32(float %656, float %653, float %549) > %658 = fcmp ogt float %600, 0.000000e+00 > %659 = fcmp ogt float %606, 0.000000e+00 > %660 = fcmp olt float %600, 0.000000e+00 > %661 = fcmp olt float %606, 0.000000e+00 > %662 = sext i1 %660 to i32 > %663 = sext i1 %661 to i32 > %664 = zext i1 %658 to i32 > %665 = zext i1 %659 to i32 > %666 = add nsw i32 %664, %662 > %667 = add nsw i32 %665, %663 > %668 = sitofp i32 %666 to float > %669 = sitofp i32 %667 to float > %670 = call float @llvm.fabs.f32(float %618) > %671 = call float @llvm.minnum.f32(float %670, float 1.000000e+02) > %672 = fsub float -0.000000e+00, %671 > %673 = call float @llvm.fma.f32(float %672, float %668, float %600) > %674 = fsub float -0.000000e+00, %671 > %675 = call float @llvm.fma.f32(float %674, float %669, float %606) > %676 = fsub float -0.000000e+00, %504 > %677 = fcmp olt float %637, %676 > %678 = fsub float -0.000000e+00, %504 > %679 = fcmp olt float %639, %678 > %680 = zext i1 %677 to i32 > %681 = zext i1 %679 to i32 > %682 = fsub float -0.000000e+00, %561 > %683 = fcmp olt float %655, %682 > %684 = fsub float -0.000000e+00, %561 > %685 = fcmp olt float %657, %684 > %686 = zext i1 %683 to i32 > %687 = zext i1 %685 to i32 > %688 = add nuw nsw i32 %686, %680 > %689 = add nuw nsw i32 %687, %681 > %690 = fsub float -0.000000e+00, %618 > %691 = fcmp olt float %673, %690 > %692 = fsub float -0.000000e+00, %618 > %693 = fcmp olt float %675, %692 > %694 = zext i1 %691 to i32 > %695 = zext i1 %693 to i32 > %696 = add nuw nsw i32 %694, %688 > %697 = add nuw nsw i32 %695, %689 > %698 = fcmp olt float %619, 0.000000e+00 > %699 = zext i1 %698 to i32 > %700 = fcmp olt float %620, 0.000000e+00 > %701 = fcmp olt float %621, 0.000000e+00 > %702 = zext i1 %700 to i32 > %703 = zext i1 %701 to i32 > %704 = add nuw nsw i32 %702, %699 > %705 = add nuw nsw i32 %703, %704 > %706 = fcmp olt float %504, %637 > %707 = fcmp olt float %504, %639 > %708 = zext i1 %706 to i32 > %709 = zext i1 %707 to i32 > %710 = fcmp olt float %561, %655 > %711 = fcmp olt float %561, %657 > %712 = zext i1 %710 to i32 > %713 = zext i1 %711 to i32 > %714 = add nuw nsw i32 %708, %712 > %715 = add nuw nsw i32 %709, %713 > %716 = fcmp olt float %618, %673 > %717 = fcmp olt float %618, %675 > %718 = zext i1 %716 to i32 > %719 = zext i1 %717 to i32 > %720 = add nuw nsw i32 %714, %718 > %721 = add nuw nsw i32 %715, %719 > %722 = icmp eq i32 %696, 3 > %723 = icmp eq i32 %697, 3 > %724 = sext i1 %722 to i32 > %725 = sext i1 %723 to i32 > %726 = icmp eq i32 %720, 3 > %727 = icmp eq i32 %721, 3 > %728 = select i1 %727, i32 -1, i32 %725 > %729 = select i1 %726, i32 -1, i32 %724 > %730 = or i32 %728, %729 > %731 = and i32 %7, 8191 > %732 = and i32 %10, 255 > %733 = mul nuw nsw i32 %731, %732 > %734 = add nuw nsw i32 %733, 18 > %735 = zext i32 %734 to i64 > %736 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %735 > %737 = bitcast i32 addrspace(3)* %736 to float addrspace(3)* > %738 = load float, float addrspace(3)* %737, align 4 > %739 = lshr i32 %7, 13 > %740 = and i32 %739, 255 > %741 = and i32 %7, 8191 > %742 = and i32 %10, 255 > %743 = mul nuw nsw i32 %741, %742 > %744 = add nuw nsw i32 %743, %740 > %745 = add nuw nsw i32 %744, 18 > %746 = zext i32 %745 to i64 > %747 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %746 > %748 = bitcast i32 addrspace(3)* %747 to float addrspace(3)* > %749 = load float, float addrspace(3)* %748, align 4 > %750 = fsub float %749, %738 > %751 = and i32 %7, 8191 > %752 = and i32 %10, 255 > %753 = mul nuw nsw i32 %751, %752 > %754 = add nuw nsw i32 %753, 16 > %755 = zext i32 %754 to i64 > %756 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %755 > %757 = bitcast i32 addrspace(3)* %756 to float addrspace(3)* > %758 = load float, float addrspace(3)* %757, align 4 > %759 = lshr i32 %7, 13 > %760 = and i32 %759, 255 > %761 = and i32 %7, 8191 > %762 = and i32 %10, 255 > %763 = mul nuw nsw i32 %761, %762 > %764 = add nuw nsw i32 %763, %760 > %765 = add nuw nsw i32 %764, 16 > %766 = zext i32 %765 to i64 > %767 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %766 > %768 = bitcast i32 addrspace(3)* %767 to float addrspace(3)* > %769 = load float, float addrspace(3)* %768, align 4 > %770 = fsub float %769, %758 > %771 = and i32 %7, 8191 > %772 = and i32 %10, 255 > %773 = mul nuw nsw i32 %771, %772 > %774 = add nuw nsw i32 %773, 17 > %775 = zext i32 %774 to i64 > %776 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %775 > %777 = bitcast i32 addrspace(3)* %776 to float addrspace(3)* > %778 = load float, float addrspace(3)* %777, align 4 > %779 = lshr i32 %7, 13 > %780 = and i32 %779, 255 > %781 = and i32 %7, 8191 > %782 = and i32 %10, 255 > %783 = mul nuw nsw i32 %781, %782 > %784 = add nuw nsw i32 %783, %780 > %785 = add nuw nsw i32 %784, 17 > %786 = zext i32 %785 to i64 > %787 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %786 > %788 = bitcast i32 addrspace(3)* %787 to float addrspace(3)* > %789 = load float, float addrspace(3)* %788, align 4 > %790 = fsub float %789, %778 > %791 = and i32 %7, 8191 > %792 = and i32 %10, 255 > %793 = mul nuw nsw i32 %791, %792 > %794 = add nuw nsw i32 %793, 16 > %795 = zext i32 %794 to i64 > %796 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %795 > %797 = bitcast i32 addrspace(3)* %796 to float addrspace(3)* > %798 = load float, float addrspace(3)* %797, align 4 > %799 = and i32 %7, 8191 > %800 = and i32 %10, 255 > %801 = mul nuw nsw i32 %799, %800 > %802 = lshr i32 %7, 12 > %803 = and i32 %802, 510 > %804 = add nuw nsw i32 %801, %803 > %805 = add nuw nsw i32 %804, 16 > %806 = zext i32 %805 to i64 > %807 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %806 > %808 = bitcast i32 addrspace(3)* %807 to float addrspace(3)* > %809 = load float, float addrspace(3)* %808, align 4 > %810 = fsub float %809, %798 > %811 = and i32 %7, 8191 > %812 = and i32 %10, 255 > %813 = mul nuw nsw i32 %811, %812 > %814 = add nuw nsw i32 %813, 17 > %815 = zext i32 %814 to i64 > %816 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %815 > %817 = bitcast i32 addrspace(3)* %816 to float addrspace(3)* > %818 = load float, float addrspace(3)* %817, align 4 > %819 = and i32 %7, 8191 > %820 = and i32 %10, 255 > %821 = mul nuw nsw i32 %819, %820 > %822 = lshr i32 %7, 12 > %823 = and i32 %822, 510 > %824 = add nuw nsw i32 %821, %823 > %825 = add nuw nsw i32 %824, 17 > %826 = zext i32 %825 to i64 > %827 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %826 > %828 = bitcast i32 addrspace(3)* %827 to float addrspace(3)* > %829 = load float, float addrspace(3)* %828, align 4 > %830 = fsub float %829, %818 > %831 = and i32 %7, 8191 > %832 = and i32 %10, 255 > %833 = mul nuw nsw i32 %831, %832 > %834 = add nuw nsw i32 %833, 18 > %835 = zext i32 %834 to i64 > %836 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %835 > %837 = bitcast i32 addrspace(3)* %836 to float addrspace(3)* > %838 = load float, float addrspace(3)* %837, align 4 > %839 = and i32 %7, 8191 > %840 = and i32 %10, 255 > %841 = mul nuw nsw i32 %839, %840 > %842 = lshr i32 %7, 12 > %843 = and i32 %842, 510 > %844 = add nuw nsw i32 %841, %843 > %845 = add nuw nsw i32 %844, 18 > %846 = zext i32 %845 to i64 > %847 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %846 > %848 = bitcast i32 addrspace(3)* %847 to float addrspace(3)* > %849 = load float, float addrspace(3)* %848, align 4 > %850 = fsub float %849, %838 > %851 = fmul float %750, %830 > %852 = fmul float %770, %850 > %853 = fmul float %790, %810 > %854 = fsub float -0.000000e+00, %851 > %855 = call float @llvm.fma.f32(float %790, float %850, float %854) > %856 = fsub float -0.000000e+00, %852 > %857 = call float @llvm.fma.f32(float %750, float %810, float %856) > %858 = fsub float -0.000000e+00, %853 > %859 = call float @llvm.fma.f32(float %770, float %830, float %858) > %860 = fmul float %855, %855 > %861 = fmul float %857, %857 > %862 = fadd float %861, %860 > %863 = fmul float %859, %859 > %864 = fadd float %862, %863 > %865 = call float @llvm.AMDGPU.rsq.clamped.f32(float %864) > %866 = fmul float %855, %865 > %867 = fmul float %857, %865 > %868 = fmul float %859, %865 > %869 = fmul float %13, %25 > %870 = fmul float %14, %26 > %871 = fadd float %869, %870 > %872 = fmul float %15, %27 > %873 = fadd float %871, %872 > %874 = fadd float %873, %16 > %875 = fmul float %17, %25 > %876 = fmul float %18, %26 > %877 = fadd float %875, %876 > %878 = fmul float %19, %27 > %879 = fadd float %877, %878 > %880 = fadd float %879, %20 > %881 = fmul float %21, %25 > %882 = fmul float %22, %26 > %883 = fadd float %881, %882 > %884 = fmul float %23, %27 > %885 = fadd float %883, %884 > %886 = fadd float %885, %24 > %887 = and i32 %7, 8191 > %888 = and i32 %10, 255 > %889 = mul nuw nsw i32 %887, %888 > %890 = add nuw nsw i32 %889, 16 > %891 = zext i32 %890 to i64 > %892 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %891 > %893 = bitcast i32 addrspace(3)* %892 to float addrspace(3)* > %894 = load float, float addrspace(3)* %893, align 4 > %895 = fsub float %874, %894 > %896 = and i32 %7, 8191 > %897 = and i32 %10, 255 > %898 = mul nuw nsw i32 %896, %897 > %899 = add nuw nsw i32 %898, 17 > %900 = zext i32 %899 to i64 > %901 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %900 > %902 = bitcast i32 addrspace(3)* %901 to float addrspace(3)* > %903 = load float, float addrspace(3)* %902, align 4 > %904 = fsub float %880, %903 > %905 = and i32 %7, 8191 > %906 = and i32 %10, 255 > %907 = mul nuw nsw i32 %905, %906 > %908 = add nuw nsw i32 %907, 18 > %909 = zext i32 %908 to i64 > %910 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %909 > %911 = bitcast i32 addrspace(3)* %910 to float addrspace(3)* > %912 = load float, float addrspace(3)* %911, align 4 > %913 = fsub float %886, %912 > %914 = fmul float %895, %895 > %915 = fmul float %904, %904 > %916 = fadd float %915, %914 > %917 = fmul float %913, %913 > %918 = fadd float %916, %917 > %919 = call float @llvm.AMDGPU.rsq.clamped.f32(float %918) > %920 = fmul float %919, %895 > %921 = fmul float %919, %904 > %922 = fmul float %919, %913 > %923 = fmul float %866, %920 > %924 = fmul float %867, %921 > %925 = fadd float %924, %923 > %926 = fmul float %868, %922 > %927 = fadd float %925, %926 > %928 = icmp eq i32 %730, 0 > %notlhs = fcmp uge float %927, -5.000000e-01 > %notrhs = icmp ne i32 %705, 3 > %not. = and i1 %notrhs, %notlhs > %929 = and i1 %928, %not. > br i1 %929, label %IF, label %ENDIF > >IF: ; preds = %main_body > %930 = lshr i32 %7, 13 > %931 = and i32 %930, 255 > %932 = and i32 %7, 8191 > %933 = and i32 %10, 255 > %934 = mul nuw nsw i32 %932, %933 > %935 = add nuw nsw i32 %934, %931 > %936 = add nuw nsw i32 %935, 16 > %937 = zext i32 %936 to i64 > %938 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %937 > %939 = bitcast i32 addrspace(3)* %938 to float addrspace(3)* > %940 = load float, float addrspace(3)* %939, align 4 > %941 = and i32 %7, 8191 > %942 = and i32 %10, 255 > %943 = mul nuw nsw i32 %941, %942 > %944 = add nuw nsw i32 %943, 16 > %945 = zext i32 %944 to i64 > %946 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %945 > %947 = bitcast i32 addrspace(3)* %946 to float addrspace(3)* > %948 = load float, float addrspace(3)* %947, align 4 > %949 = fsub float %948, %940 > %950 = lshr i32 %7, 13 > %951 = and i32 %950, 255 > %952 = and i32 %7, 8191 > %953 = and i32 %10, 255 > %954 = mul nuw nsw i32 %952, %953 > %955 = add nuw nsw i32 %954, %951 > %956 = add nuw nsw i32 %955, 17 > %957 = zext i32 %956 to i64 > %958 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %957 > %959 = bitcast i32 addrspace(3)* %958 to float addrspace(3)* > %960 = load float, float addrspace(3)* %959, align 4 > %961 = and i32 %7, 8191 > %962 = and i32 %10, 255 > %963 = mul nuw nsw i32 %961, %962 > %964 = add nuw nsw i32 %963, 17 > %965 = zext i32 %964 to i64 > %966 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %965 > %967 = bitcast i32 addrspace(3)* %966 to float addrspace(3)* > %968 = load float, float addrspace(3)* %967, align 4 > %969 = fsub float %968, %960 > %970 = lshr i32 %7, 13 > %971 = and i32 %970, 255 > %972 = and i32 %7, 8191 > %973 = and i32 %10, 255 > %974 = mul nuw nsw i32 %972, %973 > %975 = add nuw nsw i32 %974, %971 > %976 = add nuw nsw i32 %975, 18 > %977 = zext i32 %976 to i64 > %978 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %977 > %979 = bitcast i32 addrspace(3)* %978 to float addrspace(3)* > %980 = load float, float addrspace(3)* %979, align 4 > %981 = and i32 %7, 8191 > %982 = and i32 %10, 255 > %983 = mul nuw nsw i32 %981, %982 > %984 = add nuw nsw i32 %983, 18 > %985 = zext i32 %984 to i64 > %986 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %985 > %987 = bitcast i32 addrspace(3)* %986 to float addrspace(3)* > %988 = load float, float addrspace(3)* %987, align 4 > %989 = fsub float %988, %980 > %990 = fmul float %949, %949 > %991 = fmul float %969, %969 > %992 = fadd float %991, %990 > %993 = fmul float %989, %989 > %994 = fadd float %992, %993 > %995 = and i32 %7, 8191 > %996 = and i32 %10, 255 > %997 = mul nuw nsw i32 %995, %996 > %998 = lshr i32 %7, 12 > %999 = and i32 %998, 510 > %1000 = add nuw nsw i32 %997, %999 > %1001 = add nuw nsw i32 %1000, 16 > %1002 = zext i32 %1001 to i64 > %1003 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1002 > %1004 = bitcast i32 addrspace(3)* %1003 to float addrspace(3)* > %1005 = load float, float addrspace(3)* %1004, align 4 > %1006 = lshr i32 %7, 13 > %1007 = and i32 %1006, 255 > %1008 = and i32 %7, 8191 > %1009 = and i32 %10, 255 > %1010 = mul nuw nsw i32 %1008, %1009 > %1011 = add nuw nsw i32 %1010, %1007 > %1012 = add nuw nsw i32 %1011, 16 > %1013 = zext i32 %1012 to i64 > %1014 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1013 > %1015 = bitcast i32 addrspace(3)* %1014 to float addrspace(3)* > %1016 = load float, float addrspace(3)* %1015, align 4 > %1017 = fsub float %1016, %1005 > %1018 = and i32 %7, 8191 > %1019 = and i32 %10, 255 > %1020 = mul nuw nsw i32 %1018, %1019 > %1021 = lshr i32 %7, 12 > %1022 = and i32 %1021, 510 > %1023 = add nuw nsw i32 %1020, %1022 > %1024 = add nuw nsw i32 %1023, 17 > %1025 = zext i32 %1024 to i64 > %1026 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1025 > %1027 = bitcast i32 addrspace(3)* %1026 to float addrspace(3)* > %1028 = load float, float addrspace(3)* %1027, align 4 > %1029 = lshr i32 %7, 13 > %1030 = and i32 %1029, 255 > %1031 = and i32 %7, 8191 > %1032 = and i32 %10, 255 > %1033 = mul nuw nsw i32 %1031, %1032 > %1034 = add nuw nsw i32 %1033, %1030 > %1035 = add nuw nsw i32 %1034, 17 > %1036 = zext i32 %1035 to i64 > %1037 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1036 > %1038 = bitcast i32 addrspace(3)* %1037 to float addrspace(3)* > %1039 = load float, float addrspace(3)* %1038, align 4 > %1040 = fsub float %1039, %1028 > %1041 = and i32 %7, 8191 > %1042 = and i32 %10, 255 > %1043 = mul nuw nsw i32 %1041, %1042 > %1044 = lshr i32 %7, 12 > %1045 = and i32 %1044, 510 > %1046 = add nuw nsw i32 %1043, %1045 > %1047 = add nuw nsw i32 %1046, 18 > %1048 = zext i32 %1047 to i64 > %1049 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1048 > %1050 = bitcast i32 addrspace(3)* %1049 to float addrspace(3)* > %1051 = load float, float addrspace(3)* %1050, align 4 > %1052 = lshr i32 %7, 13 > %1053 = and i32 %1052, 255 > %1054 = and i32 %7, 8191 > %1055 = and i32 %10, 255 > %1056 = mul nuw nsw i32 %1054, %1055 > %1057 = add nuw nsw i32 %1056, %1053 > %1058 = add nuw nsw i32 %1057, 18 > %1059 = zext i32 %1058 to i64 > %1060 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1059 > %1061 = bitcast i32 addrspace(3)* %1060 to float addrspace(3)* > %1062 = load float, float addrspace(3)* %1061, align 4 > %1063 = fsub float %1062, %1051 > %1064 = fmul float %1017, %1017 > %1065 = fmul float %1040, %1040 > %1066 = fadd float %1065, %1064 > %1067 = fmul float %1063, %1063 > %1068 = fadd float %1066, %1067 > %1069 = fmul float %810, %810 > %1070 = fmul float %830, %830 > %1071 = fadd float %1070, %1069 > %1072 = fmul float %850, %850 > %1073 = fadd float %1071, %1072 > %1074 = call float @llvm.sqrt.f32(float %994) > %1075 = call float @llvm.sqrt.f32(float %1068) > %1076 = call float @llvm.sqrt.f32(float %1073) > %1077 = lshr i32 %7, 13 > %1078 = and i32 %1077, 255 > %1079 = and i32 %7, 8191 > %1080 = and i32 %10, 255 > %1081 = mul nuw nsw i32 %1079, %1080 > %1082 = add nuw nsw i32 %1081, %1078 > %1083 = add nuw nsw i32 %1082, 16 > %1084 = zext i32 %1083 to i64 > %1085 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1084 > %1086 = bitcast i32 addrspace(3)* %1085 to float addrspace(3)* > %1087 = load float, float addrspace(3)* %1086, align 4 > %1088 = and i32 %7, 8191 > %1089 = and i32 %10, 255 > %1090 = mul nuw nsw i32 %1088, %1089 > %1091 = add nuw nsw i32 %1090, 16 > %1092 = zext i32 %1091 to i64 > %1093 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1092 > %1094 = bitcast i32 addrspace(3)* %1093 to float addrspace(3)* > %1095 = load float, float addrspace(3)* %1094, align 4 > %1096 = fadd float %1087, %1095 > %1097 = lshr i32 %7, 13 > %1098 = and i32 %1097, 255 > %1099 = and i32 %7, 8191 > %1100 = and i32 %10, 255 > %1101 = mul nuw nsw i32 %1099, %1100 > %1102 = add nuw nsw i32 %1101, %1098 > %1103 = add nuw nsw i32 %1102, 17 > %1104 = zext i32 %1103 to i64 > %1105 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1104 > %1106 = bitcast i32 addrspace(3)* %1105 to float addrspace(3)* > %1107 = load float, float addrspace(3)* %1106, align 4 > %1108 = and i32 %7, 8191 > %1109 = and i32 %10, 255 > %1110 = mul nuw nsw i32 %1108, %1109 > %1111 = add nuw nsw i32 %1110, 17 > %1112 = zext i32 %1111 to i64 > %1113 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1112 > %1114 = bitcast i32 addrspace(3)* %1113 to float addrspace(3)* > %1115 = load float, float addrspace(3)* %1114, align 4 > %1116 = fadd float %1107, %1115 > %1117 = lshr i32 %7, 13 > %1118 = and i32 %1117, 255 > %1119 = and i32 %7, 8191 > %1120 = and i32 %10, 255 > %1121 = mul nuw nsw i32 %1119, %1120 > %1122 = add nuw nsw i32 %1121, %1118 > %1123 = add nuw nsw i32 %1122, 18 > %1124 = zext i32 %1123 to i64 > %1125 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1124 > %1126 = bitcast i32 addrspace(3)* %1125 to float addrspace(3)* > %1127 = load float, float addrspace(3)* %1126, align 4 > %1128 = and i32 %7, 8191 > %1129 = and i32 %10, 255 > %1130 = mul nuw nsw i32 %1128, %1129 > %1131 = add nuw nsw i32 %1130, 18 > %1132 = zext i32 %1131 to i64 > %1133 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1132 > %1134 = bitcast i32 addrspace(3)* %1133 to float addrspace(3)* > %1135 = load float, float addrspace(3)* %1134, align 4 > %1136 = fadd float %1127, %1135 > %1137 = fmul float %1096, 5.000000e-01 > %1138 = fmul float %1116, 5.000000e-01 > %1139 = fmul float %1136, 5.000000e-01 > %1140 = and i32 %7, 8191 > %1141 = and i32 %10, 255 > %1142 = mul nuw nsw i32 %1140, %1141 > %1143 = lshr i32 %7, 12 > %1144 = and i32 %1143, 510 > %1145 = add nuw nsw i32 %1142, %1144 > %1146 = add nuw nsw i32 %1145, 16 > %1147 = zext i32 %1146 to i64 > %1148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1147 > %1149 = bitcast i32 addrspace(3)* %1148 to float addrspace(3)* > %1150 = load float, float addrspace(3)* %1149, align 4 > %1151 = lshr i32 %7, 13 > %1152 = and i32 %1151, 255 > %1153 = and i32 %7, 8191 > %1154 = and i32 %10, 255 > %1155 = mul nuw nsw i32 %1153, %1154 > %1156 = add nuw nsw i32 %1155, %1152 > %1157 = add nuw nsw i32 %1156, 16 > %1158 = zext i32 %1157 to i64 > %1159 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1158 > %1160 = bitcast i32 addrspace(3)* %1159 to float addrspace(3)* > %1161 = load float, float addrspace(3)* %1160, align 4 > %1162 = fadd float %1150, %1161 > %1163 = and i32 %7, 8191 > %1164 = and i32 %10, 255 > %1165 = mul nuw nsw i32 %1163, %1164 > %1166 = lshr i32 %7, 12 > %1167 = and i32 %1166, 510 > %1168 = add nuw nsw i32 %1165, %1167 > %1169 = add nuw nsw i32 %1168, 17 > %1170 = zext i32 %1169 to i64 > %1171 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1170 > %1172 = bitcast i32 addrspace(3)* %1171 to float addrspace(3)* > %1173 = load float, float addrspace(3)* %1172, align 4 > %1174 = lshr i32 %7, 13 > %1175 = and i32 %1174, 255 > %1176 = and i32 %7, 8191 > %1177 = and i32 %10, 255 > %1178 = mul nuw nsw i32 %1176, %1177 > %1179 = add nuw nsw i32 %1178, %1175 > %1180 = add nuw nsw i32 %1179, 17 > %1181 = zext i32 %1180 to i64 > %1182 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1181 > %1183 = bitcast i32 addrspace(3)* %1182 to float addrspace(3)* > %1184 = load float, float addrspace(3)* %1183, align 4 > %1185 = fadd float %1173, %1184 > %1186 = and i32 %7, 8191 > %1187 = and i32 %10, 255 > %1188 = mul nuw nsw i32 %1186, %1187 > %1189 = lshr i32 %7, 12 > %1190 = and i32 %1189, 510 > %1191 = add nuw nsw i32 %1188, %1190 > %1192 = add nuw nsw i32 %1191, 18 > %1193 = zext i32 %1192 to i64 > %1194 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1193 > %1195 = bitcast i32 addrspace(3)* %1194 to float addrspace(3)* > %1196 = load float, float addrspace(3)* %1195, align 4 > %1197 = lshr i32 %7, 13 > %1198 = and i32 %1197, 255 > %1199 = and i32 %7, 8191 > %1200 = and i32 %10, 255 > %1201 = mul nuw nsw i32 %1199, %1200 > %1202 = add nuw nsw i32 %1201, %1198 > %1203 = add nuw nsw i32 %1202, 18 > %1204 = zext i32 %1203 to i64 > %1205 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1204 > %1206 = bitcast i32 addrspace(3)* %1205 to float addrspace(3)* > %1207 = load float, float addrspace(3)* %1206, align 4 > %1208 = fadd float %1196, %1207 > %1209 = fmul float %1162, 5.000000e-01 > %1210 = fmul float %1185, 5.000000e-01 > %1211 = fmul float %1208, 5.000000e-01 > %1212 = and i32 %7, 8191 > %1213 = and i32 %10, 255 > %1214 = mul nuw nsw i32 %1212, %1213 > %1215 = add nuw nsw i32 %1214, 16 > %1216 = zext i32 %1215 to i64 > %1217 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1216 > %1218 = bitcast i32 addrspace(3)* %1217 to float addrspace(3)* > %1219 = load float, float addrspace(3)* %1218, align 4 > %1220 = and i32 %7, 8191 > %1221 = and i32 %10, 255 > %1222 = mul nuw nsw i32 %1220, %1221 > %1223 = lshr i32 %7, 12 > %1224 = and i32 %1223, 510 > %1225 = add nuw nsw i32 %1222, %1224 > %1226 = add nuw nsw i32 %1225, 16 > %1227 = zext i32 %1226 to i64 > %1228 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1227 > %1229 = bitcast i32 addrspace(3)* %1228 to float addrspace(3)* > %1230 = load float, float addrspace(3)* %1229, align 4 > %1231 = fadd float %1219, %1230 > %1232 = and i32 %7, 8191 > %1233 = and i32 %10, 255 > %1234 = mul nuw nsw i32 %1232, %1233 > %1235 = add nuw nsw i32 %1234, 17 > %1236 = zext i32 %1235 to i64 > %1237 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1236 > %1238 = bitcast i32 addrspace(3)* %1237 to float addrspace(3)* > %1239 = load float, float addrspace(3)* %1238, align 4 > %1240 = and i32 %7, 8191 > %1241 = and i32 %10, 255 > %1242 = mul nuw nsw i32 %1240, %1241 > %1243 = lshr i32 %7, 12 > %1244 = and i32 %1243, 510 > %1245 = add nuw nsw i32 %1242, %1244 > %1246 = add nuw nsw i32 %1245, 17 > %1247 = zext i32 %1246 to i64 > %1248 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1247 > %1249 = bitcast i32 addrspace(3)* %1248 to float addrspace(3)* > %1250 = load float, float addrspace(3)* %1249, align 4 > %1251 = fadd float %1239, %1250 > %1252 = and i32 %7, 8191 > %1253 = and i32 %10, 255 > %1254 = mul nuw nsw i32 %1252, %1253 > %1255 = add nuw nsw i32 %1254, 18 > %1256 = zext i32 %1255 to i64 > %1257 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1256 > %1258 = bitcast i32 addrspace(3)* %1257 to float addrspace(3)* > %1259 = load float, float addrspace(3)* %1258, align 4 > %1260 = and i32 %7, 8191 > %1261 = and i32 %10, 255 > %1262 = mul nuw nsw i32 %1260, %1261 > %1263 = lshr i32 %7, 12 > %1264 = and i32 %1263, 510 > %1265 = add nuw nsw i32 %1262, %1264 > %1266 = add nuw nsw i32 %1265, 18 > %1267 = zext i32 %1266 to i64 > %1268 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1267 > %1269 = bitcast i32 addrspace(3)* %1268 to float addrspace(3)* > %1270 = load float, float addrspace(3)* %1269, align 4 > %1271 = fadd float %1259, %1270 > %1272 = fmul float %1231, 5.000000e-01 > %1273 = fmul float %1251, 5.000000e-01 > %1274 = fmul float %1271, 5.000000e-01 > %1275 = call float @llvm.fma.f32(float %54, float %1074, float %1137) > %1276 = call float @llvm.fma.f32(float %55, float %1074, float %1138) > %1277 = call float @llvm.fma.f32(float %56, float %1074, float %1139) > %1278 = call float @llvm.fma.f32(float %54, float %1075, float %1209) > %1279 = call float @llvm.fma.f32(float %55, float %1075, float %1210) > %1280 = call float @llvm.fma.f32(float %56, float %1075, float %1211) > %1281 = call float @llvm.fma.f32(float %54, float %1076, float %1272) > %1282 = call float @llvm.fma.f32(float %55, float %1076, float %1273) > %1283 = call float @llvm.fma.f32(float %56, float %1076, float %1274) > %1284 = fmul float %38, %1137 > %1285 = fmul float %39, %1138 > %1286 = fadd float %1284, %1285 > %1287 = fmul float %40, %1139 > %1288 = fadd float %1286, %1287 > %1289 = fadd float %1288, %41 > %1290 = fmul float %42, %1137 > %1291 = fmul float %43, %1138 > %1292 = fadd float %1290, %1291 > %1293 = fmul float %44, %1139 > %1294 = fadd float %1292, %1293 > %1295 = fadd float %1294, %45 > %1296 = fmul float %50, %1137 > %1297 = fmul float %51, %1138 > %1298 = fadd float %1296, %1297 > %1299 = fmul float %52, %1139 > %1300 = fadd float %1298, %1299 > %1301 = fadd float %1300, %53 > %1302 = fmul float %38, %1209 > %1303 = fmul float %39, %1210 > %1304 = fadd float %1302, %1303 > %1305 = fmul float %40, %1211 > %1306 = fadd float %1304, %1305 > %1307 = fadd float %1306, %41 > %1308 = fmul float %42, %1209 > %1309 = fmul float %43, %1210 > %1310 = fadd float %1308, %1309 > %1311 = fmul float %44, %1211 > %1312 = fadd float %1310, %1311 > %1313 = fadd float %1312, %45 > %1314 = fmul float %50, %1209 > %1315 = fmul float %51, %1210 > %1316 = fadd float %1314, %1315 > %1317 = fmul float %52, %1211 > %1318 = fadd float %1316, %1317 > %1319 = fadd float %1318, %53 > %1320 = fmul float %38, %1272 > %1321 = fmul float %39, %1273 > %1322 = fadd float %1320, %1321 > %1323 = fmul float %40, %1274 > %1324 = fadd float %1322, %1323 > %1325 = fadd float %1324, %41 > %1326 = fmul float %42, %1272 > %1327 = fmul float %43, %1273 > %1328 = fadd float %1326, %1327 > %1329 = fmul float %44, %1274 > %1330 = fadd float %1328, %1329 > %1331 = fadd float %1330, %45 > %1332 = fmul float %50, %1272 > %1333 = fmul float %51, %1273 > %1334 = fadd float %1332, %1333 > %1335 = fmul float %52, %1274 > %1336 = fadd float %1334, %1335 > %1337 = fadd float %1336, %53 > %1338 = fmul float %38, %1275 > %1339 = fmul float %39, %1276 > %1340 = fadd float %1338, %1339 > %1341 = fmul float %40, %1277 > %1342 = fadd float %1340, %1341 > %1343 = fadd float %1342, %41 > %1344 = fmul float %42, %1275 > %1345 = fmul float %43, %1276 > %1346 = fadd float %1344, %1345 > %1347 = fmul float %44, %1277 > %1348 = fadd float %1346, %1347 > %1349 = fadd float %1348, %45 > %1350 = fmul float %50, %1275 > %1351 = fmul float %51, %1276 > %1352 = fadd float %1350, %1351 > %1353 = fmul float %52, %1277 > %1354 = fadd float %1352, %1353 > %1355 = fadd float %1354, %53 > %1356 = fmul float %38, %1278 > %1357 = fmul float %39, %1279 > %1358 = fadd float %1356, %1357 > %1359 = fmul float %40, %1280 > %1360 = fadd float %1358, %1359 > %1361 = fadd float %1360, %41 > %1362 = fmul float %42, %1278 > %1363 = fmul float %43, %1279 > %1364 = fadd float %1362, %1363 > %1365 = fmul float %44, %1280 > %1366 = fadd float %1364, %1365 > %1367 = fadd float %1366, %45 > %1368 = fmul float %50, %1278 > %1369 = fmul float %51, %1279 > %1370 = fadd float %1368, %1369 > %1371 = fmul float %52, %1280 > %1372 = fadd float %1370, %1371 > %1373 = fadd float %1372, %53 > %1374 = fmul float %38, %1281 > %1375 = fmul float %39, %1282 > %1376 = fadd float %1374, %1375 > %1377 = fmul float %40, %1283 > %1378 = fadd float %1376, %1377 > %1379 = fadd float %1378, %41 > %1380 = fmul float %42, %1281 > %1381 = fmul float %43, %1282 > %1382 = fadd float %1380, %1381 > %1383 = fmul float %44, %1283 > %1384 = fadd float %1382, %1383 > %1385 = fadd float %1384, %45 > %1386 = fmul float %50, %1281 > %1387 = fmul float %51, %1282 > %1388 = fadd float %1386, %1387 > %1389 = fmul float %52, %1283 > %1390 = fadd float %1388, %1389 > %1391 = fadd float %1390, %53 > %1392 = fcmp oeq float %1319, 0.000000e+00 > %1393 = fcmp oeq float %1319, 0.000000e+00 > %1394 = fcmp ogt float %1307, 0.000000e+00 > %1395 = select i1 %1394, float 1.000000e+00, float %1307 > %1396 = fcmp oge float %1395, 0.000000e+00 > %1397 = fcmp ogt float %1313, 0.000000e+00 > %1398 = select i1 %1397, float 1.000000e+00, float %1313 > %1399 = fcmp oge float %1398, 0.000000e+00 > %.op = fmul float %1395, 0x4600000000000000 > %1400 = select i1 %1396, float %.op, float 0xC600000000000000 > %.op80 = fmul float %1398, 0x4600000000000000 > %1401 = select i1 %1399, float %.op80, float 0xC600000000000000 > %1402 = fdiv float 1.000000e+00, %1319 > %1403 = fmul float %1307, %1402 > %1404 = fmul float %1313, %1402 > %1405 = select i1 %1392, float %1400, float %1403 > %1406 = select i1 %1393, float %1401, float %1404 > %1407 = fcmp oeq float %1337, 0.000000e+00 > %1408 = fcmp oeq float %1337, 0.000000e+00 > %1409 = fcmp ogt float %1325, 0.000000e+00 > %1410 = select i1 %1409, float 1.000000e+00, float %1325 > %1411 = fcmp oge float %1410, 0.000000e+00 > %1412 = fcmp ogt float %1331, 0.000000e+00 > %1413 = select i1 %1412, float 1.000000e+00, float %1331 > %1414 = fcmp oge float %1413, 0.000000e+00 > %.op81 = fmul float %1410, 0x4600000000000000 > %1415 = select i1 %1411, float %.op81, float 0xC600000000000000 > %.op82 = fmul float %1413, 0x4600000000000000 > %1416 = select i1 %1414, float %.op82, float 0xC600000000000000 > %1417 = fdiv float 1.000000e+00, %1337 > %1418 = fmul float %1325, %1417 > %1419 = fmul float %1331, %1417 > %1420 = select i1 %1407, float %1415, float %1418 > %1421 = select i1 %1408, float %1416, float %1419 > %1422 = fcmp oeq float %1355, 0.000000e+00 > %1423 = fcmp oeq float %1355, 0.000000e+00 > %1424 = fcmp ogt float %1343, 0.000000e+00 > %1425 = select i1 %1424, float 1.000000e+00, float %1343 > %1426 = fcmp oge float %1425, 0.000000e+00 > %1427 = fcmp ogt float %1349, 0.000000e+00 > %1428 = select i1 %1427, float 1.000000e+00, float %1349 > %1429 = fcmp oge float %1428, 0.000000e+00 > %.op83 = fmul float %1425, 0x4600000000000000 > %1430 = select i1 %1426, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %1428, 0x4600000000000000 > %1431 = select i1 %1429, float %.op84, float 0xC600000000000000 > %1432 = fdiv float 1.000000e+00, %1355 > %1433 = fmul float %1343, %1432 > %1434 = fmul float %1349, %1432 > %1435 = select i1 %1422, float %1430, float %1433 > %1436 = select i1 %1423, float %1431, float %1434 > %1437 = fcmp oeq float %1301, 0.000000e+00 > %1438 = fcmp oeq float %1301, 0.000000e+00 > %1439 = fcmp ogt float %1289, 0.000000e+00 > %1440 = select i1 %1439, float 1.000000e+00, float %1289 > %1441 = fcmp oge float %1440, 0.000000e+00 > %1442 = fcmp ogt float %1295, 0.000000e+00 > %1443 = select i1 %1442, float 1.000000e+00, float %1295 > %1444 = fcmp oge float %1443, 0.000000e+00 > %.op85 = fmul float %1440, 0x4600000000000000 > %1445 = select i1 %1441, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %1443, 0x4600000000000000 > %1446 = select i1 %1444, float %.op86, float 0xC600000000000000 > %1447 = fdiv float 1.000000e+00, %1301 > %1448 = fmul float %1289, %1447 > %1449 = fmul float %1295, %1447 > %1450 = select i1 %1437, float %1445, float %1448 > %1451 = select i1 %1438, float %1446, float %1449 > %1452 = fsub float %1450, %1435 > %1453 = fsub float %1451, %1436 > %1454 = fcmp oeq float %1373, 0.000000e+00 > %1455 = fcmp oeq float %1373, 0.000000e+00 > %1456 = fcmp ogt float %1361, 0.000000e+00 > %1457 = select i1 %1456, float 1.000000e+00, float %1361 > %1458 = fcmp oge float %1457, 0.000000e+00 > %1459 = fcmp ogt float %1367, 0.000000e+00 > %1460 = select i1 %1459, float 1.000000e+00, float %1367 > %1461 = fcmp oge float %1460, 0.000000e+00 > %.op87 = fmul float %1457, 0x4600000000000000 > %1462 = select i1 %1458, float %.op87, float 0xC600000000000000 > %.op88 = fmul float %1460, 0x4600000000000000 > %1463 = select i1 %1461, float %.op88, float 0xC600000000000000 > %1464 = fdiv float 1.000000e+00, %1373 > %1465 = fmul float %1361, %1464 > %1466 = fmul float %1367, %1464 > %1467 = select i1 %1454, float %1462, float %1465 > %1468 = select i1 %1455, float %1463, float %1466 > %1469 = fsub float %1405, %1467 > %1470 = fsub float %1406, %1468 > %1471 = fmul float %1469, %57 > %1472 = fmul float %1470, %58 > %1473 = fcmp oeq float %1391, 0.000000e+00 > %1474 = fcmp oeq float %1391, 0.000000e+00 > %1475 = fcmp ogt float %1379, 0.000000e+00 > %1476 = select i1 %1475, float 1.000000e+00, float %1379 > %1477 = fcmp oge float %1476, 0.000000e+00 > %1478 = fcmp ogt float %1385, 0.000000e+00 > %1479 = select i1 %1478, float 1.000000e+00, float %1385 > %1480 = fcmp oge float %1479, 0.000000e+00 > %.op89 = fmul float %1476, 0x4600000000000000 > %1481 = select i1 %1477, float %.op89, float 0xC600000000000000 > %.op90 = fmul float %1479, 0x4600000000000000 > %1482 = select i1 %1480, float %.op90, float 0xC600000000000000 > %1483 = fdiv float 1.000000e+00, %1391 > %1484 = fmul float %1379, %1483 > %1485 = fmul float %1385, %1483 > %1486 = select i1 %1473, float %1481, float %1484 > %1487 = select i1 %1474, float %1482, float %1485 > %1488 = fsub float %1420, %1486 > %1489 = fsub float %1421, %1487 > %1490 = fmul float %1488, %57 > %1491 = fmul float %1452, %57 > %1492 = fmul float %1453, %58 > %1493 = fmul float %1489, %58 > %1494 = fmul float %1491, %1491 > %1495 = fmul float %1492, %1492 > %1496 = fadd float %1494, %1495 > %1497 = fmul float %1471, %1471 > %1498 = fmul float %1472, %1472 > %1499 = fadd float %1497, %1498 > %1500 = fmul float %1490, %1490 > %1501 = fmul float %1493, %1493 > %1502 = fadd float %1500, %1501 > %1503 = call float @llvm.sqrt.f32(float %1502) > %1504 = call float @llvm.sqrt.f32(float %1496) > %1505 = call float @llvm.sqrt.f32(float %1499) > %1506 = fsub float %1301, %30 > %1507 = fsub float %1319, %30 > %1508 = fsub float %1337, %30 > %1509 = fcmp une float %31, 0.000000e+00 > br i1 %1509, label %IF69, label %ELSE70 > >ENDIF: ; preds = %main_body, %ENDIF77 > %temp15.0 = phi float [ %31, %ENDIF77 ], [ %53, %main_body ] > %temp16.0 = phi float [ %1723, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp17.0 = phi float [ %1724, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp18.0 = phi float [ %1713, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp19.0 = phi float [ %1726, %ENDIF77 ], [ 0.000000e+00, %main_body ] > %temp14.0 = phi float [ %30, %ENDIF77 ], [ %52, %main_body ] > %temp13.0 = phi float [ %1706, %ENDIF77 ], [ %51, %main_body ] > %temp11.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %24, %main_body ] > %temp10.0 = phi float [ %1505, %ENDIF77 ], [ %23, %main_body ] > %temp9.0 = phi float [ %1698, %ENDIF77 ], [ %22, %main_body ] > %temp7.0 = phi float [ 1.000000e+00, %ENDIF77 ], [ %919, %main_body ] > %temp6.0 = phi float [ %1139, %ENDIF77 ], [ %850, %main_body ] > %temp5.0 = phi float [ %1693, %ENDIF77 ], [ %830, %main_body ] > %1510 = call float @llvm.minnum.f32(float %temp16.0, float 6.300000e+01) > %1511 = call float @llvm.minnum.f32(float %temp17.0, float 6.300000e+01) > %1512 = call float @llvm.minnum.f32(float %temp18.0, float 6.300000e+01) > %1513 = call float @llvm.minnum.f32(float %temp19.0, float 6.300000e+01) > %1514 = lshr i32 %5, 16 > %1515 = shl nuw nsw i32 %1514, 2 > %1516 = and i32 %6, 8191 > %1517 = and i32 %10, 255 > %1518 = mul nuw nsw i32 %1516, %1517 > %1519 = add nuw nsw i32 %1515, %1518 > %1520 = add nuw nsw i32 %1519, 8 > %1521 = zext i32 %1520 to i64 > %1522 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1521 > %1523 = bitcast i32 addrspace(3)* %1522 to float addrspace(3)* > store float %1510, float addrspace(3)* %1523, align 4 > %1524 = add nuw nsw i32 %1519, 9 > %1525 = zext i32 %1524 to i64 > %1526 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1525 > %1527 = bitcast i32 addrspace(3)* %1526 to float addrspace(3)* > store float %temp5.0, float addrspace(3)* %1527, align 4 > %1528 = add nuw nsw i32 %1519, 10 > %1529 = zext i32 %1528 to i64 > %1530 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1529 > %1531 = bitcast i32 addrspace(3)* %1530 to float addrspace(3)* > store float %temp6.0, float addrspace(3)* %1531, align 4 > %1532 = add nuw nsw i32 %1519, 11 > %1533 = zext i32 %1532 to i64 > %1534 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1533 > %1535 = bitcast i32 addrspace(3)* %1534 to float addrspace(3)* > store float %temp7.0, float addrspace(3)* %1535, align 4 > %1536 = lshr i32 %5, 16 > %1537 = shl nuw nsw i32 %1536, 2 > %1538 = and i32 %6, 8191 > %1539 = and i32 %10, 255 > %1540 = mul nuw nsw i32 %1538, %1539 > %1541 = add nuw nsw i32 %1537, %1540 > %1542 = add nuw nsw i32 %1541, 12 > %1543 = zext i32 %1542 to i64 > %1544 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1543 > %1545 = bitcast i32 addrspace(3)* %1544 to float addrspace(3)* > store float %1511, float addrspace(3)* %1545, align 4 > %1546 = add nuw nsw i32 %1541, 13 > %1547 = zext i32 %1546 to i64 > %1548 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1547 > %1549 = bitcast i32 addrspace(3)* %1548 to float addrspace(3)* > store float %temp9.0, float addrspace(3)* %1549, align 4 > %1550 = add nuw nsw i32 %1541, 14 > %1551 = zext i32 %1550 to i64 > %1552 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1551 > %1553 = bitcast i32 addrspace(3)* %1552 to float addrspace(3)* > store float %temp10.0, float addrspace(3)* %1553, align 4 > %1554 = add nuw nsw i32 %1541, 15 > %1555 = zext i32 %1554 to i64 > %1556 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1555 > %1557 = bitcast i32 addrspace(3)* %1556 to float addrspace(3)* > store float %temp11.0, float addrspace(3)* %1557, align 4 > %1558 = lshr i32 %5, 16 > %1559 = shl nuw nsw i32 %1558, 2 > %1560 = and i32 %6, 8191 > %1561 = and i32 %10, 255 > %1562 = mul nuw nsw i32 %1560, %1561 > %1563 = add nuw nsw i32 %1559, %1562 > %1564 = add nuw nsw i32 %1563, 16 > %1565 = zext i32 %1564 to i64 > %1566 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1565 > %1567 = bitcast i32 addrspace(3)* %1566 to float addrspace(3)* > store float %1512, float addrspace(3)* %1567, align 4 > %1568 = add nuw nsw i32 %1563, 17 > %1569 = zext i32 %1568 to i64 > %1570 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1569 > %1571 = bitcast i32 addrspace(3)* %1570 to float addrspace(3)* > store float %temp13.0, float addrspace(3)* %1571, align 4 > %1572 = add nuw nsw i32 %1563, 18 > %1573 = zext i32 %1572 to i64 > %1574 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1573 > %1575 = bitcast i32 addrspace(3)* %1574 to float addrspace(3)* > store float %temp14.0, float addrspace(3)* %1575, align 4 > %1576 = add nuw nsw i32 %1563, 19 > %1577 = zext i32 %1576 to i64 > %1578 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1577 > %1579 = bitcast i32 addrspace(3)* %1578 to float addrspace(3)* > store float %temp15.0, float addrspace(3)* %1579, align 4 > %1580 = lshr i32 %5, 16 > %1581 = shl nuw nsw i32 %1580, 2 > %1582 = and i32 %6, 8191 > %1583 = and i32 %10, 255 > %1584 = mul nuw nsw i32 %1582, %1583 > %1585 = add nuw nsw i32 %1581, %1584 > %1586 = add nuw nsw i32 %1585, 20 > %1587 = zext i32 %1586 to i64 > %1588 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1587 > %1589 = bitcast i32 addrspace(3)* %1588 to float addrspace(3)* > store float %1513, float addrspace(3)* %1589, align 4 > %1590 = add nuw nsw i32 %1585, 21 > %1591 = zext i32 %1590 to i64 > %1592 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1591 > %1593 = bitcast i32 addrspace(3)* %1592 to float addrspace(3)* > store float %1511, float addrspace(3)* %1593, align 4 > %1594 = add nuw nsw i32 %1585, 22 > %1595 = zext i32 %1594 to i64 > %1596 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1595 > %1597 = bitcast i32 addrspace(3)* %1596 to float addrspace(3)* > store float %1512, float addrspace(3)* %1597, align 4 > %1598 = add nuw nsw i32 %1585, 23 > %1599 = zext i32 %1598 to i64 > %1600 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1599 > %1601 = bitcast i32 addrspace(3)* %1600 to float addrspace(3)* > store float %1513, float addrspace(3)* %1601, align 4 > %1602 = lshr i32 %5, 16 > %1603 = shl nuw nsw i32 %1602, 2 > %1604 = and i32 %6, 8191 > %1605 = and i32 %10, 255 > %1606 = mul nuw nsw i32 %1604, %1605 > %1607 = add nuw nsw i32 %1603, %1606 > %1608 = zext i32 %1607 to i64 > %1609 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1608 > %1610 = bitcast i32 addrspace(3)* %1609 to float addrspace(3)* > store float %1510, float addrspace(3)* %1610, align 4 > %1611 = lshr i32 %5, 16 > %1612 = shl nuw nsw i32 %1611, 2 > %1613 = and i32 %6, 8191 > %1614 = and i32 %10, 255 > %1615 = mul nuw nsw i32 %1613, %1614 > %1616 = add nuw nsw i32 %1612, %1615 > %1617 = add nuw nsw i32 %1616, 1 > %1618 = zext i32 %1617 to i64 > %1619 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1618 > %1620 = bitcast i32 addrspace(3)* %1619 to float addrspace(3)* > store float %1511, float addrspace(3)* %1620, align 4 > %1621 = lshr i32 %5, 16 > %1622 = shl nuw nsw i32 %1621, 2 > %1623 = and i32 %6, 8191 > %1624 = and i32 %10, 255 > %1625 = mul nuw nsw i32 %1623, %1624 > %1626 = add nuw nsw i32 %1622, %1625 > %1627 = add nuw nsw i32 %1626, 2 > %1628 = zext i32 %1627 to i64 > %1629 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1628 > %1630 = bitcast i32 addrspace(3)* %1629 to float addrspace(3)* > store float %1512, float addrspace(3)* %1630, align 4 > %1631 = lshr i32 %5, 16 > %1632 = shl nuw nsw i32 %1631, 2 > %1633 = and i32 %6, 8191 > %1634 = and i32 %10, 255 > %1635 = mul nuw nsw i32 %1633, %1634 > %1636 = add nuw nsw i32 %1632, %1635 > %1637 = add nuw nsw i32 %1636, 4 > %1638 = zext i32 %1637 to i64 > %1639 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %1638 > %1640 = bitcast i32 addrspace(3)* %1639 to float addrspace(3)* > store float %1513, float addrspace(3)* %1640, align 4 > %1641 = and i32 %10, 255 > %1642 = lshr i32 %10, 8 > %1643 = and i32 %1642, 31 > %1644 = lshr i32 %5, 16 > %1645 = shl nuw nsw i32 %1644, 2 > %1646 = and i32 %6, 8191 > %1647 = and i32 %10, 255 > %1648 = mul nuw nsw i32 %1646, %1647 > %1649 = add nuw nsw i32 %1645, %1648 > %1650 = ptrtoint [16 x <16 x i8>] addrspace(2)* %0 to i64 > %1651 = bitcast i64 %1650 to <2 x i32> > %1652 = extractelement <2 x i32> %1651, i32 0 > %1653 = extractelement <2 x i32> %1651, i32 1 > %1654 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> undef, i32 %1652, 0 > %1655 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1654, i32 %1653, 1 > %1656 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1655, i32 %8, 13 > %1657 = bitcast i32 %1641 to float > %1658 = bitcast i32 %1643 to float > %1659 = bitcast i32 %1649 to float > %1660 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1656, float %1657, 14 > %1661 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1660, float %1658, 15 > %1662 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1661, float %1659, 16 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float }> %1662 > >IF69: ; preds = %IF > %1663 = fdiv float 1.000000e+00, %31 > %1664 = fmul float %1506, %1663 > br label %ENDIF68 > >ELSE70: ; preds = %IF > %1665 = fcmp ogt float %1506, 0.000000e+00 > %1666 = select i1 %1665, float 1.000000e+00, float %1506 > %1667 = fcmp oge float %1666, 0.000000e+00 > %.op91 = fmul float %1666, 0x4600000000000000 > %1668 = select i1 %1667, float %.op91, float 0xC600000000000000 > br label %ENDIF68 > >ENDIF68: ; preds = %ELSE70, %IF69 > %temp12.0 = phi float [ %1664, %IF69 ], [ %1668, %ELSE70 ] > %1669 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %1670 = fsub float 1.000000e+00, %1669 > %1671 = fmul float %1670, %1504 > %1672 = fcmp une float %31, 0.000000e+00 > br i1 %1672, label %IF72, label %ELSE73 > >IF72: ; preds = %ENDIF68 > %1673 = fdiv float 1.000000e+00, %31 > %1674 = fmul float %1507, %1673 > br label %ENDIF71 > >ELSE73: ; preds = %ENDIF68 > %1675 = fcmp ogt float %1507, 0.000000e+00 > %1676 = select i1 %1675, float 1.000000e+00, float %1507 > %1677 = fcmp oge float %1676, 0.000000e+00 > %.op92 = fmul float %1676, 0x4600000000000000 > %1678 = select i1 %1677, float %.op92, float 0xC600000000000000 > br label %ENDIF71 > >ENDIF71: ; preds = %ELSE73, %IF72 > %temp12.1 = phi float [ %1674, %IF72 ], [ %1678, %ELSE73 ] > %1679 = call float @llvm.AMDGPU.clamp.(float %temp12.1, float 0.000000e+00, float 1.000000e+00) > %1680 = fsub float 1.000000e+00, %1679 > %1681 = fmul float %1680, %1505 > %1682 = fcmp une float %31, 0.000000e+00 > br i1 %1682, label %IF75, label %ELSE76 > >IF75: ; preds = %ENDIF71 > %1683 = fdiv float 1.000000e+00, %31 > %1684 = fmul float %1508, %1683 > br label %ENDIF74 > >ELSE76: ; preds = %ENDIF71 > %1685 = fcmp ogt float %1508, 0.000000e+00 > %1686 = select i1 %1685, float 1.000000e+00, float %1508 > %1687 = fcmp oge float %1686, 0.000000e+00 > %.op93 = fmul float %1686, 0x4600000000000000 > %1688 = select i1 %1687, float %.op93, float 0xC600000000000000 > br label %ENDIF74 > >ENDIF74: ; preds = %ELSE76, %IF75 > %temp12.2 = phi float [ %1684, %IF75 ], [ %1688, %ELSE76 ] > %1689 = call float @llvm.AMDGPU.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) > %1690 = fsub float 1.000000e+00, %1689 > %1691 = fmul float %1690, %1503 > %1692 = fmul float %28, %34 > %1693 = fmul float %29, %35 > %1694 = call float @llvm.maxnum.f32(float %1693, float 1.000000e+00) > %1695 = fcmp oeq float %1692, 0.000000e+00 > %1696 = fcmp oeq float %1692, 0.000000e+00 > %1697 = sext i1 %1696 to i32 > %1698 = bitcast i32 %1697 to float > %1699 = fcmp ogt float %1691, 0.000000e+00 > %1700 = select i1 %1699, float 1.000000e+00, float %1691 > %1701 = fcmp oge float %1700, 0.000000e+00 > %1702 = fcmp ogt float %1671, 0.000000e+00 > %1703 = select i1 %1702, float 1.000000e+00, float %1671 > %1704 = fcmp oge float %1703, 0.000000e+00 > %.op94 = fmul float %1700, 0x4600000000000000 > %1705 = select i1 %1701, float %.op94, float 0xC600000000000000 > %.op95 = fmul float %1703, 0x4600000000000000 > %1706 = select i1 %1704, float %.op95, float 0xC600000000000000 > %1707 = fdiv float 1.000000e+00, %1692 > %1708 = fmul float %1691, %1707 > %1709 = fmul float %1671, %1707 > %1710 = select i1 %1695, float %1705, float %1708 > %1711 = select i1 %1696, float %1706, float %1709 > %1712 = call float @llvm.maxnum.f32(float %1711, float 1.000000e+00) > %1713 = call float @llvm.minnum.f32(float %1694, float %1712) > %1714 = fcmp une float %1692, 0.000000e+00 > br i1 %1714, label %IF78, label %ELSE79 > >IF78: ; preds = %ENDIF74 > %1715 = fdiv float 1.000000e+00, %1692 > %1716 = fmul float %1681, %1715 > br label %ENDIF77 > >ELSE79: ; preds = %ENDIF74 > %1717 = fcmp ogt float %1681, 0.000000e+00 > %1718 = select i1 %1717, float 1.000000e+00, float %1681 > %1719 = fcmp oge float %1718, 0.000000e+00 > %.op96 = fmul float %1718, 0x4600000000000000 > %1720 = select i1 %1719, float %.op96, float 0xC600000000000000 > br label %ENDIF77 > >ENDIF77: ; preds = %ELSE79, %IF78 > %temp4.0 = phi float [ %1716, %IF78 ], [ %1720, %ELSE79 ] > %1721 = call float @llvm.maxnum.f32(float %1710, float 1.000000e+00) > %1722 = call float @llvm.maxnum.f32(float %temp4.0, float 1.000000e+00) > %1723 = call float @llvm.minnum.f32(float %1694, float %1722) > %1724 = call float @llvm.minnum.f32(float %1694, float %1721) > %1725 = call float @llvm.maxnum.f32(float %1713, float %1724) > %1726 = call float @llvm.maxnum.f32(float %1725, float %1723) > br label %ENDIF >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > as_es = 0 > export_prim_id = 0 >TESS_EVAL >PROPERTY TES_PRIM_MODE 4 >PROPERTY TES_SPACING 0 >PROPERTY TES_VERTEX_ORDER_CW 0 >PROPERTY TES_POINT_MODE 0 >PROPERTY NEXT_SHADER 1 >DCL IN[][0..5], ARRAY(1), GENERIC[0] >DCL SV[0], TESSCOORD >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..23] >DCL CONST[2][0..3] >DCL TEMP[0..7], LOCAL >IMM[0] UINT32 {0, 176, 112, 128} >IMM[1] FLT32 { 1.0000, 0.0000, 158456325028528675187087900672.0000, 0.0000} >IMM[2] UINT32 {144, 368, 352, 1} >IMM[3] UINT32 {16, 32, 48, 0} > 0: MUL TEMP[0].xyz, SV[0].yyyy, IN[1][0].xyzz > 1: FMA TEMP[0].xyz, SV[0].xxxx, IN[0][0].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, SV[0].zzzz, IN[2][0].xyzz, TEMP[0].xyzz > 3: MOV TEMP[2].xyz, CONST[1][11] > 4: MOV TEMP[2].xyz, TEMP[2].xyzx > 5: MOV TEMP[2].w, IMM[1].xxxx > 6: MOV TEMP[3], CONST[1][7] > 7: DP4 TEMP[3].x, TEMP[3], TEMP[2] > 8: MOV TEMP[4], CONST[1][8] > 9: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 10: MOV TEMP[3].y, TEMP[4].xxxx > 11: MOV TEMP[4], CONST[1][9] > 12: DP4 TEMP[4].x, TEMP[4], TEMP[2] > 13: MOV TEMP[3].z, TEMP[4].xxxx > 14: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[3].xyzz > 15: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz > 16: MOV TEMP[0].w, TEMP[4].xxxx > 17: SQRT TEMP[4].x, TEMP[0].wwww > 18: MOV TEMP[5].w, CONST[1][23] > 19: FSNE TEMP[5].x, TEMP[5].wwww, IMM[1].yyyy > 20: UIF TEMP[5].xxxx :0 > 21: MOV TEMP[5].w, CONST[1][23] > 22: RCP TEMP[5].x, TEMP[5].wwww > 23: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx > 24: ELSE :0 > 25: SSG TEMP[6].x, TEMP[4].xxxx > 26: MUL TEMP[5].x, IMM[1].zzzz, TEMP[6].xxxx > 27: ENDIF > 28: MOV TEMP[6].z, CONST[1][22] > 29: ADD TEMP[4].x, TEMP[4].xxxx, -TEMP[6].zzzz > 30: MOV TEMP[0].w, TEMP[4].xxxx > 31: MOV TEMP[4].w, CONST[1][22] > 32: FSNE TEMP[4].x, TEMP[4].wwww, IMM[1].yyyy > 33: UIF TEMP[4].xxxx :0 > 34: MOV TEMP[4].w, CONST[1][22] > 35: RCP TEMP[4].x, TEMP[4].wwww > 36: MUL TEMP[4].x, TEMP[0].wwww, TEMP[4].xxxx > 37: ELSE :0 > 38: SSG TEMP[6].x, TEMP[0].wwww > 39: MUL TEMP[4].x, IMM[1].zzzz, TEMP[6].xxxx > 40: ENDIF > 41: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 42: ADD TEMP[4].x, -TEMP[4].xxxx, IMM[1].xxxx > 43: MOV TEMP[0].w, TEMP[4].xxxx > 44: MOV TEMP[4].z, CONST[1][23] > 45: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[4].zzzz > 46: MUL TEMP[4].xy, SV[0].yyyy, IN[1][1].xyyy > 47: MOV TEMP[2].yz, TEMP[4].yxyy > 48: FMA TEMP[4].xy, SV[0].xxxx, IN[0][1].xyyy, TEMP[2].yzzz > 49: MOV TEMP[2].yz, TEMP[4].yxyy > 50: FMA TEMP[4].xy, SV[0].zzzz, IN[2][1].xyyy, TEMP[2].yzzz > 51: MOV TEMP[2].yz, TEMP[4].yxyy > 52: MOV TEMP[4].xy, TEMP[2].yzzz > 53: MOV TEMP[4].w, TEMP[2].xxxx > 54: TXL TEMP[4], TEMP[4], SAMP[0], 2D > 55: MOV TEMP[4].x, TEMP[4] > 56: MOV TEMP[5].xy, TEMP[2].yzyy > 57: MOV TEMP[6].y, CONST[1][23] > 58: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[6].yyyy > 59: MOV TEMP[4].x, CONST[1][23] > 60: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 61: MUL TEMP[4].x, TEMP[0].wwww, TEMP[2].xxxx > 62: MOV TEMP[0].w, TEMP[4].xxxx > 63: MUL TEMP[4].x, SV[0].xxxx, IN[0][4].zzzz > 64: MOV TEMP[2].w, TEMP[4].xxxx > 65: MUL TEMP[2].xyz, SV[0].xxxx, IN[0][3].xyzz > 66: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][3].xyzz > 67: MUL TEMP[4].xyz, SV[0].yyyy, IN[1][4].xyzz > 68: MOV TEMP[3].w, TEMP[4].zzzz > 69: FMA TEMP[4].xy, SV[0].xxxx, IN[0][4].xyyy, TEMP[4].xyyy > 70: FMA TEMP[4].xy, SV[0].zzzz, IN[2][4].xyyy, TEMP[4].xyyy > 71: ADD TEMP[2], TEMP[2], TEMP[3] > 72: MUL TEMP[6].x, SV[0].zzzz, IN[2][4].zzzz > 73: MOV TEMP[3].w, TEMP[6].xxxx > 74: MUL TEMP[3].xyz, SV[0].zzzz, IN[2][3].xyzz > 75: ADD TEMP[2], TEMP[2].zwxy, TEMP[3].zwxy > 76: MOV TEMP[6].xyz, TEMP[2].zwxz > 77: MUL TEMP[3].xyz, SV[0].yyyy, IN[1][5].xyzz > 78: FMA TEMP[3].xyz, SV[0].xxxx, IN[0][5].xyzz, TEMP[3].xyzz > 79: FMA TEMP[3].xyz, SV[0].zzzz, IN[2][5].xyzz, TEMP[3].xyzz > 80: MOV TEMP[2].z, TEMP[3].zzzz > 81: MOV TEMP[3].xyz, TEMP[3].xyzx > 82: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz > 83: MOV TEMP[2].w, TEMP[7].xxxx > 84: RSQ TEMP[7].x, TEMP[2].wwww > 85: MUL TEMP[7].xyz, TEMP[7].xxxx, TEMP[2].xyzz > 86: MOV TEMP[2].xzw, TEMP[7].xxyz > 87: MOV TEMP[4].z, TEMP[2].yyyy > 88: FMA TEMP[0].xyz, TEMP[2].xzww, TEMP[0].wwww, TEMP[1].xyzz > 89: MOV TEMP[0].w, IMM[1].xxxx > 90: MOV TEMP[1], CONST[2][0] > 91: DP4 TEMP[1].x, TEMP[1], TEMP[0] > 92: MOV TEMP[2], CONST[2][1] > 93: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 94: MOV TEMP[1].y, TEMP[2].xxxx > 95: MOV TEMP[2], CONST[2][2] > 96: DP4 TEMP[2].x, TEMP[2], TEMP[0] > 97: MOV TEMP[1].z, TEMP[2].xxxx > 98: MOV TEMP[2], CONST[2][3] > 99: DP4 TEMP[2].x, TEMP[2], TEMP[0] >100: MOV TEMP[1].w, TEMP[2].xxxx >101: MUL TEMP[0], SV[0].yyyy, IN[1][2] >102: FMA TEMP[0], SV[0].xxxx, IN[0][2], TEMP[0] >103: FMA TEMP[0], SV[0].zzzz, IN[2][2], TEMP[0] >104: MOV OUT[5], TEMP[0] >105: MOV OUT[4], TEMP[3] >106: MOV OUT[2], TEMP[6] >107: MOV OUT[3], TEMP[4] >108: MOV OUT[1], TEMP[5] >109: MOV OUT[0], TEMP[1] >110: END >radeonsi: Compiling shader 334 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >@tess_lds = external addrspace(3) global [8320 x i32] > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, float, float, i32, i32) { >main_body: > %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 > %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) > %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) > %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) > %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) > %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) > %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) > %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) > %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) > %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) > %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) > %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) > %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) > %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) > %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) > %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) > %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) > %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 > %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 0) > %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 4) > %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 8) > %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 12) > %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 16) > %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 20) > %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 24) > %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 28) > %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 32) > %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 36) > %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 40) > %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 44) > %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 48) > %49 = call float @llvm.SI.load.const(<16 x i8> %35, i32 52) > %50 = call float @llvm.SI.load.const(<16 x i8> %35, i32 56) > %51 = call float @llvm.SI.load.const(<16 x i8> %35, i32 60) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = fadd float %7, %8 > %62 = fsub float 1.000000e+00, %61 > %63 = lshr i32 %6, 13 > %64 = and i32 %63, 255 > %65 = shl i32 %5, 2 > %66 = and i32 %65, 262140 > %67 = and i32 %6, 8191 > %68 = mul i32 %67, %9 > %69 = add i32 %66, %68 > %70 = add i32 %69, %64 > %71 = add i32 %70, 16 > %72 = sext i32 %71 to i64 > %73 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %72 > %74 = bitcast i32 addrspace(3)* %73 to float addrspace(3)* > %75 = load float, float addrspace(3)* %74, align 4 > %76 = fmul float %75, %8 > %77 = lshr i32 %6, 13 > %78 = and i32 %77, 255 > %79 = shl i32 %5, 2 > %80 = and i32 %79, 262140 > %81 = and i32 %6, 8191 > %82 = mul i32 %81, %9 > %83 = add i32 %80, %82 > %84 = add i32 %83, %78 > %85 = add i32 %84, 17 > %86 = sext i32 %85 to i64 > %87 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %86 > %88 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* > %89 = load float, float addrspace(3)* %88, align 4 > %90 = fmul float %89, %8 > %91 = lshr i32 %6, 13 > %92 = and i32 %91, 255 > %93 = shl i32 %5, 2 > %94 = and i32 %93, 262140 > %95 = and i32 %6, 8191 > %96 = mul i32 %95, %9 > %97 = add i32 %94, %96 > %98 = add i32 %97, %92 > %99 = add i32 %98, 18 > %100 = sext i32 %99 to i64 > %101 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %100 > %102 = bitcast i32 addrspace(3)* %101 to float addrspace(3)* > %103 = load float, float addrspace(3)* %102, align 4 > %104 = fmul float %103, %8 > %105 = shl i32 %5, 2 > %106 = and i32 %105, 262140 > %107 = and i32 %6, 8191 > %108 = mul i32 %107, %9 > %109 = add i32 %106, %108 > %110 = add i32 %109, 16 > %111 = sext i32 %110 to i64 > %112 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %111 > %113 = bitcast i32 addrspace(3)* %112 to float addrspace(3)* > %114 = load float, float addrspace(3)* %113, align 4 > %115 = call float @llvm.fma.f32(float %7, float %114, float %76) > %116 = shl i32 %5, 2 > %117 = and i32 %116, 262140 > %118 = and i32 %6, 8191 > %119 = mul i32 %118, %9 > %120 = add i32 %117, %119 > %121 = add i32 %120, 17 > %122 = sext i32 %121 to i64 > %123 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %122 > %124 = bitcast i32 addrspace(3)* %123 to float addrspace(3)* > %125 = load float, float addrspace(3)* %124, align 4 > %126 = call float @llvm.fma.f32(float %7, float %125, float %90) > %127 = shl i32 %5, 2 > %128 = and i32 %127, 262140 > %129 = and i32 %6, 8191 > %130 = mul i32 %129, %9 > %131 = add i32 %128, %130 > %132 = add i32 %131, 18 > %133 = sext i32 %132 to i64 > %134 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %133 > %135 = bitcast i32 addrspace(3)* %134 to float addrspace(3)* > %136 = load float, float addrspace(3)* %135, align 4 > %137 = call float @llvm.fma.f32(float %7, float %136, float %104) > %138 = shl i32 %5, 2 > %139 = and i32 %138, 262140 > %140 = and i32 %6, 8191 > %141 = mul i32 %140, %9 > %142 = add i32 %139, %141 > %143 = lshr i32 %6, 12 > %144 = and i32 %143, 510 > %145 = add i32 %142, %144 > %146 = add i32 %145, 16 > %147 = sext i32 %146 to i64 > %148 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %147 > %149 = bitcast i32 addrspace(3)* %148 to float addrspace(3)* > %150 = load float, float addrspace(3)* %149, align 4 > %151 = call float @llvm.fma.f32(float %62, float %150, float %115) > %152 = shl i32 %5, 2 > %153 = and i32 %152, 262140 > %154 = and i32 %6, 8191 > %155 = mul i32 %154, %9 > %156 = add i32 %153, %155 > %157 = lshr i32 %6, 12 > %158 = and i32 %157, 510 > %159 = add i32 %156, %158 > %160 = add i32 %159, 17 > %161 = sext i32 %160 to i64 > %162 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %161 > %163 = bitcast i32 addrspace(3)* %162 to float addrspace(3)* > %164 = load float, float addrspace(3)* %163, align 4 > %165 = call float @llvm.fma.f32(float %62, float %164, float %126) > %166 = shl i32 %5, 2 > %167 = and i32 %166, 262140 > %168 = and i32 %6, 8191 > %169 = mul i32 %168, %9 > %170 = add i32 %167, %169 > %171 = lshr i32 %6, 12 > %172 = and i32 %171, 510 > %173 = add i32 %170, %172 > %174 = add i32 %173, 18 > %175 = sext i32 %174 to i64 > %176 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %175 > %177 = bitcast i32 addrspace(3)* %176 to float addrspace(3)* > %178 = load float, float addrspace(3)* %177, align 4 > %179 = call float @llvm.fma.f32(float %62, float %178, float %137) > %180 = fmul float %13, %25 > %181 = fmul float %14, %26 > %182 = fadd float %180, %181 > %183 = fmul float %15, %27 > %184 = fadd float %182, %183 > %185 = fadd float %184, %16 > %186 = fmul float %17, %25 > %187 = fmul float %18, %26 > %188 = fadd float %186, %187 > %189 = fmul float %19, %27 > %190 = fadd float %188, %189 > %191 = fadd float %190, %20 > %192 = fmul float %21, %25 > %193 = fmul float %22, %26 > %194 = fadd float %192, %193 > %195 = fmul float %23, %27 > %196 = fadd float %194, %195 > %197 = fadd float %196, %24 > %198 = fsub float %151, %185 > %199 = fsub float %165, %191 > %200 = fsub float %179, %197 > %201 = fmul float %198, %198 > %202 = fmul float %199, %199 > %203 = fadd float %202, %201 > %204 = fmul float %200, %200 > %205 = fadd float %203, %204 > %206 = call float @llvm.sqrt.f32(float %205) > %207 = fcmp une float %33, 0.000000e+00 > br i1 %207, label %IF, label %ELSE > >IF: ; preds = %main_body > %208 = fdiv float 1.000000e+00, %33 > %209 = fmul float %206, %208 > br label %ENDIF > >ELSE: ; preds = %main_body > %210 = fcmp ogt float %206, 0.000000e+00 > %211 = select i1 %210, float 1.000000e+00, float %206 > %212 = fcmp oge float %211, 0.000000e+00 > %.op = fmul float %211, 0x4600000000000000 > %213 = select i1 %212, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp20.0 = phi float [ %209, %IF ], [ %213, %ELSE ] > %214 = fsub float %206, %28 > %215 = fcmp une float %29, 0.000000e+00 > br i1 %215, label %IF33, label %ELSE34 > >IF33: ; preds = %ENDIF > %216 = fdiv float 1.000000e+00, %29 > %217 = fmul float %214, %216 > br label %ENDIF32 > >ELSE34: ; preds = %ENDIF > %218 = fcmp ogt float %214, 0.000000e+00 > %219 = select i1 %218, float 1.000000e+00, float %214 > %220 = fcmp oge float %219, 0.000000e+00 > %.op35 = fmul float %219, 0x4600000000000000 > %221 = select i1 %220, float %.op35, float 0xC600000000000000 > br label %ENDIF32 > >ENDIF32: ; preds = %ELSE34, %IF33 > %temp16.0 = phi float [ %217, %IF33 ], [ %221, %ELSE34 ] > %222 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %223 = fsub float 1.000000e+00, %222 > %224 = fadd float %temp20.0, %32 > %225 = lshr i32 %6, 13 > %226 = and i32 %225, 255 > %227 = shl i32 %5, 2 > %228 = and i32 %227, 262140 > %229 = and i32 %6, 8191 > %230 = mul i32 %229, %9 > %231 = add i32 %228, %230 > %232 = add i32 %231, %226 > %233 = add i32 %232, 20 > %234 = sext i32 %233 to i64 > %235 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %234 > %236 = bitcast i32 addrspace(3)* %235 to float addrspace(3)* > %237 = load float, float addrspace(3)* %236, align 4 > %238 = fmul float %237, %8 > %239 = lshr i32 %6, 13 > %240 = and i32 %239, 255 > %241 = shl i32 %5, 2 > %242 = and i32 %241, 262140 > %243 = and i32 %6, 8191 > %244 = mul i32 %243, %9 > %245 = add i32 %242, %244 > %246 = add i32 %245, %240 > %247 = add i32 %246, 21 > %248 = sext i32 %247 to i64 > %249 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %248 > %250 = bitcast i32 addrspace(3)* %249 to float addrspace(3)* > %251 = load float, float addrspace(3)* %250, align 4 > %252 = fmul float %251, %8 > %253 = shl i32 %5, 2 > %254 = and i32 %253, 262140 > %255 = and i32 %6, 8191 > %256 = mul i32 %255, %9 > %257 = add i32 %254, %256 > %258 = add i32 %257, 20 > %259 = sext i32 %258 to i64 > %260 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %259 > %261 = bitcast i32 addrspace(3)* %260 to float addrspace(3)* > %262 = load float, float addrspace(3)* %261, align 4 > %263 = call float @llvm.fma.f32(float %7, float %262, float %238) > %264 = shl i32 %5, 2 > %265 = and i32 %264, 262140 > %266 = and i32 %6, 8191 > %267 = mul i32 %266, %9 > %268 = add i32 %265, %267 > %269 = add i32 %268, 21 > %270 = sext i32 %269 to i64 > %271 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %270 > %272 = bitcast i32 addrspace(3)* %271 to float addrspace(3)* > %273 = load float, float addrspace(3)* %272, align 4 > %274 = call float @llvm.fma.f32(float %7, float %273, float %252) > %275 = shl i32 %5, 2 > %276 = and i32 %275, 262140 > %277 = and i32 %6, 8191 > %278 = mul i32 %277, %9 > %279 = add i32 %276, %278 > %280 = lshr i32 %6, 12 > %281 = and i32 %280, 510 > %282 = add i32 %279, %281 > %283 = add i32 %282, 20 > %284 = sext i32 %283 to i64 > %285 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %284 > %286 = bitcast i32 addrspace(3)* %285 to float addrspace(3)* > %287 = load float, float addrspace(3)* %286, align 4 > %288 = call float @llvm.fma.f32(float %62, float %287, float %263) > %289 = shl i32 %5, 2 > %290 = and i32 %289, 262140 > %291 = and i32 %6, 8191 > %292 = mul i32 %291, %9 > %293 = add i32 %290, %292 > %294 = lshr i32 %6, 12 > %295 = and i32 %294, 510 > %296 = add i32 %293, %295 > %297 = add i32 %296, 21 > %298 = sext i32 %297 to i64 > %299 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %298 > %300 = bitcast i32 addrspace(3)* %299 to float addrspace(3)* > %301 = load float, float addrspace(3)* %300, align 4 > %302 = call float @llvm.fma.f32(float %62, float %301, float %274) > %303 = bitcast float %288 to i32 > %304 = bitcast float %302 to i32 > %305 = bitcast float %224 to i32 > %306 = insertelement <4 x i32> undef, i32 %303, i32 0 > %307 = insertelement <4 x i32> %306, i32 %304, i32 1 > %308 = insertelement <4 x i32> %307, i32 %305, i32 2 > %309 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %308, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %310 = extractelement <4 x float> %309, i32 0 > %311 = extractelement <4 x float> %309, i32 3 > %312 = fadd float %310, %31 > %313 = fmul float %312, %30 > %314 = fmul float %223, %313 > %315 = shl i32 %5, 2 > %316 = and i32 %315, 262140 > %317 = and i32 %6, 8191 > %318 = mul i32 %317, %9 > %319 = add i32 %316, %318 > %320 = add i32 %319, 34 > %321 = sext i32 %320 to i64 > %322 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %321 > %323 = bitcast i32 addrspace(3)* %322 to float addrspace(3)* > %324 = load float, float addrspace(3)* %323, align 4 > %325 = fmul float %324, %7 > %326 = shl i32 %5, 2 > %327 = and i32 %326, 262140 > %328 = and i32 %6, 8191 > %329 = mul i32 %328, %9 > %330 = add i32 %327, %329 > %331 = add i32 %330, 28 > %332 = sext i32 %331 to i64 > %333 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %332 > %334 = bitcast i32 addrspace(3)* %333 to float addrspace(3)* > %335 = load float, float addrspace(3)* %334, align 4 > %336 = fmul float %335, %7 > %337 = shl i32 %5, 2 > %338 = and i32 %337, 262140 > %339 = and i32 %6, 8191 > %340 = mul i32 %339, %9 > %341 = add i32 %338, %340 > %342 = add i32 %341, 29 > %343 = sext i32 %342 to i64 > %344 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %343 > %345 = bitcast i32 addrspace(3)* %344 to float addrspace(3)* > %346 = load float, float addrspace(3)* %345, align 4 > %347 = fmul float %346, %7 > %348 = shl i32 %5, 2 > %349 = and i32 %348, 262140 > %350 = and i32 %6, 8191 > %351 = mul i32 %350, %9 > %352 = add i32 %349, %351 > %353 = add i32 %352, 30 > %354 = sext i32 %353 to i64 > %355 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %354 > %356 = bitcast i32 addrspace(3)* %355 to float addrspace(3)* > %357 = load float, float addrspace(3)* %356, align 4 > %358 = fmul float %357, %7 > %359 = lshr i32 %6, 13 > %360 = and i32 %359, 255 > %361 = shl i32 %5, 2 > %362 = and i32 %361, 262140 > %363 = and i32 %6, 8191 > %364 = mul i32 %363, %9 > %365 = add i32 %362, %364 > %366 = add i32 %365, %360 > %367 = add i32 %366, 28 > %368 = sext i32 %367 to i64 > %369 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %368 > %370 = bitcast i32 addrspace(3)* %369 to float addrspace(3)* > %371 = load float, float addrspace(3)* %370, align 4 > %372 = fmul float %371, %8 > %373 = lshr i32 %6, 13 > %374 = and i32 %373, 255 > %375 = shl i32 %5, 2 > %376 = and i32 %375, 262140 > %377 = and i32 %6, 8191 > %378 = mul i32 %377, %9 > %379 = add i32 %376, %378 > %380 = add i32 %379, %374 > %381 = add i32 %380, 29 > %382 = sext i32 %381 to i64 > %383 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %382 > %384 = bitcast i32 addrspace(3)* %383 to float addrspace(3)* > %385 = load float, float addrspace(3)* %384, align 4 > %386 = fmul float %385, %8 > %387 = lshr i32 %6, 13 > %388 = and i32 %387, 255 > %389 = shl i32 %5, 2 > %390 = and i32 %389, 262140 > %391 = and i32 %6, 8191 > %392 = mul i32 %391, %9 > %393 = add i32 %390, %392 > %394 = add i32 %393, %388 > %395 = add i32 %394, 30 > %396 = sext i32 %395 to i64 > %397 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %396 > %398 = bitcast i32 addrspace(3)* %397 to float addrspace(3)* > %399 = load float, float addrspace(3)* %398, align 4 > %400 = fmul float %399, %8 > %401 = lshr i32 %6, 13 > %402 = and i32 %401, 255 > %403 = shl i32 %5, 2 > %404 = and i32 %403, 262140 > %405 = and i32 %6, 8191 > %406 = mul i32 %405, %9 > %407 = add i32 %404, %406 > %408 = add i32 %407, %402 > %409 = add i32 %408, 32 > %410 = sext i32 %409 to i64 > %411 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %410 > %412 = bitcast i32 addrspace(3)* %411 to float addrspace(3)* > %413 = load float, float addrspace(3)* %412, align 4 > %414 = fmul float %413, %8 > %415 = lshr i32 %6, 13 > %416 = and i32 %415, 255 > %417 = shl i32 %5, 2 > %418 = and i32 %417, 262140 > %419 = and i32 %6, 8191 > %420 = mul i32 %419, %9 > %421 = add i32 %418, %420 > %422 = add i32 %421, %416 > %423 = add i32 %422, 33 > %424 = sext i32 %423 to i64 > %425 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %424 > %426 = bitcast i32 addrspace(3)* %425 to float addrspace(3)* > %427 = load float, float addrspace(3)* %426, align 4 > %428 = fmul float %427, %8 > %429 = lshr i32 %6, 13 > %430 = and i32 %429, 255 > %431 = shl i32 %5, 2 > %432 = and i32 %431, 262140 > %433 = and i32 %6, 8191 > %434 = mul i32 %433, %9 > %435 = add i32 %432, %434 > %436 = add i32 %435, %430 > %437 = add i32 %436, 34 > %438 = sext i32 %437 to i64 > %439 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %438 > %440 = bitcast i32 addrspace(3)* %439 to float addrspace(3)* > %441 = load float, float addrspace(3)* %440, align 4 > %442 = fmul float %441, %8 > %443 = shl i32 %5, 2 > %444 = and i32 %443, 262140 > %445 = and i32 %6, 8191 > %446 = mul i32 %445, %9 > %447 = add i32 %444, %446 > %448 = add i32 %447, 32 > %449 = sext i32 %448 to i64 > %450 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %449 > %451 = bitcast i32 addrspace(3)* %450 to float addrspace(3)* > %452 = load float, float addrspace(3)* %451, align 4 > %453 = call float @llvm.fma.f32(float %7, float %452, float %414) > %454 = shl i32 %5, 2 > %455 = and i32 %454, 262140 > %456 = and i32 %6, 8191 > %457 = mul i32 %456, %9 > %458 = add i32 %455, %457 > %459 = add i32 %458, 33 > %460 = sext i32 %459 to i64 > %461 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %460 > %462 = bitcast i32 addrspace(3)* %461 to float addrspace(3)* > %463 = load float, float addrspace(3)* %462, align 4 > %464 = call float @llvm.fma.f32(float %7, float %463, float %428) > %465 = shl i32 %5, 2 > %466 = and i32 %465, 262140 > %467 = and i32 %6, 8191 > %468 = mul i32 %467, %9 > %469 = add i32 %466, %468 > %470 = lshr i32 %6, 12 > %471 = and i32 %470, 510 > %472 = add i32 %469, %471 > %473 = add i32 %472, 32 > %474 = sext i32 %473 to i64 > %475 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %474 > %476 = bitcast i32 addrspace(3)* %475 to float addrspace(3)* > %477 = load float, float addrspace(3)* %476, align 4 > %478 = call float @llvm.fma.f32(float %62, float %477, float %453) > %479 = shl i32 %5, 2 > %480 = and i32 %479, 262140 > %481 = and i32 %6, 8191 > %482 = mul i32 %481, %9 > %483 = add i32 %480, %482 > %484 = lshr i32 %6, 12 > %485 = and i32 %484, 510 > %486 = add i32 %483, %485 > %487 = add i32 %486, 33 > %488 = sext i32 %487 to i64 > %489 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %488 > %490 = bitcast i32 addrspace(3)* %489 to float addrspace(3)* > %491 = load float, float addrspace(3)* %490, align 4 > %492 = call float @llvm.fma.f32(float %62, float %491, float %464) > %493 = fadd float %336, %372 > %494 = fadd float %347, %386 > %495 = fadd float %358, %400 > %496 = fadd float %325, %442 > %497 = shl i32 %5, 2 > %498 = and i32 %497, 262140 > %499 = and i32 %6, 8191 > %500 = mul i32 %499, %9 > %501 = add i32 %498, %500 > %502 = lshr i32 %6, 12 > %503 = and i32 %502, 510 > %504 = add i32 %501, %503 > %505 = add i32 %504, 34 > %506 = sext i32 %505 to i64 > %507 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %506 > %508 = bitcast i32 addrspace(3)* %507 to float addrspace(3)* > %509 = load float, float addrspace(3)* %508, align 4 > %510 = fmul float %62, %509 > %511 = shl i32 %5, 2 > %512 = and i32 %511, 262140 > %513 = and i32 %6, 8191 > %514 = mul i32 %513, %9 > %515 = add i32 %512, %514 > %516 = lshr i32 %6, 12 > %517 = and i32 %516, 510 > %518 = add i32 %515, %517 > %519 = add i32 %518, 28 > %520 = sext i32 %519 to i64 > %521 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %520 > %522 = bitcast i32 addrspace(3)* %521 to float addrspace(3)* > %523 = load float, float addrspace(3)* %522, align 4 > %524 = fmul float %62, %523 > %525 = shl i32 %5, 2 > %526 = and i32 %525, 262140 > %527 = and i32 %6, 8191 > %528 = mul i32 %527, %9 > %529 = add i32 %526, %528 > %530 = lshr i32 %6, 12 > %531 = and i32 %530, 510 > %532 = add i32 %529, %531 > %533 = add i32 %532, 29 > %534 = sext i32 %533 to i64 > %535 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %534 > %536 = bitcast i32 addrspace(3)* %535 to float addrspace(3)* > %537 = load float, float addrspace(3)* %536, align 4 > %538 = fmul float %62, %537 > %539 = shl i32 %5, 2 > %540 = and i32 %539, 262140 > %541 = and i32 %6, 8191 > %542 = mul i32 %541, %9 > %543 = add i32 %540, %542 > %544 = lshr i32 %6, 12 > %545 = and i32 %544, 510 > %546 = add i32 %543, %545 > %547 = add i32 %546, 30 > %548 = sext i32 %547 to i64 > %549 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %548 > %550 = bitcast i32 addrspace(3)* %549 to float addrspace(3)* > %551 = load float, float addrspace(3)* %550, align 4 > %552 = fmul float %62, %551 > %553 = fadd float %495, %552 > %554 = fadd float %496, %510 > %555 = fadd float %493, %524 > %556 = fadd float %494, %538 > %557 = lshr i32 %6, 13 > %558 = and i32 %557, 255 > %559 = shl i32 %5, 2 > %560 = and i32 %559, 262140 > %561 = and i32 %6, 8191 > %562 = mul i32 %561, %9 > %563 = add i32 %560, %562 > %564 = add i32 %563, %558 > %565 = add i32 %564, 36 > %566 = sext i32 %565 to i64 > %567 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %566 > %568 = bitcast i32 addrspace(3)* %567 to float addrspace(3)* > %569 = load float, float addrspace(3)* %568, align 4 > %570 = fmul float %569, %8 > %571 = lshr i32 %6, 13 > %572 = and i32 %571, 255 > %573 = shl i32 %5, 2 > %574 = and i32 %573, 262140 > %575 = and i32 %6, 8191 > %576 = mul i32 %575, %9 > %577 = add i32 %574, %576 > %578 = add i32 %577, %572 > %579 = add i32 %578, 37 > %580 = sext i32 %579 to i64 > %581 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %580 > %582 = bitcast i32 addrspace(3)* %581 to float addrspace(3)* > %583 = load float, float addrspace(3)* %582, align 4 > %584 = fmul float %583, %8 > %585 = lshr i32 %6, 13 > %586 = and i32 %585, 255 > %587 = shl i32 %5, 2 > %588 = and i32 %587, 262140 > %589 = and i32 %6, 8191 > %590 = mul i32 %589, %9 > %591 = add i32 %588, %590 > %592 = add i32 %591, %586 > %593 = add i32 %592, 38 > %594 = sext i32 %593 to i64 > %595 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %594 > %596 = bitcast i32 addrspace(3)* %595 to float addrspace(3)* > %597 = load float, float addrspace(3)* %596, align 4 > %598 = fmul float %597, %8 > %599 = shl i32 %5, 2 > %600 = and i32 %599, 262140 > %601 = and i32 %6, 8191 > %602 = mul i32 %601, %9 > %603 = add i32 %600, %602 > %604 = add i32 %603, 36 > %605 = sext i32 %604 to i64 > %606 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %605 > %607 = bitcast i32 addrspace(3)* %606 to float addrspace(3)* > %608 = load float, float addrspace(3)* %607, align 4 > %609 = call float @llvm.fma.f32(float %7, float %608, float %570) > %610 = shl i32 %5, 2 > %611 = and i32 %610, 262140 > %612 = and i32 %6, 8191 > %613 = mul i32 %612, %9 > %614 = add i32 %611, %613 > %615 = add i32 %614, 37 > %616 = sext i32 %615 to i64 > %617 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %616 > %618 = bitcast i32 addrspace(3)* %617 to float addrspace(3)* > %619 = load float, float addrspace(3)* %618, align 4 > %620 = call float @llvm.fma.f32(float %7, float %619, float %584) > %621 = shl i32 %5, 2 > %622 = and i32 %621, 262140 > %623 = and i32 %6, 8191 > %624 = mul i32 %623, %9 > %625 = add i32 %622, %624 > %626 = add i32 %625, 38 > %627 = sext i32 %626 to i64 > %628 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %627 > %629 = bitcast i32 addrspace(3)* %628 to float addrspace(3)* > %630 = load float, float addrspace(3)* %629, align 4 > %631 = call float @llvm.fma.f32(float %7, float %630, float %598) > %632 = shl i32 %5, 2 > %633 = and i32 %632, 262140 > %634 = and i32 %6, 8191 > %635 = mul i32 %634, %9 > %636 = add i32 %633, %635 > %637 = lshr i32 %6, 12 > %638 = and i32 %637, 510 > %639 = add i32 %636, %638 > %640 = add i32 %639, 36 > %641 = sext i32 %640 to i64 > %642 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %641 > %643 = bitcast i32 addrspace(3)* %642 to float addrspace(3)* > %644 = load float, float addrspace(3)* %643, align 4 > %645 = call float @llvm.fma.f32(float %62, float %644, float %609) > %646 = shl i32 %5, 2 > %647 = and i32 %646, 262140 > %648 = and i32 %6, 8191 > %649 = mul i32 %648, %9 > %650 = add i32 %647, %649 > %651 = lshr i32 %6, 12 > %652 = and i32 %651, 510 > %653 = add i32 %650, %652 > %654 = add i32 %653, 37 > %655 = sext i32 %654 to i64 > %656 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %655 > %657 = bitcast i32 addrspace(3)* %656 to float addrspace(3)* > %658 = load float, float addrspace(3)* %657, align 4 > %659 = call float @llvm.fma.f32(float %62, float %658, float %620) > %660 = shl i32 %5, 2 > %661 = and i32 %660, 262140 > %662 = and i32 %6, 8191 > %663 = mul i32 %662, %9 > %664 = add i32 %661, %663 > %665 = lshr i32 %6, 12 > %666 = and i32 %665, 510 > %667 = add i32 %664, %666 > %668 = add i32 %667, 38 > %669 = sext i32 %668 to i64 > %670 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %669 > %671 = bitcast i32 addrspace(3)* %670 to float addrspace(3)* > %672 = load float, float addrspace(3)* %671, align 4 > %673 = call float @llvm.fma.f32(float %62, float %672, float %631) > %674 = fmul float %553, %553 > %675 = fmul float %554, %554 > %676 = fadd float %675, %674 > %677 = fmul float %673, %673 > %678 = fadd float %676, %677 > %679 = call float @llvm.AMDGPU.rsq.clamped.f32(float %678) > %680 = fmul float %679, %553 > %681 = fmul float %679, %554 > %682 = fmul float %679, %673 > %683 = call float @llvm.fma.f32(float %680, float %314, float %151) > %684 = call float @llvm.fma.f32(float %681, float %314, float %165) > %685 = call float @llvm.fma.f32(float %682, float %314, float %179) > %686 = fmul float %36, %683 > %687 = fmul float %37, %684 > %688 = fadd float %686, %687 > %689 = fmul float %38, %685 > %690 = fadd float %688, %689 > %691 = fadd float %690, %39 > %692 = fmul float %40, %683 > %693 = fmul float %41, %684 > %694 = fadd float %692, %693 > %695 = fmul float %42, %685 > %696 = fadd float %694, %695 > %697 = fadd float %696, %43 > %698 = fmul float %44, %683 > %699 = fmul float %45, %684 > %700 = fadd float %698, %699 > %701 = fmul float %46, %685 > %702 = fadd float %700, %701 > %703 = fadd float %702, %47 > %704 = fmul float %48, %683 > %705 = fmul float %49, %684 > %706 = fadd float %704, %705 > %707 = fmul float %50, %685 > %708 = fadd float %706, %707 > %709 = fadd float %708, %51 > %710 = lshr i32 %6, 13 > %711 = and i32 %710, 255 > %712 = shl i32 %5, 2 > %713 = and i32 %712, 262140 > %714 = and i32 %6, 8191 > %715 = mul i32 %714, %9 > %716 = add i32 %713, %715 > %717 = add i32 %716, %711 > %718 = add i32 %717, 24 > %719 = sext i32 %718 to i64 > %720 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %719 > %721 = bitcast i32 addrspace(3)* %720 to float addrspace(3)* > %722 = load float, float addrspace(3)* %721, align 4 > %723 = fmul float %722, %8 > %724 = lshr i32 %6, 13 > %725 = and i32 %724, 255 > %726 = shl i32 %5, 2 > %727 = and i32 %726, 262140 > %728 = and i32 %6, 8191 > %729 = mul i32 %728, %9 > %730 = add i32 %727, %729 > %731 = add i32 %730, %725 > %732 = add i32 %731, 25 > %733 = sext i32 %732 to i64 > %734 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %733 > %735 = bitcast i32 addrspace(3)* %734 to float addrspace(3)* > %736 = load float, float addrspace(3)* %735, align 4 > %737 = fmul float %736, %8 > %738 = lshr i32 %6, 13 > %739 = and i32 %738, 255 > %740 = shl i32 %5, 2 > %741 = and i32 %740, 262140 > %742 = and i32 %6, 8191 > %743 = mul i32 %742, %9 > %744 = add i32 %741, %743 > %745 = add i32 %744, %739 > %746 = add i32 %745, 26 > %747 = sext i32 %746 to i64 > %748 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %747 > %749 = bitcast i32 addrspace(3)* %748 to float addrspace(3)* > %750 = load float, float addrspace(3)* %749, align 4 > %751 = fmul float %750, %8 > %752 = lshr i32 %6, 13 > %753 = and i32 %752, 255 > %754 = shl i32 %5, 2 > %755 = and i32 %754, 262140 > %756 = and i32 %6, 8191 > %757 = mul i32 %756, %9 > %758 = add i32 %755, %757 > %759 = add i32 %758, %753 > %760 = add i32 %759, 27 > %761 = sext i32 %760 to i64 > %762 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %761 > %763 = bitcast i32 addrspace(3)* %762 to float addrspace(3)* > %764 = load float, float addrspace(3)* %763, align 4 > %765 = fmul float %764, %8 > %766 = shl i32 %5, 2 > %767 = and i32 %766, 262140 > %768 = and i32 %6, 8191 > %769 = mul i32 %768, %9 > %770 = add i32 %767, %769 > %771 = add i32 %770, 24 > %772 = sext i32 %771 to i64 > %773 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %772 > %774 = bitcast i32 addrspace(3)* %773 to float addrspace(3)* > %775 = load float, float addrspace(3)* %774, align 4 > %776 = call float @llvm.fma.f32(float %7, float %775, float %723) > %777 = shl i32 %5, 2 > %778 = and i32 %777, 262140 > %779 = and i32 %6, 8191 > %780 = mul i32 %779, %9 > %781 = add i32 %778, %780 > %782 = add i32 %781, 25 > %783 = sext i32 %782 to i64 > %784 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %783 > %785 = bitcast i32 addrspace(3)* %784 to float addrspace(3)* > %786 = load float, float addrspace(3)* %785, align 4 > %787 = call float @llvm.fma.f32(float %7, float %786, float %737) > %788 = shl i32 %5, 2 > %789 = and i32 %788, 262140 > %790 = and i32 %6, 8191 > %791 = mul i32 %790, %9 > %792 = add i32 %789, %791 > %793 = add i32 %792, 26 > %794 = sext i32 %793 to i64 > %795 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %794 > %796 = bitcast i32 addrspace(3)* %795 to float addrspace(3)* > %797 = load float, float addrspace(3)* %796, align 4 > %798 = call float @llvm.fma.f32(float %7, float %797, float %751) > %799 = shl i32 %5, 2 > %800 = and i32 %799, 262140 > %801 = and i32 %6, 8191 > %802 = mul i32 %801, %9 > %803 = add i32 %800, %802 > %804 = add i32 %803, 27 > %805 = sext i32 %804 to i64 > %806 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %805 > %807 = bitcast i32 addrspace(3)* %806 to float addrspace(3)* > %808 = load float, float addrspace(3)* %807, align 4 > %809 = call float @llvm.fma.f32(float %7, float %808, float %765) > %810 = shl i32 %5, 2 > %811 = and i32 %810, 262140 > %812 = and i32 %6, 8191 > %813 = mul i32 %812, %9 > %814 = add i32 %811, %813 > %815 = lshr i32 %6, 12 > %816 = and i32 %815, 510 > %817 = add i32 %814, %816 > %818 = add i32 %817, 24 > %819 = sext i32 %818 to i64 > %820 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %819 > %821 = bitcast i32 addrspace(3)* %820 to float addrspace(3)* > %822 = load float, float addrspace(3)* %821, align 4 > %823 = call float @llvm.fma.f32(float %62, float %822, float %776) > %824 = shl i32 %5, 2 > %825 = and i32 %824, 262140 > %826 = and i32 %6, 8191 > %827 = mul i32 %826, %9 > %828 = add i32 %825, %827 > %829 = lshr i32 %6, 12 > %830 = and i32 %829, 510 > %831 = add i32 %828, %830 > %832 = add i32 %831, 25 > %833 = sext i32 %832 to i64 > %834 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %833 > %835 = bitcast i32 addrspace(3)* %834 to float addrspace(3)* > %836 = load float, float addrspace(3)* %835, align 4 > %837 = call float @llvm.fma.f32(float %62, float %836, float %787) > %838 = shl i32 %5, 2 > %839 = and i32 %838, 262140 > %840 = and i32 %6, 8191 > %841 = mul i32 %840, %9 > %842 = add i32 %839, %841 > %843 = lshr i32 %6, 12 > %844 = and i32 %843, 510 > %845 = add i32 %842, %844 > %846 = add i32 %845, 26 > %847 = sext i32 %846 to i64 > %848 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %847 > %849 = bitcast i32 addrspace(3)* %848 to float addrspace(3)* > %850 = load float, float addrspace(3)* %849, align 4 > %851 = call float @llvm.fma.f32(float %62, float %850, float %798) > %852 = shl i32 %5, 2 > %853 = and i32 %852, 262140 > %854 = and i32 %6, 8191 > %855 = mul i32 %854, %9 > %856 = add i32 %853, %855 > %857 = lshr i32 %6, 12 > %858 = and i32 %857, 510 > %859 = add i32 %856, %858 > %860 = add i32 %859, 27 > %861 = sext i32 %860 to i64 > %862 = getelementptr [8320 x i32], [8320 x i32] addrspace(3)* @tess_lds, i64 0, i64 %861 > %863 = bitcast i32 addrspace(3)* %862 to float addrspace(3)* > %864 = load float, float addrspace(3)* %863, align 4 > %865 = call float @llvm.fma.f32(float %62, float %864, float %809) > %866 = bitcast i32 %10 to float > %867 = insertvalue <{ float, float, float }> undef, float %866, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %288, float %302, float undef, float %33) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %555, float %556, float %553, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %478, float %492, float %554, float %311) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %645, float %659, float %673, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %823, float %837, float %851, float %865) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %691, float %697, float %703, float %709) > ret <{ float, float, float }> %867 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >LLVM triggered Diagnostic Handler: LDS size exceeds device maximum >LLVM failed to compile shader >radeonsi: can't create a shader >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..22] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} >IMM[1] UINT32 {0, 352, 0, 0} >IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 9: DP3 TEMP[2].x, IN[2].xyzz, TEMP[0].xyzz > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 14: RSQ TEMP[2].x, TEMP[0].xxxx > 15: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 16: FMA TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[0].wwww > 17: MOV TEMP[0].w, CONST[1][22].wwww > 18: MOV TEMP[1].xy, IN[0].xyyy > 19: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 20: MUL TEMP[2].xyz, TEMP[1].xyzz, IN[4].xyzz > 21: MOV TEMP[2].w, TEMP[1].wwww > 22: MOV TEMP[1].xyz, CONST[1][22].xyzx > 23: MOV TEMP[1].w, IMM[2].xxxx > 24: MOV OUT[0], TEMP[0] > 25: MOV OUT[1], TEMP[2] > 26: MOV OUT[2], TEMP[1] > 27: END >radeonsi: Compiling shader 335 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 360) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 364) > %29 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 > %31 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %32 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %31, i64 0, i64 3 > %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 > %34 = extractelement <8 x i32> %30, i32 7 > %35 = extractelement <4 x i32> %33, i32 0 > %36 = and i32 %35, %34 > %37 = insertelement <4 x i32> %33, i32 %36, i32 0 > %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 > %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 7 > %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 > %43 = extractelement <8 x i32> %39, i32 7 > %44 = extractelement <4 x i32> %42, i32 0 > %45 = and i32 %44, %43 > %46 = insertelement <4 x i32> %42, i32 %45, i32 0 > %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %61 = bitcast float %47 to i32 > %62 = bitcast float %48 to i32 > %63 = insertelement <2 x i32> undef, i32 %61, i32 0 > %64 = insertelement <2 x i32> %63, i32 %62, i32 1 > %65 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %64, <8 x i32> %30, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %66 = extractelement <4 x float> %65, i32 1 > %67 = extractelement <4 x float> %65, i32 3 > %68 = call float @llvm.fma.f32(float %66, float 2.000000e+00, float -1.000000e+00) > %69 = call float @llvm.fma.f32(float %67, float 2.000000e+00, float -1.000000e+00) > %70 = fsub float -0.000000e+00, %68 > %71 = call float @llvm.fma.f32(float %70, float %68, float 1.000000e+00) > %72 = fsub float -0.000000e+00, %69 > %73 = call float @llvm.fma.f32(float %72, float %69, float %71) > %74 = call float @llvm.sqrt.f32(float %73) > %75 = fmul float %49, %68 > %76 = fmul float %50, %69 > %77 = fadd float %76, %75 > %78 = fmul float %51, %74 > %79 = fadd float %77, %78 > %80 = fmul float %52, %68 > %81 = fmul float %53, %69 > %82 = fadd float %81, %80 > %83 = fmul float %54, %74 > %84 = fadd float %82, %83 > %85 = fmul float %55, %68 > %86 = fmul float %56, %69 > %87 = fadd float %86, %85 > %88 = fmul float %57, %74 > %89 = fadd float %87, %88 > %90 = fmul float %79, %79 > %91 = fmul float %84, %84 > %92 = fadd float %91, %90 > %93 = fmul float %89, %89 > %94 = fadd float %92, %93 > %95 = call float @llvm.AMDGPU.rsq.clamped.f32(float %94) > %96 = fmul float %95, %79 > %97 = fmul float %95, %84 > %98 = fmul float %95, %89 > %99 = call float @llvm.fma.f32(float %96, float 5.000000e-01, float 5.000000e-01) > %100 = call float @llvm.fma.f32(float %97, float 5.000000e-01, float 5.000000e-01) > %101 = call float @llvm.fma.f32(float %98, float 5.000000e-01, float 5.000000e-01) > %102 = bitcast float %47 to i32 > %103 = bitcast float %48 to i32 > %104 = insertelement <2 x i32> undef, i32 %102, i32 0 > %105 = insertelement <2 x i32> %104, i32 %103, i32 1 > %106 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %105, <8 x i32> %39, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %107 = extractelement <4 x float> %106, i32 0 > %108 = extractelement <4 x float> %106, i32 1 > %109 = extractelement <4 x float> %106, i32 2 > %110 = extractelement <4 x float> %106, i32 3 > %111 = fmul float %107, %58 > %112 = fmul float %108, %59 > %113 = fmul float %109, %60 > %114 = bitcast float %5 to i32 > %115 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %114, 10 > %116 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %115, float %99, 11 > %117 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %116, float %100, 12 > %118 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %117, float %101, 13 > %119 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %118, float %28, 14 > %120 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %119, float %111, 15 > %121 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %120, float %112, 16 > %122 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %121, float %113, 17 > %123 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %122, float %110, 18 > %124 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %123, float %25, 19 > %125 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %124, float %26, 20 > %126 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %125, float %27, 21 > %127 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %126, float 0.000000e+00, 22 > %128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %127, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..1] >DCL CONST[2][0..26] >DCL TEMP[0..4], LOCAL >IMM[0] UINT32 {0, 1, 352, 384} >IMM[1] FLT32 { 1.0000, 0.9950, -1.0000, 0.0000} >IMM[2] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, -0.3765} >IMM[3] UINT32 {368, 16, 400, 416} >IMM[4] INT32 {1, 0, 0, 0} >IMM[5] FLT32 { 2.0000, 0.5000, 0.0000, 0.0000} > 0: ADD TEMP[0].x, CONST[2][22].yyyy, IMM[1].xxxx > 1: ADD TEMP[1].xy, -IN[4].wwww, IMM[1].xyyy > 2: FMA TEMP[2].x, CONST[2][22].xxxx, TEMP[0].xxxx, TEMP[1].xxxx > 3: CEIL TEMP[3].x, TEMP[1].yyyy > 4: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 5: ADD TEMP[0].x, TEMP[2].xxxx, IMM[1].zzzz > 6: FSNE TEMP[2].x, CONST[2][22].yyyy, IMM[1].wwww > 7: UIF TEMP[2].xxxx :0 > 8: RCP TEMP[2].x, CONST[2][22].yyyy > 9: ELSE :0 > 10: MOV TEMP[2].x, IMM[2].xxxx > 11: ENDIF > 12: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[0].xxxx > 13: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 14: FMA TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy, IMM[2].zzzz > 15: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[2].xxxx > 16: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx > 17: FMA TEMP[2].x, TEMP[3].xxxx, TEMP[0].xxxx, IMM[2].wwww > 18: FMA TEMP[0].x, -TEMP[3].xxxx, TEMP[0].xxxx, IMM[1].xxxx > 19: LG2 TEMP[3].x, TEMP[0].xxxx > 20: MUL TEMP[0].x, TEMP[3].xxxx, CONST[2][24].xxxx > 21: EX2 TEMP[3].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[3].xxxx, CONST[2][23].wwww > 23: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[1].wwww > 24: AND TEMP[2].x, TEMP[2].xxxx, IMM[4].xxxx > 25: INEG TEMP[2].x, TEMP[2].xxxx > 26: USNE TEMP[1].x, TEMP[2].xxxx, IMM[0].xxxx > 27: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 28: KILL_IF -TEMP[1].xxxx > 29: FMA TEMP[1].xy, CONST[1][1].xxxx, CONST[2][25].xyyy, IN[0].xyyy > 30: MOV TEMP[1].xy, TEMP[1].xyyy > 31: TEX TEMP[1], TEMP[1], SAMP[0], 2D > 32: ADD TEMP[1], TEMP[1], IMM[1].zzzz > 33: FMA TEMP[2], IN[4].yyyy, TEMP[1], IMM[1].xxxx > 34: MUL TEMP[2].xyz, TEMP[2].wwww, TEMP[2].xyzz > 35: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[4].xxxx > 36: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[2][24].yzww > 37: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[2][25].zzzz > 38: FMA TEMP[2].xyz, CONST[2][23].xyzz, TEMP[0].xxxx, TEMP[2].xyzz > 39: MOV TEMP[2].w, IMM[1].xxxx > 40: MOV TEMP[3].xy, IN[0].xyyy > 41: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D > 42: FMA TEMP[3].xy, TEMP[3].ywww, IMM[5].xxxx, IMM[1].zzzz > 43: MOV TEMP[0].xy, TEMP[3].xyxx > 44: FMA TEMP[4].x, -TEMP[3].xxxx, TEMP[3].xxxx, IMM[1].xxxx > 45: FMA TEMP[3].x, -TEMP[3].yyyy, TEMP[3].yyyy, TEMP[4].xxxx > 46: SQRT TEMP[3].x, TEMP[3].xxxx > 47: MOV TEMP[0].z, TEMP[3].xxxx > 48: DP3 TEMP[1].x, IN[1].xyzz, TEMP[0].xyzz > 49: DP3 TEMP[3].x, IN[2].xyzz, TEMP[0].xyzz > 50: MOV TEMP[1].y, TEMP[3].xxxx > 51: DP3 TEMP[3].x, IN[3].xyzz, TEMP[0].xyzz > 52: MOV TEMP[1].z, TEMP[3].xxxx > 53: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 54: RSQ TEMP[3].x, TEMP[0].xxxx > 55: MUL TEMP[0].xyz, TEMP[3].xxxx, TEMP[1].xyzz > 56: FMA TEMP[1].xyz, TEMP[0].xyzz, IMM[5].yyyy, IMM[5].yyyy > 57: MOV TEMP[1].w, CONST[2][26].xxxx > 58: MOV TEMP[0].xy, IN[0].xyyy > 59: TEX TEMP[0], TEMP[0], SAMP[2], 2D > 60: MOV TEMP[3].w, IMM[1].wwww > 61: MOV TEMP[4].xy, IN[0].xyyy > 62: TEX TEMP[4].xyz, TEMP[4], SAMP[3], 2D > 63: MUL TEMP[3].x, TEMP[4].zzzz, CONST[2][25].wwww > 64: MOV TEMP[3].yz, TEMP[4].xyxx > 65: MOV OUT[0], TEMP[2] > 66: MOV OUT[1], TEMP[1] > 67: MOV OUT[2], TEMP[0] > 68: MOV OUT[3], TEMP[3] > 69: END >radeonsi: Compiling shader 336 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) > %26 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 > %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 352) > %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 356) > %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %27, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %27, i32 380) > %34 = call float @llvm.SI.load.const(<16 x i8> %27, i32 384) > %35 = call float @llvm.SI.load.const(<16 x i8> %27, i32 388) > %36 = call float @llvm.SI.load.const(<16 x i8> %27, i32 392) > %37 = call float @llvm.SI.load.const(<16 x i8> %27, i32 396) > %38 = call float @llvm.SI.load.const(<16 x i8> %27, i32 400) > %39 = call float @llvm.SI.load.const(<16 x i8> %27, i32 404) > %40 = call float @llvm.SI.load.const(<16 x i8> %27, i32 408) > %41 = call float @llvm.SI.load.const(<16 x i8> %27, i32 412) > %42 = call float @llvm.SI.load.const(<16 x i8> %27, i32 416) > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 3 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 7 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 > %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 11 > %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0 > %66 = extractelement <8 x i32> %62, i32 7 > %67 = extractelement <4 x i32> %65, i32 0 > %68 = and i32 %67, %66 > %69 = insertelement <4 x i32> %65, i32 %68, i32 0 > %70 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %71 = load <8 x i32>, <8 x i32> addrspace(2)* %70, align 32, !tbaa !0 > %72 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %73 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %72, i64 0, i64 15 > %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 > %75 = extractelement <8 x i32> %71, i32 7 > %76 = extractelement <4 x i32> %74, i32 0 > %77 = and i32 %76, %75 > %78 = insertelement <4 x i32> %74, i32 %77, i32 0 > %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %93 = fadd float %29, 1.000000e+00 > %94 = fsub float 1.000000e+00, %92 > %95 = fsub float 0x3FEFD70A40000000, %92 > %96 = call float @llvm.fma.f32(float %28, float %93, float %94) > %97 = call float @llvm.ceil.f32(float %95) > %98 = call float @llvm.AMDGPU.clamp.(float %97, float 0.000000e+00, float 1.000000e+00) > %99 = fadd float %96, -1.000000e+00 > %100 = fcmp une float %29, 0.000000e+00 > %101 = fdiv float 1.000000e+00, %29 > %temp8.0 = select i1 %100, float %101, float 0x4600000000000000 > %102 = fmul float %temp8.0, %99 > %103 = call float @llvm.AMDGPU.clamp.(float %102, float 0.000000e+00, float 1.000000e+00) > %104 = call float @llvm.fma.f32(float %103, float -2.000000e+00, float 3.000000e+00) > %105 = fmul float %103, %103 > %106 = fmul float %105, %104 > %107 = call float @llvm.fma.f32(float %98, float %106, float 0xBFD8181820000000) > %108 = fsub float -0.000000e+00, %98 > %109 = call float @llvm.fma.f32(float %108, float %106, float 1.000000e+00) > %110 = call float @llvm.log2.f32(float %109) > %111 = fmul float %110, %34 > %112 = call float @llvm.exp2.f32(float %111) > %113 = fmul float %112, %33 > %114 = fcmp olt float %107, 0.000000e+00 > %115 = select i1 %114, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %115) > %116 = call float @llvm.fma.f32(float %25, float %38, float %79) > %117 = call float @llvm.fma.f32(float %25, float %39, float %80) > %118 = bitcast float %116 to i32 > %119 = bitcast float %117 to i32 > %120 = insertelement <2 x i32> undef, i32 %118, i32 0 > %121 = insertelement <2 x i32> %120, i32 %119, i32 1 > %122 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %121, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %123 = extractelement <4 x float> %122, i32 0 > %124 = extractelement <4 x float> %122, i32 1 > %125 = extractelement <4 x float> %122, i32 2 > %126 = extractelement <4 x float> %122, i32 3 > %127 = fadd float %123, -1.000000e+00 > %128 = fadd float %124, -1.000000e+00 > %129 = fadd float %125, -1.000000e+00 > %130 = fadd float %126, -1.000000e+00 > %131 = call float @llvm.fma.f32(float %91, float %127, float 1.000000e+00) > %132 = call float @llvm.fma.f32(float %91, float %128, float 1.000000e+00) > %133 = call float @llvm.fma.f32(float %91, float %129, float 1.000000e+00) > %134 = call float @llvm.fma.f32(float %91, float %130, float 1.000000e+00) > %135 = fmul float %134, %131 > %136 = fmul float %134, %132 > %137 = fmul float %134, %133 > %138 = fmul float %135, %90 > %139 = fmul float %136, %90 > %140 = fmul float %137, %90 > %141 = fmul float %138, %35 > %142 = fmul float %139, %36 > %143 = fmul float %140, %37 > %144 = fmul float %141, %40 > %145 = fmul float %142, %40 > %146 = fmul float %143, %40 > %147 = call float @llvm.fma.f32(float %30, float %113, float %144) > %148 = call float @llvm.fma.f32(float %31, float %113, float %145) > %149 = call float @llvm.fma.f32(float %32, float %113, float %146) > %150 = bitcast float %79 to i32 > %151 = bitcast float %80 to i32 > %152 = insertelement <2 x i32> undef, i32 %150, i32 0 > %153 = insertelement <2 x i32> %152, i32 %151, i32 1 > %154 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %153, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %155 = extractelement <4 x float> %154, i32 1 > %156 = extractelement <4 x float> %154, i32 3 > %157 = call float @llvm.fma.f32(float %155, float 2.000000e+00, float -1.000000e+00) > %158 = call float @llvm.fma.f32(float %156, float 2.000000e+00, float -1.000000e+00) > %159 = fsub float -0.000000e+00, %157 > %160 = call float @llvm.fma.f32(float %159, float %157, float 1.000000e+00) > %161 = fsub float -0.000000e+00, %158 > %162 = call float @llvm.fma.f32(float %161, float %158, float %160) > %163 = call float @llvm.sqrt.f32(float %162) > %164 = fmul float %81, %157 > %165 = fmul float %82, %158 > %166 = fadd float %165, %164 > %167 = fmul float %83, %163 > %168 = fadd float %166, %167 > %169 = fmul float %84, %157 > %170 = fmul float %85, %158 > %171 = fadd float %170, %169 > %172 = fmul float %86, %163 > %173 = fadd float %171, %172 > %174 = fmul float %87, %157 > %175 = fmul float %88, %158 > %176 = fadd float %175, %174 > %177 = fmul float %89, %163 > %178 = fadd float %176, %177 > %179 = fmul float %168, %168 > %180 = fmul float %173, %173 > %181 = fadd float %180, %179 > %182 = fmul float %178, %178 > %183 = fadd float %181, %182 > %184 = call float @llvm.AMDGPU.rsq.clamped.f32(float %183) > %185 = fmul float %184, %168 > %186 = fmul float %184, %173 > %187 = fmul float %184, %178 > %188 = call float @llvm.fma.f32(float %185, float 5.000000e-01, float 5.000000e-01) > %189 = call float @llvm.fma.f32(float %186, float 5.000000e-01, float 5.000000e-01) > %190 = call float @llvm.fma.f32(float %187, float 5.000000e-01, float 5.000000e-01) > %191 = bitcast float %79 to i32 > %192 = bitcast float %80 to i32 > %193 = insertelement <2 x i32> undef, i32 %191, i32 0 > %194 = insertelement <2 x i32> %193, i32 %192, i32 1 > %195 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %194, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %196 = extractelement <4 x float> %195, i32 0 > %197 = extractelement <4 x float> %195, i32 1 > %198 = extractelement <4 x float> %195, i32 2 > %199 = extractelement <4 x float> %195, i32 3 > %200 = bitcast float %79 to i32 > %201 = bitcast float %80 to i32 > %202 = insertelement <2 x i32> undef, i32 %200, i32 0 > %203 = insertelement <2 x i32> %202, i32 %201, i32 1 > %204 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %203, <8 x i32> %71, <4 x i32> %78, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %205 = extractelement <4 x float> %204, i32 0 > %206 = extractelement <4 x float> %204, i32 1 > %207 = extractelement <4 x float> %204, i32 2 > %208 = fmul float %207, %41 > %209 = bitcast float %5 to i32 > %210 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %209, 10 > %211 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %210, float %147, 11 > %212 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %211, float %148, 12 > %213 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %212, float %149, 13 > %214 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %213, float 1.000000e+00, 14 > %215 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %214, float %188, 15 > %216 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %215, float %189, 16 > %217 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %216, float %190, 17 > %218 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %217, float %42, 18 > %219 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %218, float %196, 19 > %220 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %219, float %197, 20 > %221 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %220, float %198, 21 > %222 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %221, float %199, 22 > %223 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %222, float %208, 23 > %224 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %223, float %206, 24 > %225 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %224, float %205, 25 > %226 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %225, float 0.000000e+00, 26 > %227 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %226, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %227 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..45] >DCL CONST[2][0..4095] >DCL TEMP[0..22], LOCAL >DCL ADDR[0] >IMM[0] UINT32 {0, 624, 720, 608} >IMM[1] UINT32 {640, 1, 16, 400} >IMM[2] FLT32 { 255.0020, 2.0000, 1.0000, 0.5000} >IMM[3] INT32 {1, 2, 4, 0} >IMM[4] UINT32 {320, 496, 512, 528} >IMM[5] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.1667, 0.2500} >IMM[6] UINT32 {480, 688, 576, 304} >IMM[7] UINT32 {544, 560, 592, 704} >IMM[8] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, CONST[1][39].xyzz, CONST[1][45].yyyy > 1: FMA TEMP[0].xyz, CONST[1][45].xxxx, CONST[1][38].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, CONST[1][45].zzzz, CONST[1][40].xyzz, TEMP[0].xyzz > 3: MUL TEMP[2].xyz, IN[5].zyxx, IMM[2].xxxx > 4: F2I TEMP[3].xyz, TEMP[2].xyzz > 5: SHL TEMP[4].xyz, TEMP[3].xyzz, IMM[3].xxxx > 6: UMAD TEMP[5].xyz, TEMP[3].xyzz, IMM[3].yyyy, IMM[3].xxxx > 7: UMUL TEMP[6].x, TEMP[4].xxxx, IMM[1].zzzz > 8: USHR TEMP[7].x, TEMP[6].xxxx, IMM[3].zzzz > 9: UARL ADDR[0].x, TEMP[7].xxxx > 10: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 11: MUL TEMP[6].x, IN[4].xxxx, TEMP[6].yyyy > 12: MOV TEMP[6].w, TEMP[6].xxxx > 13: UMUL TEMP[7].x, TEMP[4].yyyy, IMM[1].zzzz > 14: USHR TEMP[8].x, TEMP[7].xxxx, IMM[3].zzzz > 15: UARL ADDR[0].x, TEMP[8].xxxx > 16: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 17: MUL TEMP[7].x, IN[4].yyyy, TEMP[7].yyyy > 18: MOV TEMP[7].w, TEMP[7].xxxx > 19: UMUL TEMP[8].x, TEMP[5].xxxx, IMM[1].zzzz > 20: USHR TEMP[9].x, TEMP[8].xxxx, IMM[3].zzzz > 21: UARL ADDR[0].x, TEMP[9].xxxx > 22: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 23: UMUL TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz > 24: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 25: UARL ADDR[0].x, TEMP[10].xxxx > 26: MOV TEMP[9].w, CONST[2][ADDR[0].x] > 27: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].wwww > 28: UMUL TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz > 29: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 30: UARL ADDR[0].x, TEMP[10].xxxx > 31: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 32: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 33: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 34: UARL ADDR[0].x, TEMP[11].xxxx > 35: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 36: FMA TEMP[9].x, TEMP[9].yyyy, TEMP[10].zzzz, -TEMP[8].xxxx > 37: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 38: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 39: UARL ADDR[0].x, TEMP[11].xxxx > 40: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 41: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 42: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 43: UARL ADDR[0].x, TEMP[12].xxxx > 44: MOV TEMP[11].z, CONST[2][ADDR[0].x] > 45: FMA TEMP[8].x, TEMP[10].yyyy, TEMP[11].zzzz, TEMP[8].xxxx > 46: MUL TEMP[8].x, TEMP[8].xxxx, IN[4].xxxx > 47: MUL TEMP[9].x, TEMP[9].xxxx, IN[4].xxxx > 48: MUL TEMP[9].x, TEMP[9].xxxx, IMM[2].yyyy > 49: MOV TEMP[6].z, TEMP[9].xxxx > 50: UMUL TEMP[9].x, TEMP[5].yyyy, IMM[1].zzzz > 51: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 52: UARL ADDR[0].x, TEMP[10].xxxx > 53: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 54: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[1].zzzz > 55: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 56: UARL ADDR[0].x, TEMP[11].xxxx > 57: MOV TEMP[10].w, CONST[2][ADDR[0].x] > 58: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[10].wwww > 59: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[1].zzzz > 60: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 61: UARL ADDR[0].x, TEMP[11].xxxx > 62: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 63: UMUL TEMP[11].x, TEMP[5].yyyy, IMM[1].zzzz > 64: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 65: UARL ADDR[0].x, TEMP[12].xxxx > 66: MOV TEMP[11].z, CONST[2][ADDR[0].x] > 67: FMA TEMP[10].x, TEMP[10].yyyy, TEMP[11].zzzz, -TEMP[9].xxxx > 68: UMUL TEMP[11].x, TEMP[5].yyyy, IMM[1].zzzz > 69: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 70: UARL ADDR[0].x, TEMP[12].xxxx > 71: MOV TEMP[11].y, CONST[2][ADDR[0].x] > 72: UMUL TEMP[12].x, TEMP[5].yyyy, IMM[1].zzzz > 73: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz > 74: UARL ADDR[0].x, TEMP[13].xxxx > 75: MOV TEMP[12].z, CONST[2][ADDR[0].x] > 76: FMA TEMP[9].x, TEMP[11].yyyy, TEMP[12].zzzz, TEMP[9].xxxx > 77: MUL TEMP[9].x, TEMP[9].xxxx, IN[4].yyyy > 78: MUL TEMP[9].x, IMM[2].yyyy, TEMP[9].xxxx > 79: MOV TEMP[9].y, TEMP[9].xxxx > 80: MUL TEMP[10].x, TEMP[10].xxxx, IN[4].yyyy > 81: MUL TEMP[10].x, IMM[2].yyyy, TEMP[10].xxxx > 82: MOV TEMP[7].z, TEMP[10].xxxx > 83: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 84: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 85: UARL ADDR[0].x, TEMP[11].xxxx > 86: MOV TEMP[10].yz, CONST[2][ADDR[0].x] > 87: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 88: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 89: UARL ADDR[0].x, TEMP[12].xxxx > 90: MOV TEMP[11].xw, CONST[2][ADDR[0].x] > 91: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww > 92: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 93: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 94: UARL ADDR[0].x, TEMP[12].xxxx > 95: MOV TEMP[11].x, CONST[2][ADDR[0].x] > 96: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz > 97: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz > 98: UARL ADDR[0].x, TEMP[13].xxxx > 99: MOV TEMP[12].y, CONST[2][ADDR[0].x] >100: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >101: MUL TEMP[11].x, TEMP[11].xxxx, IN[4].xxxx >102: MUL TEMP[6].x, IMM[2].yyyy, TEMP[11].xxxx >103: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz >104: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz >105: UARL ADDR[0].x, TEMP[12].xxxx >106: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >107: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz >108: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz >109: UARL ADDR[0].x, TEMP[13].xxxx >110: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >111: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >112: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >113: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[2].yyyy, IMM[2].zzzz >114: MUL TEMP[13].x, IN[4].xxxx, TEMP[12].yyyy >115: MOV TEMP[6].y, TEMP[13].xxxx >116: UMUL TEMP[13].x, TEMP[5].yyyy, IMM[1].zzzz >117: USHR TEMP[14].x, TEMP[13].xxxx, IMM[3].zzzz >118: UARL ADDR[0].x, TEMP[14].xxxx >119: MOV TEMP[13].yz, CONST[2][ADDR[0].x] >120: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >121: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >122: UARL ADDR[0].x, TEMP[15].xxxx >123: MOV TEMP[14].xw, CONST[2][ADDR[0].x] >124: MUL TEMP[13].xyz, TEMP[13].zzyy, TEMP[14].wxww >125: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >126: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >127: UARL ADDR[0].x, TEMP[15].xxxx >128: MOV TEMP[14].x, CONST[2][ADDR[0].x] >129: UMUL TEMP[15].x, TEMP[5].yyyy, IMM[1].zzzz >130: USHR TEMP[16].x, TEMP[15].xxxx, IMM[3].zzzz >131: UARL ADDR[0].x, TEMP[16].xxxx >132: MOV TEMP[15].y, CONST[2][ADDR[0].x] >133: FMA TEMP[14].x, TEMP[14].xxxx, TEMP[15].yyyy, TEMP[13].xxxx >134: MUL TEMP[14].x, TEMP[14].xxxx, IN[4].yyyy >135: MUL TEMP[7].x, IMM[2].yyyy, TEMP[14].xxxx >136: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >137: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >138: UARL ADDR[0].x, TEMP[15].xxxx >139: MOV TEMP[14].xyz, CONST[2][ADDR[0].x] >140: UMUL TEMP[15].x, TEMP[5].yyyy, IMM[1].zzzz >141: USHR TEMP[16].x, TEMP[15].xxxx, IMM[3].zzzz >142: UARL ADDR[0].x, TEMP[16].xxxx >143: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >144: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz >145: ADD TEMP[14].xyz, TEMP[14].zzyy, TEMP[14].yxxx >146: FMA TEMP[15].xyz, -TEMP[14].xyzz, IMM[2].yyyy, IMM[2].zzzz >147: MUL TEMP[16].x, IN[4].yyyy, TEMP[15].yyyy >148: MOV TEMP[7].y, TEMP[16].xxxx >149: ADD TEMP[6], TEMP[6], TEMP[7] >150: UMUL TEMP[16].x, TEMP[4].zzzz, IMM[1].zzzz >151: USHR TEMP[17].x, TEMP[16].xxxx, IMM[3].zzzz >152: UARL ADDR[0].x, TEMP[17].xxxx >153: MOV TEMP[16].y, CONST[2][ADDR[0].x] >154: MUL TEMP[16].x, IN[4].zzzz, TEMP[16].yyyy >155: MOV TEMP[7].w, TEMP[16].xxxx >156: UMUL TEMP[16].x, TEMP[5].zzzz, IMM[1].zzzz >157: USHR TEMP[17].x, TEMP[16].xxxx, IMM[3].zzzz >158: UARL ADDR[0].x, TEMP[17].xxxx >159: MOV TEMP[16].x, CONST[2][ADDR[0].x] >160: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >161: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >162: UARL ADDR[0].x, TEMP[18].xxxx >163: MOV TEMP[17].w, CONST[2][ADDR[0].x] >164: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[17].wwww >165: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >166: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >167: UARL ADDR[0].x, TEMP[18].xxxx >168: MOV TEMP[17].y, CONST[2][ADDR[0].x] >169: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >170: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >171: UARL ADDR[0].x, TEMP[19].xxxx >172: MOV TEMP[18].z, CONST[2][ADDR[0].x] >173: FMA TEMP[17].x, TEMP[17].yyyy, TEMP[18].zzzz, -TEMP[16].xxxx >174: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >175: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >176: UARL ADDR[0].x, TEMP[19].xxxx >177: MOV TEMP[18].y, CONST[2][ADDR[0].x] >178: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >179: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >180: UARL ADDR[0].x, TEMP[20].xxxx >181: MOV TEMP[19].z, CONST[2][ADDR[0].x] >182: FMA TEMP[16].x, TEMP[18].yyyy, TEMP[19].zzzz, TEMP[16].xxxx >183: MUL TEMP[16].x, TEMP[16].xxxx, IN[4].zzzz >184: MUL TEMP[16].x, IMM[2].yyyy, TEMP[16].xxxx >185: MOV TEMP[16].y, TEMP[16].xxxx >186: MUL TEMP[17].x, TEMP[17].xxxx, IN[4].zzzz >187: MUL TEMP[17].x, IMM[2].yyyy, TEMP[17].xxxx >188: MOV TEMP[7].z, TEMP[17].xxxx >189: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >190: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >191: UARL ADDR[0].x, TEMP[18].xxxx >192: MOV TEMP[17].yz, CONST[2][ADDR[0].x] >193: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >194: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >195: UARL ADDR[0].x, TEMP[19].xxxx >196: MOV TEMP[18].xw, CONST[2][ADDR[0].x] >197: MUL TEMP[17].xyz, TEMP[17].zzyy, TEMP[18].wxww >198: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >199: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >200: UARL ADDR[0].x, TEMP[19].xxxx >201: MOV TEMP[18].x, CONST[2][ADDR[0].x] >202: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >203: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >204: UARL ADDR[0].x, TEMP[20].xxxx >205: MOV TEMP[19].y, CONST[2][ADDR[0].x] >206: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].yyyy, TEMP[17].xxxx >207: MUL TEMP[18].x, TEMP[18].xxxx, IN[4].zzzz >208: MUL TEMP[7].x, IMM[2].yyyy, TEMP[18].xxxx >209: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >210: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >211: UARL ADDR[0].x, TEMP[19].xxxx >212: MOV TEMP[18].xyz, CONST[2][ADDR[0].x] >213: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >214: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >215: UARL ADDR[0].x, TEMP[20].xxxx >216: MOV TEMP[19].xyz, CONST[2][ADDR[0].x] >217: MUL TEMP[18].xyz, TEMP[18].xyzz, TEMP[19].xyzz >218: ADD TEMP[18].xyz, TEMP[18].zzyy, TEMP[18].yxxx >219: FMA TEMP[19].xyz, -TEMP[18].xyzz, IMM[2].yyyy, IMM[2].zzzz >220: MUL TEMP[20].x, IN[4].zzzz, TEMP[19].yyyy >221: MOV TEMP[7].y, TEMP[20].xxxx >222: ADD TEMP[6], TEMP[6], TEMP[7] >223: DP3 TEMP[20].x, TEMP[6].xyzz, IN[1].xyzz >224: MOV TEMP[7].y, TEMP[20].xxxx >225: UMUL TEMP[20].x, TEMP[5].xxxx, IMM[1].zzzz >226: USHR TEMP[21].x, TEMP[20].xxxx, IMM[3].zzzz >227: UARL ADDR[0].x, TEMP[21].xxxx >228: MOV TEMP[20].x, CONST[2][ADDR[0].x] >229: UMUL TEMP[21].x, TEMP[5].xxxx, IMM[1].zzzz >230: USHR TEMP[22].x, TEMP[21].xxxx, IMM[3].zzzz >231: UARL ADDR[0].x, TEMP[22].xxxx >232: MOV TEMP[21].z, CONST[2][ADDR[0].x] >233: FMA TEMP[20].x, TEMP[20].xxxx, TEMP[21].zzzz, -TEMP[10].zzzz >234: MUL TEMP[20].x, TEMP[20].xxxx, IN[4].xxxx >235: MUL TEMP[20].x, IMM[2].yyyy, TEMP[20].xxxx >236: MUL TEMP[8].x, TEMP[8].xxxx, IMM[2].yyyy >237: MOV TEMP[20].y, TEMP[8].xxxx >238: UMUL TEMP[8].x, TEMP[5].yyyy, IMM[1].zzzz >239: USHR TEMP[21].x, TEMP[8].xxxx, IMM[3].zzzz >240: UARL ADDR[0].x, TEMP[21].xxxx >241: MOV TEMP[8].x, CONST[2][ADDR[0].x] >242: UMUL TEMP[21].x, TEMP[5].yyyy, IMM[1].zzzz >243: USHR TEMP[22].x, TEMP[21].xxxx, IMM[3].zzzz >244: UARL ADDR[0].x, TEMP[22].xxxx >245: MOV TEMP[21].z, CONST[2][ADDR[0].x] >246: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[21].zzzz, -TEMP[13].zzzz >247: MUL TEMP[8].x, TEMP[8].xxxx, IN[4].yyyy >248: MUL TEMP[9].x, IMM[2].yyyy, TEMP[8].xxxx >249: MUL TEMP[8].x, IN[4].xxxx, TEMP[12].zzzz >250: MOV TEMP[20].z, TEMP[8].xxxx >251: MUL TEMP[11].x, IN[4].xxxx, TEMP[12].xxxx >252: MUL TEMP[8].x, IN[4].yyyy, TEMP[15].zzzz >253: MOV TEMP[9].z, TEMP[8].xxxx >254: MUL TEMP[14].x, IN[4].yyyy, TEMP[15].xxxx >255: UMUL TEMP[8].x, TEMP[4].xxxx, IMM[1].zzzz >256: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >257: UARL ADDR[0].x, TEMP[12].xxxx >258: MOV TEMP[8].z, CONST[2][ADDR[0].x] >259: MUL TEMP[8].x, IN[4].xxxx, TEMP[8].zzzz >260: MOV TEMP[20].w, TEMP[8].xxxx >261: UMUL TEMP[8].x, TEMP[4].yyyy, IMM[1].zzzz >262: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >263: UARL ADDR[0].x, TEMP[12].xxxx >264: MOV TEMP[8].z, CONST[2][ADDR[0].x] >265: MUL TEMP[8].x, IN[4].yyyy, TEMP[8].zzzz >266: MOV TEMP[9].w, TEMP[8].xxxx >267: ADD TEMP[9], TEMP[9], TEMP[20] >268: UMUL TEMP[8].x, TEMP[5].zzzz, IMM[1].zzzz >269: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >270: UARL ADDR[0].x, TEMP[12].xxxx >271: MOV TEMP[8].x, CONST[2][ADDR[0].x] >272: UMUL TEMP[12].x, TEMP[5].zzzz, IMM[1].zzzz >273: USHR TEMP[15].x, TEMP[12].xxxx, IMM[3].zzzz >274: UARL ADDR[0].x, TEMP[15].xxxx >275: MOV TEMP[12].z, CONST[2][ADDR[0].x] >276: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[12].zzzz, -TEMP[17].zzzz >277: MUL TEMP[8].x, TEMP[8].xxxx, IN[4].zzzz >278: MUL TEMP[16].x, IMM[2].yyyy, TEMP[8].xxxx >279: MUL TEMP[8].x, IN[4].zzzz, TEMP[19].zzzz >280: MOV TEMP[16].z, TEMP[8].xxxx >281: MUL TEMP[18].x, IN[4].zzzz, TEMP[19].xxxx >282: UMUL TEMP[8].x, TEMP[4].zzzz, IMM[1].zzzz >283: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >284: UARL ADDR[0].x, TEMP[12].xxxx >285: MOV TEMP[8].z, CONST[2][ADDR[0].x] >286: MUL TEMP[8].x, IN[4].zzzz, TEMP[8].zzzz >287: MOV TEMP[16].w, TEMP[8].xxxx >288: ADD TEMP[9], TEMP[9], TEMP[16] >289: DP3 TEMP[8].x, TEMP[9].xyzz, IN[1].xyzz >290: MOV TEMP[7].z, TEMP[8].xxxx >291: UMUL TEMP[8].x, TEMP[4].xxxx, IMM[1].zzzz >292: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >293: UARL ADDR[0].x, TEMP[12].xxxx >294: MOV TEMP[8].x, CONST[2][ADDR[0].x] >295: MUL TEMP[8].x, IN[4].xxxx, TEMP[8].xxxx >296: MOV TEMP[11].w, TEMP[8].xxxx >297: UMUL TEMP[8].x, TEMP[4].yyyy, IMM[1].zzzz >298: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >299: UARL ADDR[0].x, TEMP[12].xxxx >300: MOV TEMP[8].x, CONST[2][ADDR[0].x] >301: MUL TEMP[8].x, IN[4].yyyy, TEMP[8].xxxx >302: MOV TEMP[14].w, TEMP[8].xxxx >303: UMUL TEMP[8].x, TEMP[4].zzzz, IMM[1].zzzz >304: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >305: UARL ADDR[0].x, TEMP[12].xxxx >306: MOV TEMP[8].x, CONST[2][ADDR[0].x] >307: MUL TEMP[8].x, IN[4].zzzz, TEMP[8].xxxx >308: MOV TEMP[18].w, TEMP[8].xxxx >309: ADD TEMP[2].x, TEMP[10].zzzz, TEMP[10].yyyy >310: MUL TEMP[2].x, TEMP[2].xxxx, IN[4].xxxx >311: MUL TEMP[8].x, IMM[2].yyyy, TEMP[2].xxxx >312: MOV TEMP[11].z, TEMP[8].xxxx >313: UMUL TEMP[8].x, TEMP[5].xxxx, IMM[1].zzzz >314: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >315: UARL ADDR[0].x, TEMP[12].xxxx >316: MOV TEMP[8].x, CONST[2][ADDR[0].x] >317: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz >318: USHR TEMP[15].x, TEMP[12].xxxx, IMM[3].zzzz >319: UARL ADDR[0].x, TEMP[15].xxxx >320: MOV TEMP[12].y, CONST[2][ADDR[0].x] >321: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[12].yyyy, -TEMP[10].xxxx >322: MUL TEMP[8].x, TEMP[8].xxxx, IN[4].xxxx >323: MUL TEMP[8].x, IMM[2].yyyy, TEMP[8].xxxx >324: MOV TEMP[11].y, TEMP[8].xxxx >325: UMUL TEMP[8].x, TEMP[5].zzzz, IMM[1].zzzz >326: USHR TEMP[10].x, TEMP[8].xxxx, IMM[3].zzzz >327: UARL ADDR[0].x, TEMP[10].xxxx >328: MOV TEMP[8].x, CONST[2][ADDR[0].x] >329: UMUL TEMP[10].x, TEMP[5].zzzz, IMM[1].zzzz >330: USHR TEMP[12].x, TEMP[10].xxxx, IMM[3].zzzz >331: UARL ADDR[0].x, TEMP[12].xxxx >332: MOV TEMP[10].y, CONST[2][ADDR[0].x] >333: FMA TEMP[2].x, TEMP[8].xxxx, TEMP[10].yyyy, -TEMP[17].xxxx >334: ADD TEMP[8].x, TEMP[17].zzzz, TEMP[17].yyyy >335: MOV TEMP[2].y, TEMP[8].xxxx >336: MUL TEMP[2].xy, TEMP[2].xyyy, IN[4].zzzz >337: MUL TEMP[8].xy, IMM[2].yyyy, TEMP[2].xyyy >338: MOV TEMP[18].yz, TEMP[8].yxyy >339: ADD TEMP[2].x, TEMP[13].zzzz, TEMP[13].yyyy >340: MUL TEMP[2].x, TEMP[2].xxxx, IN[4].yyyy >341: MUL TEMP[8].x, IMM[2].yyyy, TEMP[2].xxxx >342: MOV TEMP[14].z, TEMP[8].xxxx >343: UMUL TEMP[8].x, TEMP[5].yyyy, IMM[1].zzzz >344: USHR TEMP[10].x, TEMP[8].xxxx, IMM[3].zzzz >345: UARL ADDR[0].x, TEMP[10].xxxx >346: MOV TEMP[8].x, CONST[2][ADDR[0].x] >347: UMUL TEMP[5].x, TEMP[5].yyyy, IMM[1].zzzz >348: USHR TEMP[10].x, TEMP[5].xxxx, IMM[3].zzzz >349: UARL ADDR[0].x, TEMP[10].xxxx >350: MOV TEMP[5].y, CONST[2][ADDR[0].x] >351: FMA TEMP[5].x, TEMP[8].xxxx, TEMP[5].yyyy, -TEMP[13].xxxx >352: MUL TEMP[5].x, TEMP[5].xxxx, IN[4].yyyy >353: MUL TEMP[5].x, IMM[2].yyyy, TEMP[5].xxxx >354: MOV TEMP[14].y, TEMP[5].xxxx >355: ADD TEMP[2], TEMP[11], TEMP[14] >356: ADD TEMP[2], TEMP[18], TEMP[2] >357: DP3 TEMP[7].x, TEMP[2].xyzz, IN[1].xyzz >358: MOV TEMP[5].xyz, TEMP[7].xyzx >359: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[7].xyzz >360: ADD TEMP[7].x, TEMP[7].xxxx, IMM[2].zzzz >361: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].xxxx >362: MUL TEMP[7].x, TEMP[7].xxxx, CONST[1][25].yyyy >363: MUL TEMP[7].x, TEMP[7].xxxx, IMM[2].wwww >364: MOV TEMP[3].xyz, IN[0].xyzx >365: MOV TEMP[3].w, IMM[2].zzzz >366: DP4 TEMP[6].x, TEMP[6], TEMP[3] >367: MOV TEMP[4].y, TEMP[6].xxxx >368: DP4 TEMP[6].x, TEMP[9], TEMP[3] >369: MOV TEMP[4].z, TEMP[6].xxxx >370: DP4 TEMP[4].x, TEMP[2], TEMP[3] >371: DP3 TEMP[6].x, IMM[2].zzzz, TEMP[4].xyzz >372: DP3 TEMP[8].x, CONST[1][38].xyzz, CONST[1][38].xyzz >373: SQRT TEMP[8].x, TEMP[8].xxxx >374: MUL TEMP[9].x, TEMP[8].xxxx, CONST[1][20].wwww >375: FSNE TEMP[10].x, TEMP[9].xxxx, IMM[5].xxxx >376: UIF TEMP[10].xxxx :0 >377: RCP TEMP[9].x, TEMP[9].xxxx >378: MUL TEMP[9].x, TEMP[6].xxxx, TEMP[9].xxxx >379: ELSE :0 >380: SSG TEMP[6].x, TEMP[6].xxxx >381: MUL TEMP[9].x, IMM[5].yyyy, TEMP[6].xxxx >382: ENDIF >383: ADD TEMP[3], -CONST[1][31], CONST[1][32] >384: MUL TEMP[6].x, CONST[1][20].zzzz, CONST[1][45].wwww >385: MUL TEMP[10].x, TEMP[6].xxxx, IMM[5].zzzz >386: MOV_SAT TEMP[10].x, TEMP[10].xxxx >387: FMA TEMP[10], TEMP[10].xxxx, TEMP[3], CONST[1][31] >388: FMA TEMP[9].xy, CONST[1][33].yyyy, TEMP[10].ywww, TEMP[9].xxxx >389: MUL TEMP[3].xy, TEMP[8].xxxx, TEMP[10].xzzz >390: FMA TEMP[8].xy, -IN[3].yzzz, CONST[1][30].xyyy, IMM[2].zzzz >391: ADD TEMP[2].xy, TEMP[9].xyyy, TEMP[8].xyyy >392: SIN TEMP[8].x, TEMP[2].xxxx >393: SIN TEMP[8].y, TEMP[2].yyyy >394: MUL TEMP[2].xy, TEMP[3].xyyy, TEMP[8].xyyy >395: FMA TEMP[2].x, TEMP[6].xxxx, CONST[1][20].yyyy, TEMP[2].xxxx >396: FMA TEMP[3].x, TEMP[2].yyyy, IMM[5].wwww, TEMP[2].xxxx >397: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[3].xxxx >398: FMA TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xxxx, TEMP[4].xyzz >399: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz >400: RSQ TEMP[3].x, TEMP[3].xxxx >401: MUL TEMP[0].xyz, TEMP[3].xxxx, TEMP[1].xyzz >402: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[4].xyzz >403: ADD TEMP[3].xyz, -TEMP[4].xyzz, CONST[1][43].xyzz >404: SQRT TEMP[1].x, TEMP[1].xxxx >405: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz >406: MOV TEMP[0].w, IMM[2].zzzz >407: DP4 TEMP[1].x, CONST[1][36], TEMP[0] >408: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][19].yyyy >409: MOV TEMP[2].z, TEMP[1].xxxx >410: DP4 TEMP[2].x, CONST[1][34], TEMP[0] >411: DP4 TEMP[1].x, CONST[1][35], TEMP[0] >412: MOV TEMP[2].y, TEMP[1].xxxx >413: DP4 TEMP[0].x, CONST[1][37], TEMP[0] >414: MOV TEMP[2].w, TEMP[0].xxxx >415: MOV TEMP[0].xy, IN[2].xyxx >416: MOV OUT[4], IN[3] >417: MOV OUT[2], TEMP[0] >418: MOV OUT[3], TEMP[3] >419: MOV OUT[1], TEMP[5] >420: MOV OUT[0], TEMP[2] >421: END >radeonsi: Compiling shader 337 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 308) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 324) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 328) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 332) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 404) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 480) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 484) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 496) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 500) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 504) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 508) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 512) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 516) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 520) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 524) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 532) > %37 = call float @llvm.SI.load.const(<16 x i8> %20, i32 544) > %38 = call float @llvm.SI.load.const(<16 x i8> %20, i32 548) > %39 = call float @llvm.SI.load.const(<16 x i8> %20, i32 552) > %40 = call float @llvm.SI.load.const(<16 x i8> %20, i32 556) > %41 = call float @llvm.SI.load.const(<16 x i8> %20, i32 560) > %42 = call float @llvm.SI.load.const(<16 x i8> %20, i32 564) > %43 = call float @llvm.SI.load.const(<16 x i8> %20, i32 568) > %44 = call float @llvm.SI.load.const(<16 x i8> %20, i32 572) > %45 = call float @llvm.SI.load.const(<16 x i8> %20, i32 576) > %46 = call float @llvm.SI.load.const(<16 x i8> %20, i32 580) > %47 = call float @llvm.SI.load.const(<16 x i8> %20, i32 584) > %48 = call float @llvm.SI.load.const(<16 x i8> %20, i32 588) > %49 = call float @llvm.SI.load.const(<16 x i8> %20, i32 592) > %50 = call float @llvm.SI.load.const(<16 x i8> %20, i32 596) > %51 = call float @llvm.SI.load.const(<16 x i8> %20, i32 600) > %52 = call float @llvm.SI.load.const(<16 x i8> %20, i32 604) > %53 = call float @llvm.SI.load.const(<16 x i8> %20, i32 608) > %54 = call float @llvm.SI.load.const(<16 x i8> %20, i32 612) > %55 = call float @llvm.SI.load.const(<16 x i8> %20, i32 616) > %56 = call float @llvm.SI.load.const(<16 x i8> %20, i32 624) > %57 = call float @llvm.SI.load.const(<16 x i8> %20, i32 628) > %58 = call float @llvm.SI.load.const(<16 x i8> %20, i32 632) > %59 = call float @llvm.SI.load.const(<16 x i8> %20, i32 640) > %60 = call float @llvm.SI.load.const(<16 x i8> %20, i32 644) > %61 = call float @llvm.SI.load.const(<16 x i8> %20, i32 648) > %62 = call float @llvm.SI.load.const(<16 x i8> %20, i32 688) > %63 = call float @llvm.SI.load.const(<16 x i8> %20, i32 692) > %64 = call float @llvm.SI.load.const(<16 x i8> %20, i32 696) > %65 = call float @llvm.SI.load.const(<16 x i8> %20, i32 720) > %66 = call float @llvm.SI.load.const(<16 x i8> %20, i32 724) > %67 = call float @llvm.SI.load.const(<16 x i8> %20, i32 728) > %68 = call float @llvm.SI.load.const(<16 x i8> %20, i32 732) > %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %13) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %14) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %15) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %16) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = extractelement <4 x float> %90, i32 3 > %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 > %97 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %17) > %98 = extractelement <4 x float> %97, i32 0 > %99 = extractelement <4 x float> %97, i32 1 > %100 = extractelement <4 x float> %97, i32 2 > %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 > %103 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %18) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = extractelement <4 x float> %103, i32 2 > %107 = fmul float %56, %66 > %108 = fmul float %57, %66 > %109 = fmul float %58, %66 > %110 = call float @llvm.fma.f32(float %65, float %53, float %107) > %111 = call float @llvm.fma.f32(float %65, float %54, float %108) > %112 = call float @llvm.fma.f32(float %65, float %55, float %109) > %113 = call float @llvm.fma.f32(float %67, float %59, float %110) > %114 = call float @llvm.fma.f32(float %67, float %60, float %111) > %115 = call float @llvm.fma.f32(float %67, float %61, float %112) > %116 = fmul float %106, 0x406FE01000000000 > %117 = fmul float %105, 0x406FE01000000000 > %118 = fmul float %104, 0x406FE01000000000 > %119 = fptosi float %116 to i32 > %120 = fptosi float %117 to i32 > %121 = fptosi float %118 to i32 > %122 = shl i32 %119, 1 > %123 = or i32 %122, 1 > %124 = shl i32 %120, 1 > %125 = or i32 %124, 1 > %126 = shl i32 %121, 1 > %127 = or i32 %126, 1 > %128 = shl i32 %119, 5 > %129 = or i32 %128, 4 > %130 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %129) > %131 = fmul float %98, %130 > %132 = shl i32 %120, 5 > %133 = or i32 %132, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %133) > %135 = fmul float %99, %134 > %136 = shl i32 %123, 4 > %137 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %136) > %138 = shl i32 %123, 4 > %139 = or i32 %138, 12 > %140 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %139) > %141 = fmul float %137, %140 > %142 = shl i32 %123, 4 > %143 = or i32 %142, 4 > %144 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %143) > %145 = shl i32 %123, 4 > %146 = or i32 %145, 8 > %147 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %146) > %148 = fsub float -0.000000e+00, %141 > %149 = call float @llvm.fma.f32(float %144, float %147, float %148) > %150 = shl i32 %123, 4 > %151 = or i32 %150, 4 > %152 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %151) > %153 = shl i32 %123, 4 > %154 = or i32 %153, 8 > %155 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %154) > %156 = call float @llvm.fma.f32(float %152, float %155, float %141) > %157 = fmul float %156, %98 > %158 = fmul float %149, %98 > %159 = fmul float %158, 2.000000e+00 > %160 = shl i32 %125, 4 > %161 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %160) > %162 = shl i32 %125, 4 > %163 = or i32 %162, 12 > %164 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %163) > %165 = fmul float %161, %164 > %166 = shl i32 %125, 4 > %167 = or i32 %166, 4 > %168 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %167) > %169 = shl i32 %125, 4 > %170 = or i32 %169, 8 > %171 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %170) > %172 = fsub float -0.000000e+00, %165 > %173 = call float @llvm.fma.f32(float %168, float %171, float %172) > %174 = shl i32 %125, 4 > %175 = or i32 %174, 4 > %176 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %175) > %177 = shl i32 %125, 4 > %178 = or i32 %177, 8 > %179 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %178) > %180 = call float @llvm.fma.f32(float %176, float %179, float %165) > %181 = fmul float %180, %99 > %182 = fmul float %181, 2.000000e+00 > %183 = fmul float %173, %99 > %184 = fmul float %183, 2.000000e+00 > %185 = shl i32 %123, 4 > %186 = or i32 %185, 4 > %187 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %186) > %188 = shl i32 %123, 4 > %189 = or i32 %188, 8 > %190 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %189) > %191 = shl i32 %123, 4 > %192 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %191) > %193 = shl i32 %123, 4 > %194 = or i32 %193, 12 > %195 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %194) > %196 = fmul float %190, %195 > %197 = fmul float %190, %192 > %198 = fmul float %187, %195 > %199 = shl i32 %123, 4 > %200 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %199) > %201 = shl i32 %123, 4 > %202 = or i32 %201, 4 > %203 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %202) > %204 = call float @llvm.fma.f32(float %200, float %203, float %196) > %205 = fmul float %204, %98 > %206 = fmul float %205, 2.000000e+00 > %207 = shl i32 %123, 4 > %208 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %207) > %209 = shl i32 %123, 4 > %210 = or i32 %209, 4 > %211 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %210) > %212 = shl i32 %123, 4 > %213 = or i32 %212, 8 > %214 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %213) > %215 = shl i32 %123, 4 > %216 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %215) > %217 = shl i32 %123, 4 > %218 = or i32 %217, 4 > %219 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %218) > %220 = shl i32 %123, 4 > %221 = or i32 %220, 8 > %222 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %221) > %223 = fmul float %208, %216 > %224 = fmul float %211, %219 > %225 = fmul float %214, %222 > %226 = fadd float %225, %224 > %227 = fadd float %225, %223 > %228 = fadd float %224, %223 > %229 = fsub float -0.000000e+00, %226 > %230 = call float @llvm.fma.f32(float %229, float 2.000000e+00, float 1.000000e+00) > %231 = fsub float -0.000000e+00, %227 > %232 = call float @llvm.fma.f32(float %231, float 2.000000e+00, float 1.000000e+00) > %233 = fsub float -0.000000e+00, %228 > %234 = call float @llvm.fma.f32(float %233, float 2.000000e+00, float 1.000000e+00) > %235 = fmul float %98, %232 > %236 = shl i32 %125, 4 > %237 = or i32 %236, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %237) > %239 = shl i32 %125, 4 > %240 = or i32 %239, 8 > %241 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %240) > %242 = shl i32 %125, 4 > %243 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %242) > %244 = shl i32 %125, 4 > %245 = or i32 %244, 12 > %246 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %245) > %247 = fmul float %241, %246 > %248 = fmul float %241, %243 > %249 = fmul float %238, %246 > %250 = shl i32 %125, 4 > %251 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %250) > %252 = shl i32 %125, 4 > %253 = or i32 %252, 4 > %254 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %253) > %255 = call float @llvm.fma.f32(float %251, float %254, float %247) > %256 = fmul float %255, %99 > %257 = fmul float %256, 2.000000e+00 > %258 = shl i32 %125, 4 > %259 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %258) > %260 = shl i32 %125, 4 > %261 = or i32 %260, 4 > %262 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %261) > %263 = shl i32 %125, 4 > %264 = or i32 %263, 8 > %265 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %264) > %266 = shl i32 %125, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %266) > %268 = shl i32 %125, 4 > %269 = or i32 %268, 4 > %270 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %269) > %271 = shl i32 %125, 4 > %272 = or i32 %271, 8 > %273 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %272) > %274 = fmul float %259, %267 > %275 = fmul float %262, %270 > %276 = fmul float %265, %273 > %277 = fadd float %276, %275 > %278 = fadd float %276, %274 > %279 = fadd float %275, %274 > %280 = fsub float -0.000000e+00, %277 > %281 = call float @llvm.fma.f32(float %280, float 2.000000e+00, float 1.000000e+00) > %282 = fsub float -0.000000e+00, %278 > %283 = call float @llvm.fma.f32(float %282, float 2.000000e+00, float 1.000000e+00) > %284 = fsub float -0.000000e+00, %279 > %285 = call float @llvm.fma.f32(float %284, float 2.000000e+00, float 1.000000e+00) > %286 = fmul float %99, %283 > %287 = fadd float %206, %257 > %288 = fadd float %235, %286 > %289 = fadd float %159, %184 > %290 = fadd float %131, %135 > %291 = shl i32 %121, 5 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %292) > %294 = fmul float %100, %293 > %295 = shl i32 %127, 4 > %296 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %295) > %297 = shl i32 %127, 4 > %298 = or i32 %297, 12 > %299 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %298) > %300 = fmul float %296, %299 > %301 = shl i32 %127, 4 > %302 = or i32 %301, 4 > %303 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %302) > %304 = shl i32 %127, 4 > %305 = or i32 %304, 8 > %306 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %305) > %307 = fsub float -0.000000e+00, %300 > %308 = call float @llvm.fma.f32(float %303, float %306, float %307) > %309 = shl i32 %127, 4 > %310 = or i32 %309, 4 > %311 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %310) > %312 = shl i32 %127, 4 > %313 = or i32 %312, 8 > %314 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %313) > %315 = call float @llvm.fma.f32(float %311, float %314, float %300) > %316 = fmul float %315, %100 > %317 = fmul float %316, 2.000000e+00 > %318 = fmul float %308, %100 > %319 = fmul float %318, 2.000000e+00 > %320 = shl i32 %127, 4 > %321 = or i32 %320, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %321) > %323 = shl i32 %127, 4 > %324 = or i32 %323, 8 > %325 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %324) > %326 = shl i32 %127, 4 > %327 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %326) > %328 = shl i32 %127, 4 > %329 = or i32 %328, 12 > %330 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %329) > %331 = fmul float %325, %330 > %332 = fmul float %325, %327 > %333 = fmul float %322, %330 > %334 = shl i32 %127, 4 > %335 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %334) > %336 = shl i32 %127, 4 > %337 = or i32 %336, 4 > %338 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %337) > %339 = call float @llvm.fma.f32(float %335, float %338, float %331) > %340 = fmul float %339, %100 > %341 = fmul float %340, 2.000000e+00 > %342 = shl i32 %127, 4 > %343 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %342) > %344 = shl i32 %127, 4 > %345 = or i32 %344, 4 > %346 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %345) > %347 = shl i32 %127, 4 > %348 = or i32 %347, 8 > %349 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %348) > %350 = shl i32 %127, 4 > %351 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %350) > %352 = shl i32 %127, 4 > %353 = or i32 %352, 4 > %354 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %353) > %355 = shl i32 %127, 4 > %356 = or i32 %355, 8 > %357 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %356) > %358 = fmul float %343, %351 > %359 = fmul float %346, %354 > %360 = fmul float %349, %357 > %361 = fadd float %360, %359 > %362 = fadd float %360, %358 > %363 = fadd float %359, %358 > %364 = fsub float -0.000000e+00, %361 > %365 = call float @llvm.fma.f32(float %364, float 2.000000e+00, float 1.000000e+00) > %366 = fsub float -0.000000e+00, %362 > %367 = call float @llvm.fma.f32(float %366, float 2.000000e+00, float 1.000000e+00) > %368 = fsub float -0.000000e+00, %363 > %369 = call float @llvm.fma.f32(float %368, float 2.000000e+00, float 1.000000e+00) > %370 = fmul float %100, %367 > %371 = fadd float %287, %341 > %372 = fadd float %288, %370 > %373 = fadd float %289, %319 > %374 = fadd float %290, %294 > %375 = fmul float %371, %80 > %376 = fmul float %372, %81 > %377 = fadd float %376, %375 > %378 = fmul float %373, %82 > %379 = fadd float %377, %378 > %380 = shl i32 %123, 4 > %381 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %380) > %382 = shl i32 %123, 4 > %383 = or i32 %382, 8 > %384 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %383) > %385 = fsub float -0.000000e+00, %198 > %386 = call float @llvm.fma.f32(float %381, float %384, float %385) > %387 = fmul float %386, %98 > %388 = fmul float %387, 2.000000e+00 > %389 = fmul float %157, 2.000000e+00 > %390 = shl i32 %125, 4 > %391 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %390) > %392 = shl i32 %125, 4 > %393 = or i32 %392, 8 > %394 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %393) > %395 = fsub float -0.000000e+00, %249 > %396 = call float @llvm.fma.f32(float %391, float %394, float %395) > %397 = fmul float %396, %99 > %398 = fmul float %397, 2.000000e+00 > %399 = fmul float %98, %234 > %400 = fmul float %98, %230 > %401 = fmul float %99, %285 > %402 = fmul float %99, %281 > %403 = shl i32 %119, 5 > %404 = or i32 %403, 8 > %405 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %404) > %406 = fmul float %98, %405 > %407 = shl i32 %120, 5 > %408 = or i32 %407, 8 > %409 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %408) > %410 = fmul float %99, %409 > %411 = fadd float %398, %388 > %412 = fadd float %182, %389 > %413 = fadd float %401, %399 > %414 = fadd float %410, %406 > %415 = shl i32 %127, 4 > %416 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %415) > %417 = shl i32 %127, 4 > %418 = or i32 %417, 8 > %419 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %418) > %420 = fsub float -0.000000e+00, %333 > %421 = call float @llvm.fma.f32(float %416, float %419, float %420) > %422 = fmul float %421, %100 > %423 = fmul float %422, 2.000000e+00 > %424 = fmul float %100, %369 > %425 = fmul float %100, %365 > %426 = shl i32 %121, 5 > %427 = or i32 %426, 8 > %428 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %427) > %429 = fmul float %100, %428 > %430 = fadd float %411, %423 > %431 = fadd float %412, %317 > %432 = fadd float %413, %424 > %433 = fadd float %414, %429 > %434 = fmul float %430, %80 > %435 = fmul float %431, %81 > %436 = fadd float %435, %434 > %437 = fmul float %432, %82 > %438 = fadd float %436, %437 > %439 = shl i32 %119, 5 > %440 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %439) > %441 = fmul float %98, %440 > %442 = shl i32 %120, 5 > %443 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %442) > %444 = fmul float %99, %443 > %445 = shl i32 %121, 5 > %446 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %445) > %447 = fmul float %100, %446 > %448 = fadd float %198, %197 > %449 = fmul float %448, %98 > %450 = fmul float %449, 2.000000e+00 > %451 = shl i32 %123, 4 > %452 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %451) > %453 = shl i32 %123, 4 > %454 = or i32 %453, 4 > %455 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %454) > %456 = fsub float -0.000000e+00, %196 > %457 = call float @llvm.fma.f32(float %452, float %455, float %456) > %458 = fmul float %457, %98 > %459 = fmul float %458, 2.000000e+00 > %460 = shl i32 %127, 4 > %461 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %460) > %462 = shl i32 %127, 4 > %463 = or i32 %462, 4 > %464 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %463) > %465 = fsub float -0.000000e+00, %331 > %466 = call float @llvm.fma.f32(float %461, float %464, float %465) > %467 = fadd float %333, %332 > %468 = fmul float %466, %100 > %469 = fmul float %467, %100 > %470 = fmul float %468, 2.000000e+00 > %471 = fmul float %469, 2.000000e+00 > %472 = fadd float %249, %248 > %473 = fmul float %472, %99 > %474 = fmul float %473, 2.000000e+00 > %475 = shl i32 %125, 4 > %476 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %475) > %477 = shl i32 %125, 4 > %478 = or i32 %477, 4 > %479 = call float @llvm.SI.load.const(<16 x i8> %70, i32 %478) > %480 = fsub float -0.000000e+00, %247 > %481 = call float @llvm.fma.f32(float %476, float %479, float %480) > %482 = fmul float %481, %99 > %483 = fmul float %482, 2.000000e+00 > %484 = fadd float %400, %402 > %485 = fadd float %459, %483 > %486 = fadd float %450, %474 > %487 = fadd float %441, %444 > %488 = fadd float %425, %484 > %489 = fadd float %470, %485 > %490 = fadd float %471, %486 > %491 = fadd float %447, %487 > %492 = fmul float %488, %80 > %493 = fmul float %489, %81 > %494 = fadd float %493, %492 > %495 = fmul float %490, %82 > %496 = fadd float %494, %495 > %497 = fmul float %113, %496 > %498 = fmul float %114, %379 > %499 = fadd float %498, %497 > %500 = fmul float %115, %438 > %501 = fadd float %499, %500 > %502 = fadd float %501, 1.000000e+00 > %503 = fmul float %502, %91 > %504 = fmul float %503, %25 > %505 = fmul float %504, 5.000000e-01 > %506 = fmul float %371, %74 > %507 = fmul float %372, %75 > %508 = fadd float %506, %507 > %509 = fmul float %373, %76 > %510 = fadd float %508, %509 > %511 = fadd float %510, %374 > %512 = fmul float %430, %74 > %513 = fmul float %431, %75 > %514 = fadd float %512, %513 > %515 = fmul float %432, %76 > %516 = fadd float %514, %515 > %517 = fadd float %516, %433 > %518 = fmul float %488, %74 > %519 = fmul float %489, %75 > %520 = fadd float %518, %519 > %521 = fmul float %490, %76 > %522 = fadd float %520, %521 > %523 = fadd float %522, %491 > %524 = fadd float %511, %523 > %525 = fadd float %524, %517 > %526 = fmul float %53, %53 > %527 = fmul float %54, %54 > %528 = fadd float %527, %526 > %529 = fmul float %55, %55 > %530 = fadd float %528, %529 > %531 = call float @llvm.sqrt.f32(float %530) > %532 = fmul float %531, %24 > %533 = fcmp une float %532, 0.000000e+00 > br i1 %533, label %IF, label %ELSE > >IF: ; preds = %main_body > %534 = fdiv float 1.000000e+00, %532 > %535 = fmul float %525, %534 > br label %ENDIF > >ELSE: ; preds = %main_body > %536 = fcmp ogt float %525, 0.000000e+00 > %537 = select i1 %536, float 1.000000e+00, float %525 > %538 = fcmp oge float %537, 0.000000e+00 > %.op = fmul float %537, 0x4600000000000000 > %539 = select i1 %538, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp36.0 = phi float [ %535, %IF ], [ %539, %ELSE ] > %540 = fsub float %32, %28 > %541 = fsub float %33, %29 > %542 = fsub float %34, %30 > %543 = fsub float %35, %31 > %544 = fmul float %23, %68 > %545 = fmul float %544, 0x3FC5555560000000 > %546 = call float @llvm.AMDGPU.clamp.(float %545, float 0.000000e+00, float 1.000000e+00) > %547 = call float @llvm.fma.f32(float %546, float %540, float %28) > %548 = call float @llvm.fma.f32(float %546, float %541, float %29) > %549 = call float @llvm.fma.f32(float %546, float %542, float %30) > %550 = call float @llvm.fma.f32(float %546, float %543, float %31) > %551 = call float @llvm.fma.f32(float %36, float %548, float %temp36.0) > %552 = call float @llvm.fma.f32(float %36, float %550, float %temp36.0) > %553 = fmul float %531, %547 > %554 = fmul float %531, %549 > %555 = fsub float -0.000000e+00, %92 > %556 = call float @llvm.fma.f32(float %555, float %26, float 1.000000e+00) > %557 = fsub float -0.000000e+00, %93 > %558 = call float @llvm.fma.f32(float %557, float %27, float 1.000000e+00) > %559 = fadd float %551, %556 > %560 = fadd float %552, %558 > %561 = call float @llvm.sin.f32(float %559) > %562 = call float @llvm.sin.f32(float %560) > %563 = fmul float %553, %561 > %564 = fmul float %554, %562 > %565 = call float @llvm.fma.f32(float %544, float %22, float %563) > %566 = call float @llvm.fma.f32(float %564, float 2.500000e-01, float %565) > %567 = fmul float %113, %566 > %568 = fmul float %114, %566 > %569 = fmul float %115, %566 > %570 = call float @llvm.fma.f32(float %567, float %505, float %523) > %571 = call float @llvm.fma.f32(float %568, float %505, float %511) > %572 = call float @llvm.fma.f32(float %569, float %505, float %517) > %573 = fmul float %570, %570 > %574 = fmul float %571, %571 > %575 = fadd float %574, %573 > %576 = fmul float %572, %572 > %577 = fadd float %575, %576 > %578 = call float @llvm.AMDGPU.rsq.clamped.f32(float %577) > %579 = fmul float %578, %570 > %580 = fmul float %578, %571 > %581 = fmul float %578, %572 > %582 = fmul float %523, %523 > %583 = fmul float %511, %511 > %584 = fadd float %583, %582 > %585 = fmul float %517, %517 > %586 = fadd float %584, %585 > %587 = fsub float %62, %523 > %588 = fsub float %63, %511 > %589 = fsub float %64, %517 > %590 = call float @llvm.sqrt.f32(float %586) > %591 = fmul float %590, %579 > %592 = fmul float %590, %580 > %593 = fmul float %590, %581 > %594 = fmul float %45, %591 > %595 = fmul float %46, %592 > %596 = fadd float %594, %595 > %597 = fmul float %47, %593 > %598 = fadd float %596, %597 > %599 = fadd float %598, %48 > %600 = fmul float %599, %21 > %601 = fmul float %37, %591 > %602 = fmul float %38, %592 > %603 = fadd float %601, %602 > %604 = fmul float %39, %593 > %605 = fadd float %603, %604 > %606 = fadd float %605, %40 > %607 = fmul float %41, %591 > %608 = fmul float %42, %592 > %609 = fadd float %607, %608 > %610 = fmul float %43, %593 > %611 = fadd float %609, %610 > %612 = fadd float %611, %44 > %613 = fmul float %49, %591 > %614 = fmul float %50, %592 > %615 = fadd float %613, %614 > %616 = fmul float %51, %593 > %617 = fadd float %615, %616 > %618 = fadd float %617, %52 > %619 = bitcast i32 %11 to float > %620 = insertvalue <{ float, float, float }> undef, float %619, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %496, float %379, float %438, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %87, float %593, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %587, float %588, float %589, float %543) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %91, float %92, float %93, float %94) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %606, float %612, float %600, float %618) > ret <{ float, float, float }> %620 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sin.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL OUT[0], POSITION >DCL CONST[1][0..45] >DCL CONST[2][0..4095] >DCL TEMP[0..22], LOCAL >DCL ADDR[0] >IMM[0] UINT32 {0, 624, 720, 608} >IMM[1] UINT32 {640, 1, 16, 400} >IMM[2] FLT32 { 255.0020, 2.0000, 1.0000, 0.5000} >IMM[3] INT32 {1, 2, 4, 0} >IMM[4] UINT32 {320, 496, 512, 528} >IMM[5] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.1667, 0.2500} >IMM[6] UINT32 {480, 576, 304, 544} >IMM[7] UINT32 {560, 592, 0, 0} > 0: MUL TEMP[0].xyz, CONST[1][39].xyzz, CONST[1][45].yyyy > 1: FMA TEMP[0].xyz, CONST[1][45].xxxx, CONST[1][38].xyzz, TEMP[0].xyzz > 2: FMA TEMP[1].xyz, CONST[1][45].zzzz, CONST[1][40].xyzz, TEMP[0].xyzz > 3: MUL TEMP[2].xyz, IN[4].zyxx, IMM[2].xxxx > 4: F2I TEMP[3].xyz, TEMP[2].xyzz > 5: SHL TEMP[4].xyz, TEMP[3].xyzz, IMM[3].xxxx > 6: UMAD TEMP[5].xyz, TEMP[3].xyzz, IMM[3].yyyy, IMM[3].xxxx > 7: UMUL TEMP[6].x, TEMP[4].xxxx, IMM[1].zzzz > 8: USHR TEMP[7].x, TEMP[6].xxxx, IMM[3].zzzz > 9: UARL ADDR[0].x, TEMP[7].xxxx > 10: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 11: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].yyyy > 12: MOV TEMP[6].w, TEMP[6].xxxx > 13: UMUL TEMP[7].x, TEMP[4].yyyy, IMM[1].zzzz > 14: USHR TEMP[8].x, TEMP[7].xxxx, IMM[3].zzzz > 15: UARL ADDR[0].x, TEMP[8].xxxx > 16: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 17: MUL TEMP[7].x, IN[3].yyyy, TEMP[7].yyyy > 18: MOV TEMP[7].w, TEMP[7].xxxx > 19: UMUL TEMP[8].x, TEMP[5].xxxx, IMM[1].zzzz > 20: USHR TEMP[9].x, TEMP[8].xxxx, IMM[3].zzzz > 21: UARL ADDR[0].x, TEMP[9].xxxx > 22: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 23: UMUL TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz > 24: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 25: UARL ADDR[0].x, TEMP[10].xxxx > 26: MOV TEMP[9].w, CONST[2][ADDR[0].x] > 27: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].wwww > 28: UMUL TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz > 29: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 30: UARL ADDR[0].x, TEMP[10].xxxx > 31: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 32: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 33: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 34: UARL ADDR[0].x, TEMP[11].xxxx > 35: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 36: FMA TEMP[9].x, TEMP[9].yyyy, TEMP[10].zzzz, -TEMP[8].xxxx > 37: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 38: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 39: UARL ADDR[0].x, TEMP[11].xxxx > 40: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 41: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 42: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 43: UARL ADDR[0].x, TEMP[12].xxxx > 44: MOV TEMP[11].z, CONST[2][ADDR[0].x] > 45: FMA TEMP[8].x, TEMP[10].yyyy, TEMP[11].zzzz, TEMP[8].xxxx > 46: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].xxxx > 47: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].xxxx > 48: MUL TEMP[9].x, TEMP[9].xxxx, IMM[2].yyyy > 49: MOV TEMP[6].z, TEMP[9].xxxx > 50: UMUL TEMP[9].x, TEMP[5].yyyy, IMM[1].zzzz > 51: USHR TEMP[10].x, TEMP[9].xxxx, IMM[3].zzzz > 52: UARL ADDR[0].x, TEMP[10].xxxx > 53: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 54: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[1].zzzz > 55: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 56: UARL ADDR[0].x, TEMP[11].xxxx > 57: MOV TEMP[10].w, CONST[2][ADDR[0].x] > 58: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[10].wwww > 59: UMUL TEMP[10].x, TEMP[5].yyyy, IMM[1].zzzz > 60: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 61: UARL ADDR[0].x, TEMP[11].xxxx > 62: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 63: UMUL TEMP[11].x, TEMP[5].yyyy, IMM[1].zzzz > 64: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 65: UARL ADDR[0].x, TEMP[12].xxxx > 66: MOV TEMP[11].z, CONST[2][ADDR[0].x] > 67: FMA TEMP[10].x, TEMP[10].yyyy, TEMP[11].zzzz, -TEMP[9].xxxx > 68: UMUL TEMP[11].x, TEMP[5].yyyy, IMM[1].zzzz > 69: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 70: UARL ADDR[0].x, TEMP[12].xxxx > 71: MOV TEMP[11].y, CONST[2][ADDR[0].x] > 72: UMUL TEMP[12].x, TEMP[5].yyyy, IMM[1].zzzz > 73: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz > 74: UARL ADDR[0].x, TEMP[13].xxxx > 75: MOV TEMP[12].z, CONST[2][ADDR[0].x] > 76: FMA TEMP[9].x, TEMP[11].yyyy, TEMP[12].zzzz, TEMP[9].xxxx > 77: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].yyyy > 78: MUL TEMP[9].x, IMM[2].yyyy, TEMP[9].xxxx > 79: MOV TEMP[9].y, TEMP[9].xxxx > 80: MUL TEMP[10].x, TEMP[10].xxxx, IN[3].yyyy > 81: MUL TEMP[10].x, IMM[2].yyyy, TEMP[10].xxxx > 82: MOV TEMP[7].z, TEMP[10].xxxx > 83: UMUL TEMP[10].x, TEMP[5].xxxx, IMM[1].zzzz > 84: USHR TEMP[11].x, TEMP[10].xxxx, IMM[3].zzzz > 85: UARL ADDR[0].x, TEMP[11].xxxx > 86: MOV TEMP[10].yz, CONST[2][ADDR[0].x] > 87: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 88: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 89: UARL ADDR[0].x, TEMP[12].xxxx > 90: MOV TEMP[11].xw, CONST[2][ADDR[0].x] > 91: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww > 92: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz > 93: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz > 94: UARL ADDR[0].x, TEMP[12].xxxx > 95: MOV TEMP[11].x, CONST[2][ADDR[0].x] > 96: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz > 97: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz > 98: UARL ADDR[0].x, TEMP[13].xxxx > 99: MOV TEMP[12].y, CONST[2][ADDR[0].x] >100: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >101: MUL TEMP[11].x, TEMP[11].xxxx, IN[3].xxxx >102: MUL TEMP[6].x, IMM[2].yyyy, TEMP[11].xxxx >103: UMUL TEMP[11].x, TEMP[5].xxxx, IMM[1].zzzz >104: USHR TEMP[12].x, TEMP[11].xxxx, IMM[3].zzzz >105: UARL ADDR[0].x, TEMP[12].xxxx >106: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >107: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz >108: USHR TEMP[13].x, TEMP[12].xxxx, IMM[3].zzzz >109: UARL ADDR[0].x, TEMP[13].xxxx >110: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >111: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >112: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >113: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[2].yyyy, IMM[2].zzzz >114: MUL TEMP[13].x, IN[3].xxxx, TEMP[12].yyyy >115: MOV TEMP[6].y, TEMP[13].xxxx >116: UMUL TEMP[13].x, TEMP[5].yyyy, IMM[1].zzzz >117: USHR TEMP[14].x, TEMP[13].xxxx, IMM[3].zzzz >118: UARL ADDR[0].x, TEMP[14].xxxx >119: MOV TEMP[13].yz, CONST[2][ADDR[0].x] >120: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >121: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >122: UARL ADDR[0].x, TEMP[15].xxxx >123: MOV TEMP[14].xw, CONST[2][ADDR[0].x] >124: MUL TEMP[13].xyz, TEMP[13].zzyy, TEMP[14].wxww >125: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >126: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >127: UARL ADDR[0].x, TEMP[15].xxxx >128: MOV TEMP[14].x, CONST[2][ADDR[0].x] >129: UMUL TEMP[15].x, TEMP[5].yyyy, IMM[1].zzzz >130: USHR TEMP[16].x, TEMP[15].xxxx, IMM[3].zzzz >131: UARL ADDR[0].x, TEMP[16].xxxx >132: MOV TEMP[15].y, CONST[2][ADDR[0].x] >133: FMA TEMP[14].x, TEMP[14].xxxx, TEMP[15].yyyy, TEMP[13].xxxx >134: MUL TEMP[14].x, TEMP[14].xxxx, IN[3].yyyy >135: MUL TEMP[7].x, IMM[2].yyyy, TEMP[14].xxxx >136: UMUL TEMP[14].x, TEMP[5].yyyy, IMM[1].zzzz >137: USHR TEMP[15].x, TEMP[14].xxxx, IMM[3].zzzz >138: UARL ADDR[0].x, TEMP[15].xxxx >139: MOV TEMP[14].xyz, CONST[2][ADDR[0].x] >140: UMUL TEMP[15].x, TEMP[5].yyyy, IMM[1].zzzz >141: USHR TEMP[16].x, TEMP[15].xxxx, IMM[3].zzzz >142: UARL ADDR[0].x, TEMP[16].xxxx >143: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >144: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz >145: ADD TEMP[14].xyz, TEMP[14].zzyy, TEMP[14].yxxx >146: FMA TEMP[15].xyz, -TEMP[14].xyzz, IMM[2].yyyy, IMM[2].zzzz >147: MUL TEMP[16].x, IN[3].yyyy, TEMP[15].yyyy >148: MOV TEMP[7].y, TEMP[16].xxxx >149: ADD TEMP[6], TEMP[6], TEMP[7] >150: UMUL TEMP[16].x, TEMP[4].zzzz, IMM[1].zzzz >151: USHR TEMP[17].x, TEMP[16].xxxx, IMM[3].zzzz >152: UARL ADDR[0].x, TEMP[17].xxxx >153: MOV TEMP[16].y, CONST[2][ADDR[0].x] >154: MUL TEMP[16].x, IN[3].zzzz, TEMP[16].yyyy >155: MOV TEMP[7].w, TEMP[16].xxxx >156: UMUL TEMP[16].x, TEMP[5].zzzz, IMM[1].zzzz >157: USHR TEMP[17].x, TEMP[16].xxxx, IMM[3].zzzz >158: UARL ADDR[0].x, TEMP[17].xxxx >159: MOV TEMP[16].x, CONST[2][ADDR[0].x] >160: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >161: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >162: UARL ADDR[0].x, TEMP[18].xxxx >163: MOV TEMP[17].w, CONST[2][ADDR[0].x] >164: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[17].wwww >165: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >166: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >167: UARL ADDR[0].x, TEMP[18].xxxx >168: MOV TEMP[17].y, CONST[2][ADDR[0].x] >169: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >170: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >171: UARL ADDR[0].x, TEMP[19].xxxx >172: MOV TEMP[18].z, CONST[2][ADDR[0].x] >173: FMA TEMP[17].x, TEMP[17].yyyy, TEMP[18].zzzz, -TEMP[16].xxxx >174: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >175: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >176: UARL ADDR[0].x, TEMP[19].xxxx >177: MOV TEMP[18].y, CONST[2][ADDR[0].x] >178: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >179: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >180: UARL ADDR[0].x, TEMP[20].xxxx >181: MOV TEMP[19].z, CONST[2][ADDR[0].x] >182: FMA TEMP[16].x, TEMP[18].yyyy, TEMP[19].zzzz, TEMP[16].xxxx >183: MUL TEMP[16].x, TEMP[16].xxxx, IN[3].zzzz >184: MUL TEMP[16].x, IMM[2].yyyy, TEMP[16].xxxx >185: MOV TEMP[16].y, TEMP[16].xxxx >186: MUL TEMP[17].x, TEMP[17].xxxx, IN[3].zzzz >187: MUL TEMP[17].x, IMM[2].yyyy, TEMP[17].xxxx >188: MOV TEMP[7].z, TEMP[17].xxxx >189: UMUL TEMP[17].x, TEMP[5].zzzz, IMM[1].zzzz >190: USHR TEMP[18].x, TEMP[17].xxxx, IMM[3].zzzz >191: UARL ADDR[0].x, TEMP[18].xxxx >192: MOV TEMP[17].yz, CONST[2][ADDR[0].x] >193: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >194: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >195: UARL ADDR[0].x, TEMP[19].xxxx >196: MOV TEMP[18].xw, CONST[2][ADDR[0].x] >197: MUL TEMP[17].xyz, TEMP[17].zzyy, TEMP[18].wxww >198: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >199: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >200: UARL ADDR[0].x, TEMP[19].xxxx >201: MOV TEMP[18].x, CONST[2][ADDR[0].x] >202: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >203: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >204: UARL ADDR[0].x, TEMP[20].xxxx >205: MOV TEMP[19].y, CONST[2][ADDR[0].x] >206: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].yyyy, TEMP[17].xxxx >207: MUL TEMP[18].x, TEMP[18].xxxx, IN[3].zzzz >208: MUL TEMP[7].x, IMM[2].yyyy, TEMP[18].xxxx >209: UMUL TEMP[18].x, TEMP[5].zzzz, IMM[1].zzzz >210: USHR TEMP[19].x, TEMP[18].xxxx, IMM[3].zzzz >211: UARL ADDR[0].x, TEMP[19].xxxx >212: MOV TEMP[18].xyz, CONST[2][ADDR[0].x] >213: UMUL TEMP[19].x, TEMP[5].zzzz, IMM[1].zzzz >214: USHR TEMP[20].x, TEMP[19].xxxx, IMM[3].zzzz >215: UARL ADDR[0].x, TEMP[20].xxxx >216: MOV TEMP[19].xyz, CONST[2][ADDR[0].x] >217: MUL TEMP[18].xyz, TEMP[18].xyzz, TEMP[19].xyzz >218: ADD TEMP[18].xyz, TEMP[18].zzyy, TEMP[18].yxxx >219: FMA TEMP[19].xyz, -TEMP[18].xyzz, IMM[2].yyyy, IMM[2].zzzz >220: MUL TEMP[20].x, IN[3].zzzz, TEMP[19].yyyy >221: MOV TEMP[7].y, TEMP[20].xxxx >222: ADD TEMP[6], TEMP[6], TEMP[7] >223: DP3 TEMP[20].x, TEMP[6].xyzz, IN[1].xyzz >224: MOV TEMP[7].y, TEMP[20].xxxx >225: UMUL TEMP[20].x, TEMP[5].xxxx, IMM[1].zzzz >226: USHR TEMP[21].x, TEMP[20].xxxx, IMM[3].zzzz >227: UARL ADDR[0].x, TEMP[21].xxxx >228: MOV TEMP[20].x, CONST[2][ADDR[0].x] >229: UMUL TEMP[21].x, TEMP[5].xxxx, IMM[1].zzzz >230: USHR TEMP[22].x, TEMP[21].xxxx, IMM[3].zzzz >231: UARL ADDR[0].x, TEMP[22].xxxx >232: MOV TEMP[21].z, CONST[2][ADDR[0].x] >233: FMA TEMP[20].x, TEMP[20].xxxx, TEMP[21].zzzz, -TEMP[10].zzzz >234: MUL TEMP[20].x, TEMP[20].xxxx, IN[3].xxxx >235: MUL TEMP[20].x, IMM[2].yyyy, TEMP[20].xxxx >236: MUL TEMP[8].x, TEMP[8].xxxx, IMM[2].yyyy >237: MOV TEMP[20].y, TEMP[8].xxxx >238: UMUL TEMP[8].x, TEMP[5].yyyy, IMM[1].zzzz >239: USHR TEMP[21].x, TEMP[8].xxxx, IMM[3].zzzz >240: UARL ADDR[0].x, TEMP[21].xxxx >241: MOV TEMP[8].x, CONST[2][ADDR[0].x] >242: UMUL TEMP[21].x, TEMP[5].yyyy, IMM[1].zzzz >243: USHR TEMP[22].x, TEMP[21].xxxx, IMM[3].zzzz >244: UARL ADDR[0].x, TEMP[22].xxxx >245: MOV TEMP[21].z, CONST[2][ADDR[0].x] >246: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[21].zzzz, -TEMP[13].zzzz >247: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].yyyy >248: MUL TEMP[9].x, IMM[2].yyyy, TEMP[8].xxxx >249: MUL TEMP[8].x, IN[3].xxxx, TEMP[12].zzzz >250: MOV TEMP[20].z, TEMP[8].xxxx >251: MUL TEMP[11].x, IN[3].xxxx, TEMP[12].xxxx >252: MUL TEMP[8].x, IN[3].yyyy, TEMP[15].zzzz >253: MOV TEMP[9].z, TEMP[8].xxxx >254: MUL TEMP[14].x, IN[3].yyyy, TEMP[15].xxxx >255: UMUL TEMP[8].x, TEMP[4].xxxx, IMM[1].zzzz >256: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >257: UARL ADDR[0].x, TEMP[12].xxxx >258: MOV TEMP[8].z, CONST[2][ADDR[0].x] >259: MUL TEMP[8].x, IN[3].xxxx, TEMP[8].zzzz >260: MOV TEMP[20].w, TEMP[8].xxxx >261: UMUL TEMP[8].x, TEMP[4].yyyy, IMM[1].zzzz >262: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >263: UARL ADDR[0].x, TEMP[12].xxxx >264: MOV TEMP[8].z, CONST[2][ADDR[0].x] >265: MUL TEMP[8].x, IN[3].yyyy, TEMP[8].zzzz >266: MOV TEMP[9].w, TEMP[8].xxxx >267: ADD TEMP[9], TEMP[9], TEMP[20] >268: UMUL TEMP[8].x, TEMP[5].zzzz, IMM[1].zzzz >269: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >270: UARL ADDR[0].x, TEMP[12].xxxx >271: MOV TEMP[8].x, CONST[2][ADDR[0].x] >272: UMUL TEMP[12].x, TEMP[5].zzzz, IMM[1].zzzz >273: USHR TEMP[15].x, TEMP[12].xxxx, IMM[3].zzzz >274: UARL ADDR[0].x, TEMP[15].xxxx >275: MOV TEMP[12].z, CONST[2][ADDR[0].x] >276: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[12].zzzz, -TEMP[17].zzzz >277: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].zzzz >278: MUL TEMP[16].x, IMM[2].yyyy, TEMP[8].xxxx >279: MUL TEMP[8].x, IN[3].zzzz, TEMP[19].zzzz >280: MOV TEMP[16].z, TEMP[8].xxxx >281: MUL TEMP[18].x, IN[3].zzzz, TEMP[19].xxxx >282: UMUL TEMP[8].x, TEMP[4].zzzz, IMM[1].zzzz >283: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >284: UARL ADDR[0].x, TEMP[12].xxxx >285: MOV TEMP[8].z, CONST[2][ADDR[0].x] >286: MUL TEMP[8].x, IN[3].zzzz, TEMP[8].zzzz >287: MOV TEMP[16].w, TEMP[8].xxxx >288: ADD TEMP[9], TEMP[9], TEMP[16] >289: DP3 TEMP[8].x, TEMP[9].xyzz, IN[1].xyzz >290: MOV TEMP[7].z, TEMP[8].xxxx >291: UMUL TEMP[8].x, TEMP[4].xxxx, IMM[1].zzzz >292: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >293: UARL ADDR[0].x, TEMP[12].xxxx >294: MOV TEMP[8].x, CONST[2][ADDR[0].x] >295: MUL TEMP[8].x, IN[3].xxxx, TEMP[8].xxxx >296: MOV TEMP[11].w, TEMP[8].xxxx >297: UMUL TEMP[8].x, TEMP[4].yyyy, IMM[1].zzzz >298: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >299: UARL ADDR[0].x, TEMP[12].xxxx >300: MOV TEMP[8].x, CONST[2][ADDR[0].x] >301: MUL TEMP[8].x, IN[3].yyyy, TEMP[8].xxxx >302: MOV TEMP[14].w, TEMP[8].xxxx >303: UMUL TEMP[8].x, TEMP[4].zzzz, IMM[1].zzzz >304: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >305: UARL ADDR[0].x, TEMP[12].xxxx >306: MOV TEMP[8].x, CONST[2][ADDR[0].x] >307: MUL TEMP[8].x, IN[3].zzzz, TEMP[8].xxxx >308: MOV TEMP[18].w, TEMP[8].xxxx >309: ADD TEMP[2].x, TEMP[10].zzzz, TEMP[10].yyyy >310: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].xxxx >311: MUL TEMP[8].x, IMM[2].yyyy, TEMP[2].xxxx >312: MOV TEMP[11].z, TEMP[8].xxxx >313: UMUL TEMP[8].x, TEMP[5].xxxx, IMM[1].zzzz >314: USHR TEMP[12].x, TEMP[8].xxxx, IMM[3].zzzz >315: UARL ADDR[0].x, TEMP[12].xxxx >316: MOV TEMP[8].x, CONST[2][ADDR[0].x] >317: UMUL TEMP[12].x, TEMP[5].xxxx, IMM[1].zzzz >318: USHR TEMP[15].x, TEMP[12].xxxx, IMM[3].zzzz >319: UARL ADDR[0].x, TEMP[15].xxxx >320: MOV TEMP[12].y, CONST[2][ADDR[0].x] >321: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[12].yyyy, -TEMP[10].xxxx >322: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].xxxx >323: MUL TEMP[8].x, IMM[2].yyyy, TEMP[8].xxxx >324: MOV TEMP[11].y, TEMP[8].xxxx >325: UMUL TEMP[8].x, TEMP[5].zzzz, IMM[1].zzzz >326: USHR TEMP[10].x, TEMP[8].xxxx, IMM[3].zzzz >327: UARL ADDR[0].x, TEMP[10].xxxx >328: MOV TEMP[8].x, CONST[2][ADDR[0].x] >329: UMUL TEMP[10].x, TEMP[5].zzzz, IMM[1].zzzz >330: USHR TEMP[12].x, TEMP[10].xxxx, IMM[3].zzzz >331: UARL ADDR[0].x, TEMP[12].xxxx >332: MOV TEMP[10].y, CONST[2][ADDR[0].x] >333: FMA TEMP[2].x, TEMP[8].xxxx, TEMP[10].yyyy, -TEMP[17].xxxx >334: ADD TEMP[8].x, TEMP[17].zzzz, TEMP[17].yyyy >335: MOV TEMP[2].y, TEMP[8].xxxx >336: MUL TEMP[2].xy, TEMP[2].xyyy, IN[3].zzzz >337: MUL TEMP[8].xy, IMM[2].yyyy, TEMP[2].xyyy >338: MOV TEMP[18].yz, TEMP[8].yxyy >339: ADD TEMP[2].x, TEMP[13].zzzz, TEMP[13].yyyy >340: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy >341: MUL TEMP[8].x, IMM[2].yyyy, TEMP[2].xxxx >342: MOV TEMP[14].z, TEMP[8].xxxx >343: UMUL TEMP[8].x, TEMP[5].yyyy, IMM[1].zzzz >344: USHR TEMP[10].x, TEMP[8].xxxx, IMM[3].zzzz >345: UARL ADDR[0].x, TEMP[10].xxxx >346: MOV TEMP[8].x, CONST[2][ADDR[0].x] >347: UMUL TEMP[5].x, TEMP[5].yyyy, IMM[1].zzzz >348: USHR TEMP[10].x, TEMP[5].xxxx, IMM[3].zzzz >349: UARL ADDR[0].x, TEMP[10].xxxx >350: MOV TEMP[5].y, CONST[2][ADDR[0].x] >351: FMA TEMP[5].x, TEMP[8].xxxx, TEMP[5].yyyy, -TEMP[13].xxxx >352: MUL TEMP[5].x, TEMP[5].xxxx, IN[3].yyyy >353: MUL TEMP[5].x, IMM[2].yyyy, TEMP[5].xxxx >354: MOV TEMP[14].y, TEMP[5].xxxx >355: ADD TEMP[2], TEMP[11], TEMP[14] >356: ADD TEMP[2], TEMP[18], TEMP[2] >357: DP3 TEMP[7].x, TEMP[2].xyzz, IN[1].xyzz >358: DP3 TEMP[5].x, TEMP[1].xyzz, TEMP[7].xyzz >359: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz >360: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].xxxx >361: MUL TEMP[5].x, TEMP[5].xxxx, CONST[1][25].yyyy >362: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww >363: MOV TEMP[3].xyz, IN[0].xyzx >364: MOV TEMP[3].w, IMM[2].zzzz >365: DP4 TEMP[6].x, TEMP[6], TEMP[3] >366: MOV TEMP[4].y, TEMP[6].xxxx >367: DP4 TEMP[6].x, TEMP[9], TEMP[3] >368: MOV TEMP[4].z, TEMP[6].xxxx >369: DP4 TEMP[4].x, TEMP[2], TEMP[3] >370: DP3 TEMP[6].x, IMM[2].zzzz, TEMP[4].xyzz >371: DP3 TEMP[7].x, CONST[1][38].xyzz, CONST[1][38].xyzz >372: SQRT TEMP[7].x, TEMP[7].xxxx >373: MUL TEMP[8].x, TEMP[7].xxxx, CONST[1][20].wwww >374: FSNE TEMP[9].x, TEMP[8].xxxx, IMM[5].xxxx >375: UIF TEMP[9].xxxx :0 >376: RCP TEMP[8].x, TEMP[8].xxxx >377: MUL TEMP[8].x, TEMP[6].xxxx, TEMP[8].xxxx >378: ELSE :0 >379: SSG TEMP[6].x, TEMP[6].xxxx >380: MUL TEMP[8].x, IMM[5].yyyy, TEMP[6].xxxx >381: ENDIF >382: ADD TEMP[3], -CONST[1][31], CONST[1][32] >383: MUL TEMP[6].x, CONST[1][20].zzzz, CONST[1][45].wwww >384: MUL TEMP[9].x, TEMP[6].xxxx, IMM[5].zzzz >385: MOV_SAT TEMP[9].x, TEMP[9].xxxx >386: FMA TEMP[9], TEMP[9].xxxx, TEMP[3], CONST[1][31] >387: FMA TEMP[8].xy, CONST[1][33].yyyy, TEMP[9].ywww, TEMP[8].xxxx >388: MUL TEMP[3].xy, TEMP[7].xxxx, TEMP[9].xzzz >389: FMA TEMP[7].xy, -IN[2].yzzz, CONST[1][30].xyyy, IMM[2].zzzz >390: ADD TEMP[2].xy, TEMP[8].xyyy, TEMP[7].xyyy >391: SIN TEMP[7].x, TEMP[2].xxxx >392: SIN TEMP[7].y, TEMP[2].yyyy >393: MUL TEMP[2].xy, TEMP[3].xyyy, TEMP[7].xyyy >394: FMA TEMP[2].x, TEMP[6].xxxx, CONST[1][20].yyyy, TEMP[2].xxxx >395: FMA TEMP[3].x, TEMP[2].yyyy, IMM[5].wwww, TEMP[2].xxxx >396: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[3].xxxx >397: FMA TEMP[1].xyz, TEMP[0].xyzz, TEMP[5].xxxx, TEMP[4].xyzz >398: DP3 TEMP[3].x, TEMP[4].xyzz, TEMP[4].xyzz >399: SQRT TEMP[3].x, TEMP[3].xxxx >400: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz >401: RSQ TEMP[2].x, TEMP[2].xxxx >402: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[2].xxxx >403: MUL TEMP[0].xyz, TEMP[3].xxxx, TEMP[0].xyzz >404: MOV TEMP[0].w, IMM[2].zzzz >405: DP4 TEMP[1].x, CONST[1][36], TEMP[0] >406: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][19].yyyy >407: MOV TEMP[1].z, TEMP[1].xxxx >408: DP4 TEMP[1].x, CONST[1][34], TEMP[0] >409: DP4 TEMP[2].x, CONST[1][35], TEMP[0] >410: MOV TEMP[1].y, TEMP[2].xxxx >411: DP4 TEMP[0].x, CONST[1][37], TEMP[0] >412: MOV TEMP[1].w, TEMP[0].xxxx >413: MOV OUT[0], TEMP[1] >414: END >radeonsi: Compiling shader 338 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 308) > %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 324) > %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 328) > %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 332) > %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 404) > %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 480) > %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 484) > %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 496) > %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 500) > %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 504) > %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 508) > %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 512) > %32 = call float @llvm.SI.load.const(<16 x i8> %19, i32 516) > %33 = call float @llvm.SI.load.const(<16 x i8> %19, i32 520) > %34 = call float @llvm.SI.load.const(<16 x i8> %19, i32 524) > %35 = call float @llvm.SI.load.const(<16 x i8> %19, i32 532) > %36 = call float @llvm.SI.load.const(<16 x i8> %19, i32 544) > %37 = call float @llvm.SI.load.const(<16 x i8> %19, i32 548) > %38 = call float @llvm.SI.load.const(<16 x i8> %19, i32 552) > %39 = call float @llvm.SI.load.const(<16 x i8> %19, i32 556) > %40 = call float @llvm.SI.load.const(<16 x i8> %19, i32 560) > %41 = call float @llvm.SI.load.const(<16 x i8> %19, i32 564) > %42 = call float @llvm.SI.load.const(<16 x i8> %19, i32 568) > %43 = call float @llvm.SI.load.const(<16 x i8> %19, i32 572) > %44 = call float @llvm.SI.load.const(<16 x i8> %19, i32 576) > %45 = call float @llvm.SI.load.const(<16 x i8> %19, i32 580) > %46 = call float @llvm.SI.load.const(<16 x i8> %19, i32 584) > %47 = call float @llvm.SI.load.const(<16 x i8> %19, i32 588) > %48 = call float @llvm.SI.load.const(<16 x i8> %19, i32 592) > %49 = call float @llvm.SI.load.const(<16 x i8> %19, i32 596) > %50 = call float @llvm.SI.load.const(<16 x i8> %19, i32 600) > %51 = call float @llvm.SI.load.const(<16 x i8> %19, i32 604) > %52 = call float @llvm.SI.load.const(<16 x i8> %19, i32 608) > %53 = call float @llvm.SI.load.const(<16 x i8> %19, i32 612) > %54 = call float @llvm.SI.load.const(<16 x i8> %19, i32 616) > %55 = call float @llvm.SI.load.const(<16 x i8> %19, i32 624) > %56 = call float @llvm.SI.load.const(<16 x i8> %19, i32 628) > %57 = call float @llvm.SI.load.const(<16 x i8> %19, i32 632) > %58 = call float @llvm.SI.load.const(<16 x i8> %19, i32 640) > %59 = call float @llvm.SI.load.const(<16 x i8> %19, i32 644) > %60 = call float @llvm.SI.load.const(<16 x i8> %19, i32 648) > %61 = call float @llvm.SI.load.const(<16 x i8> %19, i32 720) > %62 = call float @llvm.SI.load.const(<16 x i8> %19, i32 724) > %63 = call float @llvm.SI.load.const(<16 x i8> %19, i32 728) > %64 = call float @llvm.SI.load.const(<16 x i8> %19, i32 732) > %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 > %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 > %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %13) > %70 = extractelement <4 x float> %69, i32 0 > %71 = extractelement <4 x float> %69, i32 1 > %72 = extractelement <4 x float> %69, i32 2 > %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 > %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %14) > %76 = extractelement <4 x float> %75, i32 0 > %77 = extractelement <4 x float> %75, i32 1 > %78 = extractelement <4 x float> %75, i32 2 > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %15) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %16) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = extractelement <4 x float> %87, i32 2 > %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 > %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %17) > %94 = extractelement <4 x float> %93, i32 0 > %95 = extractelement <4 x float> %93, i32 1 > %96 = extractelement <4 x float> %93, i32 2 > %97 = fmul float %55, %62 > %98 = fmul float %56, %62 > %99 = fmul float %57, %62 > %100 = call float @llvm.fma.f32(float %61, float %52, float %97) > %101 = call float @llvm.fma.f32(float %61, float %53, float %98) > %102 = call float @llvm.fma.f32(float %61, float %54, float %99) > %103 = call float @llvm.fma.f32(float %63, float %58, float %100) > %104 = call float @llvm.fma.f32(float %63, float %59, float %101) > %105 = call float @llvm.fma.f32(float %63, float %60, float %102) > %106 = fmul float %96, 0x406FE01000000000 > %107 = fmul float %95, 0x406FE01000000000 > %108 = fmul float %94, 0x406FE01000000000 > %109 = fptosi float %106 to i32 > %110 = fptosi float %107 to i32 > %111 = fptosi float %108 to i32 > %112 = shl i32 %109, 1 > %113 = or i32 %112, 1 > %114 = shl i32 %110, 1 > %115 = or i32 %114, 1 > %116 = shl i32 %111, 1 > %117 = or i32 %116, 1 > %118 = shl i32 %109, 5 > %119 = or i32 %118, 4 > %120 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %119) > %121 = fmul float %88, %120 > %122 = shl i32 %110, 5 > %123 = or i32 %122, 4 > %124 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %123) > %125 = fmul float %89, %124 > %126 = shl i32 %113, 4 > %127 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %126) > %128 = shl i32 %113, 4 > %129 = or i32 %128, 12 > %130 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %129) > %131 = fmul float %127, %130 > %132 = shl i32 %113, 4 > %133 = or i32 %132, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %133) > %135 = shl i32 %113, 4 > %136 = or i32 %135, 8 > %137 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %136) > %138 = fsub float -0.000000e+00, %131 > %139 = call float @llvm.fma.f32(float %134, float %137, float %138) > %140 = shl i32 %113, 4 > %141 = or i32 %140, 4 > %142 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %141) > %143 = shl i32 %113, 4 > %144 = or i32 %143, 8 > %145 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %144) > %146 = call float @llvm.fma.f32(float %142, float %145, float %131) > %147 = fmul float %146, %88 > %148 = fmul float %139, %88 > %149 = fmul float %148, 2.000000e+00 > %150 = shl i32 %115, 4 > %151 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %150) > %152 = shl i32 %115, 4 > %153 = or i32 %152, 12 > %154 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %153) > %155 = fmul float %151, %154 > %156 = shl i32 %115, 4 > %157 = or i32 %156, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %157) > %159 = shl i32 %115, 4 > %160 = or i32 %159, 8 > %161 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %160) > %162 = fsub float -0.000000e+00, %155 > %163 = call float @llvm.fma.f32(float %158, float %161, float %162) > %164 = shl i32 %115, 4 > %165 = or i32 %164, 4 > %166 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %165) > %167 = shl i32 %115, 4 > %168 = or i32 %167, 8 > %169 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %168) > %170 = call float @llvm.fma.f32(float %166, float %169, float %155) > %171 = fmul float %170, %89 > %172 = fmul float %171, 2.000000e+00 > %173 = fmul float %163, %89 > %174 = fmul float %173, 2.000000e+00 > %175 = shl i32 %113, 4 > %176 = or i32 %175, 4 > %177 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %176) > %178 = shl i32 %113, 4 > %179 = or i32 %178, 8 > %180 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %179) > %181 = shl i32 %113, 4 > %182 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %181) > %183 = shl i32 %113, 4 > %184 = or i32 %183, 12 > %185 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %184) > %186 = fmul float %180, %185 > %187 = fmul float %180, %182 > %188 = fmul float %177, %185 > %189 = shl i32 %113, 4 > %190 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %189) > %191 = shl i32 %113, 4 > %192 = or i32 %191, 4 > %193 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %192) > %194 = call float @llvm.fma.f32(float %190, float %193, float %186) > %195 = fmul float %194, %88 > %196 = fmul float %195, 2.000000e+00 > %197 = shl i32 %113, 4 > %198 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %197) > %199 = shl i32 %113, 4 > %200 = or i32 %199, 4 > %201 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %200) > %202 = shl i32 %113, 4 > %203 = or i32 %202, 8 > %204 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %203) > %205 = shl i32 %113, 4 > %206 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %205) > %207 = shl i32 %113, 4 > %208 = or i32 %207, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %208) > %210 = shl i32 %113, 4 > %211 = or i32 %210, 8 > %212 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %211) > %213 = fmul float %198, %206 > %214 = fmul float %201, %209 > %215 = fmul float %204, %212 > %216 = fadd float %215, %214 > %217 = fadd float %215, %213 > %218 = fadd float %214, %213 > %219 = fsub float -0.000000e+00, %216 > %220 = call float @llvm.fma.f32(float %219, float 2.000000e+00, float 1.000000e+00) > %221 = fsub float -0.000000e+00, %217 > %222 = call float @llvm.fma.f32(float %221, float 2.000000e+00, float 1.000000e+00) > %223 = fsub float -0.000000e+00, %218 > %224 = call float @llvm.fma.f32(float %223, float 2.000000e+00, float 1.000000e+00) > %225 = fmul float %88, %222 > %226 = shl i32 %115, 4 > %227 = or i32 %226, 4 > %228 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %227) > %229 = shl i32 %115, 4 > %230 = or i32 %229, 8 > %231 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %230) > %232 = shl i32 %115, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %232) > %234 = shl i32 %115, 4 > %235 = or i32 %234, 12 > %236 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %235) > %237 = fmul float %231, %236 > %238 = fmul float %231, %233 > %239 = fmul float %228, %236 > %240 = shl i32 %115, 4 > %241 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %240) > %242 = shl i32 %115, 4 > %243 = or i32 %242, 4 > %244 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %243) > %245 = call float @llvm.fma.f32(float %241, float %244, float %237) > %246 = fmul float %245, %89 > %247 = fmul float %246, 2.000000e+00 > %248 = shl i32 %115, 4 > %249 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %248) > %250 = shl i32 %115, 4 > %251 = or i32 %250, 4 > %252 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %251) > %253 = shl i32 %115, 4 > %254 = or i32 %253, 8 > %255 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %254) > %256 = shl i32 %115, 4 > %257 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %256) > %258 = shl i32 %115, 4 > %259 = or i32 %258, 4 > %260 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %259) > %261 = shl i32 %115, 4 > %262 = or i32 %261, 8 > %263 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %262) > %264 = fmul float %249, %257 > %265 = fmul float %252, %260 > %266 = fmul float %255, %263 > %267 = fadd float %266, %265 > %268 = fadd float %266, %264 > %269 = fadd float %265, %264 > %270 = fsub float -0.000000e+00, %267 > %271 = call float @llvm.fma.f32(float %270, float 2.000000e+00, float 1.000000e+00) > %272 = fsub float -0.000000e+00, %268 > %273 = call float @llvm.fma.f32(float %272, float 2.000000e+00, float 1.000000e+00) > %274 = fsub float -0.000000e+00, %269 > %275 = call float @llvm.fma.f32(float %274, float 2.000000e+00, float 1.000000e+00) > %276 = fmul float %89, %273 > %277 = fadd float %196, %247 > %278 = fadd float %225, %276 > %279 = fadd float %149, %174 > %280 = fadd float %121, %125 > %281 = shl i32 %111, 5 > %282 = or i32 %281, 4 > %283 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %282) > %284 = fmul float %90, %283 > %285 = shl i32 %117, 4 > %286 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %285) > %287 = shl i32 %117, 4 > %288 = or i32 %287, 12 > %289 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %288) > %290 = fmul float %286, %289 > %291 = shl i32 %117, 4 > %292 = or i32 %291, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %292) > %294 = shl i32 %117, 4 > %295 = or i32 %294, 8 > %296 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %295) > %297 = fsub float -0.000000e+00, %290 > %298 = call float @llvm.fma.f32(float %293, float %296, float %297) > %299 = shl i32 %117, 4 > %300 = or i32 %299, 4 > %301 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %300) > %302 = shl i32 %117, 4 > %303 = or i32 %302, 8 > %304 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %303) > %305 = call float @llvm.fma.f32(float %301, float %304, float %290) > %306 = fmul float %305, %90 > %307 = fmul float %306, 2.000000e+00 > %308 = fmul float %298, %90 > %309 = fmul float %308, 2.000000e+00 > %310 = shl i32 %117, 4 > %311 = or i32 %310, 4 > %312 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %311) > %313 = shl i32 %117, 4 > %314 = or i32 %313, 8 > %315 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %314) > %316 = shl i32 %117, 4 > %317 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %316) > %318 = shl i32 %117, 4 > %319 = or i32 %318, 12 > %320 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %319) > %321 = fmul float %315, %320 > %322 = fmul float %315, %317 > %323 = fmul float %312, %320 > %324 = shl i32 %117, 4 > %325 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %324) > %326 = shl i32 %117, 4 > %327 = or i32 %326, 4 > %328 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %327) > %329 = call float @llvm.fma.f32(float %325, float %328, float %321) > %330 = fmul float %329, %90 > %331 = fmul float %330, 2.000000e+00 > %332 = shl i32 %117, 4 > %333 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %332) > %334 = shl i32 %117, 4 > %335 = or i32 %334, 4 > %336 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %335) > %337 = shl i32 %117, 4 > %338 = or i32 %337, 8 > %339 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %338) > %340 = shl i32 %117, 4 > %341 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %340) > %342 = shl i32 %117, 4 > %343 = or i32 %342, 4 > %344 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %343) > %345 = shl i32 %117, 4 > %346 = or i32 %345, 8 > %347 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %346) > %348 = fmul float %333, %341 > %349 = fmul float %336, %344 > %350 = fmul float %339, %347 > %351 = fadd float %350, %349 > %352 = fadd float %350, %348 > %353 = fadd float %349, %348 > %354 = fsub float -0.000000e+00, %351 > %355 = call float @llvm.fma.f32(float %354, float 2.000000e+00, float 1.000000e+00) > %356 = fsub float -0.000000e+00, %352 > %357 = call float @llvm.fma.f32(float %356, float 2.000000e+00, float 1.000000e+00) > %358 = fsub float -0.000000e+00, %353 > %359 = call float @llvm.fma.f32(float %358, float 2.000000e+00, float 1.000000e+00) > %360 = fmul float %90, %357 > %361 = fadd float %277, %331 > %362 = fadd float %278, %360 > %363 = fadd float %279, %309 > %364 = fadd float %280, %284 > %365 = fmul float %361, %76 > %366 = fmul float %362, %77 > %367 = fadd float %366, %365 > %368 = fmul float %363, %78 > %369 = fadd float %367, %368 > %370 = shl i32 %113, 4 > %371 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %370) > %372 = shl i32 %113, 4 > %373 = or i32 %372, 8 > %374 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %373) > %375 = fsub float -0.000000e+00, %188 > %376 = call float @llvm.fma.f32(float %371, float %374, float %375) > %377 = fmul float %376, %88 > %378 = fmul float %377, 2.000000e+00 > %379 = fmul float %147, 2.000000e+00 > %380 = shl i32 %115, 4 > %381 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %380) > %382 = shl i32 %115, 4 > %383 = or i32 %382, 8 > %384 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %383) > %385 = fsub float -0.000000e+00, %239 > %386 = call float @llvm.fma.f32(float %381, float %384, float %385) > %387 = fmul float %386, %89 > %388 = fmul float %387, 2.000000e+00 > %389 = fmul float %88, %224 > %390 = fmul float %88, %220 > %391 = fmul float %89, %275 > %392 = fmul float %89, %271 > %393 = shl i32 %109, 5 > %394 = or i32 %393, 8 > %395 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %394) > %396 = fmul float %88, %395 > %397 = shl i32 %110, 5 > %398 = or i32 %397, 8 > %399 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %398) > %400 = fmul float %89, %399 > %401 = fadd float %388, %378 > %402 = fadd float %172, %379 > %403 = fadd float %391, %389 > %404 = fadd float %400, %396 > %405 = shl i32 %117, 4 > %406 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %405) > %407 = shl i32 %117, 4 > %408 = or i32 %407, 8 > %409 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %408) > %410 = fsub float -0.000000e+00, %323 > %411 = call float @llvm.fma.f32(float %406, float %409, float %410) > %412 = fmul float %411, %90 > %413 = fmul float %412, 2.000000e+00 > %414 = fmul float %90, %359 > %415 = fmul float %90, %355 > %416 = shl i32 %111, 5 > %417 = or i32 %416, 8 > %418 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %417) > %419 = fmul float %90, %418 > %420 = fadd float %401, %413 > %421 = fadd float %402, %307 > %422 = fadd float %403, %414 > %423 = fadd float %404, %419 > %424 = fmul float %420, %76 > %425 = fmul float %421, %77 > %426 = fadd float %425, %424 > %427 = fmul float %422, %78 > %428 = fadd float %426, %427 > %429 = shl i32 %109, 5 > %430 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %429) > %431 = fmul float %88, %430 > %432 = shl i32 %110, 5 > %433 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %432) > %434 = fmul float %89, %433 > %435 = shl i32 %111, 5 > %436 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %435) > %437 = fmul float %90, %436 > %438 = fadd float %188, %187 > %439 = fmul float %438, %88 > %440 = fmul float %439, 2.000000e+00 > %441 = shl i32 %113, 4 > %442 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %441) > %443 = shl i32 %113, 4 > %444 = or i32 %443, 4 > %445 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %444) > %446 = fsub float -0.000000e+00, %186 > %447 = call float @llvm.fma.f32(float %442, float %445, float %446) > %448 = fmul float %447, %88 > %449 = fmul float %448, 2.000000e+00 > %450 = shl i32 %117, 4 > %451 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %450) > %452 = shl i32 %117, 4 > %453 = or i32 %452, 4 > %454 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %453) > %455 = fsub float -0.000000e+00, %321 > %456 = call float @llvm.fma.f32(float %451, float %454, float %455) > %457 = fadd float %323, %322 > %458 = fmul float %456, %90 > %459 = fmul float %457, %90 > %460 = fmul float %458, 2.000000e+00 > %461 = fmul float %459, 2.000000e+00 > %462 = fadd float %239, %238 > %463 = fmul float %462, %89 > %464 = fmul float %463, 2.000000e+00 > %465 = shl i32 %115, 4 > %466 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %465) > %467 = shl i32 %115, 4 > %468 = or i32 %467, 4 > %469 = call float @llvm.SI.load.const(<16 x i8> %66, i32 %468) > %470 = fsub float -0.000000e+00, %237 > %471 = call float @llvm.fma.f32(float %466, float %469, float %470) > %472 = fmul float %471, %89 > %473 = fmul float %472, 2.000000e+00 > %474 = fadd float %390, %392 > %475 = fadd float %449, %473 > %476 = fadd float %440, %464 > %477 = fadd float %431, %434 > %478 = fadd float %415, %474 > %479 = fadd float %460, %475 > %480 = fadd float %461, %476 > %481 = fadd float %437, %477 > %482 = fmul float %478, %76 > %483 = fmul float %479, %77 > %484 = fadd float %483, %482 > %485 = fmul float %480, %78 > %486 = fadd float %484, %485 > %487 = fmul float %103, %486 > %488 = fmul float %104, %369 > %489 = fadd float %488, %487 > %490 = fmul float %105, %428 > %491 = fadd float %489, %490 > %492 = fadd float %491, 1.000000e+00 > %493 = fmul float %492, %82 > %494 = fmul float %493, %24 > %495 = fmul float %494, 5.000000e-01 > %496 = fmul float %361, %70 > %497 = fmul float %362, %71 > %498 = fadd float %496, %497 > %499 = fmul float %363, %72 > %500 = fadd float %498, %499 > %501 = fadd float %500, %364 > %502 = fmul float %420, %70 > %503 = fmul float %421, %71 > %504 = fadd float %502, %503 > %505 = fmul float %422, %72 > %506 = fadd float %504, %505 > %507 = fadd float %506, %423 > %508 = fmul float %478, %70 > %509 = fmul float %479, %71 > %510 = fadd float %508, %509 > %511 = fmul float %480, %72 > %512 = fadd float %510, %511 > %513 = fadd float %512, %481 > %514 = fadd float %501, %513 > %515 = fadd float %514, %507 > %516 = fmul float %52, %52 > %517 = fmul float %53, %53 > %518 = fadd float %517, %516 > %519 = fmul float %54, %54 > %520 = fadd float %518, %519 > %521 = call float @llvm.sqrt.f32(float %520) > %522 = fmul float %521, %23 > %523 = fcmp une float %522, 0.000000e+00 > br i1 %523, label %IF, label %ELSE > >IF: ; preds = %main_body > %524 = fdiv float 1.000000e+00, %522 > %525 = fmul float %515, %524 > br label %ENDIF > >ELSE: ; preds = %main_body > %526 = fcmp ogt float %515, 0.000000e+00 > %527 = select i1 %526, float 1.000000e+00, float %515 > %528 = fcmp oge float %527, 0.000000e+00 > %.op = fmul float %527, 0x4600000000000000 > %529 = select i1 %528, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp32.0 = phi float [ %525, %IF ], [ %529, %ELSE ] > %530 = fsub float %31, %27 > %531 = fsub float %32, %28 > %532 = fsub float %33, %29 > %533 = fsub float %34, %30 > %534 = fmul float %22, %64 > %535 = fmul float %534, 0x3FC5555560000000 > %536 = call float @llvm.AMDGPU.clamp.(float %535, float 0.000000e+00, float 1.000000e+00) > %537 = call float @llvm.fma.f32(float %536, float %530, float %27) > %538 = call float @llvm.fma.f32(float %536, float %531, float %28) > %539 = call float @llvm.fma.f32(float %536, float %532, float %29) > %540 = call float @llvm.fma.f32(float %536, float %533, float %30) > %541 = call float @llvm.fma.f32(float %35, float %538, float %temp32.0) > %542 = call float @llvm.fma.f32(float %35, float %540, float %temp32.0) > %543 = fmul float %521, %537 > %544 = fmul float %521, %539 > %545 = fsub float -0.000000e+00, %83 > %546 = call float @llvm.fma.f32(float %545, float %25, float 1.000000e+00) > %547 = fsub float -0.000000e+00, %84 > %548 = call float @llvm.fma.f32(float %547, float %26, float 1.000000e+00) > %549 = fadd float %541, %546 > %550 = fadd float %542, %548 > %551 = call float @llvm.sin.f32(float %549) > %552 = call float @llvm.sin.f32(float %550) > %553 = fmul float %543, %551 > %554 = fmul float %544, %552 > %555 = call float @llvm.fma.f32(float %534, float %21, float %553) > %556 = call float @llvm.fma.f32(float %554, float 2.500000e-01, float %555) > %557 = fmul float %103, %556 > %558 = fmul float %104, %556 > %559 = fmul float %105, %556 > %560 = call float @llvm.fma.f32(float %557, float %495, float %513) > %561 = call float @llvm.fma.f32(float %558, float %495, float %501) > %562 = call float @llvm.fma.f32(float %559, float %495, float %507) > %563 = fmul float %513, %513 > %564 = fmul float %501, %501 > %565 = fadd float %564, %563 > %566 = fmul float %507, %507 > %567 = fadd float %565, %566 > %568 = call float @llvm.sqrt.f32(float %567) > %569 = fmul float %560, %560 > %570 = fmul float %561, %561 > %571 = fadd float %570, %569 > %572 = fmul float %562, %562 > %573 = fadd float %571, %572 > %574 = call float @llvm.AMDGPU.rsq.clamped.f32(float %573) > %575 = fmul float %560, %574 > %576 = fmul float %561, %574 > %577 = fmul float %562, %574 > %578 = fmul float %568, %575 > %579 = fmul float %568, %576 > %580 = fmul float %568, %577 > %581 = fmul float %44, %578 > %582 = fmul float %45, %579 > %583 = fadd float %581, %582 > %584 = fmul float %46, %580 > %585 = fadd float %583, %584 > %586 = fadd float %585, %47 > %587 = fmul float %586, %20 > %588 = fmul float %36, %578 > %589 = fmul float %37, %579 > %590 = fadd float %588, %589 > %591 = fmul float %38, %580 > %592 = fadd float %590, %591 > %593 = fadd float %592, %39 > %594 = fmul float %40, %578 > %595 = fmul float %41, %579 > %596 = fadd float %594, %595 > %597 = fmul float %42, %580 > %598 = fadd float %596, %597 > %599 = fadd float %598, %43 > %600 = fmul float %48, %578 > %601 = fmul float %49, %579 > %602 = fadd float %600, %601 > %603 = fmul float %50, %580 > %604 = fadd float %602, %603 > %605 = fadd float %604, %51 > %606 = bitcast i32 %11 to float > %607 = insertvalue <{ float, float, float }> undef, float %606, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %593, float %599, float %587, float %605) > ret <{ float, float, float }> %607 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sin.f32(float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL OUT[6], GENERIC[5] >DCL CONST[1][0..37] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 464} >IMM[3] UINT32 {288, 432, 448, 480} >IMM[4] UINT32 {576, 592, 496, 512} >IMM[5] UINT32 {528, 0, 0, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][29], TEMP[18] >356: ADD TEMP[3].x, TEMP[3].xxxx, CONST[1][18].yyyy >357: MOV TEMP[1].z, TEMP[3].xxxx >358: DP4 TEMP[1].x, CONST[1][27], TEMP[18] >359: DP4 TEMP[3].x, CONST[1][28], TEMP[18] >360: MOV TEMP[1].y, TEMP[3].xxxx >361: DP4 TEMP[3].x, CONST[1][30], TEMP[18] >362: MOV TEMP[1].w, TEMP[3].xxxx >363: ADD TEMP[2].xyz, -TEMP[18].xyzz, CONST[1][36].xyzz >364: MOV TEMP[3], TEMP[1] >365: MOV TEMP[5].xy, IN[2].xyxx >366: DP3 TEMP[6].x, CONST[1][31].xyzz, TEMP[2].xyzz >367: DP3 TEMP[8].x, CONST[1][32].xyzz, TEMP[2].xyzz >368: MOV TEMP[6].y, TEMP[8].xxxx >369: DP3 TEMP[8].x, CONST[1][33].xyzz, TEMP[2].xyzz >370: MOV TEMP[6].z, TEMP[8].xxxx >371: DP3 TEMP[8].x, TEMP[4].xyzz, IN[3].xyzz >372: MOV TEMP[1].y, TEMP[8].xxxx >373: DP3 TEMP[8].x, TEMP[7].xyzz, IN[3].xyzz >374: MOV TEMP[1].z, TEMP[8].xxxx >375: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >376: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz >377: RSQ TEMP[8].x, TEMP[8].xxxx >378: MUL TEMP[1].xyz, TEMP[8].xxxx, TEMP[1].xyzz >379: DP3 TEMP[8].x, CONST[1][31].xyzz, TEMP[1].xyzz >380: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >381: MOV TEMP[2].y, TEMP[9].xxxx >382: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >383: MOV TEMP[4].y, TEMP[9].xxxx >384: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >385: MOV TEMP[2].z, TEMP[9].xxxx >386: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >387: MOV TEMP[4].z, TEMP[7].xxxx >388: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >389: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >390: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >391: RSQ TEMP[7].x, TEMP[0].xxxx >392: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >393: DP3 TEMP[7].x, CONST[1][31].xyzz, TEMP[0].xyzz >394: MOV TEMP[8].y, TEMP[7].xxxx >395: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >396: RSQ TEMP[7].x, TEMP[7].xxxx >397: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >398: DP3 TEMP[4].x, CONST[1][31].xyzz, TEMP[2].xyzz >399: MOV TEMP[8].z, TEMP[4].xxxx >400: DP3 TEMP[4].x, CONST[1][32].xyzz, TEMP[1].xyzz >401: DP3 TEMP[1].x, CONST[1][33].xyzz, TEMP[1].xyzz >402: DP3 TEMP[7].x, CONST[1][32].xyzz, TEMP[0].xyzz >403: MOV TEMP[4].y, TEMP[7].xxxx >404: DP3 TEMP[0].x, CONST[1][33].xyzz, TEMP[0].xyzz >405: MOV TEMP[1].y, TEMP[0].xxxx >406: DP3 TEMP[0].x, CONST[1][32].xyzz, TEMP[2].xyzz >407: MOV TEMP[4].z, TEMP[0].xxxx >408: DP3 TEMP[0].x, CONST[1][33].xyzz, TEMP[2].xyzz >409: MOV TEMP[1].z, TEMP[0].xxxx >410: MOV OUT[6], IN[5] >411: MOV OUT[5], TEMP[1] >412: MOV OUT[4], TEMP[4] >413: MOV OUT[3], TEMP[8] >414: MOV OUT[2], TEMP[6] >415: MOV OUT[1], TEMP[5] >416: MOV OUT[0], TEMP[3] >417: END >radeonsi: Compiling shader 339 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 292) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 432) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 436) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 440) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 444) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 448) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 452) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 456) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 460) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 464) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 468) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 472) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 476) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 480) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 484) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 488) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 492) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 496) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 500) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 504) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 512) > %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 516) > %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 520) > %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 528) > %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 532) > %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 536) > %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 576) > %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 580) > %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 584) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 > %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %13) > %57 = extractelement <4 x float> %56, i32 0 > %58 = extractelement <4 x float> %56, i32 1 > %59 = extractelement <4 x float> %56, i32 2 > %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 > %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %14) > %63 = extractelement <4 x float> %62, i32 0 > %64 = extractelement <4 x float> %62, i32 1 > %65 = extractelement <4 x float> %62, i32 2 > %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 > %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %15) > %69 = extractelement <4 x float> %68, i32 0 > %70 = extractelement <4 x float> %68, i32 1 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %16) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %17) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %18) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = extractelement <4 x float> %85, i32 2 > %89 = extractelement <4 x float> %85, i32 3 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %19) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 > %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %20) > %99 = extractelement <4 x float> %98, i32 0 > %100 = extractelement <4 x float> %98, i32 1 > %101 = extractelement <4 x float> %98, i32 2 > %102 = fmul float %101, 0x406FE01000000000 > %103 = fmul float %100, 0x406FE01000000000 > %104 = fmul float %99, 0x406FE01000000000 > %105 = fptosi float %102 to i32 > %106 = fptosi float %103 to i32 > %107 = fptosi float %104 to i32 > %108 = shl i32 %105, 1 > %109 = or i32 %108, 1 > %110 = shl i32 %106, 1 > %111 = or i32 %110, 1 > %112 = shl i32 %107, 1 > %113 = or i32 %112, 1 > %114 = shl i32 %105, 5 > %115 = or i32 %114, 4 > %116 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %115) > %117 = fmul float %93, %116 > %118 = shl i32 %106, 5 > %119 = or i32 %118, 4 > %120 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %119) > %121 = fmul float %94, %120 > %122 = shl i32 %109, 4 > %123 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %122) > %124 = shl i32 %109, 4 > %125 = or i32 %124, 12 > %126 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %125) > %127 = fmul float %123, %126 > %128 = shl i32 %109, 4 > %129 = or i32 %128, 4 > %130 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %129) > %131 = shl i32 %109, 4 > %132 = or i32 %131, 8 > %133 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %132) > %134 = fsub float -0.000000e+00, %127 > %135 = call float @llvm.fma.f32(float %130, float %133, float %134) > %136 = shl i32 %109, 4 > %137 = or i32 %136, 4 > %138 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %137) > %139 = shl i32 %109, 4 > %140 = or i32 %139, 8 > %141 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %140) > %142 = call float @llvm.fma.f32(float %138, float %141, float %127) > %143 = fmul float %142, %93 > %144 = fmul float %135, %93 > %145 = fmul float %144, 2.000000e+00 > %146 = shl i32 %111, 4 > %147 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %146) > %148 = shl i32 %111, 4 > %149 = or i32 %148, 12 > %150 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %149) > %151 = fmul float %147, %150 > %152 = shl i32 %111, 4 > %153 = or i32 %152, 4 > %154 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %153) > %155 = shl i32 %111, 4 > %156 = or i32 %155, 8 > %157 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %156) > %158 = fsub float -0.000000e+00, %151 > %159 = call float @llvm.fma.f32(float %154, float %157, float %158) > %160 = shl i32 %111, 4 > %161 = or i32 %160, 4 > %162 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %161) > %163 = shl i32 %111, 4 > %164 = or i32 %163, 8 > %165 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %164) > %166 = call float @llvm.fma.f32(float %162, float %165, float %151) > %167 = fmul float %166, %94 > %168 = fmul float %167, 2.000000e+00 > %169 = fmul float %159, %94 > %170 = fmul float %169, 2.000000e+00 > %171 = shl i32 %109, 4 > %172 = or i32 %171, 4 > %173 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %172) > %174 = shl i32 %109, 4 > %175 = or i32 %174, 8 > %176 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %175) > %177 = shl i32 %109, 4 > %178 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %177) > %179 = shl i32 %109, 4 > %180 = or i32 %179, 12 > %181 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %180) > %182 = fmul float %176, %181 > %183 = fmul float %176, %178 > %184 = fmul float %173, %181 > %185 = shl i32 %109, 4 > %186 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %185) > %187 = shl i32 %109, 4 > %188 = or i32 %187, 4 > %189 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %188) > %190 = call float @llvm.fma.f32(float %186, float %189, float %182) > %191 = fmul float %190, %93 > %192 = fmul float %191, 2.000000e+00 > %193 = shl i32 %109, 4 > %194 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %193) > %195 = shl i32 %109, 4 > %196 = or i32 %195, 4 > %197 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %196) > %198 = shl i32 %109, 4 > %199 = or i32 %198, 8 > %200 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %199) > %201 = shl i32 %109, 4 > %202 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %201) > %203 = shl i32 %109, 4 > %204 = or i32 %203, 4 > %205 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %204) > %206 = shl i32 %109, 4 > %207 = or i32 %206, 8 > %208 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %207) > %209 = fmul float %194, %202 > %210 = fmul float %197, %205 > %211 = fmul float %200, %208 > %212 = fadd float %211, %210 > %213 = fadd float %211, %209 > %214 = fadd float %210, %209 > %215 = fsub float -0.000000e+00, %212 > %216 = call float @llvm.fma.f32(float %215, float 2.000000e+00, float 1.000000e+00) > %217 = fsub float -0.000000e+00, %213 > %218 = call float @llvm.fma.f32(float %217, float 2.000000e+00, float 1.000000e+00) > %219 = fsub float -0.000000e+00, %214 > %220 = call float @llvm.fma.f32(float %219, float 2.000000e+00, float 1.000000e+00) > %221 = fmul float %93, %218 > %222 = shl i32 %111, 4 > %223 = or i32 %222, 4 > %224 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %223) > %225 = shl i32 %111, 4 > %226 = or i32 %225, 8 > %227 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %226) > %228 = shl i32 %111, 4 > %229 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %228) > %230 = shl i32 %111, 4 > %231 = or i32 %230, 12 > %232 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %231) > %233 = fmul float %227, %232 > %234 = fmul float %227, %229 > %235 = fmul float %224, %232 > %236 = shl i32 %111, 4 > %237 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %236) > %238 = shl i32 %111, 4 > %239 = or i32 %238, 4 > %240 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %239) > %241 = call float @llvm.fma.f32(float %237, float %240, float %233) > %242 = fmul float %241, %94 > %243 = fmul float %242, 2.000000e+00 > %244 = shl i32 %111, 4 > %245 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %244) > %246 = shl i32 %111, 4 > %247 = or i32 %246, 4 > %248 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %247) > %249 = shl i32 %111, 4 > %250 = or i32 %249, 8 > %251 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %250) > %252 = shl i32 %111, 4 > %253 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %252) > %254 = shl i32 %111, 4 > %255 = or i32 %254, 4 > %256 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %255) > %257 = shl i32 %111, 4 > %258 = or i32 %257, 8 > %259 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %258) > %260 = fmul float %245, %253 > %261 = fmul float %248, %256 > %262 = fmul float %251, %259 > %263 = fadd float %262, %261 > %264 = fadd float %262, %260 > %265 = fadd float %261, %260 > %266 = fsub float -0.000000e+00, %263 > %267 = call float @llvm.fma.f32(float %266, float 2.000000e+00, float 1.000000e+00) > %268 = fsub float -0.000000e+00, %264 > %269 = call float @llvm.fma.f32(float %268, float 2.000000e+00, float 1.000000e+00) > %270 = fsub float -0.000000e+00, %265 > %271 = call float @llvm.fma.f32(float %270, float 2.000000e+00, float 1.000000e+00) > %272 = fmul float %94, %269 > %273 = fadd float %192, %243 > %274 = fadd float %221, %272 > %275 = fadd float %145, %170 > %276 = fadd float %117, %121 > %277 = shl i32 %107, 5 > %278 = or i32 %277, 4 > %279 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %278) > %280 = fmul float %95, %279 > %281 = shl i32 %113, 4 > %282 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %281) > %283 = shl i32 %113, 4 > %284 = or i32 %283, 12 > %285 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %284) > %286 = fmul float %282, %285 > %287 = shl i32 %113, 4 > %288 = or i32 %287, 4 > %289 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %288) > %290 = shl i32 %113, 4 > %291 = or i32 %290, 8 > %292 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %291) > %293 = fsub float -0.000000e+00, %286 > %294 = call float @llvm.fma.f32(float %289, float %292, float %293) > %295 = shl i32 %113, 4 > %296 = or i32 %295, 4 > %297 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %296) > %298 = shl i32 %113, 4 > %299 = or i32 %298, 8 > %300 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %299) > %301 = call float @llvm.fma.f32(float %297, float %300, float %286) > %302 = fmul float %301, %95 > %303 = fmul float %302, 2.000000e+00 > %304 = fmul float %294, %95 > %305 = fmul float %304, 2.000000e+00 > %306 = shl i32 %113, 4 > %307 = or i32 %306, 4 > %308 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %307) > %309 = shl i32 %113, 4 > %310 = or i32 %309, 8 > %311 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %310) > %312 = shl i32 %113, 4 > %313 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %312) > %314 = shl i32 %113, 4 > %315 = or i32 %314, 12 > %316 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %315) > %317 = fmul float %311, %316 > %318 = fmul float %311, %313 > %319 = fmul float %308, %316 > %320 = shl i32 %113, 4 > %321 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %320) > %322 = shl i32 %113, 4 > %323 = or i32 %322, 4 > %324 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %323) > %325 = call float @llvm.fma.f32(float %321, float %324, float %317) > %326 = fmul float %325, %95 > %327 = fmul float %326, 2.000000e+00 > %328 = shl i32 %113, 4 > %329 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %328) > %330 = shl i32 %113, 4 > %331 = or i32 %330, 4 > %332 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %331) > %333 = shl i32 %113, 4 > %334 = or i32 %333, 8 > %335 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %334) > %336 = shl i32 %113, 4 > %337 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %336) > %338 = shl i32 %113, 4 > %339 = or i32 %338, 4 > %340 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %339) > %341 = shl i32 %113, 4 > %342 = or i32 %341, 8 > %343 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %342) > %344 = fmul float %329, %337 > %345 = fmul float %332, %340 > %346 = fmul float %335, %343 > %347 = fadd float %346, %345 > %348 = fadd float %346, %344 > %349 = fadd float %345, %344 > %350 = fsub float -0.000000e+00, %347 > %351 = call float @llvm.fma.f32(float %350, float 2.000000e+00, float 1.000000e+00) > %352 = fsub float -0.000000e+00, %348 > %353 = call float @llvm.fma.f32(float %352, float 2.000000e+00, float 1.000000e+00) > %354 = fsub float -0.000000e+00, %349 > %355 = call float @llvm.fma.f32(float %354, float 2.000000e+00, float 1.000000e+00) > %356 = fmul float %95, %353 > %357 = fadd float %273, %327 > %358 = fadd float %274, %356 > %359 = fadd float %275, %305 > %360 = fadd float %276, %280 > %361 = fmul float %357, %57 > %362 = fmul float %358, %58 > %363 = fadd float %361, %362 > %364 = fmul float %359, %59 > %365 = fadd float %363, %364 > %366 = fadd float %365, %360 > %367 = shl i32 %109, 4 > %368 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %367) > %369 = shl i32 %109, 4 > %370 = or i32 %369, 8 > %371 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %370) > %372 = fsub float -0.000000e+00, %184 > %373 = call float @llvm.fma.f32(float %368, float %371, float %372) > %374 = fmul float %373, %93 > %375 = fmul float %374, 2.000000e+00 > %376 = fmul float %143, 2.000000e+00 > %377 = shl i32 %111, 4 > %378 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %377) > %379 = shl i32 %111, 4 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %380) > %382 = fsub float -0.000000e+00, %235 > %383 = call float @llvm.fma.f32(float %378, float %381, float %382) > %384 = fmul float %383, %94 > %385 = fmul float %384, 2.000000e+00 > %386 = fmul float %93, %220 > %387 = fmul float %93, %216 > %388 = fmul float %94, %271 > %389 = fmul float %94, %267 > %390 = shl i32 %105, 5 > %391 = or i32 %390, 8 > %392 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %391) > %393 = fmul float %93, %392 > %394 = shl i32 %106, 5 > %395 = or i32 %394, 8 > %396 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %395) > %397 = fmul float %94, %396 > %398 = fadd float %385, %375 > %399 = fadd float %168, %376 > %400 = fadd float %388, %386 > %401 = fadd float %397, %393 > %402 = shl i32 %113, 4 > %403 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %402) > %404 = shl i32 %113, 4 > %405 = or i32 %404, 8 > %406 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %405) > %407 = fsub float -0.000000e+00, %319 > %408 = call float @llvm.fma.f32(float %403, float %406, float %407) > %409 = fmul float %408, %95 > %410 = fmul float %409, 2.000000e+00 > %411 = fmul float %95, %355 > %412 = fmul float %95, %351 > %413 = shl i32 %107, 5 > %414 = or i32 %413, 8 > %415 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %414) > %416 = fmul float %95, %415 > %417 = fadd float %398, %410 > %418 = fadd float %399, %303 > %419 = fadd float %400, %411 > %420 = fadd float %401, %416 > %421 = fmul float %417, %57 > %422 = fmul float %418, %58 > %423 = fadd float %421, %422 > %424 = fmul float %419, %59 > %425 = fadd float %423, %424 > %426 = fadd float %425, %420 > %427 = shl i32 %105, 5 > %428 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %427) > %429 = fmul float %93, %428 > %430 = shl i32 %106, 5 > %431 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %430) > %432 = fmul float %94, %431 > %433 = shl i32 %107, 5 > %434 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %433) > %435 = fmul float %95, %434 > %436 = shl i32 %109, 4 > %437 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %436) > %438 = shl i32 %109, 4 > %439 = or i32 %438, 4 > %440 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %439) > %441 = fsub float -0.000000e+00, %182 > %442 = call float @llvm.fma.f32(float %437, float %440, float %441) > %443 = fadd float %184, %183 > %444 = fmul float %442, %93 > %445 = fmul float %443, %93 > %446 = fmul float %444, 2.000000e+00 > %447 = fmul float %445, 2.000000e+00 > %448 = shl i32 %111, 4 > %449 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %448) > %450 = shl i32 %111, 4 > %451 = or i32 %450, 4 > %452 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %451) > %453 = fsub float -0.000000e+00, %233 > %454 = call float @llvm.fma.f32(float %449, float %452, float %453) > %455 = shl i32 %113, 4 > %456 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %455) > %457 = shl i32 %113, 4 > %458 = or i32 %457, 4 > %459 = call float @llvm.SI.load.const(<16 x i8> %53, i32 %458) > %460 = fsub float -0.000000e+00, %317 > %461 = call float @llvm.fma.f32(float %456, float %459, float %460) > %462 = fadd float %319, %318 > %463 = fmul float %454, %94 > %464 = fmul float %461, %95 > %465 = fmul float %462, %95 > %466 = fmul float %464, 2.000000e+00 > %467 = fmul float %465, 2.000000e+00 > %468 = fadd float %235, %234 > %469 = fmul float %468, %94 > %470 = fmul float %463, 2.000000e+00 > %471 = fmul float %469, 2.000000e+00 > %472 = fadd float %387, %389 > %473 = fadd float %446, %470 > %474 = fadd float %447, %471 > %475 = fadd float %429, %432 > %476 = fadd float %412, %472 > %477 = fadd float %466, %473 > %478 = fadd float %467, %474 > %479 = fadd float %435, %475 > %480 = fmul float %476, %57 > %481 = fmul float %477, %58 > %482 = fadd float %480, %481 > %483 = fmul float %478, %59 > %484 = fadd float %482, %483 > %485 = fadd float %484, %479 > %486 = fmul float %32, %485 > %487 = fmul float %33, %366 > %488 = fadd float %486, %487 > %489 = fmul float %34, %426 > %490 = fadd float %488, %489 > %491 = fadd float %490, %35 > %492 = fadd float %491, %23 > %493 = fmul float %24, %485 > %494 = fmul float %25, %366 > %495 = fadd float %493, %494 > %496 = fmul float %26, %426 > %497 = fadd float %495, %496 > %498 = fadd float %497, %27 > %499 = fmul float %28, %485 > %500 = fmul float %29, %366 > %501 = fadd float %499, %500 > %502 = fmul float %30, %426 > %503 = fadd float %501, %502 > %504 = fadd float %503, %31 > %505 = fmul float %36, %485 > %506 = fmul float %37, %366 > %507 = fadd float %505, %506 > %508 = fmul float %38, %426 > %509 = fadd float %507, %508 > %510 = fadd float %509, %39 > %511 = fsub float %49, %485 > %512 = fsub float %50, %366 > %513 = fsub float %51, %426 > %514 = fmul float %40, %511 > %515 = fmul float %41, %512 > %516 = fadd float %515, %514 > %517 = fmul float %42, %513 > %518 = fadd float %516, %517 > %519 = fmul float %43, %511 > %520 = fmul float %44, %512 > %521 = fadd float %520, %519 > %522 = fmul float %45, %513 > %523 = fadd float %521, %522 > %524 = fmul float %46, %511 > %525 = fmul float %47, %512 > %526 = fadd float %525, %524 > %527 = fmul float %48, %513 > %528 = fadd float %526, %527 > %529 = fmul float %357, %74 > %530 = fmul float %358, %75 > %531 = fadd float %530, %529 > %532 = fmul float %359, %76 > %533 = fadd float %531, %532 > %534 = fmul float %417, %74 > %535 = fmul float %418, %75 > %536 = fadd float %535, %534 > %537 = fmul float %419, %76 > %538 = fadd float %536, %537 > %539 = fmul float %476, %74 > %540 = fmul float %477, %75 > %541 = fadd float %540, %539 > %542 = fmul float %478, %76 > %543 = fadd float %541, %542 > %544 = fmul float %543, %543 > %545 = fmul float %533, %533 > %546 = fadd float %545, %544 > %547 = fmul float %538, %538 > %548 = fadd float %546, %547 > %549 = call float @llvm.AMDGPU.rsq.clamped.f32(float %548) > %550 = fmul float %549, %543 > %551 = fmul float %549, %533 > %552 = fmul float %549, %538 > %553 = fmul float %40, %550 > %554 = fmul float %41, %551 > %555 = fadd float %554, %553 > %556 = fmul float %42, %552 > %557 = fadd float %555, %556 > %558 = fmul float %357, %80 > %559 = fmul float %358, %81 > %560 = fadd float %559, %558 > %561 = fmul float %359, %82 > %562 = fadd float %560, %561 > %563 = fmul float %357, %63 > %564 = fmul float %358, %64 > %565 = fadd float %564, %563 > %566 = fmul float %359, %65 > %567 = fadd float %565, %566 > %568 = fmul float %417, %80 > %569 = fmul float %418, %81 > %570 = fadd float %569, %568 > %571 = fmul float %419, %82 > %572 = fadd float %570, %571 > %573 = fmul float %417, %63 > %574 = fmul float %418, %64 > %575 = fadd float %574, %573 > %576 = fmul float %419, %65 > %577 = fadd float %575, %576 > %578 = fmul float %476, %80 > %579 = fmul float %477, %81 > %580 = fadd float %579, %578 > %581 = fmul float %478, %82 > %582 = fadd float %580, %581 > %583 = fmul float %476, %63 > %584 = fmul float %477, %64 > %585 = fadd float %584, %583 > %586 = fmul float %478, %65 > %587 = fadd float %585, %586 > %588 = fmul float %582, %582 > %589 = fmul float %562, %562 > %590 = fadd float %589, %588 > %591 = fmul float %572, %572 > %592 = fadd float %590, %591 > %593 = call float @llvm.AMDGPU.rsq.clamped.f32(float %592) > %594 = fmul float %593, %582 > %595 = fmul float %593, %562 > %596 = fmul float %593, %572 > %597 = fmul float %40, %594 > %598 = fmul float %41, %595 > %599 = fadd float %598, %597 > %600 = fmul float %42, %596 > %601 = fadd float %599, %600 > %602 = fmul float %587, %587 > %603 = fmul float %567, %567 > %604 = fadd float %603, %602 > %605 = fmul float %577, %577 > %606 = fadd float %604, %605 > %607 = call float @llvm.AMDGPU.rsq.clamped.f32(float %606) > %608 = fmul float %607, %587 > %609 = fmul float %607, %567 > %610 = fmul float %607, %577 > %611 = fmul float %40, %608 > %612 = fmul float %41, %609 > %613 = fadd float %612, %611 > %614 = fmul float %42, %610 > %615 = fadd float %613, %614 > %616 = fmul float %43, %550 > %617 = fmul float %44, %551 > %618 = fadd float %617, %616 > %619 = fmul float %45, %552 > %620 = fadd float %618, %619 > %621 = fmul float %46, %550 > %622 = fmul float %47, %551 > %623 = fadd float %622, %621 > %624 = fmul float %48, %552 > %625 = fadd float %623, %624 > %626 = fmul float %43, %594 > %627 = fmul float %44, %595 > %628 = fadd float %627, %626 > %629 = fmul float %45, %596 > %630 = fadd float %628, %629 > %631 = fmul float %46, %594 > %632 = fmul float %47, %595 > %633 = fadd float %632, %631 > %634 = fmul float %48, %596 > %635 = fadd float %633, %634 > %636 = fmul float %43, %608 > %637 = fmul float %44, %609 > %638 = fadd float %637, %636 > %639 = fmul float %45, %610 > %640 = fadd float %638, %639 > %641 = fmul float %46, %608 > %642 = fmul float %47, %609 > %643 = fadd float %642, %641 > %644 = fmul float %48, %610 > %645 = fadd float %643, %644 > %646 = bitcast i32 %11 to float > %647 = insertvalue <{ float, float, float }> undef, float %646, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %69, float %70, float %59, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %518, float %523, float %528, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %557, float %601, float %615, float %150) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %620, float %630, float %640, float %360) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %625, float %635, float %645, float %510) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %86, float %87, float %88, float %89) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %498, float %504, float %492, float %510) > ret <{ float, float, float }> %647 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL IN[5], GENERIC[5], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], BUFFER, FLOAT >DCL CONST[1][0..35] >DCL TEMP[0..8], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 416, 272, 336} >IMM[2] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, 0.1250} >IMM[3] FLT32 { 0.3000, 0.5900, 0.1100, 0.0000} >IMM[4] UINT32 {288, 560, 240, 304} >IMM[5] UINT32 {256, 320, 544, 0} >IMM[6] INT32 {0, 0, 0, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[2].xyzz, TEMP[0].xyzz > 9: DP3 TEMP[2].x, IN[3].xyzz, TEMP[0].xyzz > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP3 TEMP[2].x, IN[4].xyzz, TEMP[0].xyzz > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 14: RSQ TEMP[2].x, TEMP[0].xxxx > 15: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 16: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz > 17: RSQ TEMP[2].x, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[2].xxxx, IN[1].xyzz > 19: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[1].xyzz > 20: ABS TEMP[2].x, TEMP[0].xxxx > 21: ADD TEMP[2].x, TEMP[2].xxxx, -CONST[1][26].xxxx > 22: ADD TEMP[3].x, -CONST[1][26].xxxx, CONST[1][26].yyyy > 23: FSNE TEMP[4].x, TEMP[3].xxxx, IMM[0].wwww > 24: UIF TEMP[4].xxxx :0 > 25: RCP TEMP[3].x, TEMP[3].xxxx > 26: ELSE :0 > 27: MOV TEMP[3].x, IMM[2].xxxx > 28: ENDIF > 29: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx > 30: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 31: FMA TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy, IMM[2].zzzz > 32: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx > 33: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx > 34: MAX TEMP[2].x, TEMP[2].xxxx, CONST[1][17].wwww > 35: LG2 TEMP[2].x, TEMP[2].xxxx > 36: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][26].zzzz > 37: EX2 TEMP[2].x, TEMP[2].xxxx > 38: MOV TEMP[3].xy, IN[0].xyyy > 39: TEX TEMP[3], TEMP[3], SAMP[1], 2D > 40: ABS TEMP[4].x, TEMP[0].xxxx > 41: ADD TEMP[0].x, TEMP[4].xxxx, TEMP[3].wwww > 42: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy > 43: FSNE TEMP[4].x, TEMP[3].wwww, IMM[0].wwww > 44: UIF TEMP[4].xxxx :0 > 45: RCP TEMP[4].x, TEMP[3].wwww > 46: MUL TEMP[4].x, TEMP[0].xxxx, TEMP[4].xxxx > 47: ELSE :0 > 48: SSG TEMP[5].x, TEMP[0].xxxx > 49: MUL TEMP[4].x, IMM[2].xxxx, TEMP[5].xxxx > 50: ENDIF > 51: MOV_SAT TEMP[0].x, TEMP[4].xxxx > 52: LG2 TEMP[4].x, TEMP[0].xxxx > 53: MOV TEMP[5].xy, IN[0].xyyy > 54: TEX TEMP[5].xyz, TEMP[5], SAMP[2], 2D > 55: MUL TEMP[6].x, TEMP[5].zzzz, TEMP[5].zzzz > 56: MUL TEMP[7].x, TEMP[6].xxxx, CONST[1][21].xxxx > 57: FMA TEMP[6].x, TEMP[6].xxxx, CONST[1][21].xxxx, IMM[0].xxxx > 58: MUL TEMP[6].x, TEMP[6].xxxx, IMM[2].wwww > 59: MUL TEMP[0].x, TEMP[4].xxxx, TEMP[7].xxxx > 60: EX2 TEMP[4].x, TEMP[0].xxxx > 61: MUL TEMP[0].x, TEMP[4].xxxx, TEMP[6].xxxx > 62: MUL TEMP[0].x, TEMP[5].xxxx, TEMP[0].xxxx > 63: MAX TEMP[4].x, TEMP[0].xxxx, IMM[0].wwww > 64: ADD TEMP[6].x, -TEMP[4].xxxx, IMM[0].zzzz > 65: MUL TEMP[4].xyz, TEMP[4].xxxx, CONST[1][17].xyzz > 66: MUL TEMP[4].xyz, TEMP[4].xyzz, CONST[1][21].yyyy > 67: DP3 TEMP[7].x, TEMP[3].xyzz, IMM[3].xyzz > 68: ADD TEMP[7].xyz, -TEMP[3].xyzz, TEMP[7].xxxx > 69: FMA TEMP[8].xyz, CONST[1][18].zzzz, TEMP[7].xyzz, TEMP[3].xyzz > 70: MUL TEMP[1].xyz, TEMP[8].xyzz, CONST[1][35].xyzz > 71: MUL TEMP[7].xyz, TEMP[1].xyzz, CONST[1][15].yzww > 72: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][19].xxxx > 73: MUL TEMP[7].xyz, TEMP[7].xyzz, CONST[1][18].xxxx > 74: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[7].xyzz > 75: FMA TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].yyyy, TEMP[6].xyzz > 76: MUL TEMP[3].xyz, CONST[1][16].xyzz, CONST[1][19].xxxx > 77: FMA TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, -TEMP[4].xyzz > 78: ADD TEMP[3].xy, CONST[1][20].xwww, IMM[0].zzzz > 79: FMA TEMP[5].x, TEMP[3].xxxx, CONST[1][19].yyyy, IN[5].wwww > 80: FMA TEMP[6].x, TEMP[3].yyyy, CONST[1][20].zzzz, IN[5].wwww > 81: ADD TEMP[3].x, TEMP[6].xxxx, IMM[0].yyyy > 82: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy > 83: FSEQ TEMP[6].xy, CONST[1][20].xwww, IMM[0].wwww > 84: RCP TEMP[7].x, CONST[1][20].xxxx > 85: RCP TEMP[7].y, CONST[1][20].wwww > 86: UCMP TEMP[6].xy, TEMP[6].xyyy, IMM[2].xxxx, TEMP[7].xyyy > 87: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx > 88: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 89: MUL TEMP[3].x, TEMP[6].yyyy, TEMP[3].xxxx > 90: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 91: FMA TEMP[6].x, TEMP[5].xxxx, IMM[2].yyyy, IMM[2].zzzz > 92: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 93: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx > 94: LG2 TEMP[5].x, TEMP[5].xxxx > 95: MUL TEMP[5].x, TEMP[5].xxxx, CONST[1][19].wwww > 96: EX2 TEMP[5].x, TEMP[5].xxxx > 97: MIN TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz > 98: FMA TEMP[4].xyz, TEMP[5].xxxx, TEMP[1].xyzz, TEMP[4].xyzz > 99: DP3 TEMP[1].x, TEMP[4].xyzz, CONST[1][34].xyzz >100: FMA TEMP[2].x, TEMP[1].xxxx, CONST[1][18].wwww, TEMP[2].xxxx >101: ADD TEMP[1].x, -TEMP[2].xxxx, CONST[1][19].zzzz >102: FMA TEMP[2].x, TEMP[5].xxxx, TEMP[1].xxxx, TEMP[2].xxxx >103: FMA TEMP[5].x, TEMP[3].xxxx, IMM[2].yyyy, IMM[2].zzzz >104: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx >105: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[5].xxxx >106: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][20].yyyy >107: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][35].wwww >108: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[1].xxxx >109: MOV TEMP[2].w, TEMP[2].xxxx >110: MUL TEMP[3].x, CONST[1][21].wwww, CONST[1][21].wwww >111: MIN TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz >112: ADD TEMP[1].x, CONST[1][21].zzzz, IMM[0].yyyy >113: FMA TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx, IMM[0].zzzz >114: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[4].xyzz >115: MOV TEMP[1].x, IMM[6].xxxx >116: MOV TEMP[1].w, IMM[1].xxxx >117: TXF TEMP[1].x, TEMP[1], SAMP[3], BUFFER >118: MUL TEMP[2].xyz, TEMP[1].xxxx, TEMP[0].xyzz >119: MOV OUT[0], TEMP[2] >120: END >radeonsi: Compiling shader 340 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 312) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 332) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 340) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 344) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 544) > %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 548) > %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 552) > %56 = call float @llvm.SI.load.const(<16 x i8> %24, i32 560) > %57 = call float @llvm.SI.load.const(<16 x i8> %24, i32 564) > %58 = call float @llvm.SI.load.const(<16 x i8> %24, i32 568) > %59 = call float @llvm.SI.load.const(<16 x i8> %24, i32 572) > %60 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 > %62 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %63 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %62, i64 0, i64 3 > %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 > %65 = extractelement <8 x i32> %61, i32 7 > %66 = extractelement <4 x i32> %64, i32 0 > %67 = and i32 %66, %65 > %68 = insertelement <4 x i32> %64, i32 %67, i32 0 > %69 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %70 = load <8 x i32>, <8 x i32> addrspace(2)* %69, align 32, !tbaa !0 > %71 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %72 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %71, i64 0, i64 7 > %73 = load <4 x i32>, <4 x i32> addrspace(2)* %72, align 16, !tbaa !0 > %74 = extractelement <8 x i32> %70, i32 7 > %75 = extractelement <4 x i32> %73, i32 0 > %76 = and i32 %75, %74 > %77 = insertelement <4 x i32> %73, i32 %76, i32 0 > %78 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %79 = load <8 x i32>, <8 x i32> addrspace(2)* %78, align 32, !tbaa !0 > %80 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %81 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %80, i64 0, i64 11 > %82 = load <4 x i32>, <4 x i32> addrspace(2)* %81, align 16, !tbaa !0 > %83 = extractelement <8 x i32> %79, i32 7 > %84 = extractelement <4 x i32> %82, i32 0 > %85 = and i32 %84, %83 > %86 = insertelement <4 x i32> %82, i32 %85, i32 0 > %87 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %88 = bitcast <8 x i32> addrspace(2)* %87 to <2 x i128> addrspace(2)* > %89 = load <2 x i128>, <2 x i128> addrspace(2)* %88, align 32, !tbaa !0 > %90 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %93 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %94 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %95 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %96 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %97 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %98 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %99 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %100 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %101 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %102 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %103 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %104 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %6, <2 x i32> %8) > %105 = bitcast float %90 to i32 > %106 = bitcast float %91 to i32 > %107 = insertelement <2 x i32> undef, i32 %105, i32 0 > %108 = insertelement <2 x i32> %107, i32 %106, i32 1 > %109 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %108, <8 x i32> %61, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %110 = extractelement <4 x float> %109, i32 1 > %111 = extractelement <4 x float> %109, i32 3 > %112 = call float @llvm.fma.f32(float %110, float 2.000000e+00, float -1.000000e+00) > %113 = call float @llvm.fma.f32(float %111, float 2.000000e+00, float -1.000000e+00) > %114 = fsub float -0.000000e+00, %112 > %115 = call float @llvm.fma.f32(float %114, float %112, float 1.000000e+00) > %116 = fsub float -0.000000e+00, %113 > %117 = call float @llvm.fma.f32(float %116, float %113, float %115) > %118 = call float @llvm.sqrt.f32(float %117) > %119 = fmul float %95, %112 > %120 = fmul float %96, %113 > %121 = fadd float %120, %119 > %122 = fmul float %97, %118 > %123 = fadd float %121, %122 > %124 = fmul float %98, %112 > %125 = fmul float %99, %113 > %126 = fadd float %125, %124 > %127 = fmul float %100, %118 > %128 = fadd float %126, %127 > %129 = fmul float %101, %112 > %130 = fmul float %102, %113 > %131 = fadd float %130, %129 > %132 = fmul float %103, %118 > %133 = fadd float %131, %132 > %134 = fmul float %123, %123 > %135 = fmul float %128, %128 > %136 = fadd float %135, %134 > %137 = fmul float %133, %133 > %138 = fadd float %136, %137 > %139 = call float @llvm.AMDGPU.rsq.clamped.f32(float %138) > %140 = fmul float %139, %123 > %141 = fmul float %139, %128 > %142 = fmul float %139, %133 > %143 = fmul float %92, %92 > %144 = fmul float %93, %93 > %145 = fadd float %144, %143 > %146 = fmul float %94, %94 > %147 = fadd float %145, %146 > %148 = call float @llvm.AMDGPU.rsq.clamped.f32(float %147) > %149 = fmul float %148, %92 > %150 = fmul float %148, %93 > %151 = fmul float %148, %94 > %152 = fmul float %140, %149 > %153 = fmul float %141, %150 > %154 = fadd float %153, %152 > %155 = fmul float %142, %151 > %156 = fadd float %154, %155 > %157 = call float @llvm.fabs.f32(float %156) > %158 = fsub float %157, %50 > %159 = fsub float %51, %50 > %160 = fcmp une float %159, 0.000000e+00 > %161 = fdiv float 1.000000e+00, %159 > %temp12.0 = select i1 %160, float %161, float 0x4600000000000000 > %162 = fmul float %temp12.0, %158 > %163 = call float @llvm.AMDGPU.clamp.(float %162, float 0.000000e+00, float 1.000000e+00) > %164 = call float @llvm.fma.f32(float %163, float -2.000000e+00, float 3.000000e+00) > %165 = fmul float %163, %163 > %166 = fmul float %165, %164 > %167 = call float @llvm.maxnum.f32(float %166, float %34) > %168 = call float @llvm.log2.f32(float %167) > %169 = fmul float %168, %52 > %170 = call float @llvm.exp2.f32(float %169) > %171 = bitcast float %90 to i32 > %172 = bitcast float %91 to i32 > %173 = insertelement <2 x i32> undef, i32 %171, i32 0 > %174 = insertelement <2 x i32> %173, i32 %172, i32 1 > %175 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %174, <8 x i32> %70, <4 x i32> %77, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %176 = extractelement <4 x float> %175, i32 0 > %177 = extractelement <4 x float> %175, i32 1 > %178 = extractelement <4 x float> %175, i32 2 > %179 = extractelement <4 x float> %175, i32 3 > %180 = call float @llvm.fabs.f32(float %156) > %181 = fadd float %180, %179 > %182 = fadd float %181, -1.000000e+00 > %183 = fcmp une float %179, 0.000000e+00 > br i1 %183, label %IF37, label %ELSE38 > >IF37: ; preds = %main_body > %184 = fdiv float 1.000000e+00, %179 > %185 = fmul float %182, %184 > br label %ENDIF36 > >ELSE38: ; preds = %main_body > %186 = fcmp ogt float %182, 0.000000e+00 > %187 = select i1 %186, float 1.000000e+00, float %182 > %188 = fcmp oge float %187, 0.000000e+00 > %.op = fmul float %187, 0x4600000000000000 > %189 = select i1 %188, float %.op, float 0xC600000000000000 > br label %ENDIF36 > >ENDIF36: ; preds = %ELSE38, %IF37 > %temp16.0 = phi float [ %185, %IF37 ], [ %189, %ELSE38 ] > %190 = call float @llvm.AMDGPU.clamp.(float %temp16.0, float 0.000000e+00, float 1.000000e+00) > %191 = call float @llvm.log2.f32(float %190) > %192 = bitcast float %90 to i32 > %193 = bitcast float %91 to i32 > %194 = insertelement <2 x i32> undef, i32 %192, i32 0 > %195 = insertelement <2 x i32> %194, i32 %193, i32 1 > %196 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %195, <8 x i32> %79, <4 x i32> %86, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %197 = extractelement <4 x float> %196, i32 0 > %198 = extractelement <4 x float> %196, i32 1 > %199 = extractelement <4 x float> %196, i32 2 > %200 = fmul float %199, %199 > %201 = fmul float %200, %46 > %202 = call float @llvm.fma.f32(float %200, float %46, float 2.000000e+00) > %203 = fmul float %202, 1.250000e-01 > %204 = fmul float %191, %201 > %205 = call float @llvm.exp2.f32(float %204) > %206 = fmul float %205, %203 > %207 = fmul float %197, %206 > %208 = call float @llvm.maxnum.f32(float %207, float 0.000000e+00) > %209 = fsub float 1.000000e+00, %208 > %210 = fmul float %208, %31 > %211 = fmul float %208, %32 > %212 = fmul float %208, %33 > %213 = fmul float %210, %47 > %214 = fmul float %211, %47 > %215 = fmul float %212, %47 > %216 = fmul float %176, 0x3FD3333340000000 > %217 = fmul float %177, 0x3FE2E147A0000000 > %218 = fadd float %217, %216 > %219 = fmul float %178, 0x3FBC28F5C0000000 > %220 = fadd float %218, %219 > %221 = fsub float %220, %176 > %222 = fsub float %220, %177 > %223 = fsub float %220, %178 > %224 = call float @llvm.fma.f32(float %36, float %221, float %176) > %225 = call float @llvm.fma.f32(float %36, float %222, float %177) > %226 = call float @llvm.fma.f32(float %36, float %223, float %178) > %227 = fmul float %224, %56 > %228 = fmul float %225, %57 > %229 = fmul float %226, %58 > %230 = fmul float %227, %25 > %231 = fmul float %228, %26 > %232 = fmul float %229, %27 > %233 = fmul float %227, %38 > %234 = fmul float %228, %38 > %235 = fmul float %229, %38 > %236 = fmul float %230, %35 > %237 = fmul float %231, %35 > %238 = fmul float %232, %35 > %239 = fmul float %209, %236 > %240 = fmul float %209, %237 > %241 = fmul float %209, %238 > %242 = call float @llvm.fma.f32(float %213, float %198, float %239) > %243 = call float @llvm.fma.f32(float %214, float %198, float %240) > %244 = call float @llvm.fma.f32(float %215, float %198, float %241) > %245 = fmul float %28, %38 > %246 = fmul float %29, %38 > %247 = fmul float %30, %38 > %248 = fsub float -0.000000e+00, %242 > %249 = call float @llvm.fma.f32(float %233, float %245, float %248) > %250 = fsub float -0.000000e+00, %243 > %251 = call float @llvm.fma.f32(float %234, float %246, float %250) > %252 = fsub float -0.000000e+00, %244 > %253 = call float @llvm.fma.f32(float %235, float %247, float %252) > %254 = fadd float %42, 1.000000e+00 > %255 = fadd float %45, 1.000000e+00 > %256 = call float @llvm.fma.f32(float %254, float %39, float %104) > %257 = call float @llvm.fma.f32(float %255, float %44, float %104) > %258 = fadd float %257, -1.000000e+00 > %259 = fadd float %256, -1.000000e+00 > %260 = fcmp oeq float %42, 0.000000e+00 > %261 = fcmp oeq float %45, 0.000000e+00 > %262 = fdiv float 1.000000e+00, %42 > %263 = fdiv float 1.000000e+00, %45 > %264 = select i1 %260, float 0x4600000000000000, float %262 > %265 = select i1 %261, float 0x4600000000000000, float %263 > %266 = fmul float %259, %264 > %267 = call float @llvm.AMDGPU.clamp.(float %266, float 0.000000e+00, float 1.000000e+00) > %268 = fmul float %265, %258 > %269 = call float @llvm.AMDGPU.clamp.(float %268, float 0.000000e+00, float 1.000000e+00) > %270 = call float @llvm.fma.f32(float %267, float -2.000000e+00, float 3.000000e+00) > %271 = fmul float %267, %267 > %272 = fmul float %271, %270 > %273 = call float @llvm.log2.f32(float %272) > %274 = fmul float %273, %41 > %275 = call float @llvm.exp2.f32(float %274) > %276 = call float @llvm.minnum.f32(float %275, float 1.000000e+00) > %277 = call float @llvm.fma.f32(float %276, float %249, float %242) > %278 = call float @llvm.fma.f32(float %276, float %251, float %243) > %279 = call float @llvm.fma.f32(float %276, float %253, float %244) > %280 = fmul float %277, %53 > %281 = fmul float %278, %54 > %282 = fadd float %281, %280 > %283 = fmul float %279, %55 > %284 = fadd float %282, %283 > %285 = call float @llvm.fma.f32(float %284, float %37, float %170) > %286 = fsub float %40, %285 > %287 = call float @llvm.fma.f32(float %276, float %286, float %285) > %288 = call float @llvm.fma.f32(float %269, float -2.000000e+00, float 3.000000e+00) > %289 = fmul float %269, %269 > %290 = fmul float %289, %288 > %291 = fmul float %290, %43 > %292 = fmul float %291, %59 > %293 = fmul float %287, %292 > %294 = fmul float %49, %49 > %295 = call float @llvm.minnum.f32(float %294, float 1.000000e+00) > %296 = fadd float %48, -1.000000e+00 > %297 = call float @llvm.fma.f32(float %295, float %296, float 1.000000e+00) > %298 = fmul float %297, %277 > %299 = fmul float %297, %278 > %300 = fmul float %297, %279 > %301 = extractelement <2 x i128> %89, i32 1 > %302 = bitcast i128 %301 to <16 x i8> > %303 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %302, i32 0, i32 0) > %304 = extractelement <4 x float> %303, i32 0 > %305 = fmul float %304, %298 > %306 = fmul float %304, %299 > %307 = fmul float %304, %300 > %308 = bitcast float %5 to i32 > %309 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %308, 10 > %310 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %309, float %305, 11 > %311 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %310, float %306, 12 > %312 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %311, float %307, 13 > %313 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %312, float %293, 14 > %314 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %313, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %314 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], POSITION >DCL CONST[1][0..37] >DCL CONST[2][0..4095] >DCL TEMP[0..19], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 464} >IMM[3] UINT32 {288, 432, 448, 480} >IMM[4] UINT32 {592, 0, 0, 0} > 0: MUL TEMP[0].xyz, IN[5].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[4].xxxx > 7: MOV TEMP[3].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[3].x, IN[4].xxxx, TEMP[3].yyyy > 9: MOV TEMP[3].w, TEMP[3].xxxx > 10: UMUL TEMP[4].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[5].xxxx > 13: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[4].x, IN[4].yyyy, TEMP[4].yyyy > 15: MOV TEMP[4].w, TEMP[4].xxxx > 16: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy > 17: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[6].xxxx > 19: MOV TEMP[5].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 21: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[7].xxxx > 23: MOV TEMP[6].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].wwww > 25: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 26: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[7].xxxx > 28: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 30: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[8].xxxx > 32: MOV TEMP[7].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[6].x, TEMP[6].yyyy, TEMP[7].zzzz, -TEMP[5].xxxx > 34: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 35: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[8].xxxx > 37: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 39: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[9].xxxx > 41: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[5].x, TEMP[7].yyyy, TEMP[8].zzzz, TEMP[5].xxxx > 43: MUL TEMP[5].x, TEMP[5].xxxx, IN[4].xxxx > 44: MUL TEMP[6].x, TEMP[6].xxxx, IN[4].xxxx > 45: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy > 46: MOV TEMP[3].z, TEMP[6].xxxx > 47: UMUL TEMP[6].x, TEMP[1].yyyy, IMM[2].yyyy > 48: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[7].xxxx > 50: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 52: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[8].xxxx > 54: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 56: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 57: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[8].xxxx > 59: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 61: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[9].xxxx > 63: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 65: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 66: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[9].xxxx > 68: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[9].x, TEMP[1].yyyy, IMM[2].yyyy > 70: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[10].xxxx > 72: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 74: MUL TEMP[6].x, TEMP[6].xxxx, IN[4].yyyy > 75: MUL TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx > 76: MOV TEMP[6].y, TEMP[6].xxxx > 77: MUL TEMP[7].x, TEMP[7].xxxx, IN[4].yyyy > 78: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 79: MOV TEMP[4].z, TEMP[7].xxxx > 80: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 81: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[8].xxxx > 83: MOV TEMP[7].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 85: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[9].xxxx > 87: MOV TEMP[8].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[7].xyz, TEMP[7].zzyy, TEMP[8].wxww > 89: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 90: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[9].xxxx > 92: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy > 94: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[10].xxxx > 96: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[9].yyyy, TEMP[7].xxxx > 98: MUL TEMP[8].x, TEMP[8].xxxx, IN[4].xxxx > 99: MUL TEMP[3].x, IMM[0].yyyy, TEMP[8].xxxx >100: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy >101: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[9].xxxx >103: MOV TEMP[8].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy >105: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[10].xxxx >107: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xyzz >109: ADD TEMP[8].xyz, TEMP[8].zzyy, TEMP[8].yxxx >110: FMA TEMP[9].xyz, -TEMP[8].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[10].x, IN[4].xxxx, TEMP[9].yyyy >112: MOV TEMP[3].y, TEMP[10].xxxx >113: UMUL TEMP[10].x, TEMP[1].yyyy, IMM[2].yyyy >114: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[11].xxxx >116: MOV TEMP[10].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >118: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[12].xxxx >120: MOV TEMP[11].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww >122: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >123: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[12].xxxx >125: MOV TEMP[11].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >127: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[13].xxxx >129: MOV TEMP[12].y, CONST[2][ADDR[0].x] >130: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >131: MUL TEMP[11].x, TEMP[11].xxxx, IN[4].yyyy >132: MUL TEMP[4].x, IMM[0].yyyy, TEMP[11].xxxx >133: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >134: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[12].xxxx >136: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >138: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[13].xxxx >140: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >142: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >143: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[13].x, IN[4].yyyy, TEMP[12].yyyy >145: MOV TEMP[4].y, TEMP[13].xxxx >146: ADD TEMP[3], TEMP[3], TEMP[4] >147: UMUL TEMP[13].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[14].xxxx >150: MOV TEMP[13].y, CONST[2][ADDR[0].x] >151: MUL TEMP[13].x, IN[4].zzzz, TEMP[13].yyyy >152: MOV TEMP[4].w, TEMP[13].xxxx >153: UMUL TEMP[13].x, TEMP[1].zzzz, IMM[2].yyyy >154: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[14].xxxx >156: MOV TEMP[13].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >158: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[15].xxxx >160: MOV TEMP[14].w, CONST[2][ADDR[0].x] >161: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].wwww >162: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >163: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[15].xxxx >165: MOV TEMP[14].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >167: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[16].xxxx >169: MOV TEMP[15].z, CONST[2][ADDR[0].x] >170: FMA TEMP[14].x, TEMP[14].yyyy, TEMP[15].zzzz, -TEMP[13].xxxx >171: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >172: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[16].xxxx >174: MOV TEMP[15].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >176: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[17].xxxx >178: MOV TEMP[16].z, CONST[2][ADDR[0].x] >179: FMA TEMP[13].x, TEMP[15].yyyy, TEMP[16].zzzz, TEMP[13].xxxx >180: MUL TEMP[13].x, TEMP[13].xxxx, IN[4].zzzz >181: MUL TEMP[13].x, IMM[0].yyyy, TEMP[13].xxxx >182: MOV TEMP[13].y, TEMP[13].xxxx >183: MUL TEMP[14].x, TEMP[14].xxxx, IN[4].zzzz >184: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >185: MOV TEMP[4].z, TEMP[14].xxxx >186: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >187: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[15].xxxx >189: MOV TEMP[14].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >191: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[16].xxxx >193: MOV TEMP[15].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[14].xyz, TEMP[14].zzyy, TEMP[15].wxww >195: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >196: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[16].xxxx >198: MOV TEMP[15].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >200: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[17].xxxx >202: MOV TEMP[16].y, CONST[2][ADDR[0].x] >203: FMA TEMP[15].x, TEMP[15].xxxx, TEMP[16].yyyy, TEMP[14].xxxx >204: MUL TEMP[15].x, TEMP[15].xxxx, IN[4].zzzz >205: MUL TEMP[4].x, IMM[0].yyyy, TEMP[15].xxxx >206: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >207: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[16].xxxx >209: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >211: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[17].xxxx >213: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[15].xyz, TEMP[15].xyzz, TEMP[16].xyzz >215: ADD TEMP[15].xyz, TEMP[15].zzyy, TEMP[15].yxxx >216: FMA TEMP[16].xyz, -TEMP[15].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[17].x, IN[4].zzzz, TEMP[16].yyyy >218: MOV TEMP[4].y, TEMP[17].xxxx >219: ADD TEMP[3], TEMP[3], TEMP[4] >220: MOV TEMP[4].xyz, IN[0].xyzx >221: MOV TEMP[4].w, IMM[0].zzzz >222: DP4 TEMP[3].x, TEMP[3], TEMP[4] >223: MOV TEMP[3].y, TEMP[3].xxxx >224: UMUL TEMP[17].x, TEMP[1].xxxx, IMM[2].yyyy >225: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[18].xxxx >227: MOV TEMP[17].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[18].x, TEMP[1].xxxx, IMM[2].yyyy >229: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[19].xxxx >231: MOV TEMP[18].z, CONST[2][ADDR[0].x] >232: FMA TEMP[17].x, TEMP[17].xxxx, TEMP[18].zzzz, -TEMP[7].zzzz >233: MUL TEMP[17].x, TEMP[17].xxxx, IN[4].xxxx >234: MUL TEMP[17].x, IMM[0].yyyy, TEMP[17].xxxx >235: MUL TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy >236: MOV TEMP[17].y, TEMP[5].xxxx >237: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >238: USHR TEMP[18].x, TEMP[5].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[18].xxxx >240: MOV TEMP[5].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[18].x, TEMP[1].yyyy, IMM[2].yyyy >242: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[19].xxxx >244: MOV TEMP[18].z, CONST[2][ADDR[0].x] >245: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[18].zzzz, -TEMP[10].zzzz >246: MUL TEMP[5].x, TEMP[5].xxxx, IN[4].yyyy >247: MUL TEMP[6].x, IMM[0].yyyy, TEMP[5].xxxx >248: MUL TEMP[5].x, IN[4].xxxx, TEMP[9].zzzz >249: MOV TEMP[17].z, TEMP[5].xxxx >250: MUL TEMP[8].x, IN[4].xxxx, TEMP[9].xxxx >251: MUL TEMP[5].x, IN[4].yyyy, TEMP[12].zzzz >252: MOV TEMP[6].z, TEMP[5].xxxx >253: MUL TEMP[11].x, IN[4].yyyy, TEMP[12].xxxx >254: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[9].xxxx >257: MOV TEMP[5].z, CONST[2][ADDR[0].x] >258: MUL TEMP[5].x, IN[4].xxxx, TEMP[5].zzzz >259: MOV TEMP[17].w, TEMP[5].xxxx >260: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[9].xxxx >263: MOV TEMP[5].z, CONST[2][ADDR[0].x] >264: MUL TEMP[5].x, IN[4].yyyy, TEMP[5].zzzz >265: MOV TEMP[6].w, TEMP[5].xxxx >266: ADD TEMP[6], TEMP[6], TEMP[17] >267: UMUL TEMP[5].x, TEMP[1].zzzz, IMM[2].yyyy >268: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[9].xxxx >270: MOV TEMP[5].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[9].x, TEMP[1].zzzz, IMM[2].yyyy >272: USHR TEMP[12].x, TEMP[9].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[12].xxxx >274: MOV TEMP[9].z, CONST[2][ADDR[0].x] >275: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[9].zzzz, -TEMP[14].zzzz >276: MUL TEMP[5].x, TEMP[5].xxxx, IN[4].zzzz >277: MUL TEMP[13].x, IMM[0].yyyy, TEMP[5].xxxx >278: MUL TEMP[5].x, IN[4].zzzz, TEMP[16].zzzz >279: MOV TEMP[13].z, TEMP[5].xxxx >280: MUL TEMP[15].x, IN[4].zzzz, TEMP[16].xxxx >281: UMUL TEMP[5].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[9].xxxx >284: MOV TEMP[5].z, CONST[2][ADDR[0].x] >285: MUL TEMP[5].x, IN[4].zzzz, TEMP[5].zzzz >286: MOV TEMP[13].w, TEMP[5].xxxx >287: ADD TEMP[6], TEMP[6], TEMP[13] >288: DP4 TEMP[5].x, TEMP[6], TEMP[4] >289: MOV TEMP[3].z, TEMP[5].xxxx >290: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[6].xxxx >293: MOV TEMP[5].x, CONST[2][ADDR[0].x] >294: MUL TEMP[5].x, IN[4].xxxx, TEMP[5].xxxx >295: MOV TEMP[8].w, TEMP[5].xxxx >296: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[6].xxxx >299: MOV TEMP[5].x, CONST[2][ADDR[0].x] >300: MUL TEMP[5].x, IN[4].yyyy, TEMP[5].xxxx >301: MOV TEMP[11].w, TEMP[5].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[5].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[4].zzzz, TEMP[2].xxxx >307: MOV TEMP[15].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy >309: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[5].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy >313: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[6].xxxx >315: MOV TEMP[5].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[7].xxxx >317: ADD TEMP[2].x, TEMP[7].zzzz, TEMP[7].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[4].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[8].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[1].yyyy, IMM[2].yyyy >323: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[5].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >327: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[6].xxxx >329: MOV TEMP[5].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[10].xxxx >331: UMUL TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >332: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[5].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[1].x, TEMP[1].zzzz, IMM[2].yyyy >336: USHR TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[5].xxxx >338: MOV TEMP[1].y, CONST[2][ADDR[0].x] >339: FMA TEMP[1].x, TEMP[2].xxxx, TEMP[1].yyyy, -TEMP[14].xxxx >340: MOV TEMP[0].y, TEMP[1].xxxx >341: ADD TEMP[1].x, TEMP[14].zzzz, TEMP[14].yyyy >342: MOV TEMP[0].z, TEMP[1].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[4].yzzz >344: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[15].yz, TEMP[1].yxyy >346: ADD TEMP[1].x, TEMP[10].zzzz, TEMP[10].yyyy >347: MUL TEMP[1].x, TEMP[1].xxxx, IN[4].yyyy >348: MOV TEMP[0].y, TEMP[1].xxxx >349: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[11].yz, TEMP[1].yxyy >351: ADD TEMP[0], TEMP[8], TEMP[11] >352: ADD TEMP[0], TEMP[15], TEMP[0] >353: DP4 TEMP[3].x, TEMP[0], TEMP[4] >354: MOV TEMP[3].w, IMM[0].zzzz >355: DP4 TEMP[1].x, CONST[1][29], TEMP[3] >356: ADD TEMP[1].x, TEMP[1].xxxx, CONST[1][18].yyyy >357: MOV TEMP[0].z, TEMP[1].xxxx >358: DP4 TEMP[0].x, CONST[1][27], TEMP[3] >359: DP4 TEMP[1].x, CONST[1][28], TEMP[3] >360: MOV TEMP[0].y, TEMP[1].xxxx >361: DP4 TEMP[1].x, CONST[1][30], TEMP[3] >362: MOV TEMP[0].w, TEMP[1].xxxx >363: MOV TEMP[0], TEMP[0] >364: MOV OUT[0], TEMP[0] >365: END >radeonsi: Compiling shader 341 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 292) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 432) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 436) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 440) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 444) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 448) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 452) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 456) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 460) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 464) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 468) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 472) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 476) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 480) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 484) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 488) > %37 = call float @llvm.SI.load.const(<16 x i8> %20, i32 492) > %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 > %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 > %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %13) > %43 = extractelement <4 x float> %42, i32 0 > %44 = extractelement <4 x float> %42, i32 1 > %45 = extractelement <4 x float> %42, i32 2 > %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 > %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %17) > %49 = extractelement <4 x float> %48, i32 0 > %50 = extractelement <4 x float> %48, i32 1 > %51 = extractelement <4 x float> %48, i32 2 > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %18) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = fmul float %57, 0x406FE01000000000 > %59 = fmul float %56, 0x406FE01000000000 > %60 = fmul float %55, 0x406FE01000000000 > %61 = fptosi float %58 to i32 > %62 = fptosi float %59 to i32 > %63 = fptosi float %60 to i32 > %64 = shl i32 %61, 1 > %65 = or i32 %64, 1 > %66 = shl i32 %62, 1 > %67 = or i32 %66, 1 > %68 = shl i32 %63, 1 > %69 = or i32 %68, 1 > %70 = shl i32 %61, 5 > %71 = or i32 %70, 4 > %72 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %71) > %73 = fmul float %49, %72 > %74 = shl i32 %62, 5 > %75 = or i32 %74, 4 > %76 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %75) > %77 = fmul float %50, %76 > %78 = shl i32 %65, 4 > %79 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %78) > %80 = shl i32 %65, 4 > %81 = or i32 %80, 12 > %82 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %81) > %83 = fmul float %79, %82 > %84 = shl i32 %65, 4 > %85 = or i32 %84, 4 > %86 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %85) > %87 = shl i32 %65, 4 > %88 = or i32 %87, 8 > %89 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %88) > %90 = fsub float -0.000000e+00, %83 > %91 = call float @llvm.fma.f32(float %86, float %89, float %90) > %92 = shl i32 %65, 4 > %93 = or i32 %92, 4 > %94 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %93) > %95 = shl i32 %65, 4 > %96 = or i32 %95, 8 > %97 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %96) > %98 = call float @llvm.fma.f32(float %94, float %97, float %83) > %99 = fmul float %98, %49 > %100 = fmul float %91, %49 > %101 = fmul float %100, 2.000000e+00 > %102 = shl i32 %67, 4 > %103 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %102) > %104 = shl i32 %67, 4 > %105 = or i32 %104, 12 > %106 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %105) > %107 = fmul float %103, %106 > %108 = shl i32 %67, 4 > %109 = or i32 %108, 4 > %110 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %109) > %111 = shl i32 %67, 4 > %112 = or i32 %111, 8 > %113 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %112) > %114 = fsub float -0.000000e+00, %107 > %115 = call float @llvm.fma.f32(float %110, float %113, float %114) > %116 = shl i32 %67, 4 > %117 = or i32 %116, 4 > %118 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %117) > %119 = shl i32 %67, 4 > %120 = or i32 %119, 8 > %121 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %120) > %122 = call float @llvm.fma.f32(float %118, float %121, float %107) > %123 = fmul float %122, %50 > %124 = fmul float %123, 2.000000e+00 > %125 = fmul float %115, %50 > %126 = fmul float %125, 2.000000e+00 > %127 = shl i32 %65, 4 > %128 = or i32 %127, 4 > %129 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %128) > %130 = shl i32 %65, 4 > %131 = or i32 %130, 8 > %132 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %131) > %133 = shl i32 %65, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %133) > %135 = shl i32 %65, 4 > %136 = or i32 %135, 12 > %137 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %136) > %138 = fmul float %132, %137 > %139 = fmul float %132, %134 > %140 = fmul float %129, %137 > %141 = shl i32 %65, 4 > %142 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %141) > %143 = shl i32 %65, 4 > %144 = or i32 %143, 4 > %145 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %144) > %146 = call float @llvm.fma.f32(float %142, float %145, float %138) > %147 = fmul float %146, %49 > %148 = fmul float %147, 2.000000e+00 > %149 = shl i32 %65, 4 > %150 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %149) > %151 = shl i32 %65, 4 > %152 = or i32 %151, 4 > %153 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %152) > %154 = shl i32 %65, 4 > %155 = or i32 %154, 8 > %156 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %155) > %157 = shl i32 %65, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %157) > %159 = shl i32 %65, 4 > %160 = or i32 %159, 4 > %161 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %160) > %162 = shl i32 %65, 4 > %163 = or i32 %162, 8 > %164 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %163) > %165 = fmul float %150, %158 > %166 = fmul float %153, %161 > %167 = fmul float %156, %164 > %168 = fadd float %167, %166 > %169 = fadd float %167, %165 > %170 = fadd float %166, %165 > %171 = fsub float -0.000000e+00, %168 > %172 = call float @llvm.fma.f32(float %171, float 2.000000e+00, float 1.000000e+00) > %173 = fsub float -0.000000e+00, %169 > %174 = call float @llvm.fma.f32(float %173, float 2.000000e+00, float 1.000000e+00) > %175 = fsub float -0.000000e+00, %170 > %176 = call float @llvm.fma.f32(float %175, float 2.000000e+00, float 1.000000e+00) > %177 = fmul float %49, %174 > %178 = shl i32 %67, 4 > %179 = or i32 %178, 4 > %180 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %179) > %181 = shl i32 %67, 4 > %182 = or i32 %181, 8 > %183 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %182) > %184 = shl i32 %67, 4 > %185 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %184) > %186 = shl i32 %67, 4 > %187 = or i32 %186, 12 > %188 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %187) > %189 = fmul float %183, %188 > %190 = fmul float %183, %185 > %191 = fmul float %180, %188 > %192 = shl i32 %67, 4 > %193 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %192) > %194 = shl i32 %67, 4 > %195 = or i32 %194, 4 > %196 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %195) > %197 = call float @llvm.fma.f32(float %193, float %196, float %189) > %198 = fmul float %197, %50 > %199 = fmul float %198, 2.000000e+00 > %200 = shl i32 %67, 4 > %201 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %200) > %202 = shl i32 %67, 4 > %203 = or i32 %202, 4 > %204 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %203) > %205 = shl i32 %67, 4 > %206 = or i32 %205, 8 > %207 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %206) > %208 = shl i32 %67, 4 > %209 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %208) > %210 = shl i32 %67, 4 > %211 = or i32 %210, 4 > %212 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %211) > %213 = shl i32 %67, 4 > %214 = or i32 %213, 8 > %215 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %214) > %216 = fmul float %201, %209 > %217 = fmul float %204, %212 > %218 = fmul float %207, %215 > %219 = fadd float %218, %217 > %220 = fadd float %218, %216 > %221 = fadd float %217, %216 > %222 = fsub float -0.000000e+00, %219 > %223 = call float @llvm.fma.f32(float %222, float 2.000000e+00, float 1.000000e+00) > %224 = fsub float -0.000000e+00, %220 > %225 = call float @llvm.fma.f32(float %224, float 2.000000e+00, float 1.000000e+00) > %226 = fsub float -0.000000e+00, %221 > %227 = call float @llvm.fma.f32(float %226, float 2.000000e+00, float 1.000000e+00) > %228 = fmul float %50, %225 > %229 = fadd float %148, %199 > %230 = fadd float %177, %228 > %231 = fadd float %101, %126 > %232 = fadd float %73, %77 > %233 = shl i32 %63, 5 > %234 = or i32 %233, 4 > %235 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %234) > %236 = fmul float %51, %235 > %237 = shl i32 %69, 4 > %238 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %237) > %239 = shl i32 %69, 4 > %240 = or i32 %239, 12 > %241 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %240) > %242 = fmul float %238, %241 > %243 = shl i32 %69, 4 > %244 = or i32 %243, 4 > %245 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %244) > %246 = shl i32 %69, 4 > %247 = or i32 %246, 8 > %248 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %247) > %249 = fsub float -0.000000e+00, %242 > %250 = call float @llvm.fma.f32(float %245, float %248, float %249) > %251 = shl i32 %69, 4 > %252 = or i32 %251, 4 > %253 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %252) > %254 = shl i32 %69, 4 > %255 = or i32 %254, 8 > %256 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %255) > %257 = call float @llvm.fma.f32(float %253, float %256, float %242) > %258 = fmul float %257, %51 > %259 = fmul float %258, 2.000000e+00 > %260 = fmul float %250, %51 > %261 = fmul float %260, 2.000000e+00 > %262 = shl i32 %69, 4 > %263 = or i32 %262, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %263) > %265 = shl i32 %69, 4 > %266 = or i32 %265, 8 > %267 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %266) > %268 = shl i32 %69, 4 > %269 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %268) > %270 = shl i32 %69, 4 > %271 = or i32 %270, 12 > %272 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %271) > %273 = fmul float %267, %272 > %274 = fmul float %267, %269 > %275 = fmul float %264, %272 > %276 = shl i32 %69, 4 > %277 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %276) > %278 = shl i32 %69, 4 > %279 = or i32 %278, 4 > %280 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %279) > %281 = call float @llvm.fma.f32(float %277, float %280, float %273) > %282 = fmul float %281, %51 > %283 = fmul float %282, 2.000000e+00 > %284 = shl i32 %69, 4 > %285 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %284) > %286 = shl i32 %69, 4 > %287 = or i32 %286, 4 > %288 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %287) > %289 = shl i32 %69, 4 > %290 = or i32 %289, 8 > %291 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %290) > %292 = shl i32 %69, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %292) > %294 = shl i32 %69, 4 > %295 = or i32 %294, 4 > %296 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %295) > %297 = shl i32 %69, 4 > %298 = or i32 %297, 8 > %299 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %298) > %300 = fmul float %285, %293 > %301 = fmul float %288, %296 > %302 = fmul float %291, %299 > %303 = fadd float %302, %301 > %304 = fadd float %302, %300 > %305 = fadd float %301, %300 > %306 = fsub float -0.000000e+00, %303 > %307 = call float @llvm.fma.f32(float %306, float 2.000000e+00, float 1.000000e+00) > %308 = fsub float -0.000000e+00, %304 > %309 = call float @llvm.fma.f32(float %308, float 2.000000e+00, float 1.000000e+00) > %310 = fsub float -0.000000e+00, %305 > %311 = call float @llvm.fma.f32(float %310, float 2.000000e+00, float 1.000000e+00) > %312 = fmul float %51, %309 > %313 = fadd float %229, %283 > %314 = fadd float %230, %312 > %315 = fadd float %231, %261 > %316 = fadd float %232, %236 > %317 = fmul float %313, %43 > %318 = fmul float %314, %44 > %319 = fadd float %317, %318 > %320 = fmul float %315, %45 > %321 = fadd float %319, %320 > %322 = fadd float %321, %316 > %323 = shl i32 %65, 4 > %324 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %323) > %325 = shl i32 %65, 4 > %326 = or i32 %325, 8 > %327 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %326) > %328 = fsub float -0.000000e+00, %140 > %329 = call float @llvm.fma.f32(float %324, float %327, float %328) > %330 = fmul float %329, %49 > %331 = fmul float %330, 2.000000e+00 > %332 = fmul float %99, 2.000000e+00 > %333 = shl i32 %67, 4 > %334 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %333) > %335 = shl i32 %67, 4 > %336 = or i32 %335, 8 > %337 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %336) > %338 = fsub float -0.000000e+00, %191 > %339 = call float @llvm.fma.f32(float %334, float %337, float %338) > %340 = fmul float %339, %50 > %341 = fmul float %340, 2.000000e+00 > %342 = fmul float %49, %176 > %343 = fmul float %49, %172 > %344 = fmul float %50, %227 > %345 = fmul float %50, %223 > %346 = shl i32 %61, 5 > %347 = or i32 %346, 8 > %348 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %347) > %349 = fmul float %49, %348 > %350 = shl i32 %62, 5 > %351 = or i32 %350, 8 > %352 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %351) > %353 = fmul float %50, %352 > %354 = fadd float %341, %331 > %355 = fadd float %124, %332 > %356 = fadd float %344, %342 > %357 = fadd float %353, %349 > %358 = shl i32 %69, 4 > %359 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %358) > %360 = shl i32 %69, 4 > %361 = or i32 %360, 8 > %362 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %361) > %363 = fsub float -0.000000e+00, %275 > %364 = call float @llvm.fma.f32(float %359, float %362, float %363) > %365 = fmul float %364, %51 > %366 = fmul float %365, 2.000000e+00 > %367 = fmul float %51, %311 > %368 = fmul float %51, %307 > %369 = shl i32 %63, 5 > %370 = or i32 %369, 8 > %371 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %370) > %372 = fmul float %51, %371 > %373 = fadd float %354, %366 > %374 = fadd float %355, %259 > %375 = fadd float %356, %367 > %376 = fadd float %357, %372 > %377 = fmul float %373, %43 > %378 = fmul float %374, %44 > %379 = fadd float %377, %378 > %380 = fmul float %375, %45 > %381 = fadd float %379, %380 > %382 = fadd float %381, %376 > %383 = shl i32 %61, 5 > %384 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %383) > %385 = fmul float %49, %384 > %386 = shl i32 %62, 5 > %387 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %386) > %388 = fmul float %50, %387 > %389 = shl i32 %63, 5 > %390 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %389) > %391 = fmul float %51, %390 > %392 = shl i32 %65, 4 > %393 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %392) > %394 = shl i32 %65, 4 > %395 = or i32 %394, 4 > %396 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %395) > %397 = fsub float -0.000000e+00, %138 > %398 = call float @llvm.fma.f32(float %393, float %396, float %397) > %399 = fadd float %140, %139 > %400 = fmul float %398, %49 > %401 = fmul float %399, %49 > %402 = fmul float %400, 2.000000e+00 > %403 = fmul float %401, 2.000000e+00 > %404 = shl i32 %67, 4 > %405 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %404) > %406 = shl i32 %67, 4 > %407 = or i32 %406, 4 > %408 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %407) > %409 = fsub float -0.000000e+00, %189 > %410 = call float @llvm.fma.f32(float %405, float %408, float %409) > %411 = shl i32 %69, 4 > %412 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %411) > %413 = shl i32 %69, 4 > %414 = or i32 %413, 4 > %415 = call float @llvm.SI.load.const(<16 x i8> %39, i32 %414) > %416 = fsub float -0.000000e+00, %273 > %417 = call float @llvm.fma.f32(float %412, float %415, float %416) > %418 = fadd float %275, %274 > %419 = fmul float %410, %50 > %420 = fmul float %417, %51 > %421 = fmul float %418, %51 > %422 = fmul float %420, 2.000000e+00 > %423 = fmul float %421, 2.000000e+00 > %424 = fadd float %191, %190 > %425 = fmul float %424, %50 > %426 = fmul float %419, 2.000000e+00 > %427 = fmul float %425, 2.000000e+00 > %428 = fadd float %343, %345 > %429 = fadd float %402, %426 > %430 = fadd float %403, %427 > %431 = fadd float %385, %388 > %432 = fadd float %368, %428 > %433 = fadd float %422, %429 > %434 = fadd float %423, %430 > %435 = fadd float %391, %431 > %436 = fmul float %432, %43 > %437 = fmul float %433, %44 > %438 = fadd float %436, %437 > %439 = fmul float %434, %45 > %440 = fadd float %438, %439 > %441 = fadd float %440, %435 > %442 = fmul float %30, %441 > %443 = fmul float %31, %322 > %444 = fadd float %442, %443 > %445 = fmul float %32, %382 > %446 = fadd float %444, %445 > %447 = fadd float %446, %33 > %448 = fadd float %447, %21 > %449 = fmul float %22, %441 > %450 = fmul float %23, %322 > %451 = fadd float %449, %450 > %452 = fmul float %24, %382 > %453 = fadd float %451, %452 > %454 = fadd float %453, %25 > %455 = fmul float %26, %441 > %456 = fmul float %27, %322 > %457 = fadd float %455, %456 > %458 = fmul float %28, %382 > %459 = fadd float %457, %458 > %460 = fadd float %459, %29 > %461 = fmul float %34, %441 > %462 = fmul float %35, %322 > %463 = fadd float %461, %462 > %464 = fmul float %36, %382 > %465 = fadd float %463, %464 > %466 = fadd float %465, %37 > %467 = bitcast i32 %11 to float > %468 = insertvalue <{ float, float, float }> undef, float %467, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %454, float %460, float %448, float %466) > ret <{ float, float, float }> %468 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL OUT[6], GENERIC[5] >DCL OUT[7], GENERIC[6] >DCL CONST[1][0..23] >DCL CONST[2][0..4095] >DCL CONST[3][0..24] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 32} >IMM[3] UINT32 {352, 48, 176, 2} >IMM[4] UINT32 {384, 112, 128, 144} >IMM[5] UINT32 {160, 368, 0, 0} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[3].x, CONST[1][2], TEMP[18] >356: MUL TEMP[3].x, TEMP[3].xxxx, CONST[1][22].xxxx >357: MOV TEMP[1].z, TEMP[3].xxxx >358: DP4 TEMP[1].x, CONST[1][0], TEMP[18] >359: DP4 TEMP[3].x, CONST[1][1], TEMP[18] >360: MOV TEMP[1].y, TEMP[3].xxxx >361: DP4 TEMP[3].x, CONST[1][3], TEMP[18] >362: MOV TEMP[1].w, TEMP[3].xxxx >363: ADD TEMP[2].xyz, -TEMP[18].xyzz, CONST[1][11].xyzz >364: MOV TEMP[6], TEMP[1] >365: MOV TEMP[8].zw, TEMP[1].wwzw >366: MUL TEMP[3].xy, TEMP[3].xxxx, CONST[3][24].xyyy >367: MOV TEMP[9].xy, IN[2].xyxx >368: MUL TEMP[5].xy, IMM[0].zwww, CONST[3][24].xyyy >369: FMA TEMP[8].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[3].xyyy >370: DP3 TEMP[3].x, CONST[1][7].xyzz, TEMP[2].xyzz >371: DP3 TEMP[5].x, CONST[1][8].xyzz, TEMP[2].xyzz >372: MOV TEMP[3].y, TEMP[5].xxxx >373: DP3 TEMP[5].x, CONST[1][9].xyzz, TEMP[2].xyzz >374: MOV TEMP[3].z, TEMP[5].xxxx >375: DP3 TEMP[5].x, TEMP[4].xyzz, IN[3].xyzz >376: MOV TEMP[1].y, TEMP[5].xxxx >377: DP3 TEMP[5].x, TEMP[7].xyzz, IN[3].xyzz >378: MOV TEMP[1].z, TEMP[5].xxxx >379: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >380: DP3 TEMP[5].x, TEMP[1].xyzz, TEMP[1].xyzz >381: RSQ TEMP[5].x, TEMP[5].xxxx >382: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[1].xyzz >383: DP3 TEMP[5].x, CONST[1][7].xyzz, TEMP[1].xyzz >384: DP3 TEMP[10].x, TEMP[4].xyzz, IN[4].xyzz >385: MOV TEMP[2].y, TEMP[10].xxxx >386: DP3 TEMP[10].x, TEMP[4].xyzz, IN[1].xyzz >387: MOV TEMP[4].y, TEMP[10].xxxx >388: DP3 TEMP[10].x, TEMP[7].xyzz, IN[4].xyzz >389: MOV TEMP[2].z, TEMP[10].xxxx >390: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >391: MOV TEMP[4].z, TEMP[7].xxxx >392: DP3 TEMP[2].x, TEMP[0].xyzz, IN[4].xyzz >393: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >394: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz >395: RSQ TEMP[7].x, TEMP[0].xxxx >396: MUL TEMP[0].xyz, TEMP[7].xxxx, TEMP[2].xyzz >397: DP3 TEMP[7].x, CONST[1][7].xyzz, TEMP[0].xyzz >398: MOV TEMP[5].y, TEMP[7].xxxx >399: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz >400: RSQ TEMP[7].x, TEMP[7].xxxx >401: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz >402: DP3 TEMP[4].x, CONST[1][7].xyzz, TEMP[2].xyzz >403: MOV TEMP[5].z, TEMP[4].xxxx >404: DP3 TEMP[4].x, CONST[1][8].xyzz, TEMP[1].xyzz >405: DP3 TEMP[1].x, CONST[1][9].xyzz, TEMP[1].xyzz >406: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[0].xyzz >407: MOV TEMP[4].y, TEMP[7].xxxx >408: DP3 TEMP[7].x, CONST[1][9].xyzz, TEMP[0].xyzz >409: MOV TEMP[1].y, TEMP[7].xxxx >410: DP3 TEMP[7].x, CONST[1][8].xyzz, TEMP[2].xyzz >411: MOV TEMP[4].z, TEMP[7].xxxx >412: DP3 TEMP[2].x, CONST[1][9].xyzz, TEMP[2].xyzz >413: MOV TEMP[1].z, TEMP[2].xxxx >414: MUL TEMP[0], IN[5], CONST[1][10] >415: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[1][23].xyzz >416: MOV TEMP[2].w, TEMP[0].wwww >417: MOV OUT[7], TEMP[2] >418: MOV OUT[6], TEMP[1] >419: MOV OUT[5], TEMP[4] >420: MOV OUT[4], TEMP[5] >421: MOV OUT[3], TEMP[3] >422: MOV OUT[1], TEMP[9] >423: MOV OUT[2], TEMP[8] >424: MOV OUT[0], TEMP[6] >425: END >radeonsi: Compiling shader 342 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 20) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 24) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 32) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 36) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 40) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 44) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 60) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) > %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 136) > %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) > %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) > %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) > %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) > %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) > %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) > %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) > %52 = call float @llvm.SI.load.const(<16 x i8> %22, i32 176) > %53 = call float @llvm.SI.load.const(<16 x i8> %22, i32 180) > %54 = call float @llvm.SI.load.const(<16 x i8> %22, i32 184) > %55 = call float @llvm.SI.load.const(<16 x i8> %22, i32 352) > %56 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %57 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %58 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376) > %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 > %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 3 > %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 > %63 = call float @llvm.SI.load.const(<16 x i8> %62, i32 384) > %64 = call float @llvm.SI.load.const(<16 x i8> %62, i32 388) > %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 > %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %13) > %68 = extractelement <4 x float> %67, i32 0 > %69 = extractelement <4 x float> %67, i32 1 > %70 = extractelement <4 x float> %67, i32 2 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %14) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %15) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 > %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %16) > %85 = extractelement <4 x float> %84, i32 0 > %86 = extractelement <4 x float> %84, i32 1 > %87 = extractelement <4 x float> %84, i32 2 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %17) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 > %96 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %95, i32 0, i32 %18) > %97 = extractelement <4 x float> %96, i32 0 > %98 = extractelement <4 x float> %96, i32 1 > %99 = extractelement <4 x float> %96, i32 2 > %100 = extractelement <4 x float> %96, i32 3 > %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 > %103 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %19) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = extractelement <4 x float> %103, i32 2 > %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 > %109 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %20) > %110 = extractelement <4 x float> %109, i32 0 > %111 = extractelement <4 x float> %109, i32 1 > %112 = extractelement <4 x float> %109, i32 2 > %113 = fmul float %112, 0x406FE01000000000 > %114 = fmul float %111, 0x406FE01000000000 > %115 = fmul float %110, 0x406FE01000000000 > %116 = fptosi float %113 to i32 > %117 = fptosi float %114 to i32 > %118 = fptosi float %115 to i32 > %119 = shl i32 %116, 1 > %120 = or i32 %119, 1 > %121 = shl i32 %117, 1 > %122 = or i32 %121, 1 > %123 = shl i32 %118, 1 > %124 = or i32 %123, 1 > %125 = shl i32 %116, 5 > %126 = or i32 %125, 4 > %127 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %126) > %128 = fmul float %104, %127 > %129 = shl i32 %117, 5 > %130 = or i32 %129, 4 > %131 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %130) > %132 = fmul float %105, %131 > %133 = shl i32 %120, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %133) > %135 = shl i32 %120, 4 > %136 = or i32 %135, 12 > %137 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %136) > %138 = fmul float %134, %137 > %139 = shl i32 %120, 4 > %140 = or i32 %139, 4 > %141 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %140) > %142 = shl i32 %120, 4 > %143 = or i32 %142, 8 > %144 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %143) > %145 = fsub float -0.000000e+00, %138 > %146 = call float @llvm.fma.f32(float %141, float %144, float %145) > %147 = shl i32 %120, 4 > %148 = or i32 %147, 4 > %149 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %148) > %150 = shl i32 %120, 4 > %151 = or i32 %150, 8 > %152 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %151) > %153 = call float @llvm.fma.f32(float %149, float %152, float %138) > %154 = fmul float %153, %104 > %155 = fmul float %146, %104 > %156 = fmul float %155, 2.000000e+00 > %157 = shl i32 %122, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %157) > %159 = shl i32 %122, 4 > %160 = or i32 %159, 12 > %161 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %160) > %162 = fmul float %158, %161 > %163 = shl i32 %122, 4 > %164 = or i32 %163, 4 > %165 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %164) > %166 = shl i32 %122, 4 > %167 = or i32 %166, 8 > %168 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %167) > %169 = fsub float -0.000000e+00, %162 > %170 = call float @llvm.fma.f32(float %165, float %168, float %169) > %171 = shl i32 %122, 4 > %172 = or i32 %171, 4 > %173 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %172) > %174 = shl i32 %122, 4 > %175 = or i32 %174, 8 > %176 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %175) > %177 = call float @llvm.fma.f32(float %173, float %176, float %162) > %178 = fmul float %177, %105 > %179 = fmul float %178, 2.000000e+00 > %180 = fmul float %170, %105 > %181 = fmul float %180, 2.000000e+00 > %182 = shl i32 %120, 4 > %183 = or i32 %182, 4 > %184 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %183) > %185 = shl i32 %120, 4 > %186 = or i32 %185, 8 > %187 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %186) > %188 = shl i32 %120, 4 > %189 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %188) > %190 = shl i32 %120, 4 > %191 = or i32 %190, 12 > %192 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %191) > %193 = fmul float %187, %192 > %194 = fmul float %187, %189 > %195 = fmul float %184, %192 > %196 = shl i32 %120, 4 > %197 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %196) > %198 = shl i32 %120, 4 > %199 = or i32 %198, 4 > %200 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %199) > %201 = call float @llvm.fma.f32(float %197, float %200, float %193) > %202 = fmul float %201, %104 > %203 = fmul float %202, 2.000000e+00 > %204 = shl i32 %120, 4 > %205 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %204) > %206 = shl i32 %120, 4 > %207 = or i32 %206, 4 > %208 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %207) > %209 = shl i32 %120, 4 > %210 = or i32 %209, 8 > %211 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %210) > %212 = shl i32 %120, 4 > %213 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %212) > %214 = shl i32 %120, 4 > %215 = or i32 %214, 4 > %216 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %215) > %217 = shl i32 %120, 4 > %218 = or i32 %217, 8 > %219 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %218) > %220 = fmul float %205, %213 > %221 = fmul float %208, %216 > %222 = fmul float %211, %219 > %223 = fadd float %222, %221 > %224 = fadd float %222, %220 > %225 = fadd float %221, %220 > %226 = fsub float -0.000000e+00, %223 > %227 = call float @llvm.fma.f32(float %226, float 2.000000e+00, float 1.000000e+00) > %228 = fsub float -0.000000e+00, %224 > %229 = call float @llvm.fma.f32(float %228, float 2.000000e+00, float 1.000000e+00) > %230 = fsub float -0.000000e+00, %225 > %231 = call float @llvm.fma.f32(float %230, float 2.000000e+00, float 1.000000e+00) > %232 = fmul float %104, %229 > %233 = shl i32 %122, 4 > %234 = or i32 %233, 4 > %235 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %234) > %236 = shl i32 %122, 4 > %237 = or i32 %236, 8 > %238 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %237) > %239 = shl i32 %122, 4 > %240 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %239) > %241 = shl i32 %122, 4 > %242 = or i32 %241, 12 > %243 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %242) > %244 = fmul float %238, %243 > %245 = fmul float %238, %240 > %246 = fmul float %235, %243 > %247 = shl i32 %122, 4 > %248 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %247) > %249 = shl i32 %122, 4 > %250 = or i32 %249, 4 > %251 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %250) > %252 = call float @llvm.fma.f32(float %248, float %251, float %244) > %253 = fmul float %252, %105 > %254 = fmul float %253, 2.000000e+00 > %255 = shl i32 %122, 4 > %256 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %255) > %257 = shl i32 %122, 4 > %258 = or i32 %257, 4 > %259 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %258) > %260 = shl i32 %122, 4 > %261 = or i32 %260, 8 > %262 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %261) > %263 = shl i32 %122, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %263) > %265 = shl i32 %122, 4 > %266 = or i32 %265, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %266) > %268 = shl i32 %122, 4 > %269 = or i32 %268, 8 > %270 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %269) > %271 = fmul float %256, %264 > %272 = fmul float %259, %267 > %273 = fmul float %262, %270 > %274 = fadd float %273, %272 > %275 = fadd float %273, %271 > %276 = fadd float %272, %271 > %277 = fsub float -0.000000e+00, %274 > %278 = call float @llvm.fma.f32(float %277, float 2.000000e+00, float 1.000000e+00) > %279 = fsub float -0.000000e+00, %275 > %280 = call float @llvm.fma.f32(float %279, float 2.000000e+00, float 1.000000e+00) > %281 = fsub float -0.000000e+00, %276 > %282 = call float @llvm.fma.f32(float %281, float 2.000000e+00, float 1.000000e+00) > %283 = fmul float %105, %280 > %284 = fadd float %203, %254 > %285 = fadd float %232, %283 > %286 = fadd float %156, %181 > %287 = fadd float %128, %132 > %288 = shl i32 %118, 5 > %289 = or i32 %288, 4 > %290 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %289) > %291 = fmul float %106, %290 > %292 = shl i32 %124, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %292) > %294 = shl i32 %124, 4 > %295 = or i32 %294, 12 > %296 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %295) > %297 = fmul float %293, %296 > %298 = shl i32 %124, 4 > %299 = or i32 %298, 4 > %300 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %299) > %301 = shl i32 %124, 4 > %302 = or i32 %301, 8 > %303 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %302) > %304 = fsub float -0.000000e+00, %297 > %305 = call float @llvm.fma.f32(float %300, float %303, float %304) > %306 = shl i32 %124, 4 > %307 = or i32 %306, 4 > %308 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %307) > %309 = shl i32 %124, 4 > %310 = or i32 %309, 8 > %311 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %310) > %312 = call float @llvm.fma.f32(float %308, float %311, float %297) > %313 = fmul float %312, %106 > %314 = fmul float %313, 2.000000e+00 > %315 = fmul float %305, %106 > %316 = fmul float %315, 2.000000e+00 > %317 = shl i32 %124, 4 > %318 = or i32 %317, 4 > %319 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %318) > %320 = shl i32 %124, 4 > %321 = or i32 %320, 8 > %322 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %321) > %323 = shl i32 %124, 4 > %324 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %323) > %325 = shl i32 %124, 4 > %326 = or i32 %325, 12 > %327 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %326) > %328 = fmul float %322, %327 > %329 = fmul float %322, %324 > %330 = fmul float %319, %327 > %331 = shl i32 %124, 4 > %332 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %331) > %333 = shl i32 %124, 4 > %334 = or i32 %333, 4 > %335 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %334) > %336 = call float @llvm.fma.f32(float %332, float %335, float %328) > %337 = fmul float %336, %106 > %338 = fmul float %337, 2.000000e+00 > %339 = shl i32 %124, 4 > %340 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %339) > %341 = shl i32 %124, 4 > %342 = or i32 %341, 4 > %343 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %342) > %344 = shl i32 %124, 4 > %345 = or i32 %344, 8 > %346 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %345) > %347 = shl i32 %124, 4 > %348 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %347) > %349 = shl i32 %124, 4 > %350 = or i32 %349, 4 > %351 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %350) > %352 = shl i32 %124, 4 > %353 = or i32 %352, 8 > %354 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %353) > %355 = fmul float %340, %348 > %356 = fmul float %343, %351 > %357 = fmul float %346, %354 > %358 = fadd float %357, %356 > %359 = fadd float %357, %355 > %360 = fadd float %356, %355 > %361 = fsub float -0.000000e+00, %358 > %362 = call float @llvm.fma.f32(float %361, float 2.000000e+00, float 1.000000e+00) > %363 = fsub float -0.000000e+00, %359 > %364 = call float @llvm.fma.f32(float %363, float 2.000000e+00, float 1.000000e+00) > %365 = fsub float -0.000000e+00, %360 > %366 = call float @llvm.fma.f32(float %365, float 2.000000e+00, float 1.000000e+00) > %367 = fmul float %106, %364 > %368 = fadd float %284, %338 > %369 = fadd float %285, %367 > %370 = fadd float %286, %316 > %371 = fadd float %287, %291 > %372 = fmul float %368, %68 > %373 = fmul float %369, %69 > %374 = fadd float %372, %373 > %375 = fmul float %370, %70 > %376 = fadd float %374, %375 > %377 = fadd float %376, %371 > %378 = shl i32 %120, 4 > %379 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %378) > %380 = shl i32 %120, 4 > %381 = or i32 %380, 8 > %382 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %381) > %383 = fsub float -0.000000e+00, %195 > %384 = call float @llvm.fma.f32(float %379, float %382, float %383) > %385 = fmul float %384, %104 > %386 = fmul float %385, 2.000000e+00 > %387 = fmul float %154, 2.000000e+00 > %388 = shl i32 %122, 4 > %389 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %388) > %390 = shl i32 %122, 4 > %391 = or i32 %390, 8 > %392 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %391) > %393 = fsub float -0.000000e+00, %246 > %394 = call float @llvm.fma.f32(float %389, float %392, float %393) > %395 = fmul float %394, %105 > %396 = fmul float %395, 2.000000e+00 > %397 = fmul float %104, %231 > %398 = fmul float %104, %227 > %399 = fmul float %105, %282 > %400 = fmul float %105, %278 > %401 = shl i32 %116, 5 > %402 = or i32 %401, 8 > %403 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %402) > %404 = fmul float %104, %403 > %405 = shl i32 %117, 5 > %406 = or i32 %405, 8 > %407 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %406) > %408 = fmul float %105, %407 > %409 = fadd float %396, %386 > %410 = fadd float %179, %387 > %411 = fadd float %399, %397 > %412 = fadd float %408, %404 > %413 = shl i32 %124, 4 > %414 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %413) > %415 = shl i32 %124, 4 > %416 = or i32 %415, 8 > %417 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %416) > %418 = fsub float -0.000000e+00, %330 > %419 = call float @llvm.fma.f32(float %414, float %417, float %418) > %420 = fmul float %419, %106 > %421 = fmul float %420, 2.000000e+00 > %422 = fmul float %106, %366 > %423 = fmul float %106, %362 > %424 = shl i32 %118, 5 > %425 = or i32 %424, 8 > %426 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %425) > %427 = fmul float %106, %426 > %428 = fadd float %409, %421 > %429 = fadd float %410, %314 > %430 = fadd float %411, %422 > %431 = fadd float %412, %427 > %432 = fmul float %428, %68 > %433 = fmul float %429, %69 > %434 = fadd float %432, %433 > %435 = fmul float %430, %70 > %436 = fadd float %434, %435 > %437 = fadd float %436, %431 > %438 = shl i32 %116, 5 > %439 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %438) > %440 = fmul float %104, %439 > %441 = shl i32 %117, 5 > %442 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %441) > %443 = fmul float %105, %442 > %444 = shl i32 %118, 5 > %445 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %444) > %446 = fmul float %106, %445 > %447 = shl i32 %120, 4 > %448 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %447) > %449 = shl i32 %120, 4 > %450 = or i32 %449, 4 > %451 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %450) > %452 = fsub float -0.000000e+00, %193 > %453 = call float @llvm.fma.f32(float %448, float %451, float %452) > %454 = fadd float %195, %194 > %455 = fmul float %453, %104 > %456 = fmul float %454, %104 > %457 = fmul float %455, 2.000000e+00 > %458 = fmul float %456, 2.000000e+00 > %459 = shl i32 %122, 4 > %460 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %459) > %461 = shl i32 %122, 4 > %462 = or i32 %461, 4 > %463 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %462) > %464 = fsub float -0.000000e+00, %244 > %465 = call float @llvm.fma.f32(float %460, float %463, float %464) > %466 = shl i32 %124, 4 > %467 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %466) > %468 = shl i32 %124, 4 > %469 = or i32 %468, 4 > %470 = call float @llvm.SI.load.const(<16 x i8> %60, i32 %469) > %471 = fsub float -0.000000e+00, %328 > %472 = call float @llvm.fma.f32(float %467, float %470, float %471) > %473 = fadd float %330, %329 > %474 = fmul float %465, %105 > %475 = fmul float %472, %106 > %476 = fmul float %473, %106 > %477 = fmul float %475, 2.000000e+00 > %478 = fmul float %476, 2.000000e+00 > %479 = fadd float %246, %245 > %480 = fmul float %479, %105 > %481 = fmul float %474, 2.000000e+00 > %482 = fmul float %480, 2.000000e+00 > %483 = fadd float %398, %400 > %484 = fadd float %457, %481 > %485 = fadd float %458, %482 > %486 = fadd float %440, %443 > %487 = fadd float %423, %483 > %488 = fadd float %477, %484 > %489 = fadd float %478, %485 > %490 = fadd float %446, %486 > %491 = fmul float %487, %68 > %492 = fmul float %488, %69 > %493 = fadd float %491, %492 > %494 = fmul float %489, %70 > %495 = fadd float %493, %494 > %496 = fadd float %495, %490 > %497 = fmul float %31, %496 > %498 = fmul float %32, %377 > %499 = fadd float %497, %498 > %500 = fmul float %33, %437 > %501 = fadd float %499, %500 > %502 = fadd float %501, %34 > %503 = fmul float %502, %55 > %504 = fmul float %23, %496 > %505 = fmul float %24, %377 > %506 = fadd float %504, %505 > %507 = fmul float %25, %437 > %508 = fadd float %506, %507 > %509 = fadd float %508, %26 > %510 = fmul float %27, %496 > %511 = fmul float %28, %377 > %512 = fadd float %510, %511 > %513 = fmul float %29, %437 > %514 = fadd float %512, %513 > %515 = fadd float %514, %30 > %516 = fmul float %35, %496 > %517 = fmul float %36, %377 > %518 = fadd float %516, %517 > %519 = fmul float %37, %437 > %520 = fadd float %518, %519 > %521 = fadd float %520, %38 > %522 = fsub float %52, %496 > %523 = fsub float %53, %377 > %524 = fsub float %54, %437 > %525 = fmul float %521, %63 > %526 = fmul float %521, %64 > %527 = fsub float -0.000000e+00, %64 > %528 = call float @llvm.fma.f32(float %509, float %63, float %525) > %529 = call float @llvm.fma.f32(float %515, float %527, float %526) > %530 = fmul float %39, %522 > %531 = fmul float %40, %523 > %532 = fadd float %531, %530 > %533 = fmul float %41, %524 > %534 = fadd float %532, %533 > %535 = fmul float %42, %522 > %536 = fmul float %43, %523 > %537 = fadd float %536, %535 > %538 = fmul float %44, %524 > %539 = fadd float %537, %538 > %540 = fmul float %45, %522 > %541 = fmul float %46, %523 > %542 = fadd float %541, %540 > %543 = fmul float %47, %524 > %544 = fadd float %542, %543 > %545 = fmul float %368, %85 > %546 = fmul float %369, %86 > %547 = fadd float %546, %545 > %548 = fmul float %370, %87 > %549 = fadd float %547, %548 > %550 = fmul float %428, %85 > %551 = fmul float %429, %86 > %552 = fadd float %551, %550 > %553 = fmul float %430, %87 > %554 = fadd float %552, %553 > %555 = fmul float %487, %85 > %556 = fmul float %488, %86 > %557 = fadd float %556, %555 > %558 = fmul float %489, %87 > %559 = fadd float %557, %558 > %560 = fmul float %559, %559 > %561 = fmul float %549, %549 > %562 = fadd float %561, %560 > %563 = fmul float %554, %554 > %564 = fadd float %562, %563 > %565 = call float @llvm.AMDGPU.rsq.clamped.f32(float %564) > %566 = fmul float %565, %559 > %567 = fmul float %565, %549 > %568 = fmul float %565, %554 > %569 = fmul float %39, %566 > %570 = fmul float %40, %567 > %571 = fadd float %570, %569 > %572 = fmul float %41, %568 > %573 = fadd float %571, %572 > %574 = fmul float %368, %91 > %575 = fmul float %369, %92 > %576 = fadd float %575, %574 > %577 = fmul float %370, %93 > %578 = fadd float %576, %577 > %579 = fmul float %368, %74 > %580 = fmul float %369, %75 > %581 = fadd float %580, %579 > %582 = fmul float %370, %76 > %583 = fadd float %581, %582 > %584 = fmul float %428, %91 > %585 = fmul float %429, %92 > %586 = fadd float %585, %584 > %587 = fmul float %430, %93 > %588 = fadd float %586, %587 > %589 = fmul float %428, %74 > %590 = fmul float %429, %75 > %591 = fadd float %590, %589 > %592 = fmul float %430, %76 > %593 = fadd float %591, %592 > %594 = fmul float %487, %91 > %595 = fmul float %488, %92 > %596 = fadd float %595, %594 > %597 = fmul float %489, %93 > %598 = fadd float %596, %597 > %599 = fmul float %487, %74 > %600 = fmul float %488, %75 > %601 = fadd float %600, %599 > %602 = fmul float %489, %76 > %603 = fadd float %601, %602 > %604 = fmul float %598, %598 > %605 = fmul float %578, %578 > %606 = fadd float %605, %604 > %607 = fmul float %588, %588 > %608 = fadd float %606, %607 > %609 = call float @llvm.AMDGPU.rsq.clamped.f32(float %608) > %610 = fmul float %609, %598 > %611 = fmul float %609, %578 > %612 = fmul float %609, %588 > %613 = fmul float %39, %610 > %614 = fmul float %40, %611 > %615 = fadd float %614, %613 > %616 = fmul float %41, %612 > %617 = fadd float %615, %616 > %618 = fmul float %603, %603 > %619 = fmul float %583, %583 > %620 = fadd float %619, %618 > %621 = fmul float %593, %593 > %622 = fadd float %620, %621 > %623 = call float @llvm.AMDGPU.rsq.clamped.f32(float %622) > %624 = fmul float %623, %603 > %625 = fmul float %623, %583 > %626 = fmul float %623, %593 > %627 = fmul float %39, %624 > %628 = fmul float %40, %625 > %629 = fadd float %628, %627 > %630 = fmul float %41, %626 > %631 = fadd float %629, %630 > %632 = fmul float %42, %566 > %633 = fmul float %43, %567 > %634 = fadd float %633, %632 > %635 = fmul float %44, %568 > %636 = fadd float %634, %635 > %637 = fmul float %45, %566 > %638 = fmul float %46, %567 > %639 = fadd float %638, %637 > %640 = fmul float %47, %568 > %641 = fadd float %639, %640 > %642 = fmul float %42, %610 > %643 = fmul float %43, %611 > %644 = fadd float %643, %642 > %645 = fmul float %44, %612 > %646 = fadd float %644, %645 > %647 = fmul float %45, %610 > %648 = fmul float %46, %611 > %649 = fadd float %648, %647 > %650 = fmul float %47, %612 > %651 = fadd float %649, %650 > %652 = fmul float %42, %624 > %653 = fmul float %43, %625 > %654 = fadd float %653, %652 > %655 = fmul float %44, %626 > %656 = fadd float %654, %655 > %657 = fmul float %45, %624 > %658 = fmul float %46, %625 > %659 = fadd float %658, %657 > %660 = fmul float %47, %626 > %661 = fadd float %659, %660 > %662 = fmul float %97, %48 > %663 = fmul float %98, %49 > %664 = fmul float %99, %50 > %665 = fmul float %100, %51 > %666 = fmul float %662, %56 > %667 = fmul float %663, %57 > %668 = fmul float %664, %58 > %669 = bitcast i32 %11 to float > %670 = insertvalue <{ float, float, float }> undef, float %669, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %80, float %81, float %458, float %440) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %528, float %529, float %503, float %521) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %534, float %539, float %544, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %573, float %617, float %631, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %636, float %646, float %656, float %371) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %641, float %651, float %661, float %521) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %666, float %667, float %668, float %665) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %509, float %515, float %503, float %521) > ret <{ float, float, float }> %670 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL IN[5], GENERIC[5], PERSPECTIVE >DCL IN[6], GENERIC[6], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], BUFFER, FLOAT >DCL CONST[1][0..25] >DCL TEMP[0..4], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 400, 1065353216, 384} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] FLT32 {158456325028528675187087900672.0000, -2.0000, 3.0000, 0.9961} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: MOV TEMP[0].xy, TEMP[1].xyxx > 4: FMA TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[1].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[2].xxxx > 6: SQRT TEMP[1].x, TEMP[1].xxxx > 7: MOV TEMP[0].z, TEMP[1].xxxx > 8: DP3 TEMP[1].x, IN[3].xyzz, TEMP[0].xyzz > 9: DP3 TEMP[2].x, IN[4].xyzz, TEMP[0].xyzz > 10: MOV TEMP[1].y, TEMP[2].xxxx > 11: DP3 TEMP[2].x, IN[5].xyzz, TEMP[0].xyzz > 12: MOV TEMP[1].z, TEMP[2].xxxx > 13: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[1].xyzz > 14: RSQ TEMP[2].x, TEMP[0].xxxx > 15: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 16: DP3 TEMP[2].x, IN[2].xyzz, IN[2].xyzz > 17: RSQ TEMP[2].x, TEMP[2].xxxx > 18: MUL TEMP[1].xyz, TEMP[2].xxxx, IN[2].xyzz > 19: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz > 20: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 21: ADD TEMP[0].x, TEMP[1].xxxx, IMM[0].yyyy > 22: ADD TEMP[1].x, -TEMP[0].xxxx, -CONST[1][25].xxxx > 23: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 24: ADD TEMP[2].xy, -CONST[1][25].xzzz, IMM[0].zzzz > 25: FSLT TEMP[3].x, IMM[0].wwww, TEMP[2].xxxx > 26: AND TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx > 27: INEG TEMP[3].x, TEMP[3].xxxx > 28: FSNE TEMP[4].x, TEMP[2].xxxx, IMM[0].wwww > 29: UIF TEMP[4].xxxx :0 > 30: RCP TEMP[4].x, TEMP[2].xxxx > 31: ELSE :0 > 32: MOV TEMP[4].x, IMM[3].xxxx > 33: ENDIF > 34: AND TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx > 35: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[1].xxxx > 36: LG2 TEMP[1].x, TEMP[0].xxxx > 37: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][25].yyyy > 38: EX2 TEMP[1].x, TEMP[0].xxxx > 39: ADD TEMP[0].x, -TEMP[2].yyyy, TEMP[1].xxxx > 40: ADD TEMP[1].x, -TEMP[2].yyyy, IMM[0].zzzz > 41: FSNE TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww > 42: UIF TEMP[2].xxxx :0 > 43: RCP TEMP[1].x, TEMP[1].xxxx > 44: ELSE :0 > 45: MOV TEMP[1].x, IMM[3].xxxx > 46: ENDIF > 47: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx > 48: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 49: FMA TEMP[2].x, TEMP[1].xxxx, IMM[3].yyyy, IMM[3].zzzz > 50: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[1].xxxx > 51: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx > 52: MOV TEMP[1].xy, IN[0].xyyy > 53: TEX TEMP[1].w, TEMP[1], SAMP[1], 2D > 54: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].wwww > 55: MOV TEMP[1].xy, IN[1].xyyy > 56: TEX TEMP[1].w, TEMP[1], SAMP[2], 2D > 57: FSLT TEMP[1].x, IMM[3].wwww, TEMP[1].wwww > 58: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 59: INEG TEMP[1].x, TEMP[1].xxxx > 60: USNE TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 61: UIF TEMP[1].xxxx :0 > 62: MOV TEMP[1].x, IMM[1].xxxx > 63: ELSE :0 > 64: MOV TEMP[1].x, IMM[1].zzzz > 65: ENDIF > 66: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[0].xxxx > 67: MUL TEMP[0].x, TEMP[0].xxxx, IN[6].wwww > 68: MOV TEMP[0].w, TEMP[0].xxxx > 69: MOV TEMP[1].x, IMM[2].yyyy > 70: MOV TEMP[1].w, IMM[1].xxxx > 71: TXF TEMP[1].x, TEMP[1], SAMP[3], BUFFER > 72: MUL TEMP[0].xyz, TEMP[1].xxxx, CONST[1][24].yzww > 73: MOV OUT[0], TEMP[0] > 74: END >radeonsi: Compiling shader 343 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 388) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 392) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 396) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 400) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 404) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 408) > %31 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 > %33 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %34 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %33, i64 0, i64 3 > %35 = load <4 x i32>, <4 x i32> addrspace(2)* %34, align 16, !tbaa !0 > %36 = extractelement <8 x i32> %32, i32 7 > %37 = extractelement <4 x i32> %35, i32 0 > %38 = and i32 %37, %36 > %39 = insertelement <4 x i32> %35, i32 %38, i32 0 > %40 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !tbaa !0 > %42 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %43 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %42, i64 0, i64 7 > %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !tbaa !0 > %45 = extractelement <8 x i32> %41, i32 7 > %46 = extractelement <4 x i32> %44, i32 0 > %47 = and i32 %46, %45 > %48 = insertelement <4 x i32> %44, i32 %47, i32 0 > %49 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %50 = load <8 x i32>, <8 x i32> addrspace(2)* %49, align 32, !tbaa !0 > %51 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %52 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %51, i64 0, i64 11 > %53 = load <4 x i32>, <4 x i32> addrspace(2)* %52, align 16, !tbaa !0 > %54 = extractelement <8 x i32> %50, i32 7 > %55 = extractelement <4 x i32> %53, i32 0 > %56 = and i32 %55, %54 > %57 = insertelement <4 x i32> %53, i32 %56, i32 0 > %58 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %59 = bitcast <8 x i32> addrspace(2)* %58 to <2 x i128> addrspace(2)* > %60 = load <2 x i128>, <2 x i128> addrspace(2)* %59, align 32, !tbaa !0 > %61 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %6, <2 x i32> %8) > %78 = bitcast float %61 to i32 > %79 = bitcast float %62 to i32 > %80 = insertelement <2 x i32> undef, i32 %78, i32 0 > %81 = insertelement <2 x i32> %80, i32 %79, i32 1 > %82 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %81, <8 x i32> %32, <4 x i32> %39, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %83 = extractelement <4 x float> %82, i32 1 > %84 = extractelement <4 x float> %82, i32 3 > %85 = call float @llvm.fma.f32(float %83, float 2.000000e+00, float -1.000000e+00) > %86 = call float @llvm.fma.f32(float %84, float 2.000000e+00, float -1.000000e+00) > %87 = fsub float -0.000000e+00, %85 > %88 = call float @llvm.fma.f32(float %87, float %85, float 1.000000e+00) > %89 = fsub float -0.000000e+00, %86 > %90 = call float @llvm.fma.f32(float %89, float %86, float %88) > %91 = call float @llvm.sqrt.f32(float %90) > %92 = fmul float %68, %85 > %93 = fmul float %69, %86 > %94 = fadd float %93, %92 > %95 = fmul float %70, %91 > %96 = fadd float %94, %95 > %97 = fmul float %71, %85 > %98 = fmul float %72, %86 > %99 = fadd float %98, %97 > %100 = fmul float %73, %91 > %101 = fadd float %99, %100 > %102 = fmul float %74, %85 > %103 = fmul float %75, %86 > %104 = fadd float %103, %102 > %105 = fmul float %76, %91 > %106 = fadd float %104, %105 > %107 = fmul float %96, %96 > %108 = fmul float %101, %101 > %109 = fadd float %108, %107 > %110 = fmul float %106, %106 > %111 = fadd float %109, %110 > %112 = call float @llvm.AMDGPU.rsq.clamped.f32(float %111) > %113 = fmul float %112, %96 > %114 = fmul float %112, %101 > %115 = fmul float %112, %106 > %116 = fmul float %65, %65 > %117 = fmul float %66, %66 > %118 = fadd float %117, %116 > %119 = fmul float %67, %67 > %120 = fadd float %118, %119 > %121 = call float @llvm.AMDGPU.rsq.clamped.f32(float %120) > %122 = fmul float %121, %65 > %123 = fmul float %121, %66 > %124 = fmul float %121, %67 > %125 = fmul float %113, %122 > %126 = fmul float %114, %123 > %127 = fadd float %126, %125 > %128 = fmul float %115, %124 > %129 = fadd float %127, %128 > %130 = call float @llvm.AMDGPU.clamp.(float %129, float 0.000000e+00, float 1.000000e+00) > %131 = fadd float %130, -1.000000e+00 > %132 = fsub float -0.000000e+00, %28 > %133 = fsub float %132, %131 > %134 = call float @llvm.AMDGPU.clamp.(float %133, float 0.000000e+00, float 1.000000e+00) > %135 = fsub float 1.000000e+00, %28 > %136 = fsub float 1.000000e+00, %30 > %137 = fcmp ogt float %135, 0.000000e+00 > %138 = fcmp une float %135, 0.000000e+00 > %139 = fdiv float 1.000000e+00, %135 > %140 = select i1 %138, float %139, float 0x4600000000000000 > %141 = select i1 %137, float %140, float 0.000000e+00 > %142 = fmul float %141, %134 > %143 = call float @llvm.log2.f32(float %142) > %144 = fmul float %143, %29 > %145 = call float @llvm.exp2.f32(float %144) > %146 = fsub float %145, %136 > %147 = fsub float 1.000000e+00, %136 > %148 = fcmp une float %147, 0.000000e+00 > %149 = fdiv float 1.000000e+00, %147 > %temp4.0 = select i1 %148, float %149, float 0x4600000000000000 > %150 = fmul float %temp4.0, %146 > %151 = call float @llvm.AMDGPU.clamp.(float %150, float 0.000000e+00, float 1.000000e+00) > %152 = call float @llvm.fma.f32(float %151, float -2.000000e+00, float 3.000000e+00) > %153 = fmul float %151, %151 > %154 = fmul float %153, %152 > %155 = bitcast float %61 to i32 > %156 = bitcast float %62 to i32 > %157 = insertelement <2 x i32> undef, i32 %155, i32 0 > %158 = insertelement <2 x i32> %157, i32 %156, i32 1 > %159 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %158, <8 x i32> %41, <4 x i32> %48, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %160 = extractelement <4 x float> %159, i32 3 > %161 = fmul float %154, %160 > %162 = bitcast float %63 to i32 > %163 = bitcast float %64 to i32 > %164 = insertelement <2 x i32> undef, i32 %162, i32 0 > %165 = insertelement <2 x i32> %164, i32 %163, i32 1 > %166 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %165, <8 x i32> %50, <4 x i32> %57, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %167 = extractelement <4 x float> %166, i32 3 > %168 = fcmp ogt float %167, 0x3FEFDFDFE0000000 > %. = select i1 %168, float 0.000000e+00, float 1.000000e+00 > %169 = fmul float %., %161 > %170 = fmul float %169, %77 > %171 = extractelement <2 x i128> %60, i32 1 > %172 = bitcast i128 %171 to <16 x i8> > %173 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %172, i32 0, i32 0) > %174 = extractelement <4 x float> %173, i32 0 > %175 = fmul float %174, %25 > %176 = fmul float %174, %26 > %177 = fmul float %174, %27 > %178 = bitcast float %5 to i32 > %179 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %178, 10 > %180 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %179, float %175, 11 > %181 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %180, float %176, 12 > %182 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %181, float %177, 13 > %183 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %182, float %170, 14 > %184 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %183, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %184 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL OUT[0], POSITION >DCL CONST[1][0..23] >DCL CONST[2][0..4095] >DCL CONST[3][0..24] >DCL TEMP[0..19], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -0.1500} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 32} >IMM[3] UINT32 {352, 48, 176, 128} >IMM[4] UINT32 {2, 288, 112, 144} >IMM[5] UINT32 {320, 256, 240, 304} >IMM[6] FLT32 { -1.5000, 0.0597, 0.0000, 0.0000} >IMM[7] FLT32 {158456325028528675187087900672.0000, 1.4427, 0.5000, 0.4545} >IMM[8] UINT32 {224, 272, 384, 160} >IMM[9] FLT32 { -0.0040, 6.2000, 1.7000, 0.0600} >IMM[10] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, IN[3].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[4].xxxx > 7: MOV TEMP[3].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[3].x, IN[2].xxxx, TEMP[3].yyyy > 9: MOV TEMP[3].w, TEMP[3].xxxx > 10: UMUL TEMP[4].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[5].xxxx > 13: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[4].x, IN[2].yyyy, TEMP[4].yyyy > 15: MOV TEMP[4].w, TEMP[4].xxxx > 16: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy > 17: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[6].xxxx > 19: MOV TEMP[5].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 21: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[7].xxxx > 23: MOV TEMP[6].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].wwww > 25: UMUL TEMP[6].x, TEMP[1].xxxx, IMM[2].yyyy > 26: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[7].xxxx > 28: MOV TEMP[6].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 30: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[8].xxxx > 32: MOV TEMP[7].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[6].x, TEMP[6].yyyy, TEMP[7].zzzz, -TEMP[5].xxxx > 34: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 35: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[8].xxxx > 37: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 39: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[9].xxxx > 41: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[5].x, TEMP[7].yyyy, TEMP[8].zzzz, TEMP[5].xxxx > 43: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].xxxx > 44: MUL TEMP[6].x, TEMP[6].xxxx, IN[2].xxxx > 45: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy > 46: MOV TEMP[3].z, TEMP[6].xxxx > 47: UMUL TEMP[6].x, TEMP[1].yyyy, IMM[2].yyyy > 48: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[7].xxxx > 50: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 52: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[8].xxxx > 54: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 56: UMUL TEMP[7].x, TEMP[1].yyyy, IMM[2].yyyy > 57: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[8].xxxx > 59: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 61: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[9].xxxx > 63: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 65: UMUL TEMP[8].x, TEMP[1].yyyy, IMM[2].yyyy > 66: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[9].xxxx > 68: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[9].x, TEMP[1].yyyy, IMM[2].yyyy > 70: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[10].xxxx > 72: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 74: MUL TEMP[6].x, TEMP[6].xxxx, IN[2].yyyy > 75: MUL TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx > 76: MOV TEMP[6].y, TEMP[6].xxxx > 77: MUL TEMP[7].x, TEMP[7].xxxx, IN[2].yyyy > 78: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 79: MOV TEMP[4].z, TEMP[7].xxxx > 80: UMUL TEMP[7].x, TEMP[1].xxxx, IMM[2].yyyy > 81: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[8].xxxx > 83: MOV TEMP[7].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 85: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[9].xxxx > 87: MOV TEMP[8].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[7].xyz, TEMP[7].zzyy, TEMP[8].wxww > 89: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy > 90: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[9].xxxx > 92: MOV TEMP[8].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy > 94: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[10].xxxx > 96: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[8].x, TEMP[8].xxxx, TEMP[9].yyyy, TEMP[7].xxxx > 98: MUL TEMP[8].x, TEMP[8].xxxx, IN[2].xxxx > 99: MUL TEMP[3].x, IMM[0].yyyy, TEMP[8].xxxx >100: UMUL TEMP[8].x, TEMP[1].xxxx, IMM[2].yyyy >101: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[9].xxxx >103: MOV TEMP[8].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[9].x, TEMP[1].xxxx, IMM[2].yyyy >105: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[10].xxxx >107: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xyzz >109: ADD TEMP[8].xyz, TEMP[8].zzyy, TEMP[8].yxxx >110: FMA TEMP[9].xyz, -TEMP[8].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[10].x, IN[2].xxxx, TEMP[9].yyyy >112: MOV TEMP[3].y, TEMP[10].xxxx >113: UMUL TEMP[10].x, TEMP[1].yyyy, IMM[2].yyyy >114: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[11].xxxx >116: MOV TEMP[10].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >118: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[12].xxxx >120: MOV TEMP[11].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[10].xyz, TEMP[10].zzyy, TEMP[11].wxww >122: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >123: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[12].xxxx >125: MOV TEMP[11].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >127: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[13].xxxx >129: MOV TEMP[12].y, CONST[2][ADDR[0].x] >130: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[12].yyyy, TEMP[10].xxxx >131: MUL TEMP[11].x, TEMP[11].xxxx, IN[2].yyyy >132: MUL TEMP[4].x, IMM[0].yyyy, TEMP[11].xxxx >133: UMUL TEMP[11].x, TEMP[1].yyyy, IMM[2].yyyy >134: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[12].xxxx >136: MOV TEMP[11].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[12].x, TEMP[1].yyyy, IMM[2].yyyy >138: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[13].xxxx >140: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xyzz >142: ADD TEMP[11].xyz, TEMP[11].zzyy, TEMP[11].yxxx >143: FMA TEMP[12].xyz, -TEMP[11].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[13].x, IN[2].yyyy, TEMP[12].yyyy >145: MOV TEMP[4].y, TEMP[13].xxxx >146: ADD TEMP[3], TEMP[3], TEMP[4] >147: UMUL TEMP[13].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[14].xxxx >150: MOV TEMP[13].y, CONST[2][ADDR[0].x] >151: MUL TEMP[13].x, IN[2].zzzz, TEMP[13].yyyy >152: MOV TEMP[4].w, TEMP[13].xxxx >153: UMUL TEMP[13].x, TEMP[1].zzzz, IMM[2].yyyy >154: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[14].xxxx >156: MOV TEMP[13].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >158: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[15].xxxx >160: MOV TEMP[14].w, CONST[2][ADDR[0].x] >161: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].wwww >162: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >163: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[15].xxxx >165: MOV TEMP[14].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >167: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[16].xxxx >169: MOV TEMP[15].z, CONST[2][ADDR[0].x] >170: FMA TEMP[14].x, TEMP[14].yyyy, TEMP[15].zzzz, -TEMP[13].xxxx >171: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >172: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[16].xxxx >174: MOV TEMP[15].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >176: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[17].xxxx >178: MOV TEMP[16].z, CONST[2][ADDR[0].x] >179: FMA TEMP[13].x, TEMP[15].yyyy, TEMP[16].zzzz, TEMP[13].xxxx >180: MUL TEMP[13].x, TEMP[13].xxxx, IN[2].zzzz >181: MUL TEMP[13].x, IMM[0].yyyy, TEMP[13].xxxx >182: MOV TEMP[13].y, TEMP[13].xxxx >183: MUL TEMP[14].x, TEMP[14].xxxx, IN[2].zzzz >184: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >185: MOV TEMP[4].z, TEMP[14].xxxx >186: UMUL TEMP[14].x, TEMP[1].zzzz, IMM[2].yyyy >187: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[15].xxxx >189: MOV TEMP[14].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >191: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[16].xxxx >193: MOV TEMP[15].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[14].xyz, TEMP[14].zzyy, TEMP[15].wxww >195: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >196: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[16].xxxx >198: MOV TEMP[15].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >200: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[17].xxxx >202: MOV TEMP[16].y, CONST[2][ADDR[0].x] >203: FMA TEMP[15].x, TEMP[15].xxxx, TEMP[16].yyyy, TEMP[14].xxxx >204: MUL TEMP[15].x, TEMP[15].xxxx, IN[2].zzzz >205: MUL TEMP[4].x, IMM[0].yyyy, TEMP[15].xxxx >206: UMUL TEMP[15].x, TEMP[1].zzzz, IMM[2].yyyy >207: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[16].xxxx >209: MOV TEMP[15].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[16].x, TEMP[1].zzzz, IMM[2].yyyy >211: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[17].xxxx >213: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[15].xyz, TEMP[15].xyzz, TEMP[16].xyzz >215: ADD TEMP[15].xyz, TEMP[15].zzyy, TEMP[15].yxxx >216: FMA TEMP[16].xyz, -TEMP[15].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[17].x, IN[2].zzzz, TEMP[16].yyyy >218: MOV TEMP[4].y, TEMP[17].xxxx >219: ADD TEMP[3], TEMP[3], TEMP[4] >220: MOV TEMP[4].xyz, IN[0].xyzx >221: MOV TEMP[4].w, IMM[0].zzzz >222: DP4 TEMP[17].x, TEMP[3], TEMP[4] >223: MOV TEMP[3].y, TEMP[17].xxxx >224: UMUL TEMP[17].x, TEMP[1].xxxx, IMM[2].yyyy >225: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[18].xxxx >227: MOV TEMP[17].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[18].x, TEMP[1].xxxx, IMM[2].yyyy >229: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[19].xxxx >231: MOV TEMP[18].z, CONST[2][ADDR[0].x] >232: FMA TEMP[17].x, TEMP[17].xxxx, TEMP[18].zzzz, -TEMP[7].zzzz >233: MUL TEMP[17].x, TEMP[17].xxxx, IN[2].xxxx >234: MUL TEMP[17].x, IMM[0].yyyy, TEMP[17].xxxx >235: MUL TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy >236: MOV TEMP[17].y, TEMP[5].xxxx >237: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >238: USHR TEMP[18].x, TEMP[5].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[18].xxxx >240: MOV TEMP[5].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[18].x, TEMP[1].yyyy, IMM[2].yyyy >242: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[19].xxxx >244: MOV TEMP[18].z, CONST[2][ADDR[0].x] >245: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[18].zzzz, -TEMP[10].zzzz >246: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].yyyy >247: MUL TEMP[6].x, IMM[0].yyyy, TEMP[5].xxxx >248: MUL TEMP[5].x, IN[2].xxxx, TEMP[9].zzzz >249: MOV TEMP[17].z, TEMP[5].xxxx >250: MUL TEMP[8].x, IN[2].xxxx, TEMP[9].xxxx >251: MUL TEMP[5].x, IN[2].yyyy, TEMP[12].zzzz >252: MOV TEMP[6].z, TEMP[5].xxxx >253: MUL TEMP[11].x, IN[2].yyyy, TEMP[12].xxxx >254: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[9].xxxx >257: MOV TEMP[5].z, CONST[2][ADDR[0].x] >258: MUL TEMP[5].x, IN[2].xxxx, TEMP[5].zzzz >259: MOV TEMP[17].w, TEMP[5].xxxx >260: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[9].xxxx >263: MOV TEMP[5].z, CONST[2][ADDR[0].x] >264: MUL TEMP[5].x, IN[2].yyyy, TEMP[5].zzzz >265: MOV TEMP[6].w, TEMP[5].xxxx >266: ADD TEMP[6], TEMP[6], TEMP[17] >267: UMUL TEMP[5].x, TEMP[1].zzzz, IMM[2].yyyy >268: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[9].xxxx >270: MOV TEMP[5].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[9].x, TEMP[1].zzzz, IMM[2].yyyy >272: USHR TEMP[12].x, TEMP[9].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[12].xxxx >274: MOV TEMP[9].z, CONST[2][ADDR[0].x] >275: FMA TEMP[5].x, TEMP[5].xxxx, TEMP[9].zzzz, -TEMP[14].zzzz >276: MUL TEMP[5].x, TEMP[5].xxxx, IN[2].zzzz >277: MUL TEMP[13].x, IMM[0].yyyy, TEMP[5].xxxx >278: MUL TEMP[5].x, IN[2].zzzz, TEMP[16].zzzz >279: MOV TEMP[13].z, TEMP[5].xxxx >280: MUL TEMP[15].x, IN[2].zzzz, TEMP[16].xxxx >281: UMUL TEMP[5].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[9].x, TEMP[5].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[9].xxxx >284: MOV TEMP[5].z, CONST[2][ADDR[0].x] >285: MUL TEMP[5].x, IN[2].zzzz, TEMP[5].zzzz >286: MOV TEMP[13].w, TEMP[5].xxxx >287: ADD TEMP[6], TEMP[6], TEMP[13] >288: DP4 TEMP[5].x, TEMP[6], TEMP[4] >289: MOV TEMP[3].z, TEMP[5].xxxx >290: UMUL TEMP[5].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[6].xxxx >293: MOV TEMP[5].x, CONST[2][ADDR[0].x] >294: MUL TEMP[5].x, IN[2].xxxx, TEMP[5].xxxx >295: MOV TEMP[8].w, TEMP[5].xxxx >296: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[6].xxxx >299: MOV TEMP[5].x, CONST[2][ADDR[0].x] >300: MUL TEMP[5].x, IN[2].yyyy, TEMP[5].xxxx >301: MOV TEMP[11].w, TEMP[5].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[5].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[2].zzzz, TEMP[2].xxxx >307: MOV TEMP[15].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[1].xxxx, IMM[2].yyyy >309: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[5].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy >313: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[6].xxxx >315: MOV TEMP[5].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[7].xxxx >317: ADD TEMP[2].x, TEMP[7].zzzz, TEMP[7].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[2].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[8].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[1].yyyy, IMM[2].yyyy >323: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[5].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy >327: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[6].xxxx >329: MOV TEMP[5].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].yyyy, -TEMP[10].xxxx >331: UMUL TEMP[2].x, TEMP[1].zzzz, IMM[2].yyyy >332: USHR TEMP[5].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[5].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[1].x, TEMP[1].zzzz, IMM[2].yyyy >336: USHR TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[5].xxxx >338: MOV TEMP[1].y, CONST[2][ADDR[0].x] >339: FMA TEMP[1].x, TEMP[2].xxxx, TEMP[1].yyyy, -TEMP[14].xxxx >340: MOV TEMP[0].y, TEMP[1].xxxx >341: ADD TEMP[1].x, TEMP[14].zzzz, TEMP[14].yyyy >342: MOV TEMP[0].z, TEMP[1].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[2].yzzz >344: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[15].yz, TEMP[1].yxyy >346: ADD TEMP[1].x, TEMP[10].zzzz, TEMP[10].yyyy >347: MUL TEMP[1].x, TEMP[1].xxxx, IN[2].yyyy >348: MOV TEMP[0].y, TEMP[1].xxxx >349: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[11].yz, TEMP[1].yxyy >351: ADD TEMP[0], TEMP[8], TEMP[11] >352: ADD TEMP[0], TEMP[15], TEMP[0] >353: DP4 TEMP[3].x, TEMP[0], TEMP[4] >354: MOV TEMP[3].w, IMM[0].zzzz >355: DP4 TEMP[1].x, CONST[1][2], TEMP[3] >356: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][22].xxxx >357: MOV TEMP[0].z, TEMP[1].xxxx >358: DP4 TEMP[0].x, CONST[1][0], TEMP[3] >359: DP4 TEMP[1].x, CONST[1][1], TEMP[3] >360: MOV TEMP[0].y, TEMP[1].xxxx >361: DP4 TEMP[1].x, CONST[1][3], TEMP[3] >362: MOV TEMP[0].w, TEMP[1].xxxx >363: MOV TEMP[0], TEMP[0] >364: FSNE TEMP[1].x, CONST[3][15].yyyy, IMM[6].zzzz >365: UIF TEMP[1].xxxx :0 >366: ELSE :0 >367: ENDIF >368: FSNE TEMP[1].x, CONST[3][15].xxxx, IMM[6].zzzz >369: UIF TEMP[1].xxxx :0 >370: ELSE :0 >371: ENDIF >372: FSNE TEMP[1].x, CONST[3][14].wwww, IMM[6].zzzz >373: UIF TEMP[1].xxxx :0 >374: ELSE :0 >375: ENDIF >376: MOV OUT[0], TEMP[0] >377: END >radeonsi: Compiling shader 344 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) > %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) > %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) > %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) > %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) > %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) > %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) > %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) > %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) > %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) > %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) > %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) > %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) > %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) > %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) > %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 352) > %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 > %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 > %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %13) > %41 = extractelement <4 x float> %40, i32 0 > %42 = extractelement <4 x float> %40, i32 1 > %43 = extractelement <4 x float> %40, i32 2 > %44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 > %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %15) > %47 = extractelement <4 x float> %46, i32 0 > %48 = extractelement <4 x float> %46, i32 1 > %49 = extractelement <4 x float> %46, i32 2 > %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 > %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %16) > %53 = extractelement <4 x float> %52, i32 0 > %54 = extractelement <4 x float> %52, i32 1 > %55 = extractelement <4 x float> %52, i32 2 > %56 = fmul float %55, 0x406FE01000000000 > %57 = fmul float %54, 0x406FE01000000000 > %58 = fmul float %53, 0x406FE01000000000 > %59 = fptosi float %56 to i32 > %60 = fptosi float %57 to i32 > %61 = fptosi float %58 to i32 > %62 = shl i32 %59, 1 > %63 = or i32 %62, 1 > %64 = shl i32 %60, 1 > %65 = or i32 %64, 1 > %66 = shl i32 %61, 1 > %67 = or i32 %66, 1 > %68 = shl i32 %59, 5 > %69 = or i32 %68, 4 > %70 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %69) > %71 = fmul float %47, %70 > %72 = shl i32 %60, 5 > %73 = or i32 %72, 4 > %74 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %73) > %75 = fmul float %48, %74 > %76 = shl i32 %63, 4 > %77 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %76) > %78 = shl i32 %63, 4 > %79 = or i32 %78, 12 > %80 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %79) > %81 = fmul float %77, %80 > %82 = shl i32 %63, 4 > %83 = or i32 %82, 4 > %84 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %83) > %85 = shl i32 %63, 4 > %86 = or i32 %85, 8 > %87 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %86) > %88 = fsub float -0.000000e+00, %81 > %89 = call float @llvm.fma.f32(float %84, float %87, float %88) > %90 = shl i32 %63, 4 > %91 = or i32 %90, 4 > %92 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %91) > %93 = shl i32 %63, 4 > %94 = or i32 %93, 8 > %95 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %94) > %96 = call float @llvm.fma.f32(float %92, float %95, float %81) > %97 = fmul float %96, %47 > %98 = fmul float %89, %47 > %99 = fmul float %98, 2.000000e+00 > %100 = shl i32 %65, 4 > %101 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %100) > %102 = shl i32 %65, 4 > %103 = or i32 %102, 12 > %104 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %103) > %105 = fmul float %101, %104 > %106 = shl i32 %65, 4 > %107 = or i32 %106, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %107) > %109 = shl i32 %65, 4 > %110 = or i32 %109, 8 > %111 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %110) > %112 = fsub float -0.000000e+00, %105 > %113 = call float @llvm.fma.f32(float %108, float %111, float %112) > %114 = shl i32 %65, 4 > %115 = or i32 %114, 4 > %116 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %115) > %117 = shl i32 %65, 4 > %118 = or i32 %117, 8 > %119 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %118) > %120 = call float @llvm.fma.f32(float %116, float %119, float %105) > %121 = fmul float %120, %48 > %122 = fmul float %121, 2.000000e+00 > %123 = fmul float %113, %48 > %124 = fmul float %123, 2.000000e+00 > %125 = shl i32 %63, 4 > %126 = or i32 %125, 4 > %127 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %126) > %128 = shl i32 %63, 4 > %129 = or i32 %128, 8 > %130 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %129) > %131 = shl i32 %63, 4 > %132 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %131) > %133 = shl i32 %63, 4 > %134 = or i32 %133, 12 > %135 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %134) > %136 = fmul float %130, %135 > %137 = fmul float %130, %132 > %138 = fmul float %127, %135 > %139 = shl i32 %63, 4 > %140 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %139) > %141 = shl i32 %63, 4 > %142 = or i32 %141, 4 > %143 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %142) > %144 = call float @llvm.fma.f32(float %140, float %143, float %136) > %145 = fmul float %144, %47 > %146 = fmul float %145, 2.000000e+00 > %147 = shl i32 %63, 4 > %148 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %147) > %149 = shl i32 %63, 4 > %150 = or i32 %149, 4 > %151 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %150) > %152 = shl i32 %63, 4 > %153 = or i32 %152, 8 > %154 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %153) > %155 = shl i32 %63, 4 > %156 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %155) > %157 = shl i32 %63, 4 > %158 = or i32 %157, 4 > %159 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %158) > %160 = shl i32 %63, 4 > %161 = or i32 %160, 8 > %162 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %161) > %163 = fmul float %148, %156 > %164 = fmul float %151, %159 > %165 = fmul float %154, %162 > %166 = fadd float %165, %164 > %167 = fadd float %165, %163 > %168 = fadd float %164, %163 > %169 = fsub float -0.000000e+00, %166 > %170 = call float @llvm.fma.f32(float %169, float 2.000000e+00, float 1.000000e+00) > %171 = fsub float -0.000000e+00, %167 > %172 = call float @llvm.fma.f32(float %171, float 2.000000e+00, float 1.000000e+00) > %173 = fsub float -0.000000e+00, %168 > %174 = call float @llvm.fma.f32(float %173, float 2.000000e+00, float 1.000000e+00) > %175 = fmul float %47, %172 > %176 = shl i32 %65, 4 > %177 = or i32 %176, 4 > %178 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %177) > %179 = shl i32 %65, 4 > %180 = or i32 %179, 8 > %181 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %180) > %182 = shl i32 %65, 4 > %183 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %182) > %184 = shl i32 %65, 4 > %185 = or i32 %184, 12 > %186 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %185) > %187 = fmul float %181, %186 > %188 = fmul float %181, %183 > %189 = fmul float %178, %186 > %190 = shl i32 %65, 4 > %191 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %190) > %192 = shl i32 %65, 4 > %193 = or i32 %192, 4 > %194 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %193) > %195 = call float @llvm.fma.f32(float %191, float %194, float %187) > %196 = fmul float %195, %48 > %197 = fmul float %196, 2.000000e+00 > %198 = shl i32 %65, 4 > %199 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %198) > %200 = shl i32 %65, 4 > %201 = or i32 %200, 4 > %202 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %201) > %203 = shl i32 %65, 4 > %204 = or i32 %203, 8 > %205 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %204) > %206 = shl i32 %65, 4 > %207 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %206) > %208 = shl i32 %65, 4 > %209 = or i32 %208, 4 > %210 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %209) > %211 = shl i32 %65, 4 > %212 = or i32 %211, 8 > %213 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %212) > %214 = fmul float %199, %207 > %215 = fmul float %202, %210 > %216 = fmul float %205, %213 > %217 = fadd float %216, %215 > %218 = fadd float %216, %214 > %219 = fadd float %215, %214 > %220 = fsub float -0.000000e+00, %217 > %221 = call float @llvm.fma.f32(float %220, float 2.000000e+00, float 1.000000e+00) > %222 = fsub float -0.000000e+00, %218 > %223 = call float @llvm.fma.f32(float %222, float 2.000000e+00, float 1.000000e+00) > %224 = fsub float -0.000000e+00, %219 > %225 = call float @llvm.fma.f32(float %224, float 2.000000e+00, float 1.000000e+00) > %226 = fmul float %48, %223 > %227 = fadd float %146, %197 > %228 = fadd float %175, %226 > %229 = fadd float %99, %124 > %230 = fadd float %71, %75 > %231 = shl i32 %61, 5 > %232 = or i32 %231, 4 > %233 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %232) > %234 = fmul float %49, %233 > %235 = shl i32 %67, 4 > %236 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %235) > %237 = shl i32 %67, 4 > %238 = or i32 %237, 12 > %239 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %238) > %240 = fmul float %236, %239 > %241 = shl i32 %67, 4 > %242 = or i32 %241, 4 > %243 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %242) > %244 = shl i32 %67, 4 > %245 = or i32 %244, 8 > %246 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %245) > %247 = fsub float -0.000000e+00, %240 > %248 = call float @llvm.fma.f32(float %243, float %246, float %247) > %249 = shl i32 %67, 4 > %250 = or i32 %249, 4 > %251 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %250) > %252 = shl i32 %67, 4 > %253 = or i32 %252, 8 > %254 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %253) > %255 = call float @llvm.fma.f32(float %251, float %254, float %240) > %256 = fmul float %255, %49 > %257 = fmul float %256, 2.000000e+00 > %258 = fmul float %248, %49 > %259 = fmul float %258, 2.000000e+00 > %260 = shl i32 %67, 4 > %261 = or i32 %260, 4 > %262 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %261) > %263 = shl i32 %67, 4 > %264 = or i32 %263, 8 > %265 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %264) > %266 = shl i32 %67, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %266) > %268 = shl i32 %67, 4 > %269 = or i32 %268, 12 > %270 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %269) > %271 = fmul float %265, %270 > %272 = fmul float %265, %267 > %273 = fmul float %262, %270 > %274 = shl i32 %67, 4 > %275 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %274) > %276 = shl i32 %67, 4 > %277 = or i32 %276, 4 > %278 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %277) > %279 = call float @llvm.fma.f32(float %275, float %278, float %271) > %280 = fmul float %279, %49 > %281 = fmul float %280, 2.000000e+00 > %282 = shl i32 %67, 4 > %283 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %282) > %284 = shl i32 %67, 4 > %285 = or i32 %284, 4 > %286 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %285) > %287 = shl i32 %67, 4 > %288 = or i32 %287, 8 > %289 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %288) > %290 = shl i32 %67, 4 > %291 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %290) > %292 = shl i32 %67, 4 > %293 = or i32 %292, 4 > %294 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %293) > %295 = shl i32 %67, 4 > %296 = or i32 %295, 8 > %297 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %296) > %298 = fmul float %283, %291 > %299 = fmul float %286, %294 > %300 = fmul float %289, %297 > %301 = fadd float %300, %299 > %302 = fadd float %300, %298 > %303 = fadd float %299, %298 > %304 = fsub float -0.000000e+00, %301 > %305 = call float @llvm.fma.f32(float %304, float 2.000000e+00, float 1.000000e+00) > %306 = fsub float -0.000000e+00, %302 > %307 = call float @llvm.fma.f32(float %306, float 2.000000e+00, float 1.000000e+00) > %308 = fsub float -0.000000e+00, %303 > %309 = call float @llvm.fma.f32(float %308, float 2.000000e+00, float 1.000000e+00) > %310 = fmul float %49, %307 > %311 = fadd float %227, %281 > %312 = fadd float %228, %310 > %313 = fadd float %229, %259 > %314 = fadd float %230, %234 > %315 = fmul float %311, %41 > %316 = fmul float %312, %42 > %317 = fadd float %315, %316 > %318 = fmul float %313, %43 > %319 = fadd float %317, %318 > %320 = fadd float %319, %314 > %321 = shl i32 %63, 4 > %322 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %321) > %323 = shl i32 %63, 4 > %324 = or i32 %323, 8 > %325 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %324) > %326 = fsub float -0.000000e+00, %138 > %327 = call float @llvm.fma.f32(float %322, float %325, float %326) > %328 = fmul float %327, %47 > %329 = fmul float %328, 2.000000e+00 > %330 = fmul float %97, 2.000000e+00 > %331 = shl i32 %65, 4 > %332 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %331) > %333 = shl i32 %65, 4 > %334 = or i32 %333, 8 > %335 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %334) > %336 = fsub float -0.000000e+00, %189 > %337 = call float @llvm.fma.f32(float %332, float %335, float %336) > %338 = fmul float %337, %48 > %339 = fmul float %338, 2.000000e+00 > %340 = fmul float %47, %174 > %341 = fmul float %47, %170 > %342 = fmul float %48, %225 > %343 = fmul float %48, %221 > %344 = shl i32 %59, 5 > %345 = or i32 %344, 8 > %346 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %345) > %347 = fmul float %47, %346 > %348 = shl i32 %60, 5 > %349 = or i32 %348, 8 > %350 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %349) > %351 = fmul float %48, %350 > %352 = fadd float %339, %329 > %353 = fadd float %122, %330 > %354 = fadd float %342, %340 > %355 = fadd float %351, %347 > %356 = shl i32 %67, 4 > %357 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %356) > %358 = shl i32 %67, 4 > %359 = or i32 %358, 8 > %360 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %359) > %361 = fsub float -0.000000e+00, %273 > %362 = call float @llvm.fma.f32(float %357, float %360, float %361) > %363 = fmul float %362, %49 > %364 = fmul float %363, 2.000000e+00 > %365 = fmul float %49, %309 > %366 = fmul float %49, %305 > %367 = shl i32 %61, 5 > %368 = or i32 %367, 8 > %369 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %368) > %370 = fmul float %49, %369 > %371 = fadd float %352, %364 > %372 = fadd float %353, %257 > %373 = fadd float %354, %365 > %374 = fadd float %355, %370 > %375 = fmul float %371, %41 > %376 = fmul float %372, %42 > %377 = fadd float %375, %376 > %378 = fmul float %373, %43 > %379 = fadd float %377, %378 > %380 = fadd float %379, %374 > %381 = shl i32 %59, 5 > %382 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %381) > %383 = fmul float %47, %382 > %384 = shl i32 %60, 5 > %385 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %384) > %386 = fmul float %48, %385 > %387 = shl i32 %61, 5 > %388 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %387) > %389 = fmul float %49, %388 > %390 = shl i32 %63, 4 > %391 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %390) > %392 = shl i32 %63, 4 > %393 = or i32 %392, 4 > %394 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %393) > %395 = fsub float -0.000000e+00, %136 > %396 = call float @llvm.fma.f32(float %391, float %394, float %395) > %397 = fadd float %138, %137 > %398 = fmul float %396, %47 > %399 = fmul float %397, %47 > %400 = fmul float %398, 2.000000e+00 > %401 = fmul float %399, 2.000000e+00 > %402 = shl i32 %65, 4 > %403 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %402) > %404 = shl i32 %65, 4 > %405 = or i32 %404, 4 > %406 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %405) > %407 = fsub float -0.000000e+00, %187 > %408 = call float @llvm.fma.f32(float %403, float %406, float %407) > %409 = shl i32 %67, 4 > %410 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %409) > %411 = shl i32 %67, 4 > %412 = or i32 %411, 4 > %413 = call float @llvm.SI.load.const(<16 x i8> %37, i32 %412) > %414 = fsub float -0.000000e+00, %271 > %415 = call float @llvm.fma.f32(float %410, float %413, float %414) > %416 = fadd float %273, %272 > %417 = fmul float %408, %48 > %418 = fmul float %415, %49 > %419 = fmul float %416, %49 > %420 = fmul float %418, 2.000000e+00 > %421 = fmul float %419, 2.000000e+00 > %422 = fadd float %189, %188 > %423 = fmul float %422, %48 > %424 = fmul float %417, 2.000000e+00 > %425 = fmul float %423, 2.000000e+00 > %426 = fadd float %341, %343 > %427 = fadd float %400, %424 > %428 = fadd float %401, %425 > %429 = fadd float %383, %386 > %430 = fadd float %366, %426 > %431 = fadd float %420, %427 > %432 = fadd float %421, %428 > %433 = fadd float %389, %429 > %434 = fmul float %430, %41 > %435 = fmul float %431, %42 > %436 = fadd float %434, %435 > %437 = fmul float %432, %43 > %438 = fadd float %436, %437 > %439 = fadd float %438, %433 > %440 = fmul float %27, %439 > %441 = fmul float %28, %320 > %442 = fadd float %440, %441 > %443 = fmul float %29, %380 > %444 = fadd float %442, %443 > %445 = fadd float %444, %30 > %446 = fmul float %445, %35 > %447 = fmul float %19, %439 > %448 = fmul float %20, %320 > %449 = fadd float %447, %448 > %450 = fmul float %21, %380 > %451 = fadd float %449, %450 > %452 = fadd float %451, %22 > %453 = fmul float %23, %439 > %454 = fmul float %24, %320 > %455 = fadd float %453, %454 > %456 = fmul float %25, %380 > %457 = fadd float %455, %456 > %458 = fadd float %457, %26 > %459 = fmul float %31, %439 > %460 = fmul float %32, %320 > %461 = fadd float %459, %460 > %462 = fmul float %33, %380 > %463 = fadd float %461, %462 > %464 = fadd float %463, %34 > %465 = bitcast i32 %11 to float > %466 = insertvalue <{ float, float, float }> undef, float %465, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %452, float %458, float %446, float %464) > ret <{ float, float, float }> %466 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..31] >DCL TEMP[0..9], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, 0.5000, 0.0087} >IMM[1] UINT32 {0, 304, 320, 336} >IMM[2] UINT32 {352, 464, 384, 496} >IMM[3] UINT32 {368, 400, 448, 176} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][19], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][20], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][22], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: MOV TEMP[3], TEMP[1] > 10: MOV TEMP[4].zw, TEMP[1].wwzw > 11: MUL TEMP[0].xy, TEMP[2].xxxx, CONST[1][29].xyyy > 12: MUL TEMP[2].xy, CONST[1][29].xyyy, IMM[0].xyyy > 13: FMA TEMP[4].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[0].xyyy > 14: MUL TEMP[0].xyz, CONST[1][24].xyzz, CONST[1][31].yyyy > 15: FMA TEMP[0].xyz, CONST[1][23].xyzz, CONST[1][31].xxxx, TEMP[0].xyzz > 16: FMA TEMP[2].xyz, CONST[1][25].xyzz, CONST[1][31].zzzz, TEMP[0].xyzz > 17: MUL TEMP[5].x, TEMP[2].yyyy, IN[1].yyyy > 18: FMA TEMP[5].x, TEMP[2].xxxx, -IN[1].xxxx, -TEMP[5].xxxx > 19: FMA TEMP[5].x, -TEMP[2].zzzz, IN[1].zzzz, TEMP[5].xxxx > 20: MUL TEMP[1].x, TEMP[5].xxxx, -IN[1].xxxx > 21: DP2 TEMP[6].x, TEMP[2].zxxx, IN[1].ywww > 22: FMA TEMP[6].x, -TEMP[2].yyyy, IN[1].zzzz, TEMP[6].xxxx > 23: FMA TEMP[1].x, TEMP[6].xxxx, IN[1].wwww, TEMP[1].xxxx > 24: MUL TEMP[6].xy, TEMP[6].xxxx, -IN[1].zyyy > 25: MUL TEMP[7].x, TEMP[2].zzzz, IN[1].xxxx > 26: FMA TEMP[7].x, TEMP[2].yyyy, IN[1].wwww, -TEMP[7].xxxx > 27: FMA TEMP[7].x, TEMP[2].xxxx, IN[1].zzzz, TEMP[7].xxxx > 28: FMA TEMP[1].x, TEMP[7].xxxx, -IN[1].zzzz, TEMP[1].xxxx > 29: DP2 TEMP[8].x, TEMP[2].yzzz, IN[1].xwww > 30: FMA TEMP[2].x, -TEMP[2].xxxx, IN[1].yyyy, TEMP[8].xxxx > 31: FMA TEMP[8].x, -TEMP[2].xxxx, -IN[1].yyyy, TEMP[1].xxxx > 32: FMA TEMP[9].x, TEMP[5].xxxx, -IN[1].yyyy, -TEMP[6].xxxx > 33: FMA TEMP[5].x, TEMP[5].xxxx, -IN[1].zzzz, TEMP[6].yyyy > 34: FMA TEMP[5].x, -TEMP[7].xxxx, -IN[1].xxxx, TEMP[5].xxxx > 35: FMA TEMP[6].x, TEMP[7].xxxx, IN[1].wwww, TEMP[9].xxxx > 36: FMA TEMP[6].x, TEMP[2].xxxx, -IN[1].xxxx, TEMP[6].xxxx > 37: MOV TEMP[8].y, TEMP[6].xxxx > 38: FMA TEMP[2].x, TEMP[2].xxxx, IN[1].wwww, TEMP[5].xxxx > 39: MOV TEMP[8].z, TEMP[2].xxxx > 40: DP3 TEMP[0].x, TEMP[8].xyzz, TEMP[8].xyzz > 41: RSQ TEMP[2].x, TEMP[0].xxxx > 42: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[8].xyzz > 43: ADD TEMP[0].xyz, IN[0].xyzz, -CONST[1][28].xyzz > 44: MUL TEMP[5].x, TEMP[0].yyyy, IN[1].yyyy > 45: FMA TEMP[5].x, TEMP[0].xxxx, -IN[1].xxxx, -TEMP[5].xxxx > 46: FMA TEMP[5].x, -TEMP[0].zzzz, IN[1].zzzz, TEMP[5].xxxx > 47: MUL TEMP[1].x, TEMP[5].xxxx, -IN[1].xxxx > 48: DP2 TEMP[6].x, TEMP[0].zxxx, IN[1].ywww > 49: FMA TEMP[6].x, -TEMP[0].yyyy, IN[1].zzzz, TEMP[6].xxxx > 50: FMA TEMP[1].x, TEMP[6].xxxx, IN[1].wwww, TEMP[1].xxxx > 51: MUL TEMP[6].xy, TEMP[6].xxxx, -IN[1].zyyy > 52: MUL TEMP[7].x, TEMP[0].zzzz, IN[1].xxxx > 53: FMA TEMP[7].x, TEMP[0].yyyy, IN[1].wwww, -TEMP[7].xxxx > 54: FMA TEMP[7].x, TEMP[0].xxxx, IN[1].zzzz, TEMP[7].xxxx > 55: FMA TEMP[1].x, TEMP[7].xxxx, -IN[1].zzzz, TEMP[1].xxxx > 56: DP2 TEMP[9].x, TEMP[0].yzzz, IN[1].xwww > 57: FMA TEMP[0].x, -TEMP[0].xxxx, IN[1].yyyy, TEMP[9].xxxx > 58: FMA TEMP[8].x, -TEMP[0].xxxx, -IN[1].yyyy, TEMP[1].xxxx > 59: FMA TEMP[1].x, TEMP[5].xxxx, -IN[1].yyyy, -TEMP[6].xxxx > 60: FMA TEMP[5].x, TEMP[5].xxxx, -IN[1].zzzz, TEMP[6].yyyy > 61: FMA TEMP[5].x, -TEMP[7].xxxx, -IN[1].xxxx, TEMP[5].xxxx > 62: FMA TEMP[1].x, TEMP[7].xxxx, IN[1].wwww, TEMP[1].xxxx > 63: FMA TEMP[1].x, TEMP[0].xxxx, -IN[1].xxxx, TEMP[1].xxxx > 64: MOV TEMP[8].y, TEMP[1].xxxx > 65: FMA TEMP[0].x, TEMP[0].xxxx, IN[1].wwww, TEMP[5].xxxx > 66: MOV TEMP[8].z, TEMP[0].xxxx > 67: MOV TEMP[0].xyz, TEMP[8].xyzx > 68: MOV TEMP[1].xyz, IN[2].xyzx > 69: MOV TEMP[5].xyz, IN[3].xyzx > 70: MOV OUT[5], TEMP[5] > 71: MOV OUT[4], TEMP[1] > 72: MOV OUT[3], TEMP[0] > 73: MOV OUT[2], TEMP[2] > 74: MOV OUT[1], TEMP[4] > 75: MOV OUT[0], TEMP[3] > 76: END >radeonsi: Compiling shader 345 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) > %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) > %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 316) > %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) > %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) > %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) > %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 332) > %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 336) > %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 340) > %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 344) > %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 348) > %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 352) > %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 356) > %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 360) > %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 364) > %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 368) > %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) > %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 376) > %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 384) > %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 388) > %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 392) > %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 400) > %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 404) > %43 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) > %44 = call float @llvm.SI.load.const(<16 x i8> %18, i32 448) > %45 = call float @llvm.SI.load.const(<16 x i8> %18, i32 452) > %46 = call float @llvm.SI.load.const(<16 x i8> %18, i32 456) > %47 = call float @llvm.SI.load.const(<16 x i8> %18, i32 464) > %48 = call float @llvm.SI.load.const(<16 x i8> %18, i32 468) > %49 = call float @llvm.SI.load.const(<16 x i8> %18, i32 496) > %50 = call float @llvm.SI.load.const(<16 x i8> %18, i32 500) > %51 = call float @llvm.SI.load.const(<16 x i8> %18, i32 504) > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %13) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %14) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = extractelement <4 x float> %60, i32 2 > %64 = extractelement <4 x float> %60, i32 3 > %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 > %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %15) > %68 = extractelement <4 x float> %67, i32 0 > %69 = extractelement <4 x float> %67, i32 1 > %70 = extractelement <4 x float> %67, i32 2 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %16) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = fmul float %19, %55 > %78 = fmul float %20, %56 > %79 = fadd float %77, %78 > %80 = fmul float %21, %57 > %81 = fadd float %79, %80 > %82 = fadd float %81, %22 > %83 = fmul float %23, %55 > %84 = fmul float %24, %56 > %85 = fadd float %83, %84 > %86 = fmul float %25, %57 > %87 = fadd float %85, %86 > %88 = fadd float %87, %26 > %89 = fmul float %27, %55 > %90 = fmul float %28, %56 > %91 = fadd float %89, %90 > %92 = fmul float %29, %57 > %93 = fadd float %91, %92 > %94 = fadd float %93, %30 > %95 = fmul float %31, %55 > %96 = fmul float %32, %56 > %97 = fadd float %95, %96 > %98 = fmul float %33, %57 > %99 = fadd float %97, %98 > %100 = fadd float %99, %34 > %101 = fmul float %100, %47 > %102 = fmul float %100, %48 > %103 = fsub float -0.000000e+00, %48 > %104 = call float @llvm.fma.f32(float %82, float %47, float %101) > %105 = call float @llvm.fma.f32(float %88, float %103, float %102) > %106 = fmul float %38, %50 > %107 = fmul float %39, %50 > %108 = fmul float %40, %50 > %109 = call float @llvm.fma.f32(float %35, float %49, float %106) > %110 = call float @llvm.fma.f32(float %36, float %49, float %107) > %111 = call float @llvm.fma.f32(float %37, float %49, float %108) > %112 = call float @llvm.fma.f32(float %41, float %51, float %109) > %113 = call float @llvm.fma.f32(float %42, float %51, float %110) > %114 = call float @llvm.fma.f32(float %43, float %51, float %111) > %115 = fmul float %113, %62 > %116 = fsub float -0.000000e+00, %61 > %117 = fsub float -0.000000e+00, %115 > %118 = call float @llvm.fma.f32(float %112, float %116, float %117) > %119 = fsub float -0.000000e+00, %114 > %120 = call float @llvm.fma.f32(float %119, float %63, float %118) > %121 = fmul float %61, %120 > %122 = fsub float -0.000000e+00, %121 > %123 = fmul float %114, %62 > %124 = fmul float %112, %64 > %125 = fadd float %123, %124 > %126 = fsub float -0.000000e+00, %113 > %127 = call float @llvm.fma.f32(float %126, float %63, float %125) > %128 = call float @llvm.fma.f32(float %127, float %64, float %122) > %129 = fmul float %63, %127 > %130 = fmul float %62, %127 > %131 = fsub float -0.000000e+00, %130 > %132 = fmul float %114, %61 > %133 = fsub float -0.000000e+00, %132 > %134 = call float @llvm.fma.f32(float %113, float %64, float %133) > %135 = call float @llvm.fma.f32(float %112, float %63, float %134) > %136 = fsub float -0.000000e+00, %63 > %137 = call float @llvm.fma.f32(float %135, float %136, float %128) > %138 = fmul float %113, %61 > %139 = fmul float %114, %64 > %140 = fadd float %138, %139 > %141 = fsub float -0.000000e+00, %112 > %142 = call float @llvm.fma.f32(float %141, float %62, float %140) > %143 = fsub float -0.000000e+00, %142 > %144 = fsub float -0.000000e+00, %62 > %145 = call float @llvm.fma.f32(float %143, float %144, float %137) > %146 = fsub float -0.000000e+00, %62 > %147 = call float @llvm.fma.f32(float %120, float %146, float %129) > %148 = fsub float -0.000000e+00, %63 > %149 = call float @llvm.fma.f32(float %120, float %148, float %131) > %150 = fsub float -0.000000e+00, %135 > %151 = fsub float -0.000000e+00, %61 > %152 = call float @llvm.fma.f32(float %150, float %151, float %149) > %153 = call float @llvm.fma.f32(float %135, float %64, float %147) > %154 = fsub float -0.000000e+00, %61 > %155 = call float @llvm.fma.f32(float %142, float %154, float %153) > %156 = call float @llvm.fma.f32(float %142, float %64, float %152) > %157 = fmul float %145, %145 > %158 = fmul float %155, %155 > %159 = fadd float %158, %157 > %160 = fmul float %156, %156 > %161 = fadd float %159, %160 > %162 = call float @llvm.AMDGPU.rsq.clamped.f32(float %161) > %163 = fmul float %162, %145 > %164 = fmul float %162, %155 > %165 = fmul float %162, %156 > %166 = fsub float %55, %44 > %167 = fsub float %56, %45 > %168 = fsub float %57, %46 > %169 = fmul float %167, %62 > %170 = fsub float -0.000000e+00, %61 > %171 = fsub float -0.000000e+00, %169 > %172 = call float @llvm.fma.f32(float %166, float %170, float %171) > %173 = fsub float -0.000000e+00, %168 > %174 = call float @llvm.fma.f32(float %173, float %63, float %172) > %175 = fmul float %61, %174 > %176 = fsub float -0.000000e+00, %175 > %177 = fmul float %168, %62 > %178 = fmul float %166, %64 > %179 = fadd float %177, %178 > %180 = fsub float -0.000000e+00, %167 > %181 = call float @llvm.fma.f32(float %180, float %63, float %179) > %182 = call float @llvm.fma.f32(float %181, float %64, float %176) > %183 = fmul float %63, %181 > %184 = fmul float %62, %181 > %185 = fsub float -0.000000e+00, %184 > %186 = fmul float %168, %61 > %187 = fsub float -0.000000e+00, %186 > %188 = call float @llvm.fma.f32(float %167, float %64, float %187) > %189 = call float @llvm.fma.f32(float %166, float %63, float %188) > %190 = fsub float -0.000000e+00, %63 > %191 = call float @llvm.fma.f32(float %189, float %190, float %182) > %192 = fmul float %167, %61 > %193 = fmul float %168, %64 > %194 = fadd float %192, %193 > %195 = fsub float -0.000000e+00, %166 > %196 = call float @llvm.fma.f32(float %195, float %62, float %194) > %197 = fsub float -0.000000e+00, %196 > %198 = fsub float -0.000000e+00, %62 > %199 = call float @llvm.fma.f32(float %197, float %198, float %191) > %200 = fsub float -0.000000e+00, %62 > %201 = call float @llvm.fma.f32(float %174, float %200, float %183) > %202 = fsub float -0.000000e+00, %63 > %203 = call float @llvm.fma.f32(float %174, float %202, float %185) > %204 = fsub float -0.000000e+00, %189 > %205 = fsub float -0.000000e+00, %61 > %206 = call float @llvm.fma.f32(float %204, float %205, float %203) > %207 = call float @llvm.fma.f32(float %189, float %64, float %201) > %208 = fsub float -0.000000e+00, %61 > %209 = call float @llvm.fma.f32(float %196, float %208, float %207) > %210 = call float @llvm.fma.f32(float %196, float %64, float %206) > %211 = bitcast i32 %11 to float > %212 = insertvalue <{ float, float, float }> undef, float %211, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %104, float %105, float %94, float %100) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %163, float %164, float %165, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %199, float %209, float %210, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %68, float %69, float %70, float %100) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %74, float %75, float %76, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %88, float %94, float %100) > ret <{ float, float, float }> %212 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL SVIEW[5], 3D, FLOAT >DCL CONST[1][0..30] >DCL TEMP[0..14], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.5000} >IMM[1] UINT32 {0, 480, 176, 288} >IMM[2] INT32 {1, 0, 0, 0} >IMM[3] UINT32 {208, 192, 432, 416} >IMM[4] FLT32 { -1.0000, -1.1000, 0.0000, 0.0000} > 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz > 1: SQRT TEMP[1].x, TEMP[0].xxxx > 2: MOV TEMP[0].x, TEMP[1].xxxx > 3: FSEQ TEMP[2].xyz, TEMP[1].xxxx, IMM[0].xxxx > 4: SSG TEMP[3].xyz, IN[2].xyzz > 5: MUL TEMP[3].xyz, IMM[0].yyyy, TEMP[3].xyzz > 6: RCP TEMP[4].xyz, TEMP[1].xxxx > 7: MUL TEMP[4].xyz, IN[2].xyzz, TEMP[4].xyzz > 8: UCMP TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz, TEMP[4].xyzz > 9: DP3 TEMP[3].x, TEMP[2].xyzz, IN[1].xyzz > 10: FSNE TEMP[4].x, TEMP[3].xxxx, IMM[0].xxxx > 11: UIF TEMP[4].xxxx :0 > 12: RCP TEMP[4].x, TEMP[3].xxxx > 13: ELSE :0 > 14: MOV TEMP[4].x, IMM[0].yyyy > 15: ENDIF > 16: MUL TEMP[5].x, TEMP[4].xxxx, CONST[1][30].xxxx > 17: FSEQ TEMP[6].xyz, TEMP[2].xyzz, IMM[0].xxxx > 18: SSG TEMP[7].xyz, IN[3].xyzz > 19: MUL TEMP[7].xyz, IMM[0].yyyy, TEMP[7].xyzz > 20: RCP TEMP[8].x, TEMP[2].xxxx > 21: RCP TEMP[8].y, TEMP[2].yyyy > 22: RCP TEMP[8].z, TEMP[2].zzzz > 23: MUL TEMP[8].xyz, IN[3].xyzz, TEMP[8].xyzz > 24: UCMP TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[8].xyzz > 25: FSEQ TEMP[7].xyz, TEMP[2].xyzz, IMM[0].xxxx > 26: SSG TEMP[8].xyz, IN[4].xyzz > 27: MUL TEMP[8].xyz, IMM[0].yyyy, TEMP[8].xyzz > 28: RCP TEMP[9].x, TEMP[2].xxxx > 29: RCP TEMP[9].y, TEMP[2].yyyy > 30: RCP TEMP[9].z, TEMP[2].zzzz > 31: MUL TEMP[9].xyz, IN[4].xyzz, TEMP[9].xyzz > 32: UCMP TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xyzz, TEMP[9].xyzz > 33: MIN TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xyzz > 34: MAX TEMP[8].x, TEMP[6].zzzz, TEMP[6].yyyy > 35: MAX TEMP[8].x, TEMP[8].xxxx, TEMP[6].xxxx > 36: ADD TEMP[8].x, TEMP[1].xxxx, TEMP[8].xxxx > 37: FSEQ TEMP[9].xy, IN[0].wwww, IMM[0].xxxx > 38: SSG TEMP[10].xy, IN[0].xyyy > 39: MUL TEMP[10].xy, IMM[0].yyyy, TEMP[10].xyyy > 40: RCP TEMP[11].xy, IN[0].wwww > 41: MUL TEMP[11].xy, IN[0].xyyy, TEMP[11].xyyy > 42: UCMP TEMP[6].xy, TEMP[9].xyyy, TEMP[10].xyyy, TEMP[11].xyyy > 43: MOV TEMP[9].xy, TEMP[6].xyyy > 44: TEX TEMP[9].x, TEMP[9], SAMP[0], 2D > 45: MUL TEMP[9].x, TEMP[4].xxxx, TEMP[9].xxxx > 46: MIN TEMP[9].x, TEMP[1].xxxx, TEMP[9].xxxx > 47: MAX TEMP[5].x, TEMP[5].xxxx, TEMP[8].xxxx > 48: ADD TEMP[8].x, -TEMP[5].xxxx, TEMP[9].xxxx > 49: MAX TEMP[8].x, TEMP[8].xxxx, IMM[0].xxxx > 50: ADD TEMP[7].xyz, IN[3].xyzz, -IN[4].xyzz > 51: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[7].xyzz > 52: SQRT TEMP[7].x, TEMP[7].xxxx > 53: FSNE TEMP[10].x, TEMP[7].xxxx, IMM[0].xxxx > 54: UIF TEMP[10].xxxx :0 > 55: RCP TEMP[10].x, TEMP[7].xxxx > 56: MUL TEMP[10].x, TEMP[8].xxxx, TEMP[10].xxxx > 57: ELSE :0 > 58: SSG TEMP[8].x, TEMP[8].xxxx > 59: MUL TEMP[10].x, IMM[0].yyyy, TEMP[8].xxxx > 60: ENDIF > 61: ADD TEMP[8].x, -TEMP[10].xxxx, IMM[0].zzzz > 62: ABS TEMP[8].x, TEMP[8].xxxx > 63: LG2 TEMP[8].x, TEMP[8].xxxx > 64: MUL TEMP[8].x, TEMP[8].xxxx, CONST[1][11].zzzz > 65: EX2 TEMP[8].x, TEMP[8].xxxx > 66: ADD TEMP[8].x, -TEMP[8].xxxx, IMM[0].zzzz > 67: MOV TEMP[3].z, TEMP[8].xxxx > 68: ADD TEMP[10].x, TEMP[1].xxxx, -TEMP[5].xxxx > 69: FMA TEMP[10].xyz, -TEMP[2].xyzz, TEMP[10].xxxx, -IN[3].xyzz > 70: ADD TEMP[9].x, TEMP[1].xxxx, -TEMP[9].xxxx > 71: FMA TEMP[2].xyz, -TEMP[2].xyzz, TEMP[9].xxxx, -IN[3].xyzz > 72: ADD TEMP[9].xyz, -IN[3].xyzz, IN[4].xyzz > 73: FSEQ TEMP[11].xyz, TEMP[9].xyzz, IMM[0].xxxx > 74: RCP TEMP[12].x, TEMP[9].xxxx > 75: RCP TEMP[12].y, TEMP[9].yyyy > 76: RCP TEMP[12].z, TEMP[9].zzzz > 77: UCMP TEMP[11].xyz, TEMP[11].xyzz, IMM[0].yyyy, TEMP[12].xyzz > 78: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[11].xyzz > 79: MUL TEMP[9].xyz, TEMP[2].xyzz, TEMP[11].xyzz > 80: USNE TEMP[2].x, CONST[1][18].xxxx, IMM[1].xxxx > 81: UIF TEMP[2].xxxx :0 > 82: MOV TEMP[2].xy, TEMP[6].xyyy > 83: MOV TEMP[2].w, IMM[0].xxxx > 84: TXL TEMP[2].x, TEMP[2], SAMP[1], 2D > 85: FSLT TEMP[11].x, TEMP[2].xxxx, IMM[0].zzzz > 86: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].xxxx > 87: INEG TEMP[11].x, TEMP[11].xxxx > 88: USNE TEMP[11].x, TEMP[11].xxxx, IMM[1].xxxx > 89: UIF TEMP[11].xxxx :0 > 90: MOV TEMP[11].xy, TEMP[6].xyyy > 91: MOV TEMP[11].w, IMM[0].xxxx > 92: TXL TEMP[11].x, TEMP[11], SAMP[2], 2D > 93: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[11].xxxx > 94: MIN TEMP[1].x, TEMP[4].xxxx, TEMP[1].xxxx > 95: ADD TEMP[0].x, -TEMP[5].xxxx, TEMP[1].xxxx > 96: MAX TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx > 97: FSNE TEMP[4].x, TEMP[7].xxxx, IMM[0].xxxx > 98: UIF TEMP[4].xxxx :0 > 99: RCP TEMP[4].x, TEMP[7].xxxx >100: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[4].xxxx >101: ELSE :0 >102: SSG TEMP[1].x, TEMP[1].xxxx >103: MUL TEMP[4].x, IMM[0].yyyy, TEMP[1].xxxx >104: ENDIF >105: ADD TEMP[0].x, -TEMP[4].xxxx, IMM[0].zzzz >106: ABS TEMP[1].x, TEMP[0].xxxx >107: LG2 TEMP[1].x, TEMP[1].xxxx >108: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][11].zzzz >109: EX2 TEMP[1].x, TEMP[0].xxxx >110: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz >111: SQRT TEMP[1].x, TEMP[2].xxxx >112: ADD TEMP[2].x, -TEMP[0].xxxx, TEMP[8].xxxx >113: FMA TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx, TEMP[0].xxxx >114: MOV TEMP[3].z, TEMP[1].xxxx >115: ENDIF >116: ENDIF >117: SQRT TEMP[1].x, CONST[1][13].yyyy >118: SQRT TEMP[1].y, CONST[1][13].zzzz >119: FSEQ TEMP[2].xy, TEMP[1].xyyy, IMM[0].xxxx >120: RCP TEMP[4].x, TEMP[1].xxxx >121: RCP TEMP[4].y, TEMP[1].yyyy >122: UCMP TEMP[2].xy, TEMP[2].xyyy, IMM[0].yyyy, TEMP[4].xyyy >123: MOV TEMP[6].xy, TEMP[2].xyxx >124: ADD TEMP[4], TEMP[1].xyxy, IMM[4].xxyy >125: MOV TEMP[12].w, TEMP[9].xxxx >126: MUL TEMP[5].xy, TEMP[12].xwww, TEMP[4].xyyy >127: MAX TEMP[5].xy, TEMP[5].xyyy, IMM[0].xxxx >128: MIN TEMP[5].xy, TEMP[4].zwww, TEMP[5].xyyy >129: FRC TEMP[7].xy, TEMP[5].xyyy >130: ADD TEMP[5].xy, TEMP[5].xyyy, -TEMP[7].xyyy >131: MOV TEMP[9].w, TEMP[12].zzzz >132: MUL TEMP[8].xy, TEMP[9].wzzz, TEMP[4].xyyy >133: FSEQ TEMP[11].xy, CONST[1][13].yzzz, IMM[0].xxxx >134: SSG TEMP[13].xy, TEMP[8].xyyy >135: MUL TEMP[13].xy, IMM[0].yyyy, TEMP[13].xyyy >136: RCP TEMP[14].x, CONST[1][13].yyyy >137: RCP TEMP[14].y, CONST[1][13].zzzz >138: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[14].xyyy >139: UCMP TEMP[8].xy, TEMP[11].xyyy, TEMP[13].xyyy, TEMP[8].xyyy >140: FSEQ TEMP[11].xy, CONST[1][13].yzzz, IMM[0].xxxx >141: RCP TEMP[13].x, CONST[1][13].yyyy >142: RCP TEMP[13].y, CONST[1][13].zzzz >143: MUL TEMP[13].xy, IMM[0].wwww, TEMP[13].xyyy >144: UCMP TEMP[11].xy, TEMP[11].xyyy, IMM[0].yyyy, TEMP[13].xyyy >145: ADD TEMP[8].xy, TEMP[8].xyyy, TEMP[11].xyyy >146: FSEQ TEMP[11].xy, TEMP[1].xyyy, IMM[0].xxxx >147: SSG TEMP[13].xy, TEMP[5].xyyy >148: MUL TEMP[13].xy, IMM[0].yyyy, TEMP[13].xyyy >149: RCP TEMP[14].x, TEMP[1].xxxx >150: RCP TEMP[14].y, TEMP[1].yyyy >151: MUL TEMP[1].xy, TEMP[5].xyyy, TEMP[14].xyyy >152: UCMP TEMP[1].xy, TEMP[11].xyyy, TEMP[13].xyyy, TEMP[1].xyyy >153: ADD TEMP[0].xy, TEMP[8].xyyy, TEMP[1].xyyy >154: MOV TEMP[6].z, IMM[0].xxxx >155: ADD TEMP[4], TEMP[6].xzzy, TEMP[0].xyxy >156: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[0].xyyy >157: MOV TEMP[2].xy, TEMP[0].xyyy >158: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D >159: MOV TEMP[5].xy, TEMP[4].xyyy >160: TEX TEMP[5].xyz, TEMP[5], SAMP[3], 2D >161: MOV TEMP[4].xy, TEMP[4].zwww >162: TEX TEMP[4].xyz, TEMP[4], SAMP[3], 2D >163: ADD TEMP[5].xyz, -TEMP[2].xyzz, TEMP[5].xyzz >164: FMA TEMP[2].xyz, TEMP[7].xxxx, TEMP[5].xyzz, TEMP[2].xyzz >165: MOV TEMP[1].xy, TEMP[1].xyyy >166: TEX TEMP[1].xyz, TEMP[1], SAMP[3], 2D >167: ADD TEMP[0].xyz, -TEMP[4].xyzz, TEMP[1].xyzz >168: FMA TEMP[1].xyz, TEMP[7].xxxx, TEMP[0].xyzz, TEMP[4].xyzz >169: ADD TEMP[0].xyz, -TEMP[2].xyzz, TEMP[1].xyzz >170: FMA TEMP[0].xyz, TEMP[7].yyyy, TEMP[0].xyzz, TEMP[2].xyzz >171: MOV TEMP[9].x, TEMP[12].yyyy >172: MOV TEMP[1].xy, TEMP[9].xyyy >173: TEX TEMP[1].y, TEMP[1], SAMP[4], 2D >174: MUL TEMP[1].x, TEMP[1].yyyy, TEMP[3].zzzz >175: MUL TEMP[3].xyz, CONST[1][12].yzww, CONST[1][18].yyyy >176: FMA TEMP[3].xyz, TEMP[10].xyzz, CONST[1][13].xxxx, TEMP[3].xyzz >177: MOV TEMP[2].xyz, TEMP[3].xyzz >178: TEX TEMP[2].y, TEMP[2], SAMP[5], 3D >179: FMA TEMP[2].x, -CONST[1][12].xxxx, TEMP[2].yyyy, IMM[0].zzzz >180: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx >181: MOV TEMP[0].w, TEMP[1].xxxx >182: MUL TEMP[0], TEMP[0], CONST[1][27] >183: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz >184: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][26].xyzz >185: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][11].wwww >186: MOV TEMP[1].w, TEMP[1].xxxx >187: MOV TEMP[1].xyz, TEMP[0].xyzx >188: MOV OUT[0], TEMP[1] >189: END >radeonsi: Compiling shader 346 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 184) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 188) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 200) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 204) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 208) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 212) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 216) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 424) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 432) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 436) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 440) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 444) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) > %44 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 > %46 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %47 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %46, i64 0, i64 3 > %48 = load <4 x i32>, <4 x i32> addrspace(2)* %47, align 16, !tbaa !0 > %49 = extractelement <8 x i32> %45, i32 7 > %50 = extractelement <4 x i32> %48, i32 0 > %51 = and i32 %50, %49 > %52 = insertelement <4 x i32> %48, i32 %51, i32 0 > %53 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %54 = load <8 x i32>, <8 x i32> addrspace(2)* %53, align 32, !tbaa !0 > %55 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %56 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %55, i64 0, i64 7 > %57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !tbaa !0 > %58 = extractelement <8 x i32> %54, i32 7 > %59 = extractelement <4 x i32> %57, i32 0 > %60 = and i32 %59, %58 > %61 = insertelement <4 x i32> %57, i32 %60, i32 0 > %62 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %63 = load <8 x i32>, <8 x i32> addrspace(2)* %62, align 32, !tbaa !0 > %64 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %65 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %64, i64 0, i64 11 > %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 > %67 = extractelement <8 x i32> %63, i32 7 > %68 = extractelement <4 x i32> %66, i32 0 > %69 = and i32 %68, %67 > %70 = insertelement <4 x i32> %66, i32 %69, i32 0 > %71 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0 > %73 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %74 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %73, i64 0, i64 15 > %75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !0 > %76 = extractelement <8 x i32> %72, i32 7 > %77 = extractelement <4 x i32> %75, i32 0 > %78 = and i32 %77, %76 > %79 = insertelement <4 x i32> %75, i32 %78, i32 0 > %80 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %81 = load <8 x i32>, <8 x i32> addrspace(2)* %80, align 32, !tbaa !0 > %82 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %83 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %82, i64 0, i64 19 > %84 = load <4 x i32>, <4 x i32> addrspace(2)* %83, align 16, !tbaa !0 > %85 = extractelement <8 x i32> %81, i32 7 > %86 = extractelement <4 x i32> %84, i32 0 > %87 = and i32 %86, %85 > %88 = insertelement <4 x i32> %84, i32 %87, i32 0 > %89 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %90 = load <8 x i32>, <8 x i32> addrspace(2)* %89, align 32, !tbaa !0 > %91 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %92 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %91, i64 0, i64 23 > %93 = load <4 x i32>, <4 x i32> addrspace(2)* %92, align 16, !tbaa !0 > %94 = extractelement <8 x i32> %90, i32 7 > %95 = extractelement <4 x i32> %93, i32 0 > %96 = and i32 %95, %94 > %97 = insertelement <4 x i32> %93, i32 %96, i32 0 > %98 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %99 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %100 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %101 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %102 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %103 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %104 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %105 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %106 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %107 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %108 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %109 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %110 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %111 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %112 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %113 = fmul float %104, %104 > %114 = fmul float %105, %105 > %115 = fadd float %114, %113 > %116 = fmul float %106, %106 > %117 = fadd float %115, %116 > %118 = call float @llvm.sqrt.f32(float %117) > %119 = fcmp oeq float %118, 0.000000e+00 > %120 = fcmp oeq float %118, 0.000000e+00 > %121 = fcmp oeq float %118, 0.000000e+00 > %122 = fcmp ogt float %104, 0.000000e+00 > %123 = select i1 %122, float 1.000000e+00, float %104 > %124 = fcmp oge float %123, 0.000000e+00 > %125 = fcmp ogt float %105, 0.000000e+00 > %126 = select i1 %125, float 1.000000e+00, float %105 > %127 = fcmp oge float %126, 0.000000e+00 > %128 = fcmp ogt float %106, 0.000000e+00 > %129 = select i1 %128, float 1.000000e+00, float %106 > %130 = fcmp oge float %129, 0.000000e+00 > %.op = fmul float %123, 0x4600000000000000 > %131 = select i1 %124, float %.op, float 0xC600000000000000 > %.op72 = fmul float %126, 0x4600000000000000 > %132 = select i1 %127, float %.op72, float 0xC600000000000000 > %.op73 = fmul float %129, 0x4600000000000000 > %133 = select i1 %130, float %.op73, float 0xC600000000000000 > %134 = fdiv float 1.000000e+00, %118 > %135 = fmul float %104, %134 > %136 = fmul float %105, %134 > %137 = fmul float %106, %134 > %138 = select i1 %119, float %131, float %135 > %139 = select i1 %120, float %132, float %136 > %140 = select i1 %121, float %133, float %137 > %141 = fmul float %138, %101 > %142 = fmul float %139, %102 > %143 = fadd float %142, %141 > %144 = fmul float %140, %103 > %145 = fadd float %143, %144 > %146 = fcmp une float %145, 0.000000e+00 > %147 = fdiv float 1.000000e+00, %145 > %temp16.0 = select i1 %146, float %147, float 0x4600000000000000 > %148 = fmul float %temp16.0, %43 > %149 = fcmp oeq float %138, 0.000000e+00 > %150 = fcmp oeq float %139, 0.000000e+00 > %151 = fcmp oeq float %140, 0.000000e+00 > %152 = fcmp ogt float %107, 0.000000e+00 > %153 = select i1 %152, float 1.000000e+00, float %107 > %154 = fcmp oge float %153, 0.000000e+00 > %155 = fcmp ogt float %108, 0.000000e+00 > %156 = select i1 %155, float 1.000000e+00, float %108 > %157 = fcmp oge float %156, 0.000000e+00 > %158 = fcmp ogt float %109, 0.000000e+00 > %159 = select i1 %158, float 1.000000e+00, float %109 > %160 = fcmp oge float %159, 0.000000e+00 > %.op74 = fmul float %153, 0x4600000000000000 > %161 = select i1 %154, float %.op74, float 0xC600000000000000 > %.op75 = fmul float %156, 0x4600000000000000 > %162 = select i1 %157, float %.op75, float 0xC600000000000000 > %.op76 = fmul float %159, 0x4600000000000000 > %163 = select i1 %160, float %.op76, float 0xC600000000000000 > %164 = fdiv float 1.000000e+00, %138 > %165 = fdiv float 1.000000e+00, %139 > %166 = fdiv float 1.000000e+00, %140 > %167 = fmul float %107, %164 > %168 = fmul float %108, %165 > %169 = fmul float %109, %166 > %170 = select i1 %149, float %161, float %167 > %171 = select i1 %150, float %162, float %168 > %172 = select i1 %151, float %163, float %169 > %173 = fcmp oeq float %138, 0.000000e+00 > %174 = fcmp oeq float %139, 0.000000e+00 > %175 = fcmp oeq float %140, 0.000000e+00 > %176 = fcmp ogt float %110, 0.000000e+00 > %177 = select i1 %176, float 1.000000e+00, float %110 > %178 = fcmp oge float %177, 0.000000e+00 > %179 = fcmp ogt float %111, 0.000000e+00 > %180 = select i1 %179, float 1.000000e+00, float %111 > %181 = fcmp oge float %180, 0.000000e+00 > %182 = fcmp ogt float %112, 0.000000e+00 > %183 = select i1 %182, float 1.000000e+00, float %112 > %184 = fcmp oge float %183, 0.000000e+00 > %.op77 = fmul float %177, 0x4600000000000000 > %185 = select i1 %178, float %.op77, float 0xC600000000000000 > %.op78 = fmul float %180, 0x4600000000000000 > %186 = select i1 %181, float %.op78, float 0xC600000000000000 > %.op79 = fmul float %183, 0x4600000000000000 > %187 = select i1 %184, float %.op79, float 0xC600000000000000 > %188 = fdiv float 1.000000e+00, %138 > %189 = fdiv float 1.000000e+00, %139 > %190 = fdiv float 1.000000e+00, %140 > %191 = fmul float %110, %188 > %192 = fmul float %111, %189 > %193 = fmul float %112, %190 > %194 = select i1 %173, float %185, float %191 > %195 = select i1 %174, float %186, float %192 > %196 = select i1 %175, float %187, float %193 > %197 = call float @llvm.minnum.f32(float %170, float %194) > %198 = call float @llvm.minnum.f32(float %171, float %195) > %199 = call float @llvm.minnum.f32(float %172, float %196) > %200 = call float @llvm.maxnum.f32(float %199, float %198) > %201 = call float @llvm.maxnum.f32(float %200, float %197) > %202 = fadd float %118, %201 > %203 = fcmp oeq float %100, 0.000000e+00 > %204 = fcmp oeq float %100, 0.000000e+00 > %205 = fcmp ogt float %98, 0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %98 > %207 = fcmp oge float %206, 0.000000e+00 > %208 = fcmp ogt float %99, 0.000000e+00 > %209 = select i1 %208, float 1.000000e+00, float %99 > %210 = fcmp oge float %209, 0.000000e+00 > %.op80 = fmul float %206, 0x4600000000000000 > %211 = select i1 %207, float %.op80, float 0xC600000000000000 > %.op81 = fmul float %209, 0x4600000000000000 > %212 = select i1 %210, float %.op81, float 0xC600000000000000 > %213 = fdiv float 1.000000e+00, %100 > %214 = fmul float %98, %213 > %215 = fmul float %99, %213 > %216 = select i1 %203, float %211, float %214 > %217 = select i1 %204, float %212, float %215 > %218 = bitcast float %216 to i32 > %219 = bitcast float %217 to i32 > %220 = insertelement <2 x i32> undef, i32 %218, i32 0 > %221 = insertelement <2 x i32> %220, i32 %219, i32 1 > %222 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %221, <8 x i32> %45, <4 x i32> %52, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %223 = extractelement <4 x float> %222, i32 0 > %224 = fmul float %temp16.0, %223 > %225 = call float @llvm.minnum.f32(float %118, float %224) > %226 = call float @llvm.maxnum.f32(float %148, float %202) > %227 = fsub float %225, %226 > %228 = call float @llvm.maxnum.f32(float %227, float 0.000000e+00) > %229 = fsub float %107, %110 > %230 = fsub float %108, %111 > %231 = fsub float %109, %112 > %232 = fmul float %229, %229 > %233 = fmul float %230, %230 > %234 = fadd float %233, %232 > %235 = fmul float %231, %231 > %236 = fadd float %234, %235 > %237 = call float @llvm.sqrt.f32(float %236) > %238 = fcmp une float %237, 0.000000e+00 > br i1 %238, label %IF61, label %ELSE62 > >IF61: ; preds = %main_body > %239 = fdiv float 1.000000e+00, %237 > %240 = fmul float %228, %239 > br label %ENDIF60 > >ELSE62: ; preds = %main_body > %241 = fcmp ogt float %228, 0.000000e+00 > %242 = select i1 %241, float 1.000000e+00, float %228 > %243 = fcmp oge float %242, 0.000000e+00 > %.op82 = fmul float %242, 0x4600000000000000 > %244 = select i1 %243, float %.op82, float 0xC600000000000000 > br label %ENDIF60 > >ENDIF60: ; preds = %ELSE62, %IF61 > %temp40.0 = phi float [ %240, %IF61 ], [ %244, %ELSE62 ] > %245 = fsub float 1.000000e+00, %temp40.0 > %246 = call float @llvm.fabs.f32(float %245) > %247 = call float @llvm.log2.f32(float %246) > %248 = fmul float %247, %25 > %249 = call float @llvm.exp2.f32(float %248) > %250 = fsub float 1.000000e+00, %249 > %251 = fsub float %118, %226 > %252 = fsub float -0.000000e+00, %138 > %253 = fsub float -0.000000e+00, %107 > %254 = call float @llvm.fma.f32(float %252, float %251, float %253) > %255 = fsub float -0.000000e+00, %139 > %256 = fsub float -0.000000e+00, %108 > %257 = call float @llvm.fma.f32(float %255, float %251, float %256) > %258 = fsub float -0.000000e+00, %140 > %259 = fsub float -0.000000e+00, %109 > %260 = call float @llvm.fma.f32(float %258, float %251, float %259) > %261 = fsub float %118, %225 > %262 = fsub float -0.000000e+00, %138 > %263 = fsub float -0.000000e+00, %107 > %264 = call float @llvm.fma.f32(float %262, float %261, float %263) > %265 = fsub float -0.000000e+00, %139 > %266 = fsub float -0.000000e+00, %108 > %267 = call float @llvm.fma.f32(float %265, float %261, float %266) > %268 = fsub float -0.000000e+00, %140 > %269 = fsub float -0.000000e+00, %109 > %270 = call float @llvm.fma.f32(float %268, float %261, float %269) > %271 = fsub float %110, %107 > %272 = fsub float %111, %108 > %273 = fsub float %112, %109 > %274 = fcmp oeq float %271, 0.000000e+00 > %275 = fcmp oeq float %272, 0.000000e+00 > %276 = fcmp oeq float %273, 0.000000e+00 > %277 = fdiv float 1.000000e+00, %271 > %278 = fdiv float 1.000000e+00, %272 > %279 = fdiv float 1.000000e+00, %273 > %280 = select i1 %274, float 0x4600000000000000, float %277 > %281 = select i1 %275, float 0x4600000000000000, float %278 > %282 = select i1 %276, float 0x4600000000000000, float %279 > %283 = fmul float %254, %280 > %284 = fmul float %257, %281 > %285 = fmul float %260, %282 > %286 = fmul float %264, %280 > %287 = fmul float %267, %281 > %288 = fmul float %270, %282 > %289 = bitcast float %34 to i32 > %290 = icmp eq i32 %289, 0 > br i1 %290, label %ENDIF63, label %IF64 > >IF64: ; preds = %ENDIF60 > %291 = bitcast float %216 to i32 > %292 = bitcast float %217 to i32 > %293 = insertelement <4 x i32> undef, i32 %291, i32 0 > %294 = insertelement <4 x i32> %293, i32 %292, i32 1 > %295 = insertelement <4 x i32> %294, i32 0, i32 2 > %296 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %295, <8 x i32> %54, <4 x i32> %61, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %297 = extractelement <4 x float> %296, i32 0 > %298 = fcmp olt float %297, 1.000000e+00 > br i1 %298, label %IF67, label %ENDIF63 > >ENDIF63: ; preds = %ENDIF60, %ENDIF69, %IF64 > %temp14.0 = phi float [ %250, %ENDIF60 ], [ %495, %ENDIF69 ], [ %250, %IF64 ] > %299 = call float @llvm.sqrt.f32(float %32) > %300 = call float @llvm.sqrt.f32(float %33) > %301 = fcmp oeq float %299, 0.000000e+00 > %302 = fcmp oeq float %300, 0.000000e+00 > %303 = fdiv float 1.000000e+00, %299 > %304 = fdiv float 1.000000e+00, %300 > %305 = select i1 %301, float 0x4600000000000000, float %303 > %306 = select i1 %302, float 0x4600000000000000, float %304 > %307 = fadd float %299, -1.000000e+00 > %308 = fadd float %300, -1.000000e+00 > %309 = fadd float %299, 0xBFF19999A0000000 > %310 = fadd float %300, 0xBFF19999A0000000 > %311 = fmul float %283, %307 > %312 = fmul float %286, %308 > %313 = call float @llvm.maxnum.f32(float %311, float 0.000000e+00) > %314 = call float @llvm.maxnum.f32(float %312, float 0.000000e+00) > %315 = call float @llvm.minnum.f32(float %309, float %313) > %316 = call float @llvm.minnum.f32(float %310, float %314) > %317 = call float @llvm.floor.f32(float %315) > %318 = fsub float %315, %317 > %319 = call float @llvm.floor.f32(float %316) > %320 = fsub float %316, %319 > %321 = fsub float %315, %318 > %322 = fsub float %316, %320 > %323 = fmul float %285, %307 > %324 = fmul float %288, %308 > %325 = fcmp oeq float %32, 0.000000e+00 > %326 = fcmp oeq float %33, 0.000000e+00 > %327 = fcmp ogt float %323, 0.000000e+00 > %328 = select i1 %327, float 1.000000e+00, float %323 > %329 = fcmp oge float %328, 0.000000e+00 > %330 = fcmp ogt float %324, 0.000000e+00 > %331 = select i1 %330, float 1.000000e+00, float %324 > %332 = fcmp oge float %331, 0.000000e+00 > %.op83 = fmul float %328, 0x4600000000000000 > %333 = select i1 %329, float %.op83, float 0xC600000000000000 > %.op84 = fmul float %331, 0x4600000000000000 > %334 = select i1 %332, float %.op84, float 0xC600000000000000 > %335 = fdiv float 1.000000e+00, %32 > %336 = fdiv float 1.000000e+00, %33 > %337 = fmul float %323, %335 > %338 = fmul float %324, %336 > %339 = select i1 %325, float %333, float %337 > %340 = select i1 %326, float %334, float %338 > %341 = fcmp oeq float %32, 0.000000e+00 > %342 = fcmp oeq float %33, 0.000000e+00 > %343 = fdiv float 1.000000e+00, %32 > %344 = fdiv float 1.000000e+00, %33 > %345 = fmul float %343, 5.000000e-01 > %346 = fmul float %344, 5.000000e-01 > %347 = select i1 %341, float 0x4600000000000000, float %345 > %348 = select i1 %342, float 0x4600000000000000, float %346 > %349 = fadd float %339, %347 > %350 = fadd float %340, %348 > %351 = fcmp oeq float %299, 0.000000e+00 > %352 = fcmp oeq float %300, 0.000000e+00 > %353 = fcmp ogt float %321, 0.000000e+00 > %354 = select i1 %353, float 1.000000e+00, float %321 > %355 = fcmp oge float %354, 0.000000e+00 > %356 = fcmp ogt float %322, 0.000000e+00 > %357 = select i1 %356, float 1.000000e+00, float %322 > %358 = fcmp oge float %357, 0.000000e+00 > %.op85 = fmul float %354, 0x4600000000000000 > %359 = select i1 %355, float %.op85, float 0xC600000000000000 > %.op86 = fmul float %357, 0x4600000000000000 > %360 = select i1 %358, float %.op86, float 0xC600000000000000 > %361 = fdiv float 1.000000e+00, %299 > %362 = fdiv float 1.000000e+00, %300 > %363 = fmul float %321, %361 > %364 = fmul float %322, %362 > %365 = select i1 %351, float %359, float %363 > %366 = select i1 %352, float %360, float %364 > %367 = fadd float %349, %365 > %368 = fadd float %350, %366 > %369 = fadd float %305, %367 > %370 = fadd float %368, 0.000000e+00 > %371 = fadd float %367, 0.000000e+00 > %372 = fadd float %306, %368 > %373 = fadd float %305, %367 > %374 = fadd float %306, %368 > %375 = bitcast float %367 to i32 > %376 = bitcast float %368 to i32 > %377 = insertelement <2 x i32> undef, i32 %375, i32 0 > %378 = insertelement <2 x i32> %377, i32 %376, i32 1 > %379 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %378, <8 x i32> %72, <4 x i32> %79, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %380 = extractelement <4 x float> %379, i32 0 > %381 = extractelement <4 x float> %379, i32 1 > %382 = extractelement <4 x float> %379, i32 2 > %383 = bitcast float %369 to i32 > %384 = bitcast float %370 to i32 > %385 = insertelement <2 x i32> undef, i32 %383, i32 0 > %386 = insertelement <2 x i32> %385, i32 %384, i32 1 > %387 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %386, <8 x i32> %72, <4 x i32> %79, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %388 = extractelement <4 x float> %387, i32 0 > %389 = extractelement <4 x float> %387, i32 1 > %390 = extractelement <4 x float> %387, i32 2 > %391 = bitcast float %371 to i32 > %392 = bitcast float %372 to i32 > %393 = insertelement <2 x i32> undef, i32 %391, i32 0 > %394 = insertelement <2 x i32> %393, i32 %392, i32 1 > %395 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %394, <8 x i32> %72, <4 x i32> %79, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %396 = extractelement <4 x float> %395, i32 0 > %397 = extractelement <4 x float> %395, i32 1 > %398 = extractelement <4 x float> %395, i32 2 > %399 = fsub float %388, %380 > %400 = fsub float %389, %381 > %401 = fsub float %390, %382 > %402 = call float @llvm.fma.f32(float %318, float %399, float %380) > %403 = call float @llvm.fma.f32(float %318, float %400, float %381) > %404 = call float @llvm.fma.f32(float %318, float %401, float %382) > %405 = bitcast float %373 to i32 > %406 = bitcast float %374 to i32 > %407 = insertelement <2 x i32> undef, i32 %405, i32 0 > %408 = insertelement <2 x i32> %407, i32 %406, i32 1 > %409 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %408, <8 x i32> %72, <4 x i32> %79, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %410 = extractelement <4 x float> %409, i32 0 > %411 = extractelement <4 x float> %409, i32 1 > %412 = extractelement <4 x float> %409, i32 2 > %413 = fsub float %410, %396 > %414 = fsub float %411, %397 > %415 = fsub float %412, %398 > %416 = call float @llvm.fma.f32(float %318, float %413, float %396) > %417 = call float @llvm.fma.f32(float %318, float %414, float %397) > %418 = call float @llvm.fma.f32(float %318, float %415, float %398) > %419 = fsub float %416, %402 > %420 = fsub float %417, %403 > %421 = fsub float %418, %404 > %422 = call float @llvm.fma.f32(float %320, float %419, float %402) > %423 = call float @llvm.fma.f32(float %320, float %420, float %403) > %424 = call float @llvm.fma.f32(float %320, float %421, float %404) > %425 = bitcast float %284 to i32 > %426 = bitcast float %287 to i32 > %427 = insertelement <2 x i32> undef, i32 %425, i32 0 > %428 = insertelement <2 x i32> %427, i32 %426, i32 1 > %429 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %428, <8 x i32> %81, <4 x i32> %88, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %430 = extractelement <4 x float> %429, i32 1 > %431 = fmul float %430, %temp14.0 > %432 = fmul float %28, %35 > %433 = fmul float %29, %35 > %434 = fmul float %30, %35 > %435 = call float @llvm.fma.f32(float %254, float %31, float %432) > %436 = call float @llvm.fma.f32(float %257, float %31, float %433) > %437 = call float @llvm.fma.f32(float %260, float %31, float %434) > %438 = bitcast float %435 to i32 > %439 = bitcast float %436 to i32 > %440 = bitcast float %437 to i32 > %441 = insertelement <4 x i32> undef, i32 %438, i32 0 > %442 = insertelement <4 x i32> %441, i32 %439, i32 1 > %443 = insertelement <4 x i32> %442, i32 %440, i32 2 > %444 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %443, <8 x i32> %90, <4 x i32> %97, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %445 = extractelement <4 x float> %444, i32 1 > %446 = fsub float -0.000000e+00, %27 > %447 = call float @llvm.fma.f32(float %446, float %445, float 1.000000e+00) > %448 = fmul float %431, %447 > %449 = fmul float %422, %39 > %450 = fmul float %423, %40 > %451 = fmul float %424, %41 > %452 = fmul float %448, %42 > %453 = fmul float %452, %449 > %454 = fmul float %452, %450 > %455 = fmul float %452, %451 > %456 = fmul float %453, %36 > %457 = fmul float %454, %37 > %458 = fadd float %457, %456 > %459 = fmul float %455, %38 > %460 = fadd float %458, %459 > %461 = fmul float %460, %26 > %462 = bitcast float %5 to i32 > %463 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %462, 10 > %464 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %463, float %453, 11 > %465 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %464, float %454, 12 > %466 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %465, float %455, 13 > %467 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %466, float %461, 14 > %468 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %467, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %468 > >IF67: ; preds = %IF64 > %469 = bitcast float %216 to i32 > %470 = bitcast float %217 to i32 > %471 = insertelement <4 x i32> undef, i32 %469, i32 0 > %472 = insertelement <4 x i32> %471, i32 %470, i32 1 > %473 = insertelement <4 x i32> %472, i32 0, i32 2 > %474 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %473, <8 x i32> %63, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %475 = extractelement <4 x float> %474, i32 0 > %476 = fmul float %temp16.0, %475 > %477 = call float @llvm.minnum.f32(float %476, float %118) > %478 = fsub float %477, %226 > %479 = call float @llvm.maxnum.f32(float %478, float 0.000000e+00) > %480 = fcmp une float %237, 0.000000e+00 > br i1 %480, label %IF70, label %ELSE71 > >IF70: ; preds = %IF67 > %481 = fdiv float 1.000000e+00, %237 > %482 = fmul float %479, %481 > br label %ENDIF69 > >ELSE71: ; preds = %IF67 > %483 = fcmp ogt float %479, 0.000000e+00 > %484 = select i1 %483, float 1.000000e+00, float %479 > %485 = fcmp oge float %484, 0.000000e+00 > %.op87 = fmul float %484, 0x4600000000000000 > %486 = select i1 %485, float %.op87, float 0xC600000000000000 > br label %ENDIF69 > >ENDIF69: ; preds = %ELSE71, %IF70 > %temp16.1 = phi float [ %482, %IF70 ], [ %486, %ELSE71 ] > %487 = fsub float 1.000000e+00, %temp16.1 > %488 = call float @llvm.fabs.f32(float %487) > %489 = call float @llvm.log2.f32(float %488) > %490 = fmul float %489, %25 > %491 = call float @llvm.exp2.f32(float %490) > %492 = fsub float 1.000000e+00, %491 > %493 = call float @llvm.sqrt.f32(float %297) > %494 = fsub float %250, %492 > %495 = call float @llvm.fma.f32(float %493, float %494, float %492) > br label %ENDIF63 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.floor.f32(float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..57] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 368, 880, 896} >IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[2] UINT32 {912, 768, 784, 800} >IMM[3] UINT32 {816, 0, 0, 0} > 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1][23].xyyy > 1: FMA TEMP[1].x, IN[0].zzzz, CONST[1][23].zzzz, CONST[1][23].zzzz > 2: MOV TEMP[0].z, TEMP[1].xxxx > 3: MOV TEMP[0].w, IMM[1].xxxx > 4: DP4 TEMP[1].x, CONST[1][55], TEMP[0] > 5: DP4 TEMP[2].x, CONST[1][56], TEMP[0] > 6: MOV TEMP[1].y, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][57], TEMP[0] > 8: MOV TEMP[1].z, TEMP[0].xxxx > 9: MOV TEMP[1].w, IMM[1].xxxx > 10: DP4 TEMP[0].x, CONST[1][48], TEMP[1] > 11: DP4 TEMP[2].x, CONST[1][49], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][50], TEMP[1] > 14: MOV TEMP[0].z, TEMP[2].xxxx > 15: DP4 TEMP[1].x, CONST[1][51], TEMP[1] > 16: MOV TEMP[0].w, TEMP[1].xxxx > 17: MOV OUT[0], TEMP[0] > 18: END >radeonsi: Compiling shader 347 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 368) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 372) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 376) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 800) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 804) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 808) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 812) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 816) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 820) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 824) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 828) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 896) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 900) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 904) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 908) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 912) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 916) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 920) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 924) > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %13) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = fmul float %50, %16 > %54 = fmul float %51, %17 > %55 = call float @llvm.fma.f32(float %52, float %18, float %18) > %56 = fmul float %35, %53 > %57 = fmul float %36, %54 > %58 = fadd float %56, %57 > %59 = fmul float %37, %55 > %60 = fadd float %58, %59 > %61 = fadd float %60, %38 > %62 = fmul float %39, %53 > %63 = fmul float %40, %54 > %64 = fadd float %62, %63 > %65 = fmul float %41, %55 > %66 = fadd float %64, %65 > %67 = fadd float %66, %42 > %68 = fmul float %43, %53 > %69 = fmul float %44, %54 > %70 = fadd float %68, %69 > %71 = fmul float %45, %55 > %72 = fadd float %70, %71 > %73 = fadd float %72, %46 > %74 = fmul float %19, %61 > %75 = fmul float %20, %67 > %76 = fadd float %74, %75 > %77 = fmul float %21, %73 > %78 = fadd float %76, %77 > %79 = fadd float %78, %22 > %80 = fmul float %23, %61 > %81 = fmul float %24, %67 > %82 = fadd float %80, %81 > %83 = fmul float %25, %73 > %84 = fadd float %82, %83 > %85 = fadd float %84, %26 > %86 = fmul float %27, %61 > %87 = fmul float %28, %67 > %88 = fadd float %86, %87 > %89 = fmul float %29, %73 > %90 = fadd float %88, %89 > %91 = fadd float %90, %30 > %92 = fmul float %31, %61 > %93 = fmul float %32, %67 > %94 = fadd float %92, %93 > %95 = fmul float %33, %73 > %96 = fadd float %94, %95 > %97 = fadd float %96, %34 > %98 = bitcast i32 %11 to float > %99 = insertvalue <{ float, float, float }> undef, float %98, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %79, float %85, float %91, float %97) > ret <{ float, float, float }> %99 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..57] >DCL CONST[2][0..24] >DCL TEMP[0..5], LOCAL >IMM[0] UINT32 {0, 368, 880, 896} >IMM[1] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} >IMM[2] UINT32 {912, 768, 784, 800} >IMM[3] UINT32 {816, 1, 384, 336} > 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1][23].xyyy > 1: FMA TEMP[1].x, IN[0].zzzz, CONST[1][23].zzzz, CONST[1][23].zzzz > 2: MOV TEMP[0].z, TEMP[1].xxxx > 3: MOV TEMP[0].w, IMM[1].xxxx > 4: DP4 TEMP[1].x, CONST[1][55], TEMP[0] > 5: DP4 TEMP[2].x, CONST[1][56], TEMP[0] > 6: MOV TEMP[1].y, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][57], TEMP[0] > 8: MOV TEMP[1].z, TEMP[2].xxxx > 9: MOV TEMP[1].w, IMM[1].xxxx > 10: DP4 TEMP[0].x, CONST[1][48], TEMP[1] > 11: DP4 TEMP[2].x, CONST[1][49], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][50], TEMP[1] > 14: MOV TEMP[0].z, TEMP[2].xxxx > 15: DP4 TEMP[2].x, CONST[1][51], TEMP[1] > 16: MOV TEMP[0].w, TEMP[2].xxxx > 17: MOV TEMP[3], TEMP[0] > 18: MOV TEMP[4].zw, TEMP[0].wwzw > 19: MUL TEMP[1].xy, TEMP[2].xxxx, CONST[2][24].xyyy > 20: MUL TEMP[2].xy, TEMP[2].xxxx, CONST[2][21].xyyy > 21: MUL TEMP[5].xy, IMM[1].xyyy, CONST[2][24].xyyy > 22: FMA TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xyyy, TEMP[1].xyyy > 23: MOV TEMP[4].xy, TEMP[0].xyxx > 24: FMA TEMP[0].xy, TEMP[0].xyyy, CONST[2][21].zwww, TEMP[2].xyyy > 25: MOV OUT[2], TEMP[0] > 26: MOV OUT[1], TEMP[4] > 27: MOV OUT[0], TEMP[3] > 28: END >radeonsi: Compiling shader 348 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 368) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 372) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 376) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 800) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 804) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 808) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 812) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 816) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 820) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 824) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 828) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 896) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 900) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 904) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 908) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 912) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 916) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 920) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 924) > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call float @llvm.SI.load.const(<16 x i8> %48, i32 336) > %50 = call float @llvm.SI.load.const(<16 x i8> %48, i32 340) > %51 = call float @llvm.SI.load.const(<16 x i8> %48, i32 344) > %52 = call float @llvm.SI.load.const(<16 x i8> %48, i32 348) > %53 = call float @llvm.SI.load.const(<16 x i8> %48, i32 384) > %54 = call float @llvm.SI.load.const(<16 x i8> %48, i32 388) > %55 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 > %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %56, i32 0, i32 %13) > %58 = extractelement <4 x float> %57, i32 0 > %59 = extractelement <4 x float> %57, i32 1 > %60 = extractelement <4 x float> %57, i32 2 > %61 = fmul float %58, %16 > %62 = fmul float %59, %17 > %63 = call float @llvm.fma.f32(float %60, float %18, float %18) > %64 = fmul float %35, %61 > %65 = fmul float %36, %62 > %66 = fadd float %64, %65 > %67 = fmul float %37, %63 > %68 = fadd float %66, %67 > %69 = fadd float %68, %38 > %70 = fmul float %39, %61 > %71 = fmul float %40, %62 > %72 = fadd float %70, %71 > %73 = fmul float %41, %63 > %74 = fadd float %72, %73 > %75 = fadd float %74, %42 > %76 = fmul float %43, %61 > %77 = fmul float %44, %62 > %78 = fadd float %76, %77 > %79 = fmul float %45, %63 > %80 = fadd float %78, %79 > %81 = fadd float %80, %46 > %82 = fmul float %19, %69 > %83 = fmul float %20, %75 > %84 = fadd float %82, %83 > %85 = fmul float %21, %81 > %86 = fadd float %84, %85 > %87 = fadd float %86, %22 > %88 = fmul float %23, %69 > %89 = fmul float %24, %75 > %90 = fadd float %88, %89 > %91 = fmul float %25, %81 > %92 = fadd float %90, %91 > %93 = fadd float %92, %26 > %94 = fmul float %27, %69 > %95 = fmul float %28, %75 > %96 = fadd float %94, %95 > %97 = fmul float %29, %81 > %98 = fadd float %96, %97 > %99 = fadd float %98, %30 > %100 = fmul float %31, %69 > %101 = fmul float %32, %75 > %102 = fadd float %100, %101 > %103 = fmul float %33, %81 > %104 = fadd float %102, %103 > %105 = fadd float %104, %34 > %106 = fmul float %105, %53 > %107 = fmul float %105, %54 > %108 = fmul float %105, %49 > %109 = fmul float %105, %50 > %110 = fsub float -0.000000e+00, %54 > %111 = call float @llvm.fma.f32(float %87, float %53, float %106) > %112 = call float @llvm.fma.f32(float %93, float %110, float %107) > %113 = call float @llvm.fma.f32(float %111, float %51, float %108) > %114 = call float @llvm.fma.f32(float %112, float %52, float %109) > %115 = bitcast i32 %11 to float > %116 = insertvalue <{ float, float, float }> undef, float %115, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %111, float %112, float %99, float %105) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %113, float %114, float %99, float %105) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %87, float %93, float %99, float %105) > ret <{ float, float, float }> %116 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..72] >DCL TEMP[0..8], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.9950} >IMM[1] UINT32 {0, 96, 112, 16} >IMM[2] UINT32 {1120, 880, 896, 912} >IMM[3] FLT32 { -1.0000, -2.0000, 3.0000, 2.0000} >IMM[4] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} >IMM[5] UINT32 {1136, 1152, 0, 0} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 7: SSG TEMP[2].xy, IN[0].xyyy > 8: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy > 9: RCP TEMP[3].xy, IN[0].wwww > 10: MUL TEMP[3].xy, IN[0].xyyy, TEMP[3].xyyy > 11: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[3].xyyy > 12: MOV TEMP[1].xy, TEMP[1].xyyy > 13: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 14: MOV TEMP[2].z, TEMP[1].xxxx > 15: MUL TEMP[2].xy, TEMP[0].xyyy, TEMP[1].xxxx > 16: MOV TEMP[2].w, IMM[0].zzzz > 17: DP4 TEMP[0].x, CONST[1][6], TEMP[2] > 18: DP4 TEMP[1].x, CONST[1][7], TEMP[2] > 19: MOV TEMP[0].y, TEMP[1].xxxx > 20: ADD TEMP[1].xy, -CONST[1][1].xyyy, CONST[1][1].zwww > 21: FMA TEMP[1].xy, TEMP[0].xyyy, TEMP[1].xyyy, CONST[1][1].xyyy > 22: ADD TEMP[3].xy, -TEMP[1].xxxx, IMM[0].zwww > 23: ADD TEMP[2].x, CONST[1][70].yyyy, IMM[0].zzzz > 24: FMA TEMP[4].x, CONST[1][70].xxxx, TEMP[2].xxxx, TEMP[3].xxxx > 25: ADD TEMP[2].x, TEMP[4].xxxx, IMM[3].xxxx > 26: FSNE TEMP[4].x, CONST[1][70].yyyy, IMM[0].xxxx > 27: UIF TEMP[4].xxxx :0 > 28: RCP TEMP[4].x, CONST[1][70].yyyy > 29: ELSE :0 > 30: MOV TEMP[4].x, IMM[0].yyyy > 31: ENDIF > 32: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[2].xxxx > 33: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 34: FMA TEMP[5].x, TEMP[4].xxxx, IMM[3].yyyy, IMM[3].zzzz > 35: MUL TEMP[2].x, TEMP[4].xxxx, TEMP[4].xxxx > 36: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[5].xxxx > 37: CEIL TEMP[4].x, TEMP[3].yyyy > 38: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 39: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx > 40: MOV TEMP[5].xy, TEMP[1].xyyy > 41: TEX TEMP[5], TEMP[5], SAMP[1], 2D > 42: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[5].wwww > 43: MUL TEMP[6].x, TEMP[2].xxxx, CONST[1][70].zzzz > 44: MOV TEMP[6].w, TEMP[6].xxxx > 45: MUL TEMP[6].xyz, CONST[1][0].wwww, CONST[1][0].xyzz > 46: MOV TEMP[7].xy, TEMP[1].xyyy > 47: TEX TEMP[7].yw, TEMP[7], SAMP[2], 2D > 48: MOV TEMP[1].xy, TEMP[1].xyyy > 49: TEX TEMP[1].xyz, TEMP[1], SAMP[3], 2D > 50: FMA TEMP[7].xy, TEMP[7].ywww, IMM[3].wwww, IMM[3].xxxx > 51: MOV TEMP[2].xy, TEMP[7].xyxx > 52: FMA TEMP[0].x, -TEMP[7].xxxx, TEMP[7].xxxx, IMM[0].zzzz > 53: FMA TEMP[0].x, -TEMP[7].yyyy, TEMP[7].yyyy, TEMP[0].xxxx > 54: SQRT TEMP[7].x, TEMP[0].xxxx > 55: MOV TEMP[2].z, -TEMP[7].xxxx > 56: DP3 TEMP[7].x, CONST[1][55].xyzz, TEMP[2].xyzz > 57: DP3 TEMP[8].x, CONST[1][56].xyzz, TEMP[2].xyzz > 58: MOV TEMP[7].y, TEMP[8].xxxx > 59: DP3 TEMP[8].x, CONST[1][57].xyzz, TEMP[2].xyzz > 60: MOV TEMP[7].z, TEMP[8].xxxx > 61: DP3 TEMP[0].x, TEMP[7].xyzz, TEMP[7].xyzz > 62: RSQ TEMP[8].x, TEMP[0].xxxx > 63: MUL TEMP[2].xyz, TEMP[8].xxxx, TEMP[7].xyzz > 64: FMA TEMP[2].xyz, TEMP[2].xyzz, IMM[4].xxxx, IMM[4].xxxx > 65: ADD TEMP[0].x, CONST[1][71].xxxx, IMM[0].zzzz > 66: FMA TEMP[3].x, CONST[1][70].wwww, TEMP[0].xxxx, TEMP[3].xxxx > 67: ADD TEMP[0].x, TEMP[3].xxxx, IMM[3].xxxx > 68: FSNE TEMP[3].x, CONST[1][71].xxxx, IMM[0].xxxx > 69: UIF TEMP[3].xxxx :0 > 70: RCP TEMP[3].x, CONST[1][71].xxxx > 71: ELSE :0 > 72: MOV TEMP[3].x, IMM[0].yyyy > 73: ENDIF > 74: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[0].xxxx > 75: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 76: FMA TEMP[7].x, TEMP[3].xxxx, IMM[3].yyyy, IMM[3].zzzz > 77: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[3].xxxx > 78: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[7].xxxx > 79: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx > 80: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].wwww > 81: MUL TEMP[3].xyz, TEMP[5].xyzz, CONST[1][72].xyzz > 82: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][71].yyyy > 83: MOV TEMP[2].w, TEMP[0].xxxx > 84: MOV TEMP[3].w, TEMP[0].xxxx > 85: MOV TEMP[0].w, TEMP[0].xxxx > 86: MUL TEMP[0].x, TEMP[1].zzzz, CONST[1][72].wwww > 87: MOV TEMP[0].yz, TEMP[1].xyxx > 88: MOV OUT[0], TEMP[6] > 89: MOV OUT[1], TEMP[2] > 90: MOV OUT[2], TEMP[3] > 91: MOV OUT[3], TEMP[0] > 92: END >radeonsi: Compiling shader 349 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 124) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 880) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 884) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 888) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 896) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 900) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 904) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 912) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 916) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 920) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1120) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1124) > %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1128) > %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1132) > %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1136) > %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1140) > %56 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1152) > %57 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1156) > %58 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1160) > %59 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1164) > %60 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 > %62 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %63 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %62, i64 0, i64 3 > %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 > %65 = extractelement <8 x i32> %61, i32 7 > %66 = extractelement <4 x i32> %64, i32 0 > %67 = and i32 %66, %65 > %68 = insertelement <4 x i32> %64, i32 %67, i32 0 > %69 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %70 = load <8 x i32>, <8 x i32> addrspace(2)* %69, align 32, !tbaa !0 > %71 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %72 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %71, i64 0, i64 7 > %73 = load <4 x i32>, <4 x i32> addrspace(2)* %72, align 16, !tbaa !0 > %74 = extractelement <8 x i32> %70, i32 7 > %75 = extractelement <4 x i32> %73, i32 0 > %76 = and i32 %75, %74 > %77 = insertelement <4 x i32> %73, i32 %76, i32 0 > %78 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %79 = load <8 x i32>, <8 x i32> addrspace(2)* %78, align 32, !tbaa !0 > %80 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %81 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %80, i64 0, i64 11 > %82 = load <4 x i32>, <4 x i32> addrspace(2)* %81, align 16, !tbaa !0 > %83 = extractelement <8 x i32> %79, i32 7 > %84 = extractelement <4 x i32> %82, i32 0 > %85 = and i32 %84, %83 > %86 = insertelement <4 x i32> %82, i32 %85, i32 0 > %87 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %88 = load <8 x i32>, <8 x i32> addrspace(2)* %87, align 32, !tbaa !0 > %89 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %90 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %89, i64 0, i64 15 > %91 = load <4 x i32>, <4 x i32> addrspace(2)* %90, align 16, !tbaa !0 > %92 = extractelement <8 x i32> %88, i32 7 > %93 = extractelement <4 x i32> %91, i32 0 > %94 = and i32 %93, %92 > %95 = insertelement <4 x i32> %91, i32 %94, i32 0 > %96 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %97 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %98 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %99 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %100 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %101 = fcmp oeq float %98, 0.000000e+00 > %102 = fcmp oeq float %98, 0.000000e+00 > %103 = fcmp ogt float %99, 0.000000e+00 > %104 = select i1 %103, float 1.000000e+00, float %99 > %105 = fcmp oge float %104, 0.000000e+00 > %106 = fcmp ogt float %100, 0.000000e+00 > %107 = select i1 %106, float 1.000000e+00, float %100 > %108 = fcmp oge float %107, 0.000000e+00 > %.op = fmul float %104, 0x4600000000000000 > %109 = select i1 %105, float %.op, float 0xC600000000000000 > %.op39 = fmul float %107, 0x4600000000000000 > %110 = select i1 %108, float %.op39, float 0xC600000000000000 > %111 = fdiv float 1.000000e+00, %98 > %112 = fmul float %99, %111 > %113 = fmul float %100, %111 > %114 = select i1 %101, float %109, float %112 > %115 = select i1 %102, float %110, float %113 > %116 = fcmp oeq float %98, 0.000000e+00 > %117 = fcmp oeq float %98, 0.000000e+00 > %118 = fcmp ogt float %96, 0.000000e+00 > %119 = select i1 %118, float 1.000000e+00, float %96 > %120 = fcmp oge float %119, 0.000000e+00 > %121 = fcmp ogt float %97, 0.000000e+00 > %122 = select i1 %121, float 1.000000e+00, float %97 > %123 = fcmp oge float %122, 0.000000e+00 > %.op40 = fmul float %119, 0x4600000000000000 > %124 = select i1 %120, float %.op40, float 0xC600000000000000 > %.op41 = fmul float %122, 0x4600000000000000 > %125 = select i1 %123, float %.op41, float 0xC600000000000000 > %126 = fdiv float 1.000000e+00, %98 > %127 = fmul float %96, %126 > %128 = fmul float %97, %126 > %129 = select i1 %116, float %124, float %127 > %130 = select i1 %117, float %125, float %128 > %131 = bitcast float %129 to i32 > %132 = bitcast float %130 to i32 > %133 = insertelement <2 x i32> undef, i32 %131, i32 0 > %134 = insertelement <2 x i32> %133, i32 %132, i32 1 > %135 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %134, <8 x i32> %61, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %136 = extractelement <4 x float> %135, i32 0 > %137 = fmul float %114, %136 > %138 = fmul float %115, %136 > %139 = fmul float %33, %137 > %140 = fmul float %34, %138 > %141 = fadd float %139, %140 > %142 = fmul float %35, %136 > %143 = fadd float %141, %142 > %144 = fadd float %143, %36 > %145 = fmul float %37, %137 > %146 = fmul float %38, %138 > %147 = fadd float %145, %146 > %148 = fmul float %39, %136 > %149 = fadd float %147, %148 > %150 = fadd float %149, %40 > %151 = fsub float %31, %29 > %152 = fsub float %32, %30 > %153 = call float @llvm.fma.f32(float %144, float %151, float %29) > %154 = call float @llvm.fma.f32(float %150, float %152, float %30) > %155 = fsub float 1.000000e+00, %153 > %156 = fsub float 0x3FEFD70A40000000, %153 > %157 = fadd float %51, 1.000000e+00 > %158 = call float @llvm.fma.f32(float %50, float %157, float %155) > %159 = fadd float %158, -1.000000e+00 > %160 = fcmp une float %51, 0.000000e+00 > %161 = fdiv float 1.000000e+00, %51 > %temp16.0 = select i1 %160, float %161, float 0x4600000000000000 > %162 = fmul float %temp16.0, %159 > %163 = call float @llvm.AMDGPU.clamp.(float %162, float 0.000000e+00, float 1.000000e+00) > %164 = call float @llvm.fma.f32(float %163, float -2.000000e+00, float 3.000000e+00) > %165 = fmul float %163, %163 > %166 = fmul float %165, %164 > %167 = call float @llvm.ceil.f32(float %156) > %168 = call float @llvm.AMDGPU.clamp.(float %167, float 0.000000e+00, float 1.000000e+00) > %169 = fmul float %166, %168 > %170 = bitcast float %153 to i32 > %171 = bitcast float %154 to i32 > %172 = insertelement <2 x i32> undef, i32 %170, i32 0 > %173 = insertelement <2 x i32> %172, i32 %171, i32 1 > %174 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %173, <8 x i32> %70, <4 x i32> %77, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %175 = extractelement <4 x float> %174, i32 0 > %176 = extractelement <4 x float> %174, i32 1 > %177 = extractelement <4 x float> %174, i32 2 > %178 = extractelement <4 x float> %174, i32 3 > %179 = fmul float %169, %178 > %180 = fmul float %179, %52 > %181 = fmul float %28, %25 > %182 = fmul float %28, %26 > %183 = fmul float %28, %27 > %184 = bitcast float %153 to i32 > %185 = bitcast float %154 to i32 > %186 = insertelement <2 x i32> undef, i32 %184, i32 0 > %187 = insertelement <2 x i32> %186, i32 %185, i32 1 > %188 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %187, <8 x i32> %79, <4 x i32> %86, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %189 = extractelement <4 x float> %188, i32 1 > %190 = extractelement <4 x float> %188, i32 3 > %191 = bitcast float %153 to i32 > %192 = bitcast float %154 to i32 > %193 = insertelement <2 x i32> undef, i32 %191, i32 0 > %194 = insertelement <2 x i32> %193, i32 %192, i32 1 > %195 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %194, <8 x i32> %88, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %196 = extractelement <4 x float> %195, i32 0 > %197 = extractelement <4 x float> %195, i32 1 > %198 = extractelement <4 x float> %195, i32 2 > %199 = call float @llvm.fma.f32(float %189, float 2.000000e+00, float -1.000000e+00) > %200 = call float @llvm.fma.f32(float %190, float 2.000000e+00, float -1.000000e+00) > %201 = fsub float -0.000000e+00, %199 > %202 = call float @llvm.fma.f32(float %201, float %199, float 1.000000e+00) > %203 = fsub float -0.000000e+00, %200 > %204 = call float @llvm.fma.f32(float %203, float %200, float %202) > %205 = call float @llvm.sqrt.f32(float %204) > %206 = fsub float -0.000000e+00, %205 > %207 = fmul float %41, %199 > %208 = fmul float %42, %200 > %209 = fadd float %208, %207 > %210 = fmul float %43, %206 > %211 = fadd float %209, %210 > %212 = fmul float %44, %199 > %213 = fmul float %45, %200 > %214 = fadd float %213, %212 > %215 = fmul float %46, %206 > %216 = fadd float %214, %215 > %217 = fmul float %47, %199 > %218 = fmul float %48, %200 > %219 = fadd float %218, %217 > %220 = fmul float %49, %206 > %221 = fadd float %219, %220 > %222 = fmul float %211, %211 > %223 = fmul float %216, %216 > %224 = fadd float %223, %222 > %225 = fmul float %221, %221 > %226 = fadd float %224, %225 > %227 = call float @llvm.AMDGPU.rsq.clamped.f32(float %226) > %228 = fmul float %227, %211 > %229 = fmul float %227, %216 > %230 = fmul float %227, %221 > %231 = call float @llvm.fma.f32(float %228, float 5.000000e-01, float 5.000000e-01) > %232 = call float @llvm.fma.f32(float %229, float 5.000000e-01, float 5.000000e-01) > %233 = call float @llvm.fma.f32(float %230, float 5.000000e-01, float 5.000000e-01) > %234 = fadd float %54, 1.000000e+00 > %235 = call float @llvm.fma.f32(float %53, float %234, float %155) > %236 = fadd float %235, -1.000000e+00 > %237 = fcmp une float %54, 0.000000e+00 > %238 = fdiv float 1.000000e+00, %54 > %temp12.0 = select i1 %237, float %238, float 0x4600000000000000 > %239 = fmul float %temp12.0, %236 > %240 = call float @llvm.AMDGPU.clamp.(float %239, float 0.000000e+00, float 1.000000e+00) > %241 = call float @llvm.fma.f32(float %240, float -2.000000e+00, float 3.000000e+00) > %242 = fmul float %240, %240 > %243 = fmul float %242, %241 > %244 = fmul float %243, %168 > %245 = fmul float %244, %178 > %246 = fmul float %175, %56 > %247 = fmul float %176, %57 > %248 = fmul float %177, %58 > %249 = fmul float %245, %55 > %250 = fmul float %198, %59 > %251 = bitcast float %5 to i32 > %252 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %251, 10 > %253 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %252, float %181, 11 > %254 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %253, float %182, 12 > %255 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %254, float %183, 13 > %256 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %255, float %180, 14 > %257 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %256, float %231, 15 > %258 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %257, float %232, 16 > %259 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %258, float %233, 17 > %260 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %259, float %249, 18 > %261 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %260, float %246, 19 > %262 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %261, float %247, 20 > %263 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %262, float %248, 21 > %264 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %263, float %249, 22 > %265 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %264, float %250, 23 > %266 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %265, float %197, 24 > %267 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %266, float %196, 25 > %268 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %267, float %249, 26 > %269 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %268, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %269 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..56] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 352, 864, 880} >IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[2] UINT32 {896, 752, 768, 784} >IMM[3] UINT32 {800, 0, 0, 0} > 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1][22].xyyy > 1: FMA TEMP[1].x, IN[0].zzzz, CONST[1][22].zzzz, CONST[1][22].zzzz > 2: MOV TEMP[0].z, TEMP[1].xxxx > 3: MOV TEMP[0].w, IMM[1].xxxx > 4: DP4 TEMP[1].x, CONST[1][54], TEMP[0] > 5: DP4 TEMP[2].x, CONST[1][55], TEMP[0] > 6: MOV TEMP[1].y, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][56], TEMP[0] > 8: MOV TEMP[1].z, TEMP[0].xxxx > 9: MOV TEMP[1].w, IMM[1].xxxx > 10: DP4 TEMP[0].x, CONST[1][47], TEMP[1] > 11: DP4 TEMP[2].x, CONST[1][48], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][49], TEMP[1] > 14: MOV TEMP[0].z, TEMP[2].xxxx > 15: DP4 TEMP[1].x, CONST[1][50], TEMP[1] > 16: MOV TEMP[0].w, TEMP[1].xxxx > 17: MOV OUT[0], TEMP[0] > 18: END >radeonsi: Compiling shader 350 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 352) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 356) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 360) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 752) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 756) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 760) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 764) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 800) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 804) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 808) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 812) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 864) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 868) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 872) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 876) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 896) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 900) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 904) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 908) > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %13) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = fmul float %50, %16 > %54 = fmul float %51, %17 > %55 = call float @llvm.fma.f32(float %52, float %18, float %18) > %56 = fmul float %35, %53 > %57 = fmul float %36, %54 > %58 = fadd float %56, %57 > %59 = fmul float %37, %55 > %60 = fadd float %58, %59 > %61 = fadd float %60, %38 > %62 = fmul float %39, %53 > %63 = fmul float %40, %54 > %64 = fadd float %62, %63 > %65 = fmul float %41, %55 > %66 = fadd float %64, %65 > %67 = fadd float %66, %42 > %68 = fmul float %43, %53 > %69 = fmul float %44, %54 > %70 = fadd float %68, %69 > %71 = fmul float %45, %55 > %72 = fadd float %70, %71 > %73 = fadd float %72, %46 > %74 = fmul float %19, %61 > %75 = fmul float %20, %67 > %76 = fadd float %74, %75 > %77 = fmul float %21, %73 > %78 = fadd float %76, %77 > %79 = fadd float %78, %22 > %80 = fmul float %23, %61 > %81 = fmul float %24, %67 > %82 = fadd float %80, %81 > %83 = fmul float %25, %73 > %84 = fadd float %82, %83 > %85 = fadd float %84, %26 > %86 = fmul float %27, %61 > %87 = fmul float %28, %67 > %88 = fadd float %86, %87 > %89 = fmul float %29, %73 > %90 = fadd float %88, %89 > %91 = fadd float %90, %30 > %92 = fmul float %31, %61 > %93 = fmul float %32, %67 > %94 = fadd float %92, %93 > %95 = fmul float %33, %73 > %96 = fadd float %94, %95 > %97 = fadd float %96, %34 > %98 = bitcast i32 %11 to float > %99 = insertvalue <{ float, float, float }> undef, float %98, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %79, float %85, float %91, float %97) > ret <{ float, float, float }> %99 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..56] >DCL CONST[2][0..24] >DCL TEMP[0..5], LOCAL >IMM[0] UINT32 {0, 352, 864, 880} >IMM[1] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} >IMM[2] UINT32 {896, 752, 768, 784} >IMM[3] UINT32 {800, 1, 384, 336} > 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1][22].xyyy > 1: FMA TEMP[1].x, IN[0].zzzz, CONST[1][22].zzzz, CONST[1][22].zzzz > 2: MOV TEMP[0].z, TEMP[1].xxxx > 3: MOV TEMP[0].w, IMM[1].xxxx > 4: DP4 TEMP[1].x, CONST[1][54], TEMP[0] > 5: DP4 TEMP[2].x, CONST[1][55], TEMP[0] > 6: MOV TEMP[1].y, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][56], TEMP[0] > 8: MOV TEMP[1].z, TEMP[2].xxxx > 9: MOV TEMP[1].w, IMM[1].xxxx > 10: DP4 TEMP[0].x, CONST[1][47], TEMP[1] > 11: DP4 TEMP[2].x, CONST[1][48], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][49], TEMP[1] > 14: MOV TEMP[0].z, TEMP[2].xxxx > 15: DP4 TEMP[2].x, CONST[1][50], TEMP[1] > 16: MOV TEMP[0].w, TEMP[2].xxxx > 17: MOV TEMP[3], TEMP[0] > 18: MOV TEMP[4].zw, TEMP[0].wwzw > 19: MUL TEMP[1].xy, TEMP[2].xxxx, CONST[2][24].xyyy > 20: MUL TEMP[2].xy, TEMP[2].xxxx, CONST[2][21].xyyy > 21: MUL TEMP[5].xy, IMM[1].xyyy, CONST[2][24].xyyy > 22: FMA TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xyyy, TEMP[1].xyyy > 23: MOV TEMP[4].xy, TEMP[0].xyxx > 24: FMA TEMP[0].xy, TEMP[0].xyyy, CONST[2][21].zwww, TEMP[2].xyyy > 25: MOV OUT[2], TEMP[0] > 26: MOV OUT[1], TEMP[4] > 27: MOV OUT[0], TEMP[3] > 28: END >radeonsi: Compiling shader 351 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 352) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 356) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 360) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 752) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 756) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 760) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 764) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 768) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 772) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 776) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 780) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 784) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 788) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 792) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 796) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 800) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 804) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 808) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 812) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 864) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 868) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 872) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 876) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 880) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 884) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 888) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 892) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 896) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 900) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 904) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 908) > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call float @llvm.SI.load.const(<16 x i8> %48, i32 336) > %50 = call float @llvm.SI.load.const(<16 x i8> %48, i32 340) > %51 = call float @llvm.SI.load.const(<16 x i8> %48, i32 344) > %52 = call float @llvm.SI.load.const(<16 x i8> %48, i32 348) > %53 = call float @llvm.SI.load.const(<16 x i8> %48, i32 384) > %54 = call float @llvm.SI.load.const(<16 x i8> %48, i32 388) > %55 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 > %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %56, i32 0, i32 %13) > %58 = extractelement <4 x float> %57, i32 0 > %59 = extractelement <4 x float> %57, i32 1 > %60 = extractelement <4 x float> %57, i32 2 > %61 = fmul float %58, %16 > %62 = fmul float %59, %17 > %63 = call float @llvm.fma.f32(float %60, float %18, float %18) > %64 = fmul float %35, %61 > %65 = fmul float %36, %62 > %66 = fadd float %64, %65 > %67 = fmul float %37, %63 > %68 = fadd float %66, %67 > %69 = fadd float %68, %38 > %70 = fmul float %39, %61 > %71 = fmul float %40, %62 > %72 = fadd float %70, %71 > %73 = fmul float %41, %63 > %74 = fadd float %72, %73 > %75 = fadd float %74, %42 > %76 = fmul float %43, %61 > %77 = fmul float %44, %62 > %78 = fadd float %76, %77 > %79 = fmul float %45, %63 > %80 = fadd float %78, %79 > %81 = fadd float %80, %46 > %82 = fmul float %19, %69 > %83 = fmul float %20, %75 > %84 = fadd float %82, %83 > %85 = fmul float %21, %81 > %86 = fadd float %84, %85 > %87 = fadd float %86, %22 > %88 = fmul float %23, %69 > %89 = fmul float %24, %75 > %90 = fadd float %88, %89 > %91 = fmul float %25, %81 > %92 = fadd float %90, %91 > %93 = fadd float %92, %26 > %94 = fmul float %27, %69 > %95 = fmul float %28, %75 > %96 = fadd float %94, %95 > %97 = fmul float %29, %81 > %98 = fadd float %96, %97 > %99 = fadd float %98, %30 > %100 = fmul float %31, %69 > %101 = fmul float %32, %75 > %102 = fadd float %100, %101 > %103 = fmul float %33, %81 > %104 = fadd float %102, %103 > %105 = fadd float %104, %34 > %106 = fmul float %105, %53 > %107 = fmul float %105, %54 > %108 = fmul float %105, %49 > %109 = fmul float %105, %50 > %110 = fsub float -0.000000e+00, %54 > %111 = call float @llvm.fma.f32(float %87, float %53, float %106) > %112 = call float @llvm.fma.f32(float %93, float %110, float %107) > %113 = call float @llvm.fma.f32(float %111, float %51, float %108) > %114 = call float @llvm.fma.f32(float %112, float %52, float %109) > %115 = bitcast i32 %11 to float > %116 = insertvalue <{ float, float, float }> undef, float %115, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %111, float %112, float %99, float %105) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %113, float %114, float %99, float %105) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %87, float %93, float %99, float %105) > ret <{ float, float, float }> %116 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..72] >DCL TEMP[0..6], LOCAL >IMM[0] UINT32 {0, 1104, 80, 96} >IMM[1] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[2] FLT32 { -1.0000, -2.0000, 3.0000, -0.0050} >IMM[3] UINT32 {864, 880, 896, 1120} >IMM[4] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} >IMM[5] UINT32 {1136, 1152, 0, 0} > 0: FSNE TEMP[0].x, CONST[1][69].yyyy, IMM[1].xxxx > 1: UIF TEMP[0].xxxx :0 > 2: RCP TEMP[0].x, CONST[1][69].yyyy > 3: ELSE :0 > 4: MOV TEMP[0].x, IMM[1].yyyy > 5: ENDIF > 6: ADD TEMP[1].x, CONST[1][69].yyyy, IMM[1].zzzz > 7: FSEQ TEMP[2].xy, IN[0].wwww, IMM[1].xxxx > 8: SSG TEMP[3].xy, IN[0].xyyy > 9: MUL TEMP[3].xy, IMM[1].yyyy, TEMP[3].xyyy > 10: RCP TEMP[4].xy, IN[0].wwww > 11: MUL TEMP[4].xy, IN[0].xyyy, TEMP[4].xyyy > 12: UCMP TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xyyy, TEMP[4].xyyy > 13: MOV TEMP[3].xy, TEMP[2].xyyy > 14: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D > 15: MOV TEMP[2].z, TEMP[3].xxxx > 16: FSEQ TEMP[4].xy, IN[0].wwww, IMM[1].xxxx > 17: SSG TEMP[5].xy, IN[1].xyyy > 18: MUL TEMP[5].xy, IMM[1].yyyy, TEMP[5].xyyy > 19: RCP TEMP[6].xy, IN[0].wwww > 20: MUL TEMP[6].xy, IN[1].xyyy, TEMP[6].xyyy > 21: UCMP TEMP[4].xy, TEMP[4].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 22: MUL TEMP[2].xy, TEMP[4].xyyy, TEMP[3].xxxx > 23: MOV TEMP[2].w, IMM[1].zzzz > 24: DP4 TEMP[3].x, CONST[1][5], TEMP[2] > 25: DP4 TEMP[2].x, CONST[1][6], TEMP[2] > 26: MOV TEMP[3].y, TEMP[2].xxxx > 27: MOV TEMP[2].xy, TEMP[3].xyyy > 28: TEX TEMP[2].xzw, TEMP[2], SAMP[1], 2D > 29: MOV TEMP[4].xy, TEMP[3].xyyy > 30: TEX TEMP[4].yw, TEMP[4], SAMP[2], 2D > 31: FMA TEMP[4].xy, TEMP[4].ywww, IMM[1].wwww, IMM[2].xxxx > 32: MOV TEMP[3].xy, TEMP[4].xyxx > 33: FMA TEMP[1].x, CONST[1][69].xxxx, TEMP[1].xxxx, TEMP[2].wwww > 34: ADD TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 35: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].xxxx > 36: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 37: FMA TEMP[5].x, TEMP[1].xxxx, IMM[2].yyyy, IMM[2].zzzz > 38: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[1].xxxx > 39: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].xxxx > 40: ADD TEMP[1].x, TEMP[2].wwww, IMM[2].wwww > 41: CEIL TEMP[1].x, TEMP[1].xxxx > 42: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 43: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx > 44: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].zzzz > 45: MUL TEMP[5].x, TEMP[0].xxxx, CONST[1][69].zzzz > 46: MOV TEMP[5].w, TEMP[5].xxxx > 47: MUL TEMP[5].xyz, CONST[1][0].wwww, CONST[1][0].xyzz > 48: FMA TEMP[0].x, -TEMP[4].xxxx, TEMP[4].xxxx, IMM[1].zzzz > 49: FMA TEMP[0].x, -TEMP[4].yyyy, TEMP[4].yyyy, TEMP[0].xxxx > 50: SQRT TEMP[4].x, TEMP[0].xxxx > 51: MOV TEMP[3].z, -TEMP[4].xxxx > 52: DP3 TEMP[4].x, CONST[1][54].xyzz, TEMP[3].xyzz > 53: DP3 TEMP[6].x, CONST[1][55].xyzz, TEMP[3].xyzz > 54: MOV TEMP[4].y, TEMP[6].xxxx > 55: DP3 TEMP[3].x, CONST[1][56].xyzz, TEMP[3].xyzz > 56: MOV TEMP[4].z, TEMP[3].xxxx > 57: DP3 TEMP[0].x, TEMP[4].xyzz, TEMP[4].xyzz > 58: RSQ TEMP[3].x, TEMP[0].xxxx > 59: MUL TEMP[3].xyz, TEMP[3].xxxx, TEMP[4].xyzz > 60: FMA TEMP[3].xyz, TEMP[3].xyzz, IMM[4].xxxx, IMM[4].xxxx > 61: ADD TEMP[0].x, CONST[1][70].xxxx, IMM[1].zzzz > 62: FMA TEMP[4].x, CONST[1][69].wwww, TEMP[0].xxxx, TEMP[2].wwww > 63: ADD TEMP[0].x, TEMP[4].xxxx, IMM[2].xxxx > 64: FSNE TEMP[4].x, CONST[1][70].xxxx, IMM[1].xxxx > 65: UIF TEMP[4].xxxx :0 > 66: RCP TEMP[4].x, CONST[1][70].xxxx > 67: ELSE :0 > 68: MOV TEMP[4].x, IMM[1].yyyy > 69: ENDIF > 70: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx > 71: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 72: FMA TEMP[6].x, TEMP[4].xxxx, IMM[2].yyyy, IMM[2].zzzz > 73: MUL TEMP[0].x, TEMP[4].xxxx, TEMP[4].xxxx > 74: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[6].xxxx > 75: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx > 76: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx > 77: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][70].yyyy > 78: MOV TEMP[3].w, TEMP[0].xxxx > 79: MOV TEMP[1].w, TEMP[0].xxxx > 80: MOV TEMP[0].w, TEMP[0].xxxx > 81: MOV TEMP[1].xyz, CONST[1][71].xyzx > 82: MOV TEMP[0].x, CONST[1][71].wwww > 83: MOV TEMP[0].yz, CONST[1][72].yxyy > 84: MOV OUT[0], TEMP[5] > 85: MOV OUT[1], TEMP[3] > 86: MOV OUT[2], TEMP[1] > 87: MOV OUT[3], TEMP[0] > 88: END >radeonsi: Compiling shader 352 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 84) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 88) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 92) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 864) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 868) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 872) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 880) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 884) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 888) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 896) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 900) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 904) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1104) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1108) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1112) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1116) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1120) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1124) > %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1136) > %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1140) > %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1144) > %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1148) > %56 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1152) > %57 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1156) > %58 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %59 = load <8 x i32>, <8 x i32> addrspace(2)* %58, align 32, !tbaa !0 > %60 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %61 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %60, i64 0, i64 3 > %62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !tbaa !0 > %63 = extractelement <8 x i32> %59, i32 7 > %64 = extractelement <4 x i32> %62, i32 0 > %65 = and i32 %64, %63 > %66 = insertelement <4 x i32> %62, i32 %65, i32 0 > %67 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0 > %69 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %70 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %69, i64 0, i64 7 > %71 = load <4 x i32>, <4 x i32> addrspace(2)* %70, align 16, !tbaa !0 > %72 = extractelement <8 x i32> %68, i32 7 > %73 = extractelement <4 x i32> %71, i32 0 > %74 = and i32 %73, %72 > %75 = insertelement <4 x i32> %71, i32 %74, i32 0 > %76 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %77 = load <8 x i32>, <8 x i32> addrspace(2)* %76, align 32, !tbaa !0 > %78 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %79 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %78, i64 0, i64 11 > %80 = load <4 x i32>, <4 x i32> addrspace(2)* %79, align 16, !tbaa !0 > %81 = extractelement <8 x i32> %77, i32 7 > %82 = extractelement <4 x i32> %80, i32 0 > %83 = and i32 %82, %81 > %84 = insertelement <4 x i32> %80, i32 %83, i32 0 > %85 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %90 = fcmp une float %47, 0.000000e+00 > %91 = fdiv float 1.000000e+00, %47 > %temp.0 = select i1 %90, float %91, float 0x4600000000000000 > %92 = fadd float %47, 1.000000e+00 > %93 = fcmp oeq float %87, 0.000000e+00 > %94 = fcmp oeq float %87, 0.000000e+00 > %95 = fcmp ogt float %85, 0.000000e+00 > %96 = select i1 %95, float 1.000000e+00, float %85 > %97 = fcmp oge float %96, 0.000000e+00 > %98 = fcmp ogt float %86, 0.000000e+00 > %99 = select i1 %98, float 1.000000e+00, float %86 > %100 = fcmp oge float %99, 0.000000e+00 > %.op = fmul float %96, 0x4600000000000000 > %101 = select i1 %97, float %.op, float 0xC600000000000000 > %.op31 = fmul float %99, 0x4600000000000000 > %102 = select i1 %100, float %.op31, float 0xC600000000000000 > %103 = fdiv float 1.000000e+00, %87 > %104 = fmul float %85, %103 > %105 = fmul float %86, %103 > %106 = select i1 %93, float %101, float %104 > %107 = select i1 %94, float %102, float %105 > %108 = bitcast float %106 to i32 > %109 = bitcast float %107 to i32 > %110 = insertelement <2 x i32> undef, i32 %108, i32 0 > %111 = insertelement <2 x i32> %110, i32 %109, i32 1 > %112 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %111, <8 x i32> %59, <4 x i32> %66, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %113 = extractelement <4 x float> %112, i32 0 > %114 = fcmp oeq float %87, 0.000000e+00 > %115 = fcmp oeq float %87, 0.000000e+00 > %116 = fcmp ogt float %88, 0.000000e+00 > %117 = select i1 %116, float 1.000000e+00, float %88 > %118 = fcmp oge float %117, 0.000000e+00 > %119 = fcmp ogt float %89, 0.000000e+00 > %120 = select i1 %119, float 1.000000e+00, float %89 > %121 = fcmp oge float %120, 0.000000e+00 > %.op32 = fmul float %117, 0x4600000000000000 > %122 = select i1 %118, float %.op32, float 0xC600000000000000 > %.op33 = fmul float %120, 0x4600000000000000 > %123 = select i1 %121, float %.op33, float 0xC600000000000000 > %124 = fdiv float 1.000000e+00, %87 > %125 = fmul float %88, %124 > %126 = fmul float %89, %124 > %127 = select i1 %114, float %122, float %125 > %128 = select i1 %115, float %123, float %126 > %129 = fmul float %127, %113 > %130 = fmul float %128, %113 > %131 = fmul float %29, %129 > %132 = fmul float %30, %130 > %133 = fadd float %131, %132 > %134 = fmul float %31, %113 > %135 = fadd float %133, %134 > %136 = fadd float %135, %32 > %137 = fmul float %33, %129 > %138 = fmul float %34, %130 > %139 = fadd float %137, %138 > %140 = fmul float %35, %113 > %141 = fadd float %139, %140 > %142 = fadd float %141, %36 > %143 = bitcast float %136 to i32 > %144 = bitcast float %142 to i32 > %145 = insertelement <2 x i32> undef, i32 %143, i32 0 > %146 = insertelement <2 x i32> %145, i32 %144, i32 1 > %147 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %146, <8 x i32> %68, <4 x i32> %75, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %148 = extractelement <4 x float> %147, i32 0 > %149 = extractelement <4 x float> %147, i32 2 > %150 = extractelement <4 x float> %147, i32 3 > %151 = bitcast float %136 to i32 > %152 = bitcast float %142 to i32 > %153 = insertelement <2 x i32> undef, i32 %151, i32 0 > %154 = insertelement <2 x i32> %153, i32 %152, i32 1 > %155 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %154, <8 x i32> %77, <4 x i32> %84, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %156 = extractelement <4 x float> %155, i32 1 > %157 = extractelement <4 x float> %155, i32 3 > %158 = call float @llvm.fma.f32(float %156, float 2.000000e+00, float -1.000000e+00) > %159 = call float @llvm.fma.f32(float %157, float 2.000000e+00, float -1.000000e+00) > %160 = call float @llvm.fma.f32(float %46, float %92, float %150) > %161 = fadd float %160, -1.000000e+00 > %162 = fmul float %temp.0, %161 > %163 = call float @llvm.AMDGPU.clamp.(float %162, float 0.000000e+00, float 1.000000e+00) > %164 = call float @llvm.fma.f32(float %163, float -2.000000e+00, float 3.000000e+00) > %165 = fmul float %163, %163 > %166 = fmul float %165, %164 > %167 = fadd float %150, 0xBF747AE140000000 > %168 = call float @llvm.ceil.f32(float %167) > %169 = call float @llvm.AMDGPU.clamp.(float %168, float 0.000000e+00, float 1.000000e+00) > %170 = fmul float %166, %169 > %171 = fmul float %170, %149 > %172 = fmul float %171, %48 > %173 = fmul float %28, %25 > %174 = fmul float %28, %26 > %175 = fmul float %28, %27 > %176 = fsub float -0.000000e+00, %158 > %177 = call float @llvm.fma.f32(float %176, float %158, float 1.000000e+00) > %178 = fsub float -0.000000e+00, %159 > %179 = call float @llvm.fma.f32(float %178, float %159, float %177) > %180 = call float @llvm.sqrt.f32(float %179) > %181 = fsub float -0.000000e+00, %180 > %182 = fmul float %37, %158 > %183 = fmul float %38, %159 > %184 = fadd float %183, %182 > %185 = fmul float %39, %181 > %186 = fadd float %184, %185 > %187 = fmul float %40, %158 > %188 = fmul float %41, %159 > %189 = fadd float %188, %187 > %190 = fmul float %42, %181 > %191 = fadd float %189, %190 > %192 = fmul float %43, %158 > %193 = fmul float %44, %159 > %194 = fadd float %193, %192 > %195 = fmul float %45, %181 > %196 = fadd float %194, %195 > %197 = fmul float %186, %186 > %198 = fmul float %191, %191 > %199 = fadd float %198, %197 > %200 = fmul float %196, %196 > %201 = fadd float %199, %200 > %202 = call float @llvm.AMDGPU.rsq.clamped.f32(float %201) > %203 = fmul float %202, %186 > %204 = fmul float %202, %191 > %205 = fmul float %202, %196 > %206 = call float @llvm.fma.f32(float %203, float 5.000000e-01, float 5.000000e-01) > %207 = call float @llvm.fma.f32(float %204, float 5.000000e-01, float 5.000000e-01) > %208 = call float @llvm.fma.f32(float %205, float 5.000000e-01, float 5.000000e-01) > %209 = fadd float %50, 1.000000e+00 > %210 = call float @llvm.fma.f32(float %49, float %209, float %150) > %211 = fadd float %210, -1.000000e+00 > %212 = fcmp une float %50, 0.000000e+00 > %213 = fdiv float 1.000000e+00, %50 > %temp16.0 = select i1 %212, float %213, float 0x4600000000000000 > %214 = fmul float %temp16.0, %211 > %215 = call float @llvm.AMDGPU.clamp.(float %214, float 0.000000e+00, float 1.000000e+00) > %216 = call float @llvm.fma.f32(float %215, float -2.000000e+00, float 3.000000e+00) > %217 = fmul float %215, %215 > %218 = fmul float %217, %216 > %219 = fmul float %218, %169 > %220 = fmul float %219, %148 > %221 = fmul float %220, %51 > %222 = bitcast float %5 to i32 > %223 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %222, 10 > %224 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %223, float %173, 11 > %225 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %224, float %174, 12 > %226 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %225, float %175, 13 > %227 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %226, float %172, 14 > %228 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %227, float %206, 15 > %229 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %228, float %207, 16 > %230 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %229, float %208, 17 > %231 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %230, float %221, 18 > %232 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %231, float %52, 19 > %233 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %232, float %53, 20 > %234 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %233, float %54, 21 > %235 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %234, float %221, 22 > %236 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %235, float %55, 23 > %237 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %236, float %56, 24 > %238 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %237, float %57, 25 > %239 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %238, float %221, 26 > %240 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %239, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %240 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.ceil.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..37] >DCL TEMP[0..12], LOCAL >IMM[0] FLT32 { 1.0000, -0.1500, 0.0597, -1.5000} >IMM[1] UINT32 {0, 320, 336, 352} >IMM[2] UINT32 {368, 400, 496, 576} >IMM[3] UINT32 {544, 560, 384, 416} >IMM[4] UINT32 {528, 448, 432, 512} >IMM[5] FLT32 { 0.0000, 0.0000, 158456325028528675187087900672.0000, 1.4427} >IMM[6] UINT32 {464, 480, 0, 0} >IMM[7] FLT32 { 0.5000, 0.4545, -0.0040, 6.2000} >IMM[8] FLT32 { 1.7000, 0.0600, 0.0000, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][20], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][22], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][23], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][25], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][31].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: ADD TEMP[5].xyz, -IN[0].xyzz, CONST[1][36].xyzz > 14: MUL TEMP[6].xyz, CONST[1][34].xyzz, CONST[1][35].xyzz > 15: MOV TEMP[6].w, CONST[1][34].wwww > 16: DP3 TEMP[1].x, CONST[1][24].xyzz, TEMP[5].xyzz > 17: DP3 TEMP[7].x, CONST[1][26].xyzz, TEMP[5].xyzz > 18: MOV TEMP[1].z, TEMP[7].xxxx > 19: DP3 TEMP[5].x, CONST[1][25].xyzz, TEMP[5].xyzz > 20: MOV TEMP[1].y, TEMP[5].xxxx > 21: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz > 22: RSQ TEMP[7].x, TEMP[7].xxxx > 23: MUL TEMP[8].xyz, TEMP[7].xxxx, TEMP[1].xyzz > 24: FMA TEMP[5].x, -TEMP[5].xxxx, TEMP[7].xxxx, IMM[0].yyyy > 25: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].xxxx > 26: MOV_SAT TEMP[5].x, TEMP[5].xxxx > 27: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx > 28: DP3 TEMP[7].x, -TEMP[8].xyzz, CONST[1][33].xyzz > 29: FMA TEMP[9].x, -CONST[1][28].yyyy, TEMP[7].xxxx, CONST[1][28].xxxx > 30: FMA TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx, IMM[0].xxxx > 31: MOV TEMP[0].z, TEMP[7].xxxx > 32: ABS TEMP[7].x, TEMP[9].xxxx > 33: LG2 TEMP[7].x, TEMP[7].xxxx > 34: MOV TEMP[0].w, TEMP[7].xxxx > 35: MUL TEMP[7].xy, TEMP[0].zwww, IMM[0].zwww > 36: EX2 TEMP[9].x, TEMP[7].yyyy > 37: FMA TEMP[1].x, CONST[1][28].zzzz, TEMP[9].xxxx, -CONST[1][27].zzzz > 38: MUL TEMP[9].x, TEMP[9].xxxx, CONST[1][28].zzzz > 39: MAX TEMP[10].x, TEMP[1].xxxx, IMM[5].xxxx > 40: ABS TEMP[11].x, TEMP[2].xxxx > 41: MUL TEMP[11].x, TEMP[11].xxxx, IMM[5].yyyy > 42: MIN TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx > 43: ADD TEMP[11].x, -TEMP[11].xxxx, IMM[0].xxxx > 44: FMA TEMP[9].x, -TEMP[10].xxxx, TEMP[11].xxxx, TEMP[9].xxxx > 45: MAX TEMP[9].x, TEMP[9].xxxx, CONST[1][32].wwww > 46: FSNE TEMP[10].x, CONST[1][27].xxxx, IMM[5].xxxx > 47: UIF TEMP[10].xxxx :0 > 48: RCP TEMP[10].x, CONST[1][27].xxxx > 49: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 50: ELSE :0 > 51: SSG TEMP[11].x, -TEMP[0].xxxx > 52: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 53: ENDIF > 54: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 55: EX2 TEMP[10].x, TEMP[1].xxxx > 56: ADD TEMP[1].x, TEMP[10].xxxx, CONST[1][28].wwww > 57: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][29].yyyy > 58: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].xxxx > 59: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx > 60: MIN TEMP[5].x, TEMP[5].xxxx, CONST[1][27].wwww > 61: MAX TEMP[5].x, TEMP[5].xxxx, CONST[1][29].xxxx > 62: MUL TEMP[9].x, TEMP[5].xxxx, TEMP[9].xxxx > 63: FSNE TEMP[10].x, CONST[1][30].wwww, IMM[5].xxxx > 64: UIF TEMP[10].xxxx :0 > 65: RCP TEMP[10].x, CONST[1][30].wwww > 66: MUL TEMP[10].x, -TEMP[0].xxxx, TEMP[10].xxxx > 67: ELSE :0 > 68: SSG TEMP[11].x, -TEMP[0].xxxx > 69: MUL TEMP[10].x, IMM[5].zzzz, TEMP[11].xxxx > 70: ENDIF > 71: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][31].zzzz > 72: FSNE TEMP[11].x, CONST[1][27].yyyy, IMM[5].xxxx > 73: UIF TEMP[11].xxxx :0 > 74: RCP TEMP[11].x, CONST[1][27].yyyy > 75: MUL TEMP[11].x, TEMP[0].xxxx, TEMP[11].xxxx > 76: ELSE :0 > 77: SSG TEMP[12].x, TEMP[0].xxxx > 78: MUL TEMP[11].x, IMM[5].zzzz, TEMP[12].xxxx > 79: ENDIF > 80: MUL TEMP[1].x, TEMP[10].xxxx, IMM[5].wwww > 81: EX2 TEMP[10].x, TEMP[1].xxxx > 82: MUL TEMP[8].xyz, TEMP[10].xxxx, CONST[1][30].xyzz > 83: FMA TEMP[5].xyz, CONST[1][30].xyzz, TEMP[10].xxxx, TEMP[5].xxxx > 84: FMA TEMP[7].xyz, TEMP[8].xyzz, TEMP[7].xxxx, TEMP[9].xxxx > 85: FSEQ TEMP[9].xyz, TEMP[5].xyzz, IMM[5].xxxx > 86: SSG TEMP[10].xyz, TEMP[7].xyzz > 87: MUL TEMP[10].xyz, IMM[5].zzzz, TEMP[10].xyzz > 88: RCP TEMP[12].x, TEMP[5].xxxx > 89: RCP TEMP[12].y, TEMP[5].yyyy > 90: RCP TEMP[12].z, TEMP[5].zzzz > 91: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[12].xyzz > 92: UCMP TEMP[7].xyz, TEMP[9].xyzz, TEMP[10].xyzz, TEMP[7].xyzz > 93: MUL TEMP[8].xyz, TEMP[11].xxxx, -TEMP[5].xyzz > 94: ABS TEMP[2].xyz, TEMP[2].xxxx > 95: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[5].xyzz > 96: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[5].wwww > 97: EX2 TEMP[2].x, TEMP[1].xxxx > 98: EX2 TEMP[2].y, TEMP[1].yyyy > 99: EX2 TEMP[2].z, TEMP[1].zzzz >100: MUL TEMP[8].xyz, TEMP[8].xyzz, IMM[5].wwww >101: LG2 TEMP[5].x, CONST[1][32].xxxx >102: LG2 TEMP[5].y, CONST[1][32].yyyy >103: LG2 TEMP[5].z, CONST[1][32].zzzz >104: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[7].yyyy >105: EX2 TEMP[9].x, TEMP[5].xxxx >106: EX2 TEMP[9].y, TEMP[5].yyyy >107: EX2 TEMP[9].z, TEMP[5].zzzz >108: EX2 TEMP[5].x, TEMP[8].xxxx >109: EX2 TEMP[5].y, TEMP[8].yyyy >110: EX2 TEMP[5].z, TEMP[8].zzzz >111: MUL TEMP[8].xyz, TEMP[5].xyzz, TEMP[9].xyzz >112: MUL TEMP[0].xyz, TEMP[7].xyzz, TEMP[8].xyzz >113: ADD TEMP[5].xyz, -TEMP[2].xyzz, IMM[0].xxxx >114: MOV TEMP[2].w, TEMP[2].xxxx >115: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz, IMM[7].zzzz >116: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[5].xxxx >117: FMA TEMP[5].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[7].xxxx >118: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[5].xyzz >119: FMA TEMP[8].xyz, TEMP[0].xyzz, IMM[7].wwww, IMM[8].xxxx >120: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[8].xyzz, IMM[8].yyyy >121: FSEQ TEMP[5].xyz, TEMP[0].xyzz, IMM[5].xxxx >122: SSG TEMP[7].xyz, TEMP[1].xyzz >123: MUL TEMP[7].xyz, IMM[5].zzzz, TEMP[7].xyzz >124: RCP TEMP[8].x, TEMP[0].xxxx >125: RCP TEMP[8].y, TEMP[0].yyyy >126: RCP TEMP[8].z, TEMP[0].zzzz >127: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[8].xyzz >128: UCMP TEMP[2].xyz, TEMP[5].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >129: MOV OUT[4], IN[2] >130: MOV OUT[3], TEMP[2] >131: MOV OUT[2], TEMP[6] >132: MOV OUT[1], TEMP[4] >133: MOV OUT[0], TEMP[3] >134: END >radeonsi: Compiling shader 353 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 332) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 412) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 492) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 524) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 544) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 548) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 552) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 556) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 568) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 576) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 580) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 584) > %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 > %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %13) > %80 = extractelement <4 x float> %79, i32 0 > %81 = extractelement <4 x float> %79, i32 1 > %82 = extractelement <4 x float> %79, i32 2 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %14) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %15) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = extractelement <4 x float> %90, i32 3 > %95 = fmul float %18, %80 > %96 = fmul float %19, %81 > %97 = fadd float %95, %96 > %98 = fmul float %20, %82 > %99 = fadd float %97, %98 > %100 = fadd float %99, %21 > %101 = fmul float %22, %80 > %102 = fmul float %23, %81 > %103 = fadd float %101, %102 > %104 = fmul float %24, %82 > %105 = fadd float %103, %104 > %106 = fadd float %105, %25 > %107 = fmul float %26, %80 > %108 = fmul float %27, %81 > %109 = fadd float %107, %108 > %110 = fmul float %28, %82 > %111 = fadd float %109, %110 > %112 = fadd float %111, %29 > %113 = fmul float %30, %80 > %114 = fmul float %31, %81 > %115 = fadd float %113, %114 > %116 = fmul float %32, %82 > %117 = fadd float %115, %116 > %118 = fadd float %117, %33 > %119 = fmul float %37, %80 > %120 = fmul float %38, %81 > %121 = fadd float %119, %120 > %122 = fmul float %39, %82 > %123 = fadd float %121, %122 > %124 = fadd float %123, %40 > %125 = fadd float %124, %59 > %126 = fsub float %74, %80 > %127 = fsub float %75, %81 > %128 = fsub float %76, %82 > %129 = fmul float %67, %71 > %130 = fmul float %68, %72 > %131 = fmul float %69, %73 > %132 = fmul float %34, %126 > %133 = fmul float %35, %127 > %134 = fadd float %133, %132 > %135 = fmul float %36, %128 > %136 = fadd float %134, %135 > %137 = fmul float %41, %126 > %138 = fmul float %42, %127 > %139 = fadd float %138, %137 > %140 = fmul float %43, %128 > %141 = fadd float %139, %140 > %142 = fmul float %37, %126 > %143 = fmul float %38, %127 > %144 = fadd float %143, %142 > %145 = fmul float %39, %128 > %146 = fadd float %144, %145 > %147 = fmul float %136, %136 > %148 = fmul float %146, %146 > %149 = fadd float %148, %147 > %150 = fmul float %141, %141 > %151 = fadd float %149, %150 > %152 = call float @llvm.AMDGPU.rsq.clamped.f32(float %151) > %153 = fmul float %152, %136 > %154 = fmul float %152, %146 > %155 = fmul float %152, %141 > %156 = fsub float -0.000000e+00, %146 > %157 = call float @llvm.fma.f32(float %156, float %152, float 0xBFC3333340000000) > %158 = fsub float 1.000000e+00, %157 > %159 = call float @llvm.AMDGPU.clamp.(float %158, float 0.000000e+00, float 1.000000e+00) > %160 = fmul float %159, %159 > %161 = fmul float %153, %64 > %162 = fsub float -0.000000e+00, %161 > %163 = fmul float %154, %65 > %164 = fsub float %162, %163 > %165 = fmul float %155, %66 > %166 = fsub float %164, %165 > %167 = fsub float -0.000000e+00, %49 > %168 = call float @llvm.fma.f32(float %167, float %166, float %48) > %169 = call float @llvm.fma.f32(float %166, float %166, float 1.000000e+00) > %170 = call float @llvm.fabs.f32(float %168) > %171 = call float @llvm.log2.f32(float %170) > %172 = fmul float %169, 0x3FAE8EC8A0000000 > %173 = fmul float %171, -1.500000e+00 > %174 = call float @llvm.exp2.f32(float %173) > %175 = fsub float -0.000000e+00, %46 > %176 = call float @llvm.fma.f32(float %50, float %174, float %175) > %177 = fmul float %174, %50 > %178 = call float @llvm.maxnum.f32(float %176, float 0.000000e+00) > %179 = call float @llvm.fabs.f32(float %118) > %180 = fmul float %179, 0x3EF4F8B580000000 > %181 = call float @llvm.minnum.f32(float %180, float 1.000000e+00) > %182 = fsub float 1.000000e+00, %181 > %183 = fsub float -0.000000e+00, %178 > %184 = call float @llvm.fma.f32(float %183, float %182, float %177) > %185 = call float @llvm.maxnum.f32(float %184, float %63) > %186 = fcmp une float %44, 0.000000e+00 > br i1 %186, label %IF, label %ELSE > >IF: ; preds = %main_body > %187 = fdiv float 1.000000e+00, %44 > %188 = fmul float %125, %187 > %189 = fsub float -0.000000e+00, %188 > br label %ENDIF > >ELSE: ; preds = %main_body > %190 = fsub float -0.000000e+00, %125 > %191 = fcmp olt float %125, -0.000000e+00 > %192 = select i1 %191, float 1.000000e+00, float %190 > %193 = fcmp oge float %192, 0.000000e+00 > %.op = fmul float %192, 0x4600000000000000 > %194 = select i1 %193, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp40.0 = phi float [ %189, %IF ], [ %194, %ELSE ] > %195 = fmul float %temp40.0, 0x3FF7154760000000 > %196 = call float @llvm.exp2.f32(float %195) > %197 = fadd float %196, %51 > %198 = fmul float %197, %53 > %199 = fmul float %198, 5.000000e-01 > %200 = fmul float %160, %199 > %201 = call float @llvm.minnum.f32(float %200, float %47) > %202 = call float @llvm.maxnum.f32(float %201, float %52) > %203 = fmul float %202, %185 > %204 = fcmp une float %57, 0.000000e+00 > br i1 %204, label %IF53, label %ELSE54 > >IF53: ; preds = %ENDIF > %205 = fdiv float 1.000000e+00, %57 > %206 = fmul float %125, %205 > %207 = fsub float -0.000000e+00, %206 > br label %ENDIF52 > >ELSE54: ; preds = %ENDIF > %208 = fsub float -0.000000e+00, %125 > %209 = fcmp olt float %125, -0.000000e+00 > %210 = select i1 %209, float 1.000000e+00, float %208 > %211 = fcmp oge float %210, 0.000000e+00 > %.op58 = fmul float %210, 0x4600000000000000 > %212 = select i1 %211, float %.op58, float 0xC600000000000000 > br label %ENDIF52 > >ENDIF52: ; preds = %ELSE54, %IF53 > %temp40.1 = phi float [ %207, %IF53 ], [ %212, %ELSE54 ] > %213 = fsub float %58, %125 > %214 = fcmp une float %45, 0.000000e+00 > br i1 %214, label %IF56, label %ELSE57 > >IF56: ; preds = %ENDIF52 > %215 = fdiv float 1.000000e+00, %45 > %216 = fmul float %213, %215 > br label %ENDIF55 > >ELSE57: ; preds = %ENDIF52 > %217 = fcmp ogt float %213, 0.000000e+00 > %218 = select i1 %217, float 1.000000e+00, float %213 > %219 = fcmp oge float %218, 0.000000e+00 > %.op59 = fmul float %218, 0x4600000000000000 > %220 = select i1 %219, float %.op59, float 0xC600000000000000 > br label %ENDIF55 > >ENDIF55: ; preds = %ELSE57, %IF56 > %temp44.0 = phi float [ %216, %IF56 ], [ %220, %ELSE57 ] > %221 = fmul float %temp40.1, 0x3FF7154760000000 > %222 = call float @llvm.exp2.f32(float %221) > %223 = fmul float %222, %54 > %224 = fmul float %222, %55 > %225 = fmul float %222, %56 > %226 = call float @llvm.fma.f32(float %54, float %222, float %202) > %227 = call float @llvm.fma.f32(float %55, float %222, float %202) > %228 = call float @llvm.fma.f32(float %56, float %222, float %202) > %229 = call float @llvm.fma.f32(float %223, float %172, float %203) > %230 = call float @llvm.fma.f32(float %224, float %172, float %203) > %231 = call float @llvm.fma.f32(float %225, float %172, float %203) > %232 = fcmp oeq float %226, 0.000000e+00 > %233 = fcmp oeq float %227, 0.000000e+00 > %234 = fcmp oeq float %228, 0.000000e+00 > %235 = fcmp ogt float %229, 0.000000e+00 > %236 = select i1 %235, float 1.000000e+00, float %229 > %237 = fcmp oge float %236, 0.000000e+00 > %238 = fcmp ogt float %230, 0.000000e+00 > %239 = select i1 %238, float 1.000000e+00, float %230 > %240 = fcmp oge float %239, 0.000000e+00 > %241 = fcmp ogt float %231, 0.000000e+00 > %242 = select i1 %241, float 1.000000e+00, float %231 > %243 = fcmp oge float %242, 0.000000e+00 > %.op60 = fmul float %236, 0x4600000000000000 > %244 = select i1 %237, float %.op60, float 0xC600000000000000 > %.op61 = fmul float %239, 0x4600000000000000 > %245 = select i1 %240, float %.op61, float 0xC600000000000000 > %.op62 = fmul float %242, 0x4600000000000000 > %246 = select i1 %243, float %.op62, float 0xC600000000000000 > %247 = fdiv float 1.000000e+00, %226 > %248 = fdiv float 1.000000e+00, %227 > %249 = fdiv float 1.000000e+00, %228 > %250 = fmul float %229, %247 > %251 = fmul float %230, %248 > %252 = fmul float %231, %249 > %253 = select i1 %232, float %244, float %250 > %254 = select i1 %233, float %245, float %251 > %255 = select i1 %234, float %246, float %252 > %256 = fmul float %226, %temp44.0 > %257 = fmul float %227, %temp44.0 > %258 = fmul float %228, %temp44.0 > %259 = call float @llvm.fabs.f32(float %118) > %260 = call float @llvm.fabs.f32(float %118) > %261 = call float @llvm.fabs.f32(float %118) > %262 = fmul float %226, %259 > %263 = fmul float %227, %260 > %264 = fmul float %228, %261 > %265 = fmul float %262, 0xBFF7154760000000 > %266 = fmul float %263, 0xBFF7154760000000 > %267 = fmul float %264, 0xBFF7154760000000 > %268 = call float @llvm.exp2.f32(float %265) > %269 = call float @llvm.exp2.f32(float %266) > %270 = call float @llvm.exp2.f32(float %267) > %271 = fmul float %256, 0xBFF7154760000000 > %272 = fmul float %257, 0xBFF7154760000000 > %273 = fmul float %258, 0xBFF7154760000000 > %274 = call float @llvm.log2.f32(float %60) > %275 = call float @llvm.log2.f32(float %61) > %276 = call float @llvm.log2.f32(float %62) > %277 = fmul float %274, 0x3FDD1745E0000000 > %278 = fmul float %275, 0x3FDD1745E0000000 > %279 = fmul float %276, 0x3FDD1745E0000000 > %280 = call float @llvm.exp2.f32(float %277) > %281 = call float @llvm.exp2.f32(float %278) > %282 = call float @llvm.exp2.f32(float %279) > %283 = call float @llvm.exp2.f32(float %271) > %284 = call float @llvm.exp2.f32(float %272) > %285 = call float @llvm.exp2.f32(float %273) > %286 = fmul float %283, %280 > %287 = fmul float %284, %281 > %288 = fmul float %285, %282 > %289 = fmul float %253, %286 > %290 = fmul float %254, %287 > %291 = fmul float %255, %288 > %292 = fsub float 1.000000e+00, %268 > %293 = fsub float 1.000000e+00, %269 > %294 = fsub float 1.000000e+00, %270 > %295 = call float @llvm.fma.f32(float %289, float %292, float 0xBF70624DE0000000) > %296 = call float @llvm.fma.f32(float %290, float %293, float 0xBF70624DE0000000) > %297 = call float @llvm.fma.f32(float %291, float %294, float 0xBF70624DE0000000) > %298 = call float @llvm.maxnum.f32(float %295, float 0.000000e+00) > %299 = call float @llvm.maxnum.f32(float %296, float 0.000000e+00) > %300 = call float @llvm.maxnum.f32(float %297, float 0.000000e+00) > %301 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 5.000000e-01) > %302 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 5.000000e-01) > %303 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 5.000000e-01) > %304 = fmul float %298, %301 > %305 = fmul float %299, %302 > %306 = fmul float %300, %303 > %307 = call float @llvm.fma.f32(float %298, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %308 = call float @llvm.fma.f32(float %299, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %309 = call float @llvm.fma.f32(float %300, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %310 = call float @llvm.fma.f32(float %298, float %307, float 0x3FAEB851E0000000) > %311 = call float @llvm.fma.f32(float %299, float %308, float 0x3FAEB851E0000000) > %312 = call float @llvm.fma.f32(float %300, float %309, float 0x3FAEB851E0000000) > %313 = fcmp oeq float %310, 0.000000e+00 > %314 = fcmp oeq float %311, 0.000000e+00 > %315 = fcmp oeq float %312, 0.000000e+00 > %316 = fcmp ogt float %304, 0.000000e+00 > %317 = select i1 %316, float 1.000000e+00, float %304 > %318 = fcmp oge float %317, 0.000000e+00 > %319 = fcmp ogt float %305, 0.000000e+00 > %320 = select i1 %319, float 1.000000e+00, float %305 > %321 = fcmp oge float %320, 0.000000e+00 > %322 = fcmp ogt float %306, 0.000000e+00 > %323 = select i1 %322, float 1.000000e+00, float %306 > %324 = fcmp oge float %323, 0.000000e+00 > %.op63 = fmul float %317, 0x4600000000000000 > %325 = select i1 %318, float %.op63, float 0xC600000000000000 > %.op64 = fmul float %320, 0x4600000000000000 > %326 = select i1 %321, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %323, 0x4600000000000000 > %327 = select i1 %324, float %.op65, float 0xC600000000000000 > %328 = fdiv float 1.000000e+00, %310 > %329 = fdiv float 1.000000e+00, %311 > %330 = fdiv float 1.000000e+00, %312 > %331 = fmul float %304, %328 > %332 = fmul float %305, %329 > %333 = fmul float %306, %330 > %334 = select i1 %313, float %325, float %331 > %335 = select i1 %314, float %326, float %332 > %336 = select i1 %315, float %327, float %333 > %337 = bitcast i32 %11 to float > %338 = insertvalue <{ float, float, float }> undef, float %337, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %129, float %130, float %131, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %334, float %335, float %336, float %268) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %91, float %92, float %93, float %94) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %100, float %106, float %112, float %118) > ret <{ float, float, float }> %338 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], BUFFER, FLOAT >DCL CONST[1][0..19] >DCL TEMP[0..3], LOCAL >IMM[0] UINT32 {0, 256, 304, 288} >IMM[1] FLT32 { 2.0000, -1.0000, 0.0000, 0.0000} >IMM[2] UINT32 {272, 240, 0, 0} >IMM[3] INT32 {0, 0, 0, 0} > 0: MUL TEMP[0].xy, CONST[1][16].zwww, CONST[1][19].xxxx > 1: FMA TEMP[0].xy, IN[0].xyyy, CONST[1][18].yzzz, TEMP[0].xyyy > 2: MOV TEMP[1].xy, TEMP[0].xyyy > 3: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 4: FMA TEMP[0].xy, TEMP[1].ywww, IMM[1].xxxx, IMM[1].yyyy > 5: FMA TEMP[1].xy, TEMP[0].xyyy, CONST[1][17].xxxx, IN[0].xyyy > 6: MUL TEMP[2].x, TEMP[1].yyyy, CONST[1][18].xxxx > 7: MOV TEMP[2].w, TEMP[2].xxxx > 8: MUL TEMP[2].xyz, TEMP[1].xyxx, CONST[1][17].yzww > 9: FMA TEMP[0].xy, CONST[1][19].xxxx, CONST[1][16].xyyy, TEMP[2].zwww > 10: FMA TEMP[1].xy, CONST[1][19].xxxx, CONST[1][15].zwww, TEMP[2].xyyy > 11: MOV TEMP[1].xy, TEMP[1].xyyy > 12: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 13: MOV TEMP[3].xy, TEMP[0].xyyy > 14: TEX TEMP[3], TEMP[3], SAMP[2], 2D > 15: ADD TEMP[0].xyz, -TEMP[1].xyzz, TEMP[3].xyzz > 16: MUL TEMP[3].x, TEMP[3].wwww, TEMP[1].wwww > 17: FMA TEMP[2].xyz, CONST[1][18].wwww, TEMP[0].xyzz, TEMP[1].xyzz > 18: MOV TEMP[1].xy, IN[0].xyyy > 19: TEX TEMP[1].w, TEMP[1], SAMP[3], 2D > 20: MUL TEMP[1].x, TEMP[1].wwww, TEMP[3].xxxx > 21: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 22: MOV TEMP[2].w, TEMP[1].xxxx > 23: MUL TEMP[0], TEMP[2], IN[3] > 24: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[1].xyzz > 25: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww > 26: MOV TEMP[1].w, TEMP[1].xxxx > 27: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[2].wwww > 28: MOV TEMP[2].x, IMM[3].xxxx > 29: MOV TEMP[2].w, IMM[0].xxxx > 30: TXF TEMP[2].x, TEMP[2], SAMP[4], BUFFER > 31: MUL TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz > 32: MOV OUT[0], TEMP[1] > 33: END >radeonsi: Compiling shader 354 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %40 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !tbaa !0 > %42 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %43 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %42, i64 0, i64 3 > %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !tbaa !0 > %45 = extractelement <8 x i32> %41, i32 7 > %46 = extractelement <4 x i32> %44, i32 0 > %47 = and i32 %46, %45 > %48 = insertelement <4 x i32> %44, i32 %47, i32 0 > %49 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %50 = load <8 x i32>, <8 x i32> addrspace(2)* %49, align 32, !tbaa !0 > %51 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %52 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %51, i64 0, i64 7 > %53 = load <4 x i32>, <4 x i32> addrspace(2)* %52, align 16, !tbaa !0 > %54 = extractelement <8 x i32> %50, i32 7 > %55 = extractelement <4 x i32> %53, i32 0 > %56 = and i32 %55, %54 > %57 = insertelement <4 x i32> %53, i32 %56, i32 0 > %58 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %59 = load <8 x i32>, <8 x i32> addrspace(2)* %58, align 32, !tbaa !0 > %60 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %61 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %60, i64 0, i64 11 > %62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !tbaa !0 > %63 = extractelement <8 x i32> %59, i32 7 > %64 = extractelement <4 x i32> %62, i32 0 > %65 = and i32 %64, %63 > %66 = insertelement <4 x i32> %62, i32 %65, i32 0 > %67 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0 > %69 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %70 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %69, i64 0, i64 15 > %71 = load <4 x i32>, <4 x i32> addrspace(2)* %70, align 16, !tbaa !0 > %72 = extractelement <8 x i32> %68, i32 7 > %73 = extractelement <4 x i32> %71, i32 0 > %74 = and i32 %73, %72 > %75 = insertelement <4 x i32> %71, i32 %74, i32 0 > %76 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %77 = bitcast <8 x i32> addrspace(2)* %76 to <2 x i128> addrspace(2)* > %78 = load <2 x i128>, <2 x i128> addrspace(2)* %77, align 32, !tbaa !0 > %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %90 = fmul float %29, %39 > %91 = fmul float %30, %39 > %92 = call float @llvm.fma.f32(float %79, float %36, float %90) > %93 = call float @llvm.fma.f32(float %80, float %37, float %91) > %94 = bitcast float %92 to i32 > %95 = bitcast float %93 to i32 > %96 = insertelement <2 x i32> undef, i32 %94, i32 0 > %97 = insertelement <2 x i32> %96, i32 %95, i32 1 > %98 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %97, <8 x i32> %41, <4 x i32> %48, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %99 = extractelement <4 x float> %98, i32 1 > %100 = extractelement <4 x float> %98, i32 3 > %101 = call float @llvm.fma.f32(float %99, float 2.000000e+00, float -1.000000e+00) > %102 = call float @llvm.fma.f32(float %100, float 2.000000e+00, float -1.000000e+00) > %103 = call float @llvm.fma.f32(float %101, float %31, float %79) > %104 = call float @llvm.fma.f32(float %102, float %31, float %80) > %105 = fmul float %104, %35 > %106 = fmul float %103, %32 > %107 = fmul float %104, %33 > %108 = fmul float %103, %34 > %109 = call float @llvm.fma.f32(float %39, float %27, float %108) > %110 = call float @llvm.fma.f32(float %39, float %28, float %105) > %111 = call float @llvm.fma.f32(float %39, float %25, float %106) > %112 = call float @llvm.fma.f32(float %39, float %26, float %107) > %113 = bitcast float %111 to i32 > %114 = bitcast float %112 to i32 > %115 = insertelement <2 x i32> undef, i32 %113, i32 0 > %116 = insertelement <2 x i32> %115, i32 %114, i32 1 > %117 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %116, <8 x i32> %50, <4 x i32> %57, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %118 = extractelement <4 x float> %117, i32 0 > %119 = extractelement <4 x float> %117, i32 1 > %120 = extractelement <4 x float> %117, i32 2 > %121 = extractelement <4 x float> %117, i32 3 > %122 = bitcast float %109 to i32 > %123 = bitcast float %110 to i32 > %124 = insertelement <2 x i32> undef, i32 %122, i32 0 > %125 = insertelement <2 x i32> %124, i32 %123, i32 1 > %126 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %125, <8 x i32> %59, <4 x i32> %66, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %127 = extractelement <4 x float> %126, i32 0 > %128 = extractelement <4 x float> %126, i32 1 > %129 = extractelement <4 x float> %126, i32 2 > %130 = extractelement <4 x float> %126, i32 3 > %131 = fsub float %127, %118 > %132 = fsub float %128, %119 > %133 = fsub float %129, %120 > %134 = fmul float %130, %121 > %135 = call float @llvm.fma.f32(float %38, float %131, float %118) > %136 = call float @llvm.fma.f32(float %38, float %132, float %119) > %137 = call float @llvm.fma.f32(float %38, float %133, float %120) > %138 = bitcast float %79 to i32 > %139 = bitcast float %80 to i32 > %140 = insertelement <2 x i32> undef, i32 %138, i32 0 > %141 = insertelement <2 x i32> %140, i32 %139, i32 1 > %142 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %141, <8 x i32> %68, <4 x i32> %75, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %143 = extractelement <4 x float> %142, i32 3 > %144 = fmul float %143, %134 > %145 = call float @llvm.AMDGPU.clamp.(float %144, float 0.000000e+00, float 1.000000e+00) > %146 = fmul float %135, %86 > %147 = fmul float %136, %87 > %148 = fmul float %137, %88 > %149 = fmul float %145, %89 > %150 = fmul float %146, %81 > %151 = fmul float %147, %82 > %152 = fmul float %148, %83 > %153 = fmul float %149, %84 > %154 = fmul float %150, %85 > %155 = fmul float %151, %85 > %156 = fmul float %152, %85 > %157 = extractelement <2 x i128> %78, i32 1 > %158 = bitcast i128 %157 to <16 x i8> > %159 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %158, i32 0, i32 0) > %160 = extractelement <4 x float> %159, i32 0 > %161 = fmul float %160, %154 > %162 = fmul float %160, %155 > %163 = fmul float %160, %156 > %164 = bitcast float %5 to i32 > %165 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %164, 10 > %166 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %165, float %161, 11 > %167 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %166, float %162, 12 > %168 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %167, float %163, 13 > %169 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %168, float %153, 14 > %170 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %169, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %170 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL OUT[6], GENERIC[5] >DCL OUT[7], GENERIC[6] >DCL CONST[1][0..36] >DCL TEMP[0..15], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 288, 304, 320} >IMM[2] UINT32 {336, 368, 464, 576} >IMM[3] UINT32 {560, 528, 544, 352} >IMM[4] UINT32 {384, 496, 416, 400} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {480, 432, 448, 0} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][18], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][20], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][23], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][29].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xyz, IN[1].xyzx > 13: MOV TEMP[5].xy, IN[2].xyxx > 14: MUL TEMP[6].xy, CONST[1][36].xyyy, IMM[0].xyyy > 15: MUL TEMP[7].xy, TEMP[2].xxxx, CONST[1][36].xyyy > 16: FMA TEMP[6].xy, TEMP[1].xyyy, TEMP[6].xyyy, TEMP[7].xyyy > 17: MOV TEMP[6].zw, TEMP[1].wwzw > 18: ADD TEMP[8].xyz, -IN[0].xyzz, CONST[1][35].xyzz > 19: MOV TEMP[9].xyz, TEMP[8].xyzx > 20: MUL TEMP[10].xyz, CONST[1][33].xyzz, CONST[1][34].xyzz > 21: MOV TEMP[10].w, CONST[1][33].wwww > 22: DP3 TEMP[1].x, CONST[1][22].xyzz, TEMP[8].xyzz > 23: DP3 TEMP[11].x, CONST[1][24].xyzz, TEMP[8].xyzz > 24: MOV TEMP[1].z, TEMP[11].xxxx > 25: DP3 TEMP[8].x, CONST[1][23].xyzz, TEMP[8].xyzz > 26: MOV TEMP[1].y, TEMP[8].xxxx > 27: DP3 TEMP[11].x, TEMP[1].xyzz, TEMP[1].xyzz > 28: RSQ TEMP[11].x, TEMP[11].xxxx > 29: MUL TEMP[7].xyz, TEMP[11].xxxx, TEMP[1].xyzz > 30: FMA TEMP[8].x, -TEMP[8].xxxx, TEMP[11].xxxx, IMM[0].zzzz > 31: ADD TEMP[8].x, -TEMP[8].xxxx, IMM[0].xxxx > 32: MOV_SAT TEMP[8].x, TEMP[8].xxxx > 33: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[8].xxxx > 34: DP3 TEMP[11].x, -TEMP[7].xyzz, CONST[1][31].xyzz > 35: FMA TEMP[12].x, -CONST[1][26].yyyy, TEMP[11].xxxx, CONST[1][26].xxxx > 36: FMA TEMP[11].x, TEMP[11].xxxx, TEMP[11].xxxx, IMM[0].xxxx > 37: MOV TEMP[0].z, TEMP[11].xxxx > 38: ABS TEMP[11].x, TEMP[12].xxxx > 39: LG2 TEMP[11].x, TEMP[11].xxxx > 40: MOV TEMP[0].w, TEMP[11].xxxx > 41: MUL TEMP[11].xy, TEMP[0].zwww, IMM[5].xyyy > 42: EX2 TEMP[12].x, TEMP[11].yyyy > 43: FMA TEMP[1].x, CONST[1][26].zzzz, TEMP[12].xxxx, -CONST[1][25].zzzz > 44: MUL TEMP[12].x, TEMP[12].xxxx, CONST[1][26].zzzz > 45: MAX TEMP[13].x, TEMP[1].xxxx, IMM[0].wwww > 46: ABS TEMP[14].x, TEMP[2].xxxx > 47: MUL TEMP[14].x, TEMP[14].xxxx, IMM[5].zzzz > 48: MIN TEMP[14].x, TEMP[14].xxxx, IMM[0].xxxx > 49: ADD TEMP[14].x, -TEMP[14].xxxx, IMM[0].xxxx > 50: FMA TEMP[12].x, -TEMP[13].xxxx, TEMP[14].xxxx, TEMP[12].xxxx > 51: MAX TEMP[12].x, TEMP[12].xxxx, CONST[1][30].wwww > 52: FSNE TEMP[13].x, CONST[1][25].xxxx, IMM[0].wwww > 53: UIF TEMP[13].xxxx :0 > 54: RCP TEMP[13].x, CONST[1][25].xxxx > 55: MUL TEMP[13].x, -TEMP[0].xxxx, TEMP[13].xxxx > 56: ELSE :0 > 57: SSG TEMP[14].x, -TEMP[0].xxxx > 58: MUL TEMP[13].x, IMM[5].wwww, TEMP[14].xxxx > 59: ENDIF > 60: MUL TEMP[1].x, TEMP[13].xxxx, IMM[7].xxxx > 61: EX2 TEMP[13].x, TEMP[1].xxxx > 62: ADD TEMP[1].x, TEMP[13].xxxx, CONST[1][26].wwww > 63: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][27].yyyy > 64: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].yyyy > 65: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[1].xxxx > 66: MIN TEMP[8].x, TEMP[8].xxxx, CONST[1][25].wwww > 67: MAX TEMP[8].x, TEMP[8].xxxx, CONST[1][27].xxxx > 68: MUL TEMP[12].x, TEMP[8].xxxx, TEMP[12].xxxx > 69: FSNE TEMP[13].x, CONST[1][28].wwww, IMM[0].wwww > 70: UIF TEMP[13].xxxx :0 > 71: RCP TEMP[13].x, CONST[1][28].wwww > 72: MUL TEMP[13].x, -TEMP[0].xxxx, TEMP[13].xxxx > 73: ELSE :0 > 74: SSG TEMP[14].x, -TEMP[0].xxxx > 75: MUL TEMP[13].x, IMM[5].wwww, TEMP[14].xxxx > 76: ENDIF > 77: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][29].zzzz > 78: FSNE TEMP[14].x, CONST[1][25].yyyy, IMM[0].wwww > 79: UIF TEMP[14].xxxx :0 > 80: RCP TEMP[14].x, CONST[1][25].yyyy > 81: MUL TEMP[14].x, TEMP[0].xxxx, TEMP[14].xxxx > 82: ELSE :0 > 83: SSG TEMP[15].x, TEMP[0].xxxx > 84: MUL TEMP[14].x, IMM[5].wwww, TEMP[15].xxxx > 85: ENDIF > 86: MUL TEMP[1].x, TEMP[13].xxxx, IMM[7].xxxx > 87: EX2 TEMP[13].x, TEMP[1].xxxx > 88: MUL TEMP[7].xyz, TEMP[13].xxxx, CONST[1][28].xyzz > 89: FMA TEMP[8].xyz, CONST[1][28].xyzz, TEMP[13].xxxx, TEMP[8].xxxx > 90: FMA TEMP[11].xyz, TEMP[7].xyzz, TEMP[11].xxxx, TEMP[12].xxxx > 91: FSEQ TEMP[12].xyz, TEMP[8].xyzz, IMM[0].wwww > 92: SSG TEMP[13].xyz, TEMP[11].xyzz > 93: MUL TEMP[13].xyz, IMM[5].wwww, TEMP[13].xyzz > 94: RCP TEMP[15].x, TEMP[8].xxxx > 95: RCP TEMP[15].y, TEMP[8].yyyy > 96: RCP TEMP[15].z, TEMP[8].zzzz > 97: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[15].xyzz > 98: UCMP TEMP[11].xyz, TEMP[12].xyzz, TEMP[13].xyzz, TEMP[11].xyzz > 99: MUL TEMP[7].xyz, TEMP[14].xxxx, -TEMP[8].xyzz >100: ABS TEMP[2].xyz, TEMP[2].xxxx >101: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[8].xyzz >102: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].xxxx >103: EX2 TEMP[2].x, TEMP[1].xxxx >104: EX2 TEMP[2].y, TEMP[1].yyyy >105: EX2 TEMP[2].z, TEMP[1].zzzz >106: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].xxxx >107: LG2 TEMP[8].x, CONST[1][30].xxxx >108: LG2 TEMP[8].y, CONST[1][30].yyyy >109: LG2 TEMP[8].z, CONST[1][30].zzzz >110: MUL TEMP[8].xyz, TEMP[8].xyzz, IMM[7].zzzz >111: EX2 TEMP[12].x, TEMP[8].xxxx >112: EX2 TEMP[12].y, TEMP[8].yyyy >113: EX2 TEMP[12].z, TEMP[8].zzzz >114: EX2 TEMP[8].x, TEMP[7].xxxx >115: EX2 TEMP[8].y, TEMP[7].yyyy >116: EX2 TEMP[8].z, TEMP[7].zzzz >117: MUL TEMP[7].xyz, TEMP[8].xyzz, TEMP[12].xyzz >118: MUL TEMP[0].xyz, TEMP[11].xyzz, TEMP[7].xyzz >119: ADD TEMP[8].xyz, -TEMP[2].xyzz, IMM[0].xxxx >120: MOV TEMP[2].w, TEMP[2].xxxx >121: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[8].xyzz, IMM[7].wwww >122: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >123: FMA TEMP[8].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >124: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[8].xyzz >125: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >126: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[8].zzzz >127: FSEQ TEMP[7].xyz, TEMP[0].xyzz, IMM[0].wwww >128: SSG TEMP[8].xyz, TEMP[1].xyzz >129: MUL TEMP[8].xyz, IMM[5].wwww, TEMP[8].xyzz >130: RCP TEMP[11].x, TEMP[0].xxxx >131: RCP TEMP[11].y, TEMP[0].yyyy >132: RCP TEMP[11].z, TEMP[0].zzzz >133: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[11].xyzz >134: UCMP TEMP[2].xyz, TEMP[7].xyzz, TEMP[8].xyzz, TEMP[0].xyzz >135: MOV OUT[7], IN[3] >136: MOV OUT[6], TEMP[2] >137: MOV OUT[5], TEMP[10] >138: MOV OUT[4], TEMP[9] >139: MOV OUT[3], TEMP[6] >140: MOV OUT[2], TEMP[5] >141: MOV OUT[1], TEMP[4] >142: MOV OUT[0], TEMP[3] >143: END >radeonsi: Compiling shader 355 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 > %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 288) > %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 292) > %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 296) > %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 300) > %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) > %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) > %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) > %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 316) > %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) > %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) > %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) > %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 332) > %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 336) > %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 340) > %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 344) > %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 348) > %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 352) > %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 356) > %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 360) > %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 368) > %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) > %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 376) > %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 380) > %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 384) > %43 = call float @llvm.SI.load.const(<16 x i8> %18, i32 388) > %44 = call float @llvm.SI.load.const(<16 x i8> %18, i32 392) > %45 = call float @llvm.SI.load.const(<16 x i8> %18, i32 400) > %46 = call float @llvm.SI.load.const(<16 x i8> %18, i32 404) > %47 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) > %48 = call float @llvm.SI.load.const(<16 x i8> %18, i32 412) > %49 = call float @llvm.SI.load.const(<16 x i8> %18, i32 416) > %50 = call float @llvm.SI.load.const(<16 x i8> %18, i32 420) > %51 = call float @llvm.SI.load.const(<16 x i8> %18, i32 424) > %52 = call float @llvm.SI.load.const(<16 x i8> %18, i32 428) > %53 = call float @llvm.SI.load.const(<16 x i8> %18, i32 432) > %54 = call float @llvm.SI.load.const(<16 x i8> %18, i32 436) > %55 = call float @llvm.SI.load.const(<16 x i8> %18, i32 448) > %56 = call float @llvm.SI.load.const(<16 x i8> %18, i32 452) > %57 = call float @llvm.SI.load.const(<16 x i8> %18, i32 456) > %58 = call float @llvm.SI.load.const(<16 x i8> %18, i32 460) > %59 = call float @llvm.SI.load.const(<16 x i8> %18, i32 472) > %60 = call float @llvm.SI.load.const(<16 x i8> %18, i32 476) > %61 = call float @llvm.SI.load.const(<16 x i8> %18, i32 480) > %62 = call float @llvm.SI.load.const(<16 x i8> %18, i32 484) > %63 = call float @llvm.SI.load.const(<16 x i8> %18, i32 488) > %64 = call float @llvm.SI.load.const(<16 x i8> %18, i32 492) > %65 = call float @llvm.SI.load.const(<16 x i8> %18, i32 496) > %66 = call float @llvm.SI.load.const(<16 x i8> %18, i32 500) > %67 = call float @llvm.SI.load.const(<16 x i8> %18, i32 504) > %68 = call float @llvm.SI.load.const(<16 x i8> %18, i32 528) > %69 = call float @llvm.SI.load.const(<16 x i8> %18, i32 532) > %70 = call float @llvm.SI.load.const(<16 x i8> %18, i32 536) > %71 = call float @llvm.SI.load.const(<16 x i8> %18, i32 540) > %72 = call float @llvm.SI.load.const(<16 x i8> %18, i32 544) > %73 = call float @llvm.SI.load.const(<16 x i8> %18, i32 548) > %74 = call float @llvm.SI.load.const(<16 x i8> %18, i32 552) > %75 = call float @llvm.SI.load.const(<16 x i8> %18, i32 560) > %76 = call float @llvm.SI.load.const(<16 x i8> %18, i32 564) > %77 = call float @llvm.SI.load.const(<16 x i8> %18, i32 568) > %78 = call float @llvm.SI.load.const(<16 x i8> %18, i32 576) > %79 = call float @llvm.SI.load.const(<16 x i8> %18, i32 580) > %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 > %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %13) > %83 = extractelement <4 x float> %82, i32 0 > %84 = extractelement <4 x float> %82, i32 1 > %85 = extractelement <4 x float> %82, i32 2 > %86 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 > %88 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %14) > %89 = extractelement <4 x float> %88, i32 0 > %90 = extractelement <4 x float> %88, i32 1 > %91 = extractelement <4 x float> %88, i32 2 > %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 > %94 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %15) > %95 = extractelement <4 x float> %94, i32 0 > %96 = extractelement <4 x float> %94, i32 1 > %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 > %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %16) > %100 = extractelement <4 x float> %99, i32 0 > %101 = extractelement <4 x float> %99, i32 1 > %102 = extractelement <4 x float> %99, i32 2 > %103 = extractelement <4 x float> %99, i32 3 > %104 = fmul float %19, %83 > %105 = fmul float %20, %84 > %106 = fadd float %104, %105 > %107 = fmul float %21, %85 > %108 = fadd float %106, %107 > %109 = fadd float %108, %22 > %110 = fmul float %23, %83 > %111 = fmul float %24, %84 > %112 = fadd float %110, %111 > %113 = fmul float %25, %85 > %114 = fadd float %112, %113 > %115 = fadd float %114, %26 > %116 = fmul float %27, %83 > %117 = fmul float %28, %84 > %118 = fadd float %116, %117 > %119 = fmul float %29, %85 > %120 = fadd float %118, %119 > %121 = fadd float %120, %30 > %122 = fmul float %31, %83 > %123 = fmul float %32, %84 > %124 = fadd float %122, %123 > %125 = fmul float %33, %85 > %126 = fadd float %124, %125 > %127 = fadd float %126, %34 > %128 = fmul float %38, %83 > %129 = fmul float %39, %84 > %130 = fadd float %128, %129 > %131 = fmul float %40, %85 > %132 = fadd float %130, %131 > %133 = fadd float %132, %41 > %134 = fadd float %133, %60 > %135 = fsub float -0.000000e+00, %79 > %136 = fmul float %127, %78 > %137 = fmul float %127, %79 > %138 = call float @llvm.fma.f32(float %109, float %78, float %136) > %139 = call float @llvm.fma.f32(float %115, float %135, float %137) > %140 = fsub float %75, %83 > %141 = fsub float %76, %84 > %142 = fsub float %77, %85 > %143 = fmul float %68, %72 > %144 = fmul float %69, %73 > %145 = fmul float %70, %74 > %146 = fmul float %35, %140 > %147 = fmul float %36, %141 > %148 = fadd float %147, %146 > %149 = fmul float %37, %142 > %150 = fadd float %148, %149 > %151 = fmul float %42, %140 > %152 = fmul float %43, %141 > %153 = fadd float %152, %151 > %154 = fmul float %44, %142 > %155 = fadd float %153, %154 > %156 = fmul float %38, %140 > %157 = fmul float %39, %141 > %158 = fadd float %157, %156 > %159 = fmul float %40, %142 > %160 = fadd float %158, %159 > %161 = fmul float %150, %150 > %162 = fmul float %160, %160 > %163 = fadd float %162, %161 > %164 = fmul float %155, %155 > %165 = fadd float %163, %164 > %166 = call float @llvm.AMDGPU.rsq.clamped.f32(float %165) > %167 = fmul float %166, %150 > %168 = fmul float %166, %160 > %169 = fmul float %166, %155 > %170 = fsub float -0.000000e+00, %160 > %171 = call float @llvm.fma.f32(float %170, float %166, float 0xBFC3333340000000) > %172 = fsub float 1.000000e+00, %171 > %173 = call float @llvm.AMDGPU.clamp.(float %172, float 0.000000e+00, float 1.000000e+00) > %174 = fmul float %173, %173 > %175 = fmul float %167, %65 > %176 = fsub float -0.000000e+00, %175 > %177 = fmul float %168, %66 > %178 = fsub float %176, %177 > %179 = fmul float %169, %67 > %180 = fsub float %178, %179 > %181 = fsub float -0.000000e+00, %50 > %182 = call float @llvm.fma.f32(float %181, float %180, float %49) > %183 = call float @llvm.fma.f32(float %180, float %180, float 1.000000e+00) > %184 = call float @llvm.fabs.f32(float %182) > %185 = call float @llvm.log2.f32(float %184) > %186 = fmul float %183, 0x3FAE8EC8A0000000 > %187 = fmul float %185, -1.500000e+00 > %188 = call float @llvm.exp2.f32(float %187) > %189 = fsub float -0.000000e+00, %47 > %190 = call float @llvm.fma.f32(float %51, float %188, float %189) > %191 = fmul float %188, %51 > %192 = call float @llvm.maxnum.f32(float %190, float 0.000000e+00) > %193 = call float @llvm.fabs.f32(float %127) > %194 = fmul float %193, 0x3EF4F8B580000000 > %195 = call float @llvm.minnum.f32(float %194, float 1.000000e+00) > %196 = fsub float 1.000000e+00, %195 > %197 = fsub float -0.000000e+00, %192 > %198 = call float @llvm.fma.f32(float %197, float %196, float %191) > %199 = call float @llvm.maxnum.f32(float %198, float %64) > %200 = fcmp une float %45, 0.000000e+00 > br i1 %200, label %IF, label %ELSE > >IF: ; preds = %main_body > %201 = fdiv float 1.000000e+00, %45 > %202 = fmul float %134, %201 > %203 = fsub float -0.000000e+00, %202 > br label %ENDIF > >ELSE: ; preds = %main_body > %204 = fsub float -0.000000e+00, %134 > %205 = fcmp olt float %134, -0.000000e+00 > %206 = select i1 %205, float 1.000000e+00, float %204 > %207 = fcmp oge float %206, 0.000000e+00 > %.op = fmul float %206, 0x4600000000000000 > %208 = select i1 %207, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp52.0 = phi float [ %203, %IF ], [ %208, %ELSE ] > %209 = fmul float %temp52.0, 0x3FF7154760000000 > %210 = call float @llvm.exp2.f32(float %209) > %211 = fadd float %210, %52 > %212 = fmul float %211, %54 > %213 = fmul float %212, 5.000000e-01 > %214 = fmul float %174, %213 > %215 = call float @llvm.minnum.f32(float %214, float %48) > %216 = call float @llvm.maxnum.f32(float %215, float %53) > %217 = fmul float %216, %199 > %218 = fcmp une float %58, 0.000000e+00 > br i1 %218, label %IF65, label %ELSE66 > >IF65: ; preds = %ENDIF > %219 = fdiv float 1.000000e+00, %58 > %220 = fmul float %134, %219 > %221 = fsub float -0.000000e+00, %220 > br label %ENDIF64 > >ELSE66: ; preds = %ENDIF > %222 = fsub float -0.000000e+00, %134 > %223 = fcmp olt float %134, -0.000000e+00 > %224 = select i1 %223, float 1.000000e+00, float %222 > %225 = fcmp oge float %224, 0.000000e+00 > %.op70 = fmul float %224, 0x4600000000000000 > %226 = select i1 %225, float %.op70, float 0xC600000000000000 > br label %ENDIF64 > >ENDIF64: ; preds = %ELSE66, %IF65 > %temp52.1 = phi float [ %221, %IF65 ], [ %226, %ELSE66 ] > %227 = fsub float %59, %134 > %228 = fcmp une float %46, 0.000000e+00 > br i1 %228, label %IF68, label %ELSE69 > >IF68: ; preds = %ENDIF64 > %229 = fdiv float 1.000000e+00, %46 > %230 = fmul float %227, %229 > br label %ENDIF67 > >ELSE69: ; preds = %ENDIF64 > %231 = fcmp ogt float %227, 0.000000e+00 > %232 = select i1 %231, float 1.000000e+00, float %227 > %233 = fcmp oge float %232, 0.000000e+00 > %.op71 = fmul float %232, 0x4600000000000000 > %234 = select i1 %233, float %.op71, float 0xC600000000000000 > br label %ENDIF67 > >ENDIF67: ; preds = %ELSE69, %IF68 > %temp56.0 = phi float [ %230, %IF68 ], [ %234, %ELSE69 ] > %235 = fmul float %temp52.1, 0x3FF7154760000000 > %236 = call float @llvm.exp2.f32(float %235) > %237 = fmul float %236, %55 > %238 = fmul float %236, %56 > %239 = fmul float %236, %57 > %240 = call float @llvm.fma.f32(float %55, float %236, float %216) > %241 = call float @llvm.fma.f32(float %56, float %236, float %216) > %242 = call float @llvm.fma.f32(float %57, float %236, float %216) > %243 = call float @llvm.fma.f32(float %237, float %186, float %217) > %244 = call float @llvm.fma.f32(float %238, float %186, float %217) > %245 = call float @llvm.fma.f32(float %239, float %186, float %217) > %246 = fcmp oeq float %240, 0.000000e+00 > %247 = fcmp oeq float %241, 0.000000e+00 > %248 = fcmp oeq float %242, 0.000000e+00 > %249 = fcmp ogt float %243, 0.000000e+00 > %250 = select i1 %249, float 1.000000e+00, float %243 > %251 = fcmp oge float %250, 0.000000e+00 > %252 = fcmp ogt float %244, 0.000000e+00 > %253 = select i1 %252, float 1.000000e+00, float %244 > %254 = fcmp oge float %253, 0.000000e+00 > %255 = fcmp ogt float %245, 0.000000e+00 > %256 = select i1 %255, float 1.000000e+00, float %245 > %257 = fcmp oge float %256, 0.000000e+00 > %.op72 = fmul float %250, 0x4600000000000000 > %258 = select i1 %251, float %.op72, float 0xC600000000000000 > %.op73 = fmul float %253, 0x4600000000000000 > %259 = select i1 %254, float %.op73, float 0xC600000000000000 > %.op74 = fmul float %256, 0x4600000000000000 > %260 = select i1 %257, float %.op74, float 0xC600000000000000 > %261 = fdiv float 1.000000e+00, %240 > %262 = fdiv float 1.000000e+00, %241 > %263 = fdiv float 1.000000e+00, %242 > %264 = fmul float %243, %261 > %265 = fmul float %244, %262 > %266 = fmul float %245, %263 > %267 = select i1 %246, float %258, float %264 > %268 = select i1 %247, float %259, float %265 > %269 = select i1 %248, float %260, float %266 > %270 = fmul float %240, %temp56.0 > %271 = fmul float %241, %temp56.0 > %272 = fmul float %242, %temp56.0 > %273 = call float @llvm.fabs.f32(float %127) > %274 = call float @llvm.fabs.f32(float %127) > %275 = call float @llvm.fabs.f32(float %127) > %276 = fmul float %240, %273 > %277 = fmul float %241, %274 > %278 = fmul float %242, %275 > %279 = fmul float %276, 0xBFF7154760000000 > %280 = fmul float %277, 0xBFF7154760000000 > %281 = fmul float %278, 0xBFF7154760000000 > %282 = call float @llvm.exp2.f32(float %279) > %283 = call float @llvm.exp2.f32(float %280) > %284 = call float @llvm.exp2.f32(float %281) > %285 = fmul float %270, 0xBFF7154760000000 > %286 = fmul float %271, 0xBFF7154760000000 > %287 = fmul float %272, 0xBFF7154760000000 > %288 = call float @llvm.log2.f32(float %61) > %289 = call float @llvm.log2.f32(float %62) > %290 = call float @llvm.log2.f32(float %63) > %291 = fmul float %288, 0x3FDD1745E0000000 > %292 = fmul float %289, 0x3FDD1745E0000000 > %293 = fmul float %290, 0x3FDD1745E0000000 > %294 = call float @llvm.exp2.f32(float %291) > %295 = call float @llvm.exp2.f32(float %292) > %296 = call float @llvm.exp2.f32(float %293) > %297 = call float @llvm.exp2.f32(float %285) > %298 = call float @llvm.exp2.f32(float %286) > %299 = call float @llvm.exp2.f32(float %287) > %300 = fmul float %297, %294 > %301 = fmul float %298, %295 > %302 = fmul float %299, %296 > %303 = fmul float %267, %300 > %304 = fmul float %268, %301 > %305 = fmul float %269, %302 > %306 = fsub float 1.000000e+00, %282 > %307 = fsub float 1.000000e+00, %283 > %308 = fsub float 1.000000e+00, %284 > %309 = call float @llvm.fma.f32(float %303, float %306, float 0xBF70624DE0000000) > %310 = call float @llvm.fma.f32(float %304, float %307, float 0xBF70624DE0000000) > %311 = call float @llvm.fma.f32(float %305, float %308, float 0xBF70624DE0000000) > %312 = call float @llvm.maxnum.f32(float %309, float 0.000000e+00) > %313 = call float @llvm.maxnum.f32(float %310, float 0.000000e+00) > %314 = call float @llvm.maxnum.f32(float %311, float 0.000000e+00) > %315 = call float @llvm.fma.f32(float %312, float 0x4018CCCCC0000000, float 5.000000e-01) > %316 = call float @llvm.fma.f32(float %313, float 0x4018CCCCC0000000, float 5.000000e-01) > %317 = call float @llvm.fma.f32(float %314, float 0x4018CCCCC0000000, float 5.000000e-01) > %318 = fmul float %312, %315 > %319 = fmul float %313, %316 > %320 = fmul float %314, %317 > %321 = call float @llvm.fma.f32(float %312, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %322 = call float @llvm.fma.f32(float %313, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %323 = call float @llvm.fma.f32(float %314, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %324 = call float @llvm.fma.f32(float %312, float %321, float 0x3FAEB851E0000000) > %325 = call float @llvm.fma.f32(float %313, float %322, float 0x3FAEB851E0000000) > %326 = call float @llvm.fma.f32(float %314, float %323, float 0x3FAEB851E0000000) > %327 = fcmp oeq float %324, 0.000000e+00 > %328 = fcmp oeq float %325, 0.000000e+00 > %329 = fcmp oeq float %326, 0.000000e+00 > %330 = fcmp ogt float %318, 0.000000e+00 > %331 = select i1 %330, float 1.000000e+00, float %318 > %332 = fcmp oge float %331, 0.000000e+00 > %333 = fcmp ogt float %319, 0.000000e+00 > %334 = select i1 %333, float 1.000000e+00, float %319 > %335 = fcmp oge float %334, 0.000000e+00 > %336 = fcmp ogt float %320, 0.000000e+00 > %337 = select i1 %336, float 1.000000e+00, float %320 > %338 = fcmp oge float %337, 0.000000e+00 > %.op75 = fmul float %331, 0x4600000000000000 > %339 = select i1 %332, float %.op75, float 0xC600000000000000 > %.op76 = fmul float %334, 0x4600000000000000 > %340 = select i1 %335, float %.op76, float 0xC600000000000000 > %.op77 = fmul float %337, 0x4600000000000000 > %341 = select i1 %338, float %.op77, float 0xC600000000000000 > %342 = fdiv float 1.000000e+00, %324 > %343 = fdiv float 1.000000e+00, %325 > %344 = fdiv float 1.000000e+00, %326 > %345 = fmul float %318, %342 > %346 = fmul float %319, %343 > %347 = fmul float %320, %344 > %348 = select i1 %327, float %339, float %345 > %349 = select i1 %328, float %340, float %346 > %350 = select i1 %329, float %341, float %347 > %351 = bitcast i32 %11 to float > %352 = insertvalue <{ float, float, float }> undef, float %351, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %95, float %96, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %138, float %139, float %121, float %127) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %140, float %141, float %142, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %143, float %144, float %145, float %71) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %348, float %349, float %350, float %282) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %100, float %101, float %102, float %103) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %109, float %115, float %121, float %127) > ret <{ float, float, float }> %352 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL IN[5], GENERIC[5], PERSPECTIVE >DCL IN[6], GENERIC[6], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..32] >DCL TEMP[0..4], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 240, 256, 272} >IMM[2] UINT32 {512, 0, 0, 0} > 0: FSEQ TEMP[0].xy, IN[2].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[2].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[2].wwww > 4: MUL TEMP[2].xy, IN[2].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[2].wwww > 9: FSNE TEMP[1].x, CONST[1][15].zzzz, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][15].zzzz > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][15].wwww > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: DP3 TEMP[1].x, IN[3].xyzz, IN[3].xyzz > 26: RSQ TEMP[1].x, TEMP[1].xxxx > 27: MUL TEMP[1].xyz, TEMP[1].xxxx, IN[3].xyzz > 28: DP3 TEMP[2].x, IN[0].xyzz, IN[0].xyzz > 29: RSQ TEMP[3].x, TEMP[2].xxxx > 30: MUL TEMP[2].xyz, TEMP[3].xxxx, IN[0].xyzz > 31: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[1].xyzz > 32: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 33: MUL TEMP[2].x, IN[1].xxxx, CONST[1][16].wwww > 34: MUL TEMP[3].x, IN[1].yyyy, CONST[1][17].xxxx > 35: MOV TEMP[2].y, TEMP[3].xxxx > 36: FMA TEMP[3].xy, CONST[1][17].yyyy, CONST[1][16].yzzz, TEMP[2].xyyy > 37: MOV TEMP[3].xy, TEMP[3].xyyy > 38: TEX TEMP[3], TEMP[3], SAMP[1], 2D > 39: MOV TEMP[4].xy, IN[1].xyyy > 40: TEX TEMP[4], TEMP[4], SAMP[2], 2D > 41: MUL TEMP[2], TEMP[3], TEMP[4] > 42: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].wwww > 43: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].xxxx > 44: MOV TEMP[2].w, TEMP[1].xxxx > 45: MUL TEMP[0], TEMP[2], IN[6] > 46: MUL TEMP[0], TEMP[0], IN[4] > 47: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[5].wwww > 48: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 49: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][32].xyzz > 50: MOV TEMP[0].xyz, TEMP[0].xyzx > 51: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][16].xxxx > 52: MOV TEMP[0].w, TEMP[1].xxxx > 53: MOV OUT[0], TEMP[0] > 54: END >radeonsi: Compiling shader 356 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 512) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 516) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 520) > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 3 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 7 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %55 = load <8 x i32>, <8 x i32> addrspace(2)* %54, align 32, !tbaa !0 > %56 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %57 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %56, i64 0, i64 11 > %58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0 > %59 = extractelement <8 x i32> %55, i32 7 > %60 = extractelement <4 x i32> %58, i32 0 > %61 = and i32 %60, %59 > %62 = insertelement <4 x i32> %58, i32 %61, i32 0 > %63 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %6, <2 x i32> %8) > %82 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %6, <2 x i32> %8) > %83 = fcmp oeq float %70, 0.000000e+00 > %84 = fcmp oeq float %70, 0.000000e+00 > %85 = fcmp ogt float %68, 0.000000e+00 > %86 = select i1 %85, float 1.000000e+00, float %68 > %87 = fcmp oge float %86, 0.000000e+00 > %88 = fcmp ogt float %69, 0.000000e+00 > %89 = select i1 %88, float 1.000000e+00, float %69 > %90 = fcmp oge float %89, 0.000000e+00 > %.op = fmul float %86, 0x4600000000000000 > %91 = select i1 %87, float %.op, float 0xC600000000000000 > %.op20 = fmul float %89, 0x4600000000000000 > %92 = select i1 %90, float %.op20, float 0xC600000000000000 > %93 = fdiv float 1.000000e+00, %70 > %94 = fmul float %68, %93 > %95 = fmul float %69, %93 > %96 = select i1 %83, float %91, float %94 > %97 = select i1 %84, float %92, float %95 > %98 = bitcast float %96 to i32 > %99 = bitcast float %97 to i32 > %100 = insertelement <2 x i32> undef, i32 %98, i32 0 > %101 = insertelement <2 x i32> %100, i32 %99, i32 1 > %102 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %101, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %103 = extractelement <4 x float> %102, i32 0 > %104 = fsub float %70, %103 > %105 = fcmp une float %25, 0.000000e+00 > %106 = call float @llvm.fabs.f32(float %104) > br i1 %105, label %IF, label %ELSE > >IF: ; preds = %main_body > %107 = fdiv float 1.000000e+00, %25 > %108 = fmul float %106, %107 > br label %ENDIF > >ELSE: ; preds = %main_body > %109 = fcmp one float %104, 0.000000e+00 > %110 = select i1 %109, float 1.000000e+00, float %106 > %111 = fcmp oge float %110, 0.000000e+00 > %.op21 = fmul float %110, 0x4600000000000000 > %112 = select i1 %111, float %.op21, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %108, %IF ], [ %112, %ELSE ] > %113 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %114 = fsub float 1.000000e+00, %113 > %115 = call float @llvm.log2.f32(float %114) > %116 = fmul float %115, %26 > %117 = call float @llvm.exp2.f32(float %116) > %118 = fsub float 1.000000e+00, %117 > %119 = fmul float %71, %71 > %120 = fmul float %72, %72 > %121 = fadd float %120, %119 > %122 = fmul float %73, %73 > %123 = fadd float %121, %122 > %124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123) > %125 = fmul float %124, %71 > %126 = fmul float %124, %72 > %127 = fmul float %124, %73 > %128 = fmul float %63, %63 > %129 = fmul float %64, %64 > %130 = fadd float %129, %128 > %131 = fmul float %65, %65 > %132 = fadd float %130, %131 > %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) > %134 = fmul float %133, %63 > %135 = fmul float %133, %64 > %136 = fmul float %133, %65 > %137 = fmul float %134, %125 > %138 = fmul float %135, %126 > %139 = fadd float %138, %137 > %140 = fmul float %136, %127 > %141 = fadd float %139, %140 > %142 = call float @llvm.AMDGPU.clamp.(float %141, float 0.000000e+00, float 1.000000e+00) > %143 = fmul float %66, %30 > %144 = fmul float %67, %31 > %145 = call float @llvm.fma.f32(float %32, float %28, float %143) > %146 = call float @llvm.fma.f32(float %32, float %29, float %144) > %147 = bitcast float %145 to i32 > %148 = bitcast float %146 to i32 > %149 = insertelement <2 x i32> undef, i32 %147, i32 0 > %150 = insertelement <2 x i32> %149, i32 %148, i32 1 > %151 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %150, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %152 = extractelement <4 x float> %151, i32 0 > %153 = extractelement <4 x float> %151, i32 1 > %154 = extractelement <4 x float> %151, i32 2 > %155 = extractelement <4 x float> %151, i32 3 > %156 = bitcast float %66 to i32 > %157 = bitcast float %67 to i32 > %158 = insertelement <2 x i32> undef, i32 %156, i32 0 > %159 = insertelement <2 x i32> %158, i32 %157, i32 1 > %160 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %159, <8 x i32> %55, <4 x i32> %62, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %161 = extractelement <4 x float> %160, i32 0 > %162 = extractelement <4 x float> %160, i32 1 > %163 = extractelement <4 x float> %160, i32 2 > %164 = extractelement <4 x float> %160, i32 3 > %165 = fmul float %152, %161 > %166 = fmul float %153, %162 > %167 = fmul float %154, %163 > %168 = fmul float %155, %164 > %169 = fmul float %142, %168 > %170 = fmul float %118, %169 > %171 = fmul float %165, %79 > %172 = fmul float %166, %80 > %173 = fmul float %167, %81 > %174 = fmul float %170, %82 > %175 = fmul float %171, %74 > %176 = fmul float %172, %75 > %177 = fmul float %173, %76 > %178 = fmul float %174, %77 > %179 = fmul float %175, %78 > %180 = fmul float %176, %78 > %181 = fmul float %177, %78 > %182 = fmul float %178, %179 > %183 = fmul float %178, %180 > %184 = fmul float %178, %181 > %185 = fmul float %182, %33 > %186 = fmul float %183, %34 > %187 = fadd float %186, %185 > %188 = fmul float %184, %35 > %189 = fadd float %187, %188 > %190 = fmul float %189, %27 > %191 = bitcast float %5 to i32 > %192 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %191, 10 > %193 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %192, float %182, 11 > %194 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %193, float %183, 12 > %195 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %194, float %184, 13 > %196 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %195, float %190, 14 > %197 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %196, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %197 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..37] >DCL TEMP[0..13], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 304, 320, 336} >IMM[2] UINT32 {352, 384, 480, 592} >IMM[3] UINT32 {576, 544, 560, 368} >IMM[4] UINT32 {400, 512, 432, 416} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {496, 448, 464, 0} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][19], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][20], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][22], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][24], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][30].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: MUL TEMP[5].xy, CONST[1][37].xyyy, IMM[0].xyyy > 14: MUL TEMP[6].xy, TEMP[2].xxxx, CONST[1][37].xyyy > 15: FMA TEMP[5].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 16: MOV TEMP[5].zw, TEMP[1].wwzw > 17: ADD TEMP[7].xyz, -IN[0].xyzz, CONST[1][36].xyzz > 18: MUL TEMP[8].xyz, CONST[1][34].xyzz, CONST[1][35].xyzz > 19: MOV TEMP[8].w, CONST[1][34].wwww > 20: DP3 TEMP[1].x, CONST[1][23].xyzz, TEMP[7].xyzz > 21: DP3 TEMP[9].x, CONST[1][25].xyzz, TEMP[7].xyzz > 22: MOV TEMP[1].z, TEMP[9].xxxx > 23: DP3 TEMP[7].x, CONST[1][24].xyzz, TEMP[7].xyzz > 24: MOV TEMP[1].y, TEMP[7].xxxx > 25: DP3 TEMP[9].x, TEMP[1].xyzz, TEMP[1].xyzz > 26: RSQ TEMP[9].x, TEMP[9].xxxx > 27: MUL TEMP[6].xyz, TEMP[9].xxxx, TEMP[1].xyzz > 28: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx, IMM[0].zzzz > 29: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 30: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 31: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 32: DP3 TEMP[9].x, -TEMP[6].xyzz, CONST[1][32].xyzz > 33: FMA TEMP[10].x, -CONST[1][27].yyyy, TEMP[9].xxxx, CONST[1][27].xxxx > 34: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].xxxx > 35: MOV TEMP[0].z, TEMP[9].xxxx > 36: ABS TEMP[9].x, TEMP[10].xxxx > 37: LG2 TEMP[9].x, TEMP[9].xxxx > 38: MOV TEMP[0].w, TEMP[9].xxxx > 39: MUL TEMP[9].xy, TEMP[0].zwww, IMM[5].xyyy > 40: EX2 TEMP[10].x, TEMP[9].yyyy > 41: FMA TEMP[1].x, CONST[1][27].zzzz, TEMP[10].xxxx, -CONST[1][26].zzzz > 42: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][27].zzzz > 43: MAX TEMP[11].x, TEMP[1].xxxx, IMM[0].wwww > 44: ABS TEMP[12].x, TEMP[2].xxxx > 45: MUL TEMP[12].x, TEMP[12].xxxx, IMM[5].zzzz > 46: MIN TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx > 47: ADD TEMP[12].x, -TEMP[12].xxxx, IMM[0].xxxx > 48: FMA TEMP[10].x, -TEMP[11].xxxx, TEMP[12].xxxx, TEMP[10].xxxx > 49: MAX TEMP[10].x, TEMP[10].xxxx, CONST[1][31].wwww > 50: FSNE TEMP[11].x, CONST[1][26].xxxx, IMM[0].wwww > 51: UIF TEMP[11].xxxx :0 > 52: RCP TEMP[11].x, CONST[1][26].xxxx > 53: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 54: ELSE :0 > 55: SSG TEMP[12].x, -TEMP[0].xxxx > 56: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 57: ENDIF > 58: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 59: EX2 TEMP[11].x, TEMP[1].xxxx > 60: ADD TEMP[1].x, TEMP[11].xxxx, CONST[1][27].wwww > 61: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][28].yyyy > 62: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].yyyy > 63: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[1].xxxx > 64: MIN TEMP[7].x, TEMP[7].xxxx, CONST[1][26].wwww > 65: MAX TEMP[7].x, TEMP[7].xxxx, CONST[1][28].xxxx > 66: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx > 67: FSNE TEMP[11].x, CONST[1][29].wwww, IMM[0].wwww > 68: UIF TEMP[11].xxxx :0 > 69: RCP TEMP[11].x, CONST[1][29].wwww > 70: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 71: ELSE :0 > 72: SSG TEMP[12].x, -TEMP[0].xxxx > 73: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 74: ENDIF > 75: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][30].zzzz > 76: FSNE TEMP[12].x, CONST[1][26].yyyy, IMM[0].wwww > 77: UIF TEMP[12].xxxx :0 > 78: RCP TEMP[12].x, CONST[1][26].yyyy > 79: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 80: ELSE :0 > 81: SSG TEMP[13].x, TEMP[0].xxxx > 82: MUL TEMP[12].x, IMM[5].wwww, TEMP[13].xxxx > 83: ENDIF > 84: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 85: EX2 TEMP[11].x, TEMP[1].xxxx > 86: MUL TEMP[6].xyz, TEMP[11].xxxx, CONST[1][29].xyzz > 87: FMA TEMP[7].xyz, CONST[1][29].xyzz, TEMP[11].xxxx, TEMP[7].xxxx > 88: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[10].xxxx > 89: FSEQ TEMP[10].xyz, TEMP[7].xyzz, IMM[0].wwww > 90: SSG TEMP[11].xyz, TEMP[9].xyzz > 91: MUL TEMP[11].xyz, IMM[5].wwww, TEMP[11].xyzz > 92: RCP TEMP[13].x, TEMP[7].xxxx > 93: RCP TEMP[13].y, TEMP[7].yyyy > 94: RCP TEMP[13].z, TEMP[7].zzzz > 95: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 96: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz > 97: MUL TEMP[6].xyz, TEMP[12].xxxx, -TEMP[7].xyzz > 98: ABS TEMP[2].xyz, TEMP[2].xxxx > 99: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[7].xyzz >100: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].xxxx >101: EX2 TEMP[2].x, TEMP[1].xxxx >102: EX2 TEMP[2].y, TEMP[1].yyyy >103: EX2 TEMP[2].z, TEMP[1].zzzz >104: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[7].xxxx >105: LG2 TEMP[7].x, CONST[1][31].xxxx >106: LG2 TEMP[7].y, CONST[1][31].yyyy >107: LG2 TEMP[7].z, CONST[1][31].zzzz >108: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >109: EX2 TEMP[10].x, TEMP[7].xxxx >110: EX2 TEMP[10].y, TEMP[7].yyyy >111: EX2 TEMP[10].z, TEMP[7].zzzz >112: EX2 TEMP[7].x, TEMP[6].xxxx >113: EX2 TEMP[7].y, TEMP[6].yyyy >114: EX2 TEMP[7].z, TEMP[6].zzzz >115: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[10].xyzz >116: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[6].xyzz >117: ADD TEMP[7].xyz, -TEMP[2].xyzz, IMM[0].xxxx >118: MOV TEMP[2].w, TEMP[2].xxxx >119: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].wwww >120: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >121: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >122: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xyzz >123: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >124: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].zzzz >125: FSEQ TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww >126: SSG TEMP[7].xyz, TEMP[1].xyzz >127: MUL TEMP[7].xyz, IMM[5].wwww, TEMP[7].xyzz >128: RCP TEMP[9].x, TEMP[0].xxxx >129: RCP TEMP[9].y, TEMP[0].yyyy >130: RCP TEMP[9].z, TEMP[0].zzzz >131: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[9].xyzz >132: UCMP TEMP[2].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >133: MOV OUT[5], IN[2] >134: MOV OUT[4], TEMP[2] >135: MOV OUT[3], TEMP[8] >136: MOV OUT[2], TEMP[5] >137: MOV OUT[1], TEMP[4] >138: MOV OUT[0], TEMP[3] >139: END >radeonsi: Compiling shader 357 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 304) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 308) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 312) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 316) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 332) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 428) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 476) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 492) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 544) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 548) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 552) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 556) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 568) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 576) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 580) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 584) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 592) > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 596) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %13) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %14) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %15) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = extractelement <4 x float> %92, i32 3 > %97 = fmul float %18, %82 > %98 = fmul float %19, %83 > %99 = fadd float %97, %98 > %100 = fmul float %20, %84 > %101 = fadd float %99, %100 > %102 = fadd float %101, %21 > %103 = fmul float %22, %82 > %104 = fmul float %23, %83 > %105 = fadd float %103, %104 > %106 = fmul float %24, %84 > %107 = fadd float %105, %106 > %108 = fadd float %107, %25 > %109 = fmul float %26, %82 > %110 = fmul float %27, %83 > %111 = fadd float %109, %110 > %112 = fmul float %28, %84 > %113 = fadd float %111, %112 > %114 = fadd float %113, %29 > %115 = fmul float %30, %82 > %116 = fmul float %31, %83 > %117 = fadd float %115, %116 > %118 = fmul float %32, %84 > %119 = fadd float %117, %118 > %120 = fadd float %119, %33 > %121 = fmul float %37, %82 > %122 = fmul float %38, %83 > %123 = fadd float %121, %122 > %124 = fmul float %39, %84 > %125 = fadd float %123, %124 > %126 = fadd float %125, %40 > %127 = fadd float %126, %59 > %128 = fsub float -0.000000e+00, %78 > %129 = fmul float %120, %77 > %130 = fmul float %120, %78 > %131 = call float @llvm.fma.f32(float %102, float %77, float %129) > %132 = call float @llvm.fma.f32(float %108, float %128, float %130) > %133 = fsub float %74, %82 > %134 = fsub float %75, %83 > %135 = fsub float %76, %84 > %136 = fmul float %67, %71 > %137 = fmul float %68, %72 > %138 = fmul float %69, %73 > %139 = fmul float %34, %133 > %140 = fmul float %35, %134 > %141 = fadd float %140, %139 > %142 = fmul float %36, %135 > %143 = fadd float %141, %142 > %144 = fmul float %41, %133 > %145 = fmul float %42, %134 > %146 = fadd float %145, %144 > %147 = fmul float %43, %135 > %148 = fadd float %146, %147 > %149 = fmul float %37, %133 > %150 = fmul float %38, %134 > %151 = fadd float %150, %149 > %152 = fmul float %39, %135 > %153 = fadd float %151, %152 > %154 = fmul float %143, %143 > %155 = fmul float %153, %153 > %156 = fadd float %155, %154 > %157 = fmul float %148, %148 > %158 = fadd float %156, %157 > %159 = call float @llvm.AMDGPU.rsq.clamped.f32(float %158) > %160 = fmul float %159, %143 > %161 = fmul float %159, %153 > %162 = fmul float %159, %148 > %163 = fsub float -0.000000e+00, %153 > %164 = call float @llvm.fma.f32(float %163, float %159, float 0xBFC3333340000000) > %165 = fsub float 1.000000e+00, %164 > %166 = call float @llvm.AMDGPU.clamp.(float %165, float 0.000000e+00, float 1.000000e+00) > %167 = fmul float %166, %166 > %168 = fmul float %160, %64 > %169 = fsub float -0.000000e+00, %168 > %170 = fmul float %161, %65 > %171 = fsub float %169, %170 > %172 = fmul float %162, %66 > %173 = fsub float %171, %172 > %174 = fsub float -0.000000e+00, %49 > %175 = call float @llvm.fma.f32(float %174, float %173, float %48) > %176 = call float @llvm.fma.f32(float %173, float %173, float 1.000000e+00) > %177 = call float @llvm.fabs.f32(float %175) > %178 = call float @llvm.log2.f32(float %177) > %179 = fmul float %176, 0x3FAE8EC8A0000000 > %180 = fmul float %178, -1.500000e+00 > %181 = call float @llvm.exp2.f32(float %180) > %182 = fsub float -0.000000e+00, %46 > %183 = call float @llvm.fma.f32(float %50, float %181, float %182) > %184 = fmul float %181, %50 > %185 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00) > %186 = call float @llvm.fabs.f32(float %120) > %187 = fmul float %186, 0x3EF4F8B580000000 > %188 = call float @llvm.minnum.f32(float %187, float 1.000000e+00) > %189 = fsub float 1.000000e+00, %188 > %190 = fsub float -0.000000e+00, %185 > %191 = call float @llvm.fma.f32(float %190, float %189, float %184) > %192 = call float @llvm.maxnum.f32(float %191, float %63) > %193 = fcmp une float %44, 0.000000e+00 > br i1 %193, label %IF, label %ELSE > >IF: ; preds = %main_body > %194 = fdiv float 1.000000e+00, %44 > %195 = fmul float %127, %194 > %196 = fsub float -0.000000e+00, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fsub float -0.000000e+00, %127 > %198 = fcmp olt float %127, -0.000000e+00 > %199 = select i1 %198, float 1.000000e+00, float %197 > %200 = fcmp oge float %199, 0.000000e+00 > %.op = fmul float %199, 0x4600000000000000 > %201 = select i1 %200, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %196, %IF ], [ %201, %ELSE ] > %202 = fmul float %temp44.0, 0x3FF7154760000000 > %203 = call float @llvm.exp2.f32(float %202) > %204 = fadd float %203, %51 > %205 = fmul float %204, %53 > %206 = fmul float %205, 5.000000e-01 > %207 = fmul float %167, %206 > %208 = call float @llvm.minnum.f32(float %207, float %47) > %209 = call float @llvm.maxnum.f32(float %208, float %52) > %210 = fmul float %209, %192 > %211 = fcmp une float %57, 0.000000e+00 > br i1 %211, label %IF57, label %ELSE58 > >IF57: ; preds = %ENDIF > %212 = fdiv float 1.000000e+00, %57 > %213 = fmul float %127, %212 > %214 = fsub float -0.000000e+00, %213 > br label %ENDIF56 > >ELSE58: ; preds = %ENDIF > %215 = fsub float -0.000000e+00, %127 > %216 = fcmp olt float %127, -0.000000e+00 > %217 = select i1 %216, float 1.000000e+00, float %215 > %218 = fcmp oge float %217, 0.000000e+00 > %.op62 = fmul float %217, 0x4600000000000000 > %219 = select i1 %218, float %.op62, float 0xC600000000000000 > br label %ENDIF56 > >ENDIF56: ; preds = %ELSE58, %IF57 > %temp44.1 = phi float [ %214, %IF57 ], [ %219, %ELSE58 ] > %220 = fsub float %58, %127 > %221 = fcmp une float %45, 0.000000e+00 > br i1 %221, label %IF60, label %ELSE61 > >IF60: ; preds = %ENDIF56 > %222 = fdiv float 1.000000e+00, %45 > %223 = fmul float %220, %222 > br label %ENDIF59 > >ELSE61: ; preds = %ENDIF56 > %224 = fcmp ogt float %220, 0.000000e+00 > %225 = select i1 %224, float 1.000000e+00, float %220 > %226 = fcmp oge float %225, 0.000000e+00 > %.op63 = fmul float %225, 0x4600000000000000 > %227 = select i1 %226, float %.op63, float 0xC600000000000000 > br label %ENDIF59 > >ENDIF59: ; preds = %ELSE61, %IF60 > %temp48.0 = phi float [ %223, %IF60 ], [ %227, %ELSE61 ] > %228 = fmul float %temp44.1, 0x3FF7154760000000 > %229 = call float @llvm.exp2.f32(float %228) > %230 = fmul float %229, %54 > %231 = fmul float %229, %55 > %232 = fmul float %229, %56 > %233 = call float @llvm.fma.f32(float %54, float %229, float %209) > %234 = call float @llvm.fma.f32(float %55, float %229, float %209) > %235 = call float @llvm.fma.f32(float %56, float %229, float %209) > %236 = call float @llvm.fma.f32(float %230, float %179, float %210) > %237 = call float @llvm.fma.f32(float %231, float %179, float %210) > %238 = call float @llvm.fma.f32(float %232, float %179, float %210) > %239 = fcmp oeq float %233, 0.000000e+00 > %240 = fcmp oeq float %234, 0.000000e+00 > %241 = fcmp oeq float %235, 0.000000e+00 > %242 = fcmp ogt float %236, 0.000000e+00 > %243 = select i1 %242, float 1.000000e+00, float %236 > %244 = fcmp oge float %243, 0.000000e+00 > %245 = fcmp ogt float %237, 0.000000e+00 > %246 = select i1 %245, float 1.000000e+00, float %237 > %247 = fcmp oge float %246, 0.000000e+00 > %248 = fcmp ogt float %238, 0.000000e+00 > %249 = select i1 %248, float 1.000000e+00, float %238 > %250 = fcmp oge float %249, 0.000000e+00 > %.op64 = fmul float %243, 0x4600000000000000 > %251 = select i1 %244, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %246, 0x4600000000000000 > %252 = select i1 %247, float %.op65, float 0xC600000000000000 > %.op66 = fmul float %249, 0x4600000000000000 > %253 = select i1 %250, float %.op66, float 0xC600000000000000 > %254 = fdiv float 1.000000e+00, %233 > %255 = fdiv float 1.000000e+00, %234 > %256 = fdiv float 1.000000e+00, %235 > %257 = fmul float %236, %254 > %258 = fmul float %237, %255 > %259 = fmul float %238, %256 > %260 = select i1 %239, float %251, float %257 > %261 = select i1 %240, float %252, float %258 > %262 = select i1 %241, float %253, float %259 > %263 = fmul float %233, %temp48.0 > %264 = fmul float %234, %temp48.0 > %265 = fmul float %235, %temp48.0 > %266 = call float @llvm.fabs.f32(float %120) > %267 = call float @llvm.fabs.f32(float %120) > %268 = call float @llvm.fabs.f32(float %120) > %269 = fmul float %233, %266 > %270 = fmul float %234, %267 > %271 = fmul float %235, %268 > %272 = fmul float %269, 0xBFF7154760000000 > %273 = fmul float %270, 0xBFF7154760000000 > %274 = fmul float %271, 0xBFF7154760000000 > %275 = call float @llvm.exp2.f32(float %272) > %276 = call float @llvm.exp2.f32(float %273) > %277 = call float @llvm.exp2.f32(float %274) > %278 = fmul float %263, 0xBFF7154760000000 > %279 = fmul float %264, 0xBFF7154760000000 > %280 = fmul float %265, 0xBFF7154760000000 > %281 = call float @llvm.log2.f32(float %60) > %282 = call float @llvm.log2.f32(float %61) > %283 = call float @llvm.log2.f32(float %62) > %284 = fmul float %281, 0x3FDD1745E0000000 > %285 = fmul float %282, 0x3FDD1745E0000000 > %286 = fmul float %283, 0x3FDD1745E0000000 > %287 = call float @llvm.exp2.f32(float %284) > %288 = call float @llvm.exp2.f32(float %285) > %289 = call float @llvm.exp2.f32(float %286) > %290 = call float @llvm.exp2.f32(float %278) > %291 = call float @llvm.exp2.f32(float %279) > %292 = call float @llvm.exp2.f32(float %280) > %293 = fmul float %290, %287 > %294 = fmul float %291, %288 > %295 = fmul float %292, %289 > %296 = fmul float %260, %293 > %297 = fmul float %261, %294 > %298 = fmul float %262, %295 > %299 = fsub float 1.000000e+00, %275 > %300 = fsub float 1.000000e+00, %276 > %301 = fsub float 1.000000e+00, %277 > %302 = call float @llvm.fma.f32(float %296, float %299, float 0xBF70624DE0000000) > %303 = call float @llvm.fma.f32(float %297, float %300, float 0xBF70624DE0000000) > %304 = call float @llvm.fma.f32(float %298, float %301, float 0xBF70624DE0000000) > %305 = call float @llvm.maxnum.f32(float %302, float 0.000000e+00) > %306 = call float @llvm.maxnum.f32(float %303, float 0.000000e+00) > %307 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00) > %308 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 5.000000e-01) > %309 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 5.000000e-01) > %310 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 5.000000e-01) > %311 = fmul float %305, %308 > %312 = fmul float %306, %309 > %313 = fmul float %307, %310 > %314 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %315 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %316 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %317 = call float @llvm.fma.f32(float %305, float %314, float 0x3FAEB851E0000000) > %318 = call float @llvm.fma.f32(float %306, float %315, float 0x3FAEB851E0000000) > %319 = call float @llvm.fma.f32(float %307, float %316, float 0x3FAEB851E0000000) > %320 = fcmp oeq float %317, 0.000000e+00 > %321 = fcmp oeq float %318, 0.000000e+00 > %322 = fcmp oeq float %319, 0.000000e+00 > %323 = fcmp ogt float %311, 0.000000e+00 > %324 = select i1 %323, float 1.000000e+00, float %311 > %325 = fcmp oge float %324, 0.000000e+00 > %326 = fcmp ogt float %312, 0.000000e+00 > %327 = select i1 %326, float 1.000000e+00, float %312 > %328 = fcmp oge float %327, 0.000000e+00 > %329 = fcmp ogt float %313, 0.000000e+00 > %330 = select i1 %329, float 1.000000e+00, float %313 > %331 = fcmp oge float %330, 0.000000e+00 > %.op67 = fmul float %324, 0x4600000000000000 > %332 = select i1 %325, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %327, 0x4600000000000000 > %333 = select i1 %328, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %330, 0x4600000000000000 > %334 = select i1 %331, float %.op69, float 0xC600000000000000 > %335 = fdiv float 1.000000e+00, %317 > %336 = fdiv float 1.000000e+00, %318 > %337 = fdiv float 1.000000e+00, %319 > %338 = fmul float %311, %335 > %339 = fmul float %312, %336 > %340 = fmul float %313, %337 > %341 = select i1 %320, float %332, float %338 > %342 = select i1 %321, float %333, float %339 > %343 = select i1 %322, float %334, float %340 > %344 = bitcast i32 %11 to float > %345 = insertvalue <{ float, float, float }> undef, float %344, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %131, float %132, float %114, float %120) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %136, float %137, float %138, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %341, float %342, float %343, float %275) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %93, float %94, float %95, float %96) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %102, float %108, float %114, float %120) > ret <{ float, float, float }> %345 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..33] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 240, 256, 288} >IMM[2] UINT32 {272, 528, 0, 0} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: FSNE TEMP[1].x, CONST[1][15].zzzz, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][15].zzzz > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][15].wwww > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[1].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: MOV TEMP[0].w, TEMP[1].xxxx > 26: ADD TEMP[1].xy, IN[0].xyyy, CONST[1][16].xyyy > 27: MUL TEMP[2].x, CONST[1][16].wwww, CONST[1][18].xxxx > 28: MUL TEMP[3].x, CONST[1][17].xxxx, CONST[1][18].xxxx > 29: MOV TEMP[2].y, TEMP[3].xxxx > 30: FMA TEMP[1].xy, TEMP[1].xyyy, CONST[1][17].yzzz, TEMP[2].xyyy > 31: MOV TEMP[2].xy, TEMP[1].xyyy > 32: TEX TEMP[2], TEMP[2], SAMP[1], 2D > 33: MOV TEMP[3].xy, IN[0].xyyy > 34: TEX TEMP[3].x, TEMP[3], SAMP[2], 2D > 35: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx > 36: MUL TEMP[0].xyz, TEMP[2].wwww, TEMP[1].xyzz > 37: MUL TEMP[0], TEMP[0], IN[4] > 38: MUL TEMP[0], TEMP[0], IN[2] > 39: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].wwww > 40: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 41: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][33].xyzz > 42: MOV TEMP[0].xyz, TEMP[0].xyzx > 43: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][16].zzzz > 44: MOV TEMP[0].w, TEMP[1].xxxx > 45: MOV OUT[0], TEMP[0] > 46: END >radeonsi: Compiling shader 358 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 528) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 532) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 536) > %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 > %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 3 > %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 > %43 = extractelement <8 x i32> %39, i32 7 > %44 = extractelement <4 x i32> %42, i32 0 > %45 = and i32 %44, %43 > %46 = insertelement <4 x i32> %42, i32 %45, i32 0 > %47 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 > %49 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %50 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %49, i64 0, i64 7 > %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 > %52 = extractelement <8 x i32> %48, i32 7 > %53 = extractelement <4 x i32> %51, i32 0 > %54 = and i32 %53, %52 > %55 = insertelement <4 x i32> %51, i32 %54, i32 0 > %56 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 > %58 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 11 > %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0 > %61 = extractelement <8 x i32> %57, i32 7 > %62 = extractelement <4 x i32> %60, i32 0 > %63 = and i32 %62, %61 > %64 = insertelement <4 x i32> %60, i32 %63, i32 0 > %65 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %79 = fcmp oeq float %69, 0.000000e+00 > %80 = fcmp oeq float %69, 0.000000e+00 > %81 = fcmp ogt float %67, 0.000000e+00 > %82 = select i1 %81, float 1.000000e+00, float %67 > %83 = fcmp oge float %82, 0.000000e+00 > %84 = fcmp ogt float %68, 0.000000e+00 > %85 = select i1 %84, float 1.000000e+00, float %68 > %86 = fcmp oge float %85, 0.000000e+00 > %.op = fmul float %82, 0x4600000000000000 > %87 = select i1 %83, float %.op, float 0xC600000000000000 > %.op16 = fmul float %85, 0x4600000000000000 > %88 = select i1 %86, float %.op16, float 0xC600000000000000 > %89 = fdiv float 1.000000e+00, %69 > %90 = fmul float %67, %89 > %91 = fmul float %68, %89 > %92 = select i1 %79, float %87, float %90 > %93 = select i1 %80, float %88, float %91 > %94 = bitcast float %92 to i32 > %95 = bitcast float %93 to i32 > %96 = insertelement <2 x i32> undef, i32 %94, i32 0 > %97 = insertelement <2 x i32> %96, i32 %95, i32 1 > %98 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %97, <8 x i32> %39, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %99 = extractelement <4 x float> %98, i32 0 > %100 = fsub float %69, %99 > %101 = fcmp une float %25, 0.000000e+00 > %102 = call float @llvm.fabs.f32(float %100) > br i1 %101, label %IF, label %ELSE > >IF: ; preds = %main_body > %103 = fdiv float 1.000000e+00, %25 > %104 = fmul float %102, %103 > br label %ENDIF > >ELSE: ; preds = %main_body > %105 = fcmp one float %100, 0.000000e+00 > %106 = select i1 %105, float 1.000000e+00, float %102 > %107 = fcmp oge float %106, 0.000000e+00 > %.op17 = fmul float %106, 0x4600000000000000 > %108 = select i1 %107, float %.op17, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %104, %IF ], [ %108, %ELSE ] > %109 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %110 = fsub float 1.000000e+00, %109 > %111 = call float @llvm.log2.f32(float %110) > %112 = fmul float %111, %26 > %113 = call float @llvm.exp2.f32(float %112) > %114 = fsub float 1.000000e+00, %113 > %115 = fadd float %65, %27 > %116 = fadd float %66, %28 > %117 = fmul float %30, %34 > %118 = fmul float %31, %34 > %119 = call float @llvm.fma.f32(float %115, float %32, float %117) > %120 = call float @llvm.fma.f32(float %116, float %33, float %118) > %121 = bitcast float %119 to i32 > %122 = bitcast float %120 to i32 > %123 = insertelement <2 x i32> undef, i32 %121, i32 0 > %124 = insertelement <2 x i32> %123, i32 %122, i32 1 > %125 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %124, <8 x i32> %48, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %126 = extractelement <4 x float> %125, i32 0 > %127 = extractelement <4 x float> %125, i32 1 > %128 = extractelement <4 x float> %125, i32 2 > %129 = extractelement <4 x float> %125, i32 3 > %130 = bitcast float %65 to i32 > %131 = bitcast float %66 to i32 > %132 = insertelement <2 x i32> undef, i32 %130, i32 0 > %133 = insertelement <2 x i32> %132, i32 %131, i32 1 > %134 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %133, <8 x i32> %57, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %135 = extractelement <4 x float> %134, i32 0 > %136 = fmul float %126, %135 > %137 = fmul float %127, %135 > %138 = fmul float %128, %135 > %139 = fmul float %129, %136 > %140 = fmul float %129, %137 > %141 = fmul float %129, %138 > %142 = fmul float %139, %75 > %143 = fmul float %140, %76 > %144 = fmul float %141, %77 > %145 = fmul float %114, %78 > %146 = fmul float %142, %70 > %147 = fmul float %143, %71 > %148 = fmul float %144, %72 > %149 = fmul float %145, %73 > %150 = fmul float %146, %74 > %151 = fmul float %147, %74 > %152 = fmul float %148, %74 > %153 = fmul float %149, %150 > %154 = fmul float %149, %151 > %155 = fmul float %149, %152 > %156 = fmul float %153, %35 > %157 = fmul float %154, %36 > %158 = fadd float %157, %156 > %159 = fmul float %155, %37 > %160 = fadd float %158, %159 > %161 = fmul float %160, %29 > %162 = bitcast float %5 to i32 > %163 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %162, 10 > %164 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %163, float %153, 11 > %165 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %164, float %154, 12 > %166 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %165, float %155, 13 > %167 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %166, float %161, 14 > %168 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %167, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %168 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..35] >DCL TEMP[0..13], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 288, 304, 320} >IMM[2] UINT32 {336, 368, 464, 560} >IMM[3] UINT32 {544, 528, 512, 256} >IMM[4] UINT32 {352, 384, 496, 416} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {400, 480, 432, 448} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][18], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][20], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][23], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][29].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: MUL TEMP[5].xy, CONST[1][35].xyyy, IMM[0].xyyy > 14: MUL TEMP[6].xy, TEMP[2].xxxx, CONST[1][35].xyyy > 15: FMA TEMP[5].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 16: MOV TEMP[5].zw, TEMP[1].wwzw > 17: ADD TEMP[7].xyz, -IN[0].xyzz, CONST[1][34].xyzz > 18: FMA TEMP[8].xyz, CONST[1][33].xyzz, CONST[1][32].xyzz, CONST[1][16].xyzz > 19: MOV_SAT TEMP[8].xyz, TEMP[8].xyzz > 20: MOV TEMP[8].w, CONST[1][32].wwww > 21: DP3 TEMP[1].x, CONST[1][22].xyzz, TEMP[7].xyzz > 22: DP3 TEMP[9].x, CONST[1][24].xyzz, TEMP[7].xyzz > 23: MOV TEMP[1].z, TEMP[9].xxxx > 24: DP3 TEMP[7].x, CONST[1][23].xyzz, TEMP[7].xyzz > 25: MOV TEMP[1].y, TEMP[7].xxxx > 26: DP3 TEMP[9].x, TEMP[1].xyzz, TEMP[1].xyzz > 27: RSQ TEMP[9].x, TEMP[9].xxxx > 28: MUL TEMP[6].xyz, TEMP[9].xxxx, TEMP[1].xyzz > 29: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx, IMM[0].zzzz > 30: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 31: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 32: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 33: DP3 TEMP[9].x, -TEMP[6].xyzz, CONST[1][31].xyzz > 34: FMA TEMP[10].x, -CONST[1][26].yyyy, TEMP[9].xxxx, CONST[1][26].xxxx > 35: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].xxxx > 36: MOV TEMP[0].z, TEMP[9].xxxx > 37: ABS TEMP[9].x, TEMP[10].xxxx > 38: LG2 TEMP[9].x, TEMP[9].xxxx > 39: MOV TEMP[0].w, TEMP[9].xxxx > 40: MUL TEMP[9].xy, TEMP[0].zwww, IMM[5].xyyy > 41: EX2 TEMP[10].x, TEMP[9].yyyy > 42: FMA TEMP[1].x, CONST[1][26].zzzz, TEMP[10].xxxx, -CONST[1][25].zzzz > 43: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][26].zzzz > 44: MAX TEMP[11].x, TEMP[1].xxxx, IMM[0].wwww > 45: ABS TEMP[12].x, TEMP[2].xxxx > 46: MUL TEMP[12].x, TEMP[12].xxxx, IMM[5].zzzz > 47: MIN TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx > 48: ADD TEMP[12].x, -TEMP[12].xxxx, IMM[0].xxxx > 49: FMA TEMP[10].x, -TEMP[11].xxxx, TEMP[12].xxxx, TEMP[10].xxxx > 50: MAX TEMP[10].x, TEMP[10].xxxx, CONST[1][30].wwww > 51: FSNE TEMP[11].x, CONST[1][25].xxxx, IMM[0].wwww > 52: UIF TEMP[11].xxxx :0 > 53: RCP TEMP[11].x, CONST[1][25].xxxx > 54: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 55: ELSE :0 > 56: SSG TEMP[12].x, -TEMP[0].xxxx > 57: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 58: ENDIF > 59: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 60: EX2 TEMP[11].x, TEMP[1].xxxx > 61: ADD TEMP[1].x, TEMP[11].xxxx, CONST[1][26].wwww > 62: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][27].yyyy > 63: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].yyyy > 64: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[1].xxxx > 65: MIN TEMP[7].x, TEMP[7].xxxx, CONST[1][25].wwww > 66: MAX TEMP[7].x, TEMP[7].xxxx, CONST[1][27].xxxx > 67: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx > 68: FSNE TEMP[11].x, CONST[1][28].wwww, IMM[0].wwww > 69: UIF TEMP[11].xxxx :0 > 70: RCP TEMP[11].x, CONST[1][28].wwww > 71: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 72: ELSE :0 > 73: SSG TEMP[12].x, -TEMP[0].xxxx > 74: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 75: ENDIF > 76: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][29].zzzz > 77: FSNE TEMP[12].x, CONST[1][25].yyyy, IMM[0].wwww > 78: UIF TEMP[12].xxxx :0 > 79: RCP TEMP[12].x, CONST[1][25].yyyy > 80: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 81: ELSE :0 > 82: SSG TEMP[13].x, TEMP[0].xxxx > 83: MUL TEMP[12].x, IMM[5].wwww, TEMP[13].xxxx > 84: ENDIF > 85: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 86: EX2 TEMP[11].x, TEMP[1].xxxx > 87: MUL TEMP[6].xyz, TEMP[11].xxxx, CONST[1][28].xyzz > 88: FMA TEMP[7].xyz, CONST[1][28].xyzz, TEMP[11].xxxx, TEMP[7].xxxx > 89: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[10].xxxx > 90: FSEQ TEMP[10].xyz, TEMP[7].xyzz, IMM[0].wwww > 91: SSG TEMP[11].xyz, TEMP[9].xyzz > 92: MUL TEMP[11].xyz, IMM[5].wwww, TEMP[11].xyzz > 93: RCP TEMP[13].x, TEMP[7].xxxx > 94: RCP TEMP[13].y, TEMP[7].yyyy > 95: RCP TEMP[13].z, TEMP[7].zzzz > 96: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 97: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz > 98: MUL TEMP[6].xyz, TEMP[12].xxxx, -TEMP[7].xyzz > 99: ABS TEMP[2].xyz, TEMP[2].xxxx >100: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[7].xyzz >101: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].xxxx >102: EX2 TEMP[2].x, TEMP[1].xxxx >103: EX2 TEMP[2].y, TEMP[1].yyyy >104: EX2 TEMP[2].z, TEMP[1].zzzz >105: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[7].xxxx >106: LG2 TEMP[7].x, CONST[1][30].xxxx >107: LG2 TEMP[7].y, CONST[1][30].yyyy >108: LG2 TEMP[7].z, CONST[1][30].zzzz >109: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >110: EX2 TEMP[10].x, TEMP[7].xxxx >111: EX2 TEMP[10].y, TEMP[7].yyyy >112: EX2 TEMP[10].z, TEMP[7].zzzz >113: EX2 TEMP[7].x, TEMP[6].xxxx >114: EX2 TEMP[7].y, TEMP[6].yyyy >115: EX2 TEMP[7].z, TEMP[6].zzzz >116: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[10].xyzz >117: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[6].xyzz >118: ADD TEMP[7].xyz, -TEMP[2].xyzz, IMM[0].xxxx >119: MOV TEMP[2].w, TEMP[2].xxxx >120: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].wwww >121: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >122: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >123: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xyzz >124: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >125: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].zzzz >126: FSEQ TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww >127: SSG TEMP[7].xyz, TEMP[1].xyzz >128: MUL TEMP[7].xyz, IMM[5].wwww, TEMP[7].xyzz >129: RCP TEMP[9].x, TEMP[0].xxxx >130: RCP TEMP[9].y, TEMP[0].yyyy >131: RCP TEMP[9].z, TEMP[0].zzzz >132: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[9].xyzz >133: UCMP TEMP[2].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >134: MOV OUT[5], IN[2] >135: MOV OUT[4], TEMP[2] >136: MOV OUT[3], TEMP[8] >137: MOV OUT[2], TEMP[5] >138: MOV OUT[1], TEMP[4] >139: MOV OUT[0], TEMP[3] >140: END >radeonsi: Compiling shader 359 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 256) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 260) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 264) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 288) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 292) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 296) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 300) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 304) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 308) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 312) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 316) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 332) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 412) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 428) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 476) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 492) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 524) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 544) > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 548) > %79 = call float @llvm.SI.load.const(<16 x i8> %17, i32 552) > %80 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %81 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 > %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %13) > %85 = extractelement <4 x float> %84, i32 0 > %86 = extractelement <4 x float> %84, i32 1 > %87 = extractelement <4 x float> %84, i32 2 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %14) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 > %95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %94, i32 0, i32 %15) > %96 = extractelement <4 x float> %95, i32 0 > %97 = extractelement <4 x float> %95, i32 1 > %98 = extractelement <4 x float> %95, i32 2 > %99 = extractelement <4 x float> %95, i32 3 > %100 = fmul float %21, %85 > %101 = fmul float %22, %86 > %102 = fadd float %100, %101 > %103 = fmul float %23, %87 > %104 = fadd float %102, %103 > %105 = fadd float %104, %24 > %106 = fmul float %25, %85 > %107 = fmul float %26, %86 > %108 = fadd float %106, %107 > %109 = fmul float %27, %87 > %110 = fadd float %108, %109 > %111 = fadd float %110, %28 > %112 = fmul float %29, %85 > %113 = fmul float %30, %86 > %114 = fadd float %112, %113 > %115 = fmul float %31, %87 > %116 = fadd float %114, %115 > %117 = fadd float %116, %32 > %118 = fmul float %33, %85 > %119 = fmul float %34, %86 > %120 = fadd float %118, %119 > %121 = fmul float %35, %87 > %122 = fadd float %120, %121 > %123 = fadd float %122, %36 > %124 = fmul float %40, %85 > %125 = fmul float %41, %86 > %126 = fadd float %124, %125 > %127 = fmul float %42, %87 > %128 = fadd float %126, %127 > %129 = fadd float %128, %43 > %130 = fadd float %129, %62 > %131 = fsub float -0.000000e+00, %81 > %132 = fmul float %123, %80 > %133 = fmul float %123, %81 > %134 = call float @llvm.fma.f32(float %105, float %80, float %132) > %135 = call float @llvm.fma.f32(float %111, float %131, float %133) > %136 = fsub float %77, %85 > %137 = fsub float %78, %86 > %138 = fsub float %79, %87 > %139 = call float @llvm.fma.f32(float %74, float %70, float %18) > %140 = call float @llvm.fma.f32(float %75, float %71, float %19) > %141 = call float @llvm.fma.f32(float %76, float %72, float %20) > %142 = call float @llvm.AMDGPU.clamp.(float %139, float 0.000000e+00, float 1.000000e+00) > %143 = call float @llvm.AMDGPU.clamp.(float %140, float 0.000000e+00, float 1.000000e+00) > %144 = call float @llvm.AMDGPU.clamp.(float %141, float 0.000000e+00, float 1.000000e+00) > %145 = fmul float %37, %136 > %146 = fmul float %38, %137 > %147 = fadd float %146, %145 > %148 = fmul float %39, %138 > %149 = fadd float %147, %148 > %150 = fmul float %44, %136 > %151 = fmul float %45, %137 > %152 = fadd float %151, %150 > %153 = fmul float %46, %138 > %154 = fadd float %152, %153 > %155 = fmul float %40, %136 > %156 = fmul float %41, %137 > %157 = fadd float %156, %155 > %158 = fmul float %42, %138 > %159 = fadd float %157, %158 > %160 = fmul float %149, %149 > %161 = fmul float %159, %159 > %162 = fadd float %161, %160 > %163 = fmul float %154, %154 > %164 = fadd float %162, %163 > %165 = call float @llvm.AMDGPU.rsq.clamped.f32(float %164) > %166 = fmul float %165, %149 > %167 = fmul float %165, %159 > %168 = fmul float %165, %154 > %169 = fsub float -0.000000e+00, %159 > %170 = call float @llvm.fma.f32(float %169, float %165, float 0xBFC3333340000000) > %171 = fsub float 1.000000e+00, %170 > %172 = call float @llvm.AMDGPU.clamp.(float %171, float 0.000000e+00, float 1.000000e+00) > %173 = fmul float %172, %172 > %174 = fmul float %166, %67 > %175 = fsub float -0.000000e+00, %174 > %176 = fmul float %167, %68 > %177 = fsub float %175, %176 > %178 = fmul float %168, %69 > %179 = fsub float %177, %178 > %180 = fsub float -0.000000e+00, %52 > %181 = call float @llvm.fma.f32(float %180, float %179, float %51) > %182 = call float @llvm.fma.f32(float %179, float %179, float 1.000000e+00) > %183 = call float @llvm.fabs.f32(float %181) > %184 = call float @llvm.log2.f32(float %183) > %185 = fmul float %182, 0x3FAE8EC8A0000000 > %186 = fmul float %184, -1.500000e+00 > %187 = call float @llvm.exp2.f32(float %186) > %188 = fsub float -0.000000e+00, %49 > %189 = call float @llvm.fma.f32(float %53, float %187, float %188) > %190 = fmul float %187, %53 > %191 = call float @llvm.maxnum.f32(float %189, float 0.000000e+00) > %192 = call float @llvm.fabs.f32(float %123) > %193 = fmul float %192, 0x3EF4F8B580000000 > %194 = call float @llvm.minnum.f32(float %193, float 1.000000e+00) > %195 = fsub float 1.000000e+00, %194 > %196 = fsub float -0.000000e+00, %191 > %197 = call float @llvm.fma.f32(float %196, float %195, float %190) > %198 = call float @llvm.maxnum.f32(float %197, float %66) > %199 = fcmp une float %47, 0.000000e+00 > br i1 %199, label %IF, label %ELSE > >IF: ; preds = %main_body > %200 = fdiv float 1.000000e+00, %47 > %201 = fmul float %130, %200 > %202 = fsub float -0.000000e+00, %201 > br label %ENDIF > >ELSE: ; preds = %main_body > %203 = fsub float -0.000000e+00, %130 > %204 = fcmp olt float %130, -0.000000e+00 > %205 = select i1 %204, float 1.000000e+00, float %203 > %206 = fcmp oge float %205, 0.000000e+00 > %.op = fmul float %205, 0x4600000000000000 > %207 = select i1 %206, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %202, %IF ], [ %207, %ELSE ] > %208 = fmul float %temp44.0, 0x3FF7154760000000 > %209 = call float @llvm.exp2.f32(float %208) > %210 = fadd float %209, %54 > %211 = fmul float %210, %56 > %212 = fmul float %211, 5.000000e-01 > %213 = fmul float %173, %212 > %214 = call float @llvm.minnum.f32(float %213, float %50) > %215 = call float @llvm.maxnum.f32(float %214, float %55) > %216 = fmul float %215, %198 > %217 = fcmp une float %60, 0.000000e+00 > br i1 %217, label %IF57, label %ELSE58 > >IF57: ; preds = %ENDIF > %218 = fdiv float 1.000000e+00, %60 > %219 = fmul float %130, %218 > %220 = fsub float -0.000000e+00, %219 > br label %ENDIF56 > >ELSE58: ; preds = %ENDIF > %221 = fsub float -0.000000e+00, %130 > %222 = fcmp olt float %130, -0.000000e+00 > %223 = select i1 %222, float 1.000000e+00, float %221 > %224 = fcmp oge float %223, 0.000000e+00 > %.op62 = fmul float %223, 0x4600000000000000 > %225 = select i1 %224, float %.op62, float 0xC600000000000000 > br label %ENDIF56 > >ENDIF56: ; preds = %ELSE58, %IF57 > %temp44.1 = phi float [ %220, %IF57 ], [ %225, %ELSE58 ] > %226 = fsub float %61, %130 > %227 = fcmp une float %48, 0.000000e+00 > br i1 %227, label %IF60, label %ELSE61 > >IF60: ; preds = %ENDIF56 > %228 = fdiv float 1.000000e+00, %48 > %229 = fmul float %226, %228 > br label %ENDIF59 > >ELSE61: ; preds = %ENDIF56 > %230 = fcmp ogt float %226, 0.000000e+00 > %231 = select i1 %230, float 1.000000e+00, float %226 > %232 = fcmp oge float %231, 0.000000e+00 > %.op63 = fmul float %231, 0x4600000000000000 > %233 = select i1 %232, float %.op63, float 0xC600000000000000 > br label %ENDIF59 > >ENDIF59: ; preds = %ELSE61, %IF60 > %temp48.0 = phi float [ %229, %IF60 ], [ %233, %ELSE61 ] > %234 = fmul float %temp44.1, 0x3FF7154760000000 > %235 = call float @llvm.exp2.f32(float %234) > %236 = fmul float %235, %57 > %237 = fmul float %235, %58 > %238 = fmul float %235, %59 > %239 = call float @llvm.fma.f32(float %57, float %235, float %215) > %240 = call float @llvm.fma.f32(float %58, float %235, float %215) > %241 = call float @llvm.fma.f32(float %59, float %235, float %215) > %242 = call float @llvm.fma.f32(float %236, float %185, float %216) > %243 = call float @llvm.fma.f32(float %237, float %185, float %216) > %244 = call float @llvm.fma.f32(float %238, float %185, float %216) > %245 = fcmp oeq float %239, 0.000000e+00 > %246 = fcmp oeq float %240, 0.000000e+00 > %247 = fcmp oeq float %241, 0.000000e+00 > %248 = fcmp ogt float %242, 0.000000e+00 > %249 = select i1 %248, float 1.000000e+00, float %242 > %250 = fcmp oge float %249, 0.000000e+00 > %251 = fcmp ogt float %243, 0.000000e+00 > %252 = select i1 %251, float 1.000000e+00, float %243 > %253 = fcmp oge float %252, 0.000000e+00 > %254 = fcmp ogt float %244, 0.000000e+00 > %255 = select i1 %254, float 1.000000e+00, float %244 > %256 = fcmp oge float %255, 0.000000e+00 > %.op64 = fmul float %249, 0x4600000000000000 > %257 = select i1 %250, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %252, 0x4600000000000000 > %258 = select i1 %253, float %.op65, float 0xC600000000000000 > %.op66 = fmul float %255, 0x4600000000000000 > %259 = select i1 %256, float %.op66, float 0xC600000000000000 > %260 = fdiv float 1.000000e+00, %239 > %261 = fdiv float 1.000000e+00, %240 > %262 = fdiv float 1.000000e+00, %241 > %263 = fmul float %242, %260 > %264 = fmul float %243, %261 > %265 = fmul float %244, %262 > %266 = select i1 %245, float %257, float %263 > %267 = select i1 %246, float %258, float %264 > %268 = select i1 %247, float %259, float %265 > %269 = fmul float %239, %temp48.0 > %270 = fmul float %240, %temp48.0 > %271 = fmul float %241, %temp48.0 > %272 = call float @llvm.fabs.f32(float %123) > %273 = call float @llvm.fabs.f32(float %123) > %274 = call float @llvm.fabs.f32(float %123) > %275 = fmul float %239, %272 > %276 = fmul float %240, %273 > %277 = fmul float %241, %274 > %278 = fmul float %275, 0xBFF7154760000000 > %279 = fmul float %276, 0xBFF7154760000000 > %280 = fmul float %277, 0xBFF7154760000000 > %281 = call float @llvm.exp2.f32(float %278) > %282 = call float @llvm.exp2.f32(float %279) > %283 = call float @llvm.exp2.f32(float %280) > %284 = fmul float %269, 0xBFF7154760000000 > %285 = fmul float %270, 0xBFF7154760000000 > %286 = fmul float %271, 0xBFF7154760000000 > %287 = call float @llvm.log2.f32(float %63) > %288 = call float @llvm.log2.f32(float %64) > %289 = call float @llvm.log2.f32(float %65) > %290 = fmul float %287, 0x3FDD1745E0000000 > %291 = fmul float %288, 0x3FDD1745E0000000 > %292 = fmul float %289, 0x3FDD1745E0000000 > %293 = call float @llvm.exp2.f32(float %290) > %294 = call float @llvm.exp2.f32(float %291) > %295 = call float @llvm.exp2.f32(float %292) > %296 = call float @llvm.exp2.f32(float %284) > %297 = call float @llvm.exp2.f32(float %285) > %298 = call float @llvm.exp2.f32(float %286) > %299 = fmul float %296, %293 > %300 = fmul float %297, %294 > %301 = fmul float %298, %295 > %302 = fmul float %266, %299 > %303 = fmul float %267, %300 > %304 = fmul float %268, %301 > %305 = fsub float 1.000000e+00, %281 > %306 = fsub float 1.000000e+00, %282 > %307 = fsub float 1.000000e+00, %283 > %308 = call float @llvm.fma.f32(float %302, float %305, float 0xBF70624DE0000000) > %309 = call float @llvm.fma.f32(float %303, float %306, float 0xBF70624DE0000000) > %310 = call float @llvm.fma.f32(float %304, float %307, float 0xBF70624DE0000000) > %311 = call float @llvm.maxnum.f32(float %308, float 0.000000e+00) > %312 = call float @llvm.maxnum.f32(float %309, float 0.000000e+00) > %313 = call float @llvm.maxnum.f32(float %310, float 0.000000e+00) > %314 = call float @llvm.fma.f32(float %311, float 0x4018CCCCC0000000, float 5.000000e-01) > %315 = call float @llvm.fma.f32(float %312, float 0x4018CCCCC0000000, float 5.000000e-01) > %316 = call float @llvm.fma.f32(float %313, float 0x4018CCCCC0000000, float 5.000000e-01) > %317 = fmul float %311, %314 > %318 = fmul float %312, %315 > %319 = fmul float %313, %316 > %320 = call float @llvm.fma.f32(float %311, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %321 = call float @llvm.fma.f32(float %312, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %322 = call float @llvm.fma.f32(float %313, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %323 = call float @llvm.fma.f32(float %311, float %320, float 0x3FAEB851E0000000) > %324 = call float @llvm.fma.f32(float %312, float %321, float 0x3FAEB851E0000000) > %325 = call float @llvm.fma.f32(float %313, float %322, float 0x3FAEB851E0000000) > %326 = fcmp oeq float %323, 0.000000e+00 > %327 = fcmp oeq float %324, 0.000000e+00 > %328 = fcmp oeq float %325, 0.000000e+00 > %329 = fcmp ogt float %317, 0.000000e+00 > %330 = select i1 %329, float 1.000000e+00, float %317 > %331 = fcmp oge float %330, 0.000000e+00 > %332 = fcmp ogt float %318, 0.000000e+00 > %333 = select i1 %332, float 1.000000e+00, float %318 > %334 = fcmp oge float %333, 0.000000e+00 > %335 = fcmp ogt float %319, 0.000000e+00 > %336 = select i1 %335, float 1.000000e+00, float %319 > %337 = fcmp oge float %336, 0.000000e+00 > %.op67 = fmul float %330, 0x4600000000000000 > %338 = select i1 %331, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %333, 0x4600000000000000 > %339 = select i1 %334, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %336, 0x4600000000000000 > %340 = select i1 %337, float %.op69, float 0xC600000000000000 > %341 = fdiv float 1.000000e+00, %323 > %342 = fdiv float 1.000000e+00, %324 > %343 = fdiv float 1.000000e+00, %325 > %344 = fmul float %317, %341 > %345 = fmul float %318, %342 > %346 = fmul float %319, %343 > %347 = select i1 %326, float %338, float %344 > %348 = select i1 %327, float %339, float %345 > %349 = select i1 %328, float %340, float %346 > %350 = bitcast i32 %11 to float > %351 = insertvalue <{ float, float, float }> undef, float %350, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %91, float %92, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %134, float %135, float %117, float %123) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %142, float %143, float %144, float %73) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %347, float %348, float %349, float %281) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %96, float %97, float %98, float %99) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %105, float %111, float %117, float %123) > ret <{ float, float, float }> %351 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], BUFFER, FLOAT >DCL CONST[1][0..17] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 256, 272, 0} >IMM[2] INT32 {0, 0, 0, 0} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: FSNE TEMP[1].x, CONST[1][16].wwww, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][16].wwww > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][17].xxxx > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: MOV TEMP[1].xy, IN[0].xyyy > 26: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 27: MOV TEMP[2].xyz, TEMP[1].xyzx > 28: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].wwww > 29: MOV TEMP[2].w, TEMP[1].xxxx > 30: MUL TEMP[0], TEMP[2], IN[4] > 31: MUL TEMP[0], TEMP[0], IN[2] > 32: FMA TEMP[1].xyz, TEMP[0].xyzz, IN[3].wwww, IN[3].xyzz > 33: MUL TEMP[0].x, TEMP[0].wwww, IN[3].wwww > 34: MOV TEMP[0].w, TEMP[0].xxxx > 35: MOV TEMP[2].x, IMM[2].xxxx > 36: MOV TEMP[2].w, IMM[1].xxxx > 37: TXF TEMP[2].x, TEMP[2], SAMP[2], BUFFER > 38: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 39: MOV OUT[0], TEMP[0] > 40: END >radeonsi: Compiling shader 360 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 7 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %46 = bitcast <8 x i32> addrspace(2)* %45 to <2 x i128> addrspace(2)* > %47 = load <2 x i128>, <2 x i128> addrspace(2)* %46, align 32, !tbaa !0 > %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %65 = fcmp oeq float %52, 0.000000e+00 > %66 = fcmp oeq float %52, 0.000000e+00 > %67 = fcmp ogt float %50, 0.000000e+00 > %68 = select i1 %67, float 1.000000e+00, float %50 > %69 = fcmp oge float %68, 0.000000e+00 > %70 = fcmp ogt float %51, 0.000000e+00 > %71 = select i1 %70, float 1.000000e+00, float %51 > %72 = fcmp oge float %71, 0.000000e+00 > %.op = fmul float %68, 0x4600000000000000 > %73 = select i1 %69, float %.op, float 0xC600000000000000 > %.op12 = fmul float %71, 0x4600000000000000 > %74 = select i1 %72, float %.op12, float 0xC600000000000000 > %75 = fdiv float 1.000000e+00, %52 > %76 = fmul float %50, %75 > %77 = fmul float %51, %75 > %78 = select i1 %65, float %73, float %76 > %79 = select i1 %66, float %74, float %77 > %80 = bitcast float %78 to i32 > %81 = bitcast float %79 to i32 > %82 = insertelement <2 x i32> undef, i32 %80, i32 0 > %83 = insertelement <2 x i32> %82, i32 %81, i32 1 > %84 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %83, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %85 = extractelement <4 x float> %84, i32 0 > %86 = fsub float %52, %85 > %87 = fcmp une float %25, 0.000000e+00 > %88 = call float @llvm.fabs.f32(float %86) > br i1 %87, label %IF, label %ELSE > >IF: ; preds = %main_body > %89 = fdiv float 1.000000e+00, %25 > %90 = fmul float %88, %89 > br label %ENDIF > >ELSE: ; preds = %main_body > %91 = fcmp one float %86, 0.000000e+00 > %92 = select i1 %91, float 1.000000e+00, float %88 > %93 = fcmp oge float %92, 0.000000e+00 > %.op13 = fmul float %92, 0x4600000000000000 > %94 = select i1 %93, float %.op13, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %90, %IF ], [ %94, %ELSE ] > %95 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %96 = fsub float 1.000000e+00, %95 > %97 = call float @llvm.log2.f32(float %96) > %98 = fmul float %97, %26 > %99 = call float @llvm.exp2.f32(float %98) > %100 = fsub float 1.000000e+00, %99 > %101 = bitcast float %48 to i32 > %102 = bitcast float %49 to i32 > %103 = insertelement <2 x i32> undef, i32 %101, i32 0 > %104 = insertelement <2 x i32> %103, i32 %102, i32 1 > %105 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %104, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %106 = extractelement <4 x float> %105, i32 0 > %107 = extractelement <4 x float> %105, i32 1 > %108 = extractelement <4 x float> %105, i32 2 > %109 = extractelement <4 x float> %105, i32 3 > %110 = fmul float %100, %109 > %111 = fmul float %106, %61 > %112 = fmul float %107, %62 > %113 = fmul float %108, %63 > %114 = fmul float %110, %64 > %115 = fmul float %111, %53 > %116 = fmul float %112, %54 > %117 = fmul float %113, %55 > %118 = fmul float %114, %56 > %119 = call float @llvm.fma.f32(float %115, float %60, float %57) > %120 = call float @llvm.fma.f32(float %116, float %60, float %58) > %121 = call float @llvm.fma.f32(float %117, float %60, float %59) > %122 = fmul float %118, %60 > %123 = extractelement <2 x i128> %47, i32 1 > %124 = bitcast i128 %123 to <16 x i8> > %125 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %124, i32 0, i32 0) > %126 = extractelement <4 x float> %125, i32 0 > %127 = fmul float %126, %119 > %128 = fmul float %126, %120 > %129 = fmul float %126, %121 > %130 = bitcast float %5 to i32 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %130, 10 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %127, 11 > %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %128, 12 > %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %129, 13 > %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %122, 14 > %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL CONST[1][0..27] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 320, 336, 352} >IMM[2] UINT32 {368, 432, 384, 400} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][20], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][22], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][23], TEMP[0] > 8: MOV TEMP[1].w, TEMP[0].xxxx > 9: MOV TEMP[0].xy, IN[1].xyxx > 10: MUL TEMP[2].xyz, CONST[1][24].xyzz, CONST[1][25].xyzz > 11: MOV TEMP[2].w, CONST[1][24].wwww > 12: MOV OUT[3], IN[2] > 13: MOV OUT[2], TEMP[2] > 14: MOV OUT[1], TEMP[0] > 15: MOV OUT[0], TEMP[1] > 16: END >radeonsi: Compiling shader 361 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 332) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %13) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %14) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %15) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = extractelement <4 x float> %54, i32 3 > %59 = fmul float %18, %44 > %60 = fmul float %19, %45 > %61 = fadd float %59, %60 > %62 = fmul float %20, %46 > %63 = fadd float %61, %62 > %64 = fadd float %63, %21 > %65 = fmul float %22, %44 > %66 = fmul float %23, %45 > %67 = fadd float %65, %66 > %68 = fmul float %24, %46 > %69 = fadd float %67, %68 > %70 = fadd float %69, %25 > %71 = fmul float %26, %44 > %72 = fmul float %27, %45 > %73 = fadd float %71, %72 > %74 = fmul float %28, %46 > %75 = fadd float %73, %74 > %76 = fadd float %75, %29 > %77 = fmul float %30, %44 > %78 = fmul float %31, %45 > %79 = fadd float %77, %78 > %80 = fmul float %32, %46 > %81 = fadd float %79, %80 > %82 = fadd float %81, %33 > %83 = fmul float %34, %38 > %84 = fmul float %35, %39 > %85 = fmul float %36, %40 > %86 = bitcast i32 %11 to float > %87 = insertvalue <{ float, float, float }> undef, float %86, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %50, float %51, float %46, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %83, float %84, float %85, float %37) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %55, float %56, float %57, float %58) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %70, float %76, float %82) > ret <{ float, float, float }> %87 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL CONST[1][0..19] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 288, 240, 304} >IMM[1] UINT32 {256, 272, 0, 0} >IMM[2] FLT32 { 2.0000, -1.0000, 0.0000, 0.0000} > 0: FMA TEMP[0].xy, IN[0].xyyy, CONST[1][18].xyyy, CONST[1][15].zwww > 1: FMA TEMP[0].xy, CONST[1][19].wwww, CONST[1][16].xyyy, TEMP[0].xyyy > 2: MOV TEMP[1].xy, TEMP[0].xyyy > 3: TEX TEMP[1].yw, TEMP[1], SAMP[0], 2D > 4: FMA TEMP[1].xy, TEMP[1].ywww, IMM[2].xxxx, IMM[2].yyyy > 5: MOV TEMP[0].x, TEMP[1].xyxx > 6: MOV TEMP[0].z, -TEMP[1].yyyy > 7: MUL TEMP[0].xy, TEMP[0].xzzz, CONST[1][19].xyyy > 8: FMA TEMP[0].xy, IN[0].xyyy, CONST[1][17].xyyy, TEMP[0].xyyy > 9: MOV TEMP[1].xy, TEMP[0].xyyy > 10: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 11: MUL TEMP[0], TEMP[1], IN[2] > 12: MUL TEMP[0], TEMP[0], IN[1] > 13: MOV OUT[0], TEMP[0] > 14: END >radeonsi: Compiling shader 362 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 3 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 > %47 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %48 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %47, i64 0, i64 7 > %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 > %50 = extractelement <8 x i32> %46, i32 7 > %51 = extractelement <4 x i32> %49, i32 0 > %52 = and i32 %51, %50 > %53 = insertelement <4 x i32> %49, i32 %52, i32 0 > %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %64 = call float @llvm.fma.f32(float %54, float %31, float %25) > %65 = call float @llvm.fma.f32(float %55, float %32, float %26) > %66 = call float @llvm.fma.f32(float %35, float %27, float %64) > %67 = call float @llvm.fma.f32(float %35, float %28, float %65) > %68 = bitcast float %66 to i32 > %69 = bitcast float %67 to i32 > %70 = insertelement <2 x i32> undef, i32 %68, i32 0 > %71 = insertelement <2 x i32> %70, i32 %69, i32 1 > %72 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %71, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %73 = extractelement <4 x float> %72, i32 1 > %74 = extractelement <4 x float> %72, i32 3 > %75 = call float @llvm.fma.f32(float %73, float 2.000000e+00, float -1.000000e+00) > %76 = call float @llvm.fma.f32(float %74, float 2.000000e+00, float -1.000000e+00) > %77 = fmul float %75, %33 > %78 = fmul float %76, %34 > %79 = fsub float -0.000000e+00, %78 > %80 = call float @llvm.fma.f32(float %54, float %29, float %77) > %81 = call float @llvm.fma.f32(float %55, float %30, float %79) > %82 = bitcast float %80 to i32 > %83 = bitcast float %81 to i32 > %84 = insertelement <2 x i32> undef, i32 %82, i32 0 > %85 = insertelement <2 x i32> %84, i32 %83, i32 1 > %86 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %85, <8 x i32> %46, <4 x i32> %53, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %87 = extractelement <4 x float> %86, i32 0 > %88 = extractelement <4 x float> %86, i32 1 > %89 = extractelement <4 x float> %86, i32 2 > %90 = extractelement <4 x float> %86, i32 3 > %91 = fmul float %87, %60 > %92 = fmul float %88, %61 > %93 = fmul float %89, %62 > %94 = fmul float %90, %63 > %95 = fmul float %91, %56 > %96 = fmul float %92, %57 > %97 = fmul float %93, %58 > %98 = fmul float %94, %59 > %99 = bitcast float %5 to i32 > %100 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %99, 10 > %101 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %100, float %95, 11 > %102 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %101, float %96, 12 > %103 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %102, float %97, 13 > %104 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %103, float %98, 14 > %105 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %104, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %105 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..41] >DCL CONST[2][0..4095] >DCL TEMP[0..20], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 368} >IMM[3] UINT32 {384, 400, 416, 640} >IMM[4] UINT32 {448, 544, 656, 608} >IMM[5] UINT32 {624, 432, 464, 576} >IMM[6] FLT32 { 0.0000, -0.1500, 0.0597, -1.5000} >IMM[7] UINT32 {496, 480, 560, 512} >IMM[8] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.4427, 0.5000} >IMM[9] UINT32 {528, 0, 0, 0} >IMM[10] FLT32 { 0.4545, -0.0040, 6.2000, 1.7000} >IMM[11] FLT32 { 0.0600, 0.0000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, IN[4].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[3].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[3].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[3].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[3].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[3].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[3].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[3].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[3].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[3].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[3].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[4].y, TEMP[18].xxxx >224: UMUL TEMP[18].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[19].xxxx >227: MOV TEMP[18].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[20].xxxx >231: MOV TEMP[19].z, CONST[2][ADDR[0].x] >232: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].zzzz, -TEMP[8].zzzz >233: MUL TEMP[18].x, TEMP[18].xxxx, IN[3].xxxx >234: MUL TEMP[18].x, IMM[0].yyyy, TEMP[18].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[18].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[19].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[19].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[19].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[20].xxxx >244: MOV TEMP[19].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[19].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[3].xxxx, TEMP[10].zzzz >249: MOV TEMP[18].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[3].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[3].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[3].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].zzzz >259: MOV TEMP[18].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[18] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[3].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[3].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[3].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[4].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[7].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[7].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[2].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[6].xxxx >305: MOV TEMP[2].x, CONST[2][ADDR[0].x] >306: MUL TEMP[2].x, IN[3].zzzz, TEMP[2].xxxx >307: MOV TEMP[16].w, TEMP[2].xxxx >308: UMUL TEMP[2].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[6].xxxx >311: MOV TEMP[2].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[7].xxxx >315: MOV TEMP[6].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[8].xxxx >317: ADD TEMP[2].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[2].xxxx >319: MUL TEMP[2].xy, TEMP[0].xwww, IN[3].xxxx >320: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[2].xyyy >321: MOV TEMP[9].yz, TEMP[2].yxyy >322: UMUL TEMP[2].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[6].xxxx >325: MOV TEMP[2].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[7].xxxx >329: MOV TEMP[6].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[6].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[2].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[6].x, TEMP[2].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[6].xxxx >334: MOV TEMP[2].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[6].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[6].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[2].x, TEMP[2].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[2].xxxx >341: ADD TEMP[2].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[2].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].yzzz >344: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[2].yxyy >346: ADD TEMP[2].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy >348: MOV TEMP[0].y, TEMP[2].xxxx >349: MUL TEMP[2].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[2].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[4].x, TEMP[0], TEMP[5] >354: MOV TEMP[4].w, IMM[0].zzzz >355: DP4 TEMP[0].x, CONST[1][23], TEMP[4] >356: DP4 TEMP[2].x, CONST[1][24], TEMP[4] >357: MOV TEMP[0].y, TEMP[2].xxxx >358: DP4 TEMP[2].x, CONST[1][25], TEMP[4] >359: MOV TEMP[0].z, TEMP[2].xxxx >360: DP4 TEMP[2].x, CONST[1][26], TEMP[4] >361: MOV TEMP[0].w, TEMP[2].xxxx >362: ADD TEMP[3].xyz, -TEMP[4].xyzz, CONST[1][40].xyzz >363: DP4 TEMP[5].x, CONST[1][28], TEMP[4] >364: ADD TEMP[1].x, TEMP[5].xxxx, CONST[1][34].wwww >365: MOV TEMP[5], TEMP[0] >366: MOV TEMP[6].xy, IN[1].xyxx >367: MUL TEMP[7].xy, TEMP[2].xxxx, CONST[1][41].xyyy >368: MUL TEMP[8].xy, CONST[1][41].xyyy, IMM[0].zwww >369: FMA TEMP[8].xy, TEMP[0].xyyy, TEMP[8].xyyy, TEMP[7].xyyy >370: MOV TEMP[8].zw, TEMP[0].wwzw >371: MUL TEMP[9].xyz, CONST[1][38].xyzz, CONST[1][39].xyzz >372: MOV TEMP[9].w, CONST[1][38].wwww >373: ABS TEMP[10].x, TEMP[2].xxxx >374: MUL TEMP[0].x, TEMP[10].xxxx, IMM[6].xxxx >375: MIN TEMP[10].x, TEMP[0].xxxx, IMM[0].zzzz >376: ADD TEMP[0].x, -TEMP[10].xxxx, IMM[0].zzzz >377: DP3 TEMP[7].x, CONST[1][27].xyzz, TEMP[3].xyzz >378: DP3 TEMP[10].x, CONST[1][29].xyzz, TEMP[3].xyzz >379: MOV TEMP[7].z, TEMP[10].xxxx >380: DP3 TEMP[3].x, CONST[1][28].xyzz, TEMP[3].xyzz >381: MOV TEMP[7].y, TEMP[3].xxxx >382: DP3 TEMP[10].x, TEMP[7].xyzz, TEMP[7].xyzz >383: RSQ TEMP[10].x, TEMP[10].xxxx >384: MUL TEMP[11].xyz, TEMP[10].xxxx, TEMP[7].xyzz >385: FMA TEMP[3].x, -TEMP[3].xxxx, TEMP[10].xxxx, IMM[6].yyyy >386: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].zzzz >387: MOV_SAT TEMP[3].x, TEMP[3].xxxx >388: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx >389: DP3 TEMP[10].x, -TEMP[11].xyzz, CONST[1][36].xyzz >390: FMA TEMP[11].x, -CONST[1][31].yyyy, TEMP[10].xxxx, CONST[1][31].xxxx >391: FMA TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx, IMM[0].zzzz >392: MUL TEMP[10].x, TEMP[10].xxxx, IMM[6].zzzz >393: ABS TEMP[11].x, TEMP[11].xxxx >394: LG2 TEMP[11].x, TEMP[11].xxxx >395: MUL TEMP[11].x, TEMP[11].xxxx, IMM[6].wwww >396: EX2 TEMP[11].x, TEMP[11].xxxx >397: FMA TEMP[12].x, CONST[1][31].zzzz, TEMP[11].xxxx, -CONST[1][30].zzzz >398: MUL TEMP[11].x, TEMP[11].xxxx, CONST[1][31].zzzz >399: MAX TEMP[12].x, TEMP[12].xxxx, IMM[8].xxxx >400: FMA TEMP[0].x, -TEMP[12].xxxx, TEMP[0].xxxx, TEMP[11].xxxx >401: MAX TEMP[11].x, TEMP[0].xxxx, CONST[1][35].wwww >402: FSNE TEMP[12].x, CONST[1][30].xxxx, IMM[8].xxxx >403: UIF TEMP[12].xxxx :0 >404: RCP TEMP[12].x, CONST[1][30].xxxx >405: MUL TEMP[12].x, -TEMP[1].xxxx, TEMP[12].xxxx >406: ELSE :0 >407: SSG TEMP[13].x, -TEMP[1].xxxx >408: MUL TEMP[12].x, IMM[8].yyyy, TEMP[13].xxxx >409: ENDIF >410: MUL TEMP[12].x, TEMP[12].xxxx, IMM[8].zzzz >411: EX2 TEMP[12].x, TEMP[12].xxxx >412: ADD TEMP[12].x, TEMP[12].xxxx, CONST[1][31].wwww >413: MUL TEMP[12].x, TEMP[12].xxxx, CONST[1][32].yyyy >414: MUL TEMP[12].x, TEMP[12].xxxx, IMM[8].wwww >415: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[12].xxxx >416: MIN TEMP[3].x, TEMP[3].xxxx, CONST[1][30].wwww >417: MAX TEMP[3].x, TEMP[3].xxxx, CONST[1][32].xxxx >418: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[11].xxxx >419: FSNE TEMP[11].x, CONST[1][33].wwww, IMM[8].xxxx >420: UIF TEMP[11].xxxx :0 >421: RCP TEMP[11].x, CONST[1][33].wwww >422: MUL TEMP[11].x, -TEMP[1].xxxx, TEMP[11].xxxx >423: ELSE :0 >424: SSG TEMP[12].x, -TEMP[1].xxxx >425: MUL TEMP[11].x, IMM[8].yyyy, TEMP[12].xxxx >426: ENDIF >427: ADD TEMP[1].x, -TEMP[1].xxxx, CONST[1][34].zzzz >428: FSNE TEMP[12].x, CONST[1][30].yyyy, IMM[8].xxxx >429: UIF TEMP[12].xxxx :0 >430: RCP TEMP[12].x, CONST[1][30].yyyy >431: MUL TEMP[12].x, TEMP[1].xxxx, TEMP[12].xxxx >432: ELSE :0 >433: SSG TEMP[13].x, TEMP[1].xxxx >434: MUL TEMP[12].x, IMM[8].yyyy, TEMP[13].xxxx >435: ENDIF >436: MUL TEMP[11].x, TEMP[11].xxxx, IMM[8].zzzz >437: EX2 TEMP[11].x, TEMP[11].xxxx >438: MUL TEMP[7].xyz, TEMP[11].xxxx, CONST[1][33].xyzz >439: FMA TEMP[3].xyz, CONST[1][33].xyzz, TEMP[11].xxxx, TEMP[3].xxxx >440: FMA TEMP[10].xyz, TEMP[7].xyzz, TEMP[10].xxxx, TEMP[0].xxxx >441: MUL TEMP[7].xyz, TEMP[12].xxxx, -TEMP[3].xyzz >442: ABS TEMP[2].xyz, TEMP[2].xxxx >443: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[3].xyzz >444: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[8].zzzz >445: EX2 TEMP[2].x, TEMP[1].xxxx >446: EX2 TEMP[2].y, TEMP[1].yyyy >447: EX2 TEMP[2].z, TEMP[1].zzzz >448: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[8].zzzz >449: LG2 TEMP[11].x, CONST[1][35].xxxx >450: LG2 TEMP[11].y, CONST[1][35].yyyy >451: LG2 TEMP[11].z, CONST[1][35].zzzz >452: MUL TEMP[4].xyz, TEMP[11].xyzz, IMM[10].xxxx >453: EX2 TEMP[11].x, TEMP[4].xxxx >454: EX2 TEMP[11].y, TEMP[4].yyyy >455: EX2 TEMP[11].z, TEMP[4].zzzz >456: EX2 TEMP[4].x, TEMP[7].xxxx >457: EX2 TEMP[4].y, TEMP[7].yyyy >458: EX2 TEMP[4].z, TEMP[7].zzzz >459: MUL TEMP[7].xyz, TEMP[4].xyzz, TEMP[11].xyzz >460: FSEQ TEMP[4].xyz, TEMP[3].xyzz, IMM[8].xxxx >461: SSG TEMP[11].xyz, TEMP[10].xyzz >462: MUL TEMP[11].xyz, IMM[8].yyyy, TEMP[11].xyzz >463: RCP TEMP[12].x, TEMP[3].xxxx >464: RCP TEMP[12].y, TEMP[3].yyyy >465: RCP TEMP[12].z, TEMP[3].zzzz >466: MUL TEMP[3].xyz, TEMP[10].xyzz, TEMP[12].xyzz >467: UCMP TEMP[3].xyz, TEMP[4].xyzz, TEMP[11].xyzz, TEMP[3].xyzz >468: MUL TEMP[0].xyz, TEMP[3].xyzz, TEMP[7].xyzz >469: ADD TEMP[3].xyz, -TEMP[2].xyzz, IMM[0].zzzz >470: MOV TEMP[2].w, TEMP[2].xxxx >471: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xyzz, IMM[10].yyyy >472: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[8].xxxx >473: FMA TEMP[3].xyz, TEMP[0].xyzz, IMM[10].zzzz, IMM[8].wwww >474: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[3].xyzz >475: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[10].zzzz, IMM[10].wwww >476: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[11].xxxx >477: FSEQ TEMP[3].xyz, TEMP[0].xyzz, IMM[8].xxxx >478: SSG TEMP[4].xyz, TEMP[1].xyzz >479: MUL TEMP[4].xyz, IMM[8].yyyy, TEMP[4].xyzz >480: RCP TEMP[7].x, TEMP[0].xxxx >481: RCP TEMP[7].y, TEMP[0].yyyy >482: RCP TEMP[7].z, TEMP[0].zzzz >483: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[7].xyzz >484: UCMP TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xyzz, TEMP[0].xyzz >485: MOV OUT[5], IN[2] >486: MOV OUT[4], TEMP[2] >487: MOV OUT[3], TEMP[9] >488: MOV OUT[2], TEMP[8] >489: MOV OUT[1], TEMP[6] >490: MOV OUT[0], TEMP[5] >491: END >radeonsi: Compiling shader 363 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 368) > %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 372) > %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 376) > %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 380) > %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 384) > %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 388) > %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 392) > %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 396) > %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 400) > %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 404) > %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 408) > %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 412) > %32 = call float @llvm.SI.load.const(<16 x i8> %19, i32 416) > %33 = call float @llvm.SI.load.const(<16 x i8> %19, i32 420) > %34 = call float @llvm.SI.load.const(<16 x i8> %19, i32 424) > %35 = call float @llvm.SI.load.const(<16 x i8> %19, i32 428) > %36 = call float @llvm.SI.load.const(<16 x i8> %19, i32 432) > %37 = call float @llvm.SI.load.const(<16 x i8> %19, i32 436) > %38 = call float @llvm.SI.load.const(<16 x i8> %19, i32 440) > %39 = call float @llvm.SI.load.const(<16 x i8> %19, i32 448) > %40 = call float @llvm.SI.load.const(<16 x i8> %19, i32 452) > %41 = call float @llvm.SI.load.const(<16 x i8> %19, i32 456) > %42 = call float @llvm.SI.load.const(<16 x i8> %19, i32 460) > %43 = call float @llvm.SI.load.const(<16 x i8> %19, i32 464) > %44 = call float @llvm.SI.load.const(<16 x i8> %19, i32 468) > %45 = call float @llvm.SI.load.const(<16 x i8> %19, i32 472) > %46 = call float @llvm.SI.load.const(<16 x i8> %19, i32 480) > %47 = call float @llvm.SI.load.const(<16 x i8> %19, i32 484) > %48 = call float @llvm.SI.load.const(<16 x i8> %19, i32 488) > %49 = call float @llvm.SI.load.const(<16 x i8> %19, i32 492) > %50 = call float @llvm.SI.load.const(<16 x i8> %19, i32 496) > %51 = call float @llvm.SI.load.const(<16 x i8> %19, i32 500) > %52 = call float @llvm.SI.load.const(<16 x i8> %19, i32 504) > %53 = call float @llvm.SI.load.const(<16 x i8> %19, i32 508) > %54 = call float @llvm.SI.load.const(<16 x i8> %19, i32 512) > %55 = call float @llvm.SI.load.const(<16 x i8> %19, i32 516) > %56 = call float @llvm.SI.load.const(<16 x i8> %19, i32 528) > %57 = call float @llvm.SI.load.const(<16 x i8> %19, i32 532) > %58 = call float @llvm.SI.load.const(<16 x i8> %19, i32 536) > %59 = call float @llvm.SI.load.const(<16 x i8> %19, i32 540) > %60 = call float @llvm.SI.load.const(<16 x i8> %19, i32 552) > %61 = call float @llvm.SI.load.const(<16 x i8> %19, i32 556) > %62 = call float @llvm.SI.load.const(<16 x i8> %19, i32 560) > %63 = call float @llvm.SI.load.const(<16 x i8> %19, i32 564) > %64 = call float @llvm.SI.load.const(<16 x i8> %19, i32 568) > %65 = call float @llvm.SI.load.const(<16 x i8> %19, i32 572) > %66 = call float @llvm.SI.load.const(<16 x i8> %19, i32 576) > %67 = call float @llvm.SI.load.const(<16 x i8> %19, i32 580) > %68 = call float @llvm.SI.load.const(<16 x i8> %19, i32 584) > %69 = call float @llvm.SI.load.const(<16 x i8> %19, i32 608) > %70 = call float @llvm.SI.load.const(<16 x i8> %19, i32 612) > %71 = call float @llvm.SI.load.const(<16 x i8> %19, i32 616) > %72 = call float @llvm.SI.load.const(<16 x i8> %19, i32 620) > %73 = call float @llvm.SI.load.const(<16 x i8> %19, i32 624) > %74 = call float @llvm.SI.load.const(<16 x i8> %19, i32 628) > %75 = call float @llvm.SI.load.const(<16 x i8> %19, i32 632) > %76 = call float @llvm.SI.load.const(<16 x i8> %19, i32 640) > %77 = call float @llvm.SI.load.const(<16 x i8> %19, i32 644) > %78 = call float @llvm.SI.load.const(<16 x i8> %19, i32 648) > %79 = call float @llvm.SI.load.const(<16 x i8> %19, i32 656) > %80 = call float @llvm.SI.load.const(<16 x i8> %19, i32 660) > %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 > %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 > %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %13) > %86 = extractelement <4 x float> %85, i32 0 > %87 = extractelement <4 x float> %85, i32 1 > %88 = extractelement <4 x float> %85, i32 2 > %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 > %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %14) > %92 = extractelement <4 x float> %91, i32 0 > %93 = extractelement <4 x float> %91, i32 1 > %94 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 > %96 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %95, i32 0, i32 %15) > %97 = extractelement <4 x float> %96, i32 0 > %98 = extractelement <4 x float> %96, i32 1 > %99 = extractelement <4 x float> %96, i32 2 > %100 = extractelement <4 x float> %96, i32 3 > %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 > %103 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %16) > %104 = extractelement <4 x float> %103, i32 0 > %105 = extractelement <4 x float> %103, i32 1 > %106 = extractelement <4 x float> %103, i32 2 > %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 > %109 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %17) > %110 = extractelement <4 x float> %109, i32 0 > %111 = extractelement <4 x float> %109, i32 1 > %112 = extractelement <4 x float> %109, i32 2 > %113 = fmul float %112, 0x406FE01000000000 > %114 = fmul float %111, 0x406FE01000000000 > %115 = fmul float %110, 0x406FE01000000000 > %116 = fptosi float %113 to i32 > %117 = fptosi float %114 to i32 > %118 = fptosi float %115 to i32 > %119 = shl i32 %116, 1 > %120 = or i32 %119, 1 > %121 = shl i32 %117, 1 > %122 = or i32 %121, 1 > %123 = shl i32 %118, 1 > %124 = or i32 %123, 1 > %125 = shl i32 %116, 5 > %126 = or i32 %125, 4 > %127 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %126) > %128 = fmul float %104, %127 > %129 = shl i32 %117, 5 > %130 = or i32 %129, 4 > %131 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %130) > %132 = fmul float %105, %131 > %133 = shl i32 %120, 4 > %134 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %133) > %135 = shl i32 %120, 4 > %136 = or i32 %135, 12 > %137 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %136) > %138 = fmul float %134, %137 > %139 = shl i32 %120, 4 > %140 = or i32 %139, 4 > %141 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %140) > %142 = shl i32 %120, 4 > %143 = or i32 %142, 8 > %144 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %143) > %145 = fsub float -0.000000e+00, %138 > %146 = call float @llvm.fma.f32(float %141, float %144, float %145) > %147 = shl i32 %120, 4 > %148 = or i32 %147, 4 > %149 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %148) > %150 = shl i32 %120, 4 > %151 = or i32 %150, 8 > %152 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %151) > %153 = call float @llvm.fma.f32(float %149, float %152, float %138) > %154 = fmul float %153, %104 > %155 = fmul float %146, %104 > %156 = fmul float %155, 2.000000e+00 > %157 = shl i32 %122, 4 > %158 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %157) > %159 = shl i32 %122, 4 > %160 = or i32 %159, 12 > %161 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %160) > %162 = fmul float %158, %161 > %163 = shl i32 %122, 4 > %164 = or i32 %163, 4 > %165 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %164) > %166 = shl i32 %122, 4 > %167 = or i32 %166, 8 > %168 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %167) > %169 = fsub float -0.000000e+00, %162 > %170 = call float @llvm.fma.f32(float %165, float %168, float %169) > %171 = shl i32 %122, 4 > %172 = or i32 %171, 4 > %173 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %172) > %174 = shl i32 %122, 4 > %175 = or i32 %174, 8 > %176 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %175) > %177 = call float @llvm.fma.f32(float %173, float %176, float %162) > %178 = fmul float %177, %105 > %179 = fmul float %178, 2.000000e+00 > %180 = fmul float %170, %105 > %181 = fmul float %180, 2.000000e+00 > %182 = shl i32 %120, 4 > %183 = or i32 %182, 4 > %184 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %183) > %185 = shl i32 %120, 4 > %186 = or i32 %185, 8 > %187 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %186) > %188 = shl i32 %120, 4 > %189 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %188) > %190 = shl i32 %120, 4 > %191 = or i32 %190, 12 > %192 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %191) > %193 = fmul float %187, %192 > %194 = fmul float %187, %189 > %195 = fmul float %184, %192 > %196 = shl i32 %120, 4 > %197 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %196) > %198 = shl i32 %120, 4 > %199 = or i32 %198, 4 > %200 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %199) > %201 = call float @llvm.fma.f32(float %197, float %200, float %193) > %202 = fmul float %201, %104 > %203 = fmul float %202, 2.000000e+00 > %204 = shl i32 %120, 4 > %205 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %204) > %206 = shl i32 %120, 4 > %207 = or i32 %206, 4 > %208 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %207) > %209 = shl i32 %120, 4 > %210 = or i32 %209, 8 > %211 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %210) > %212 = shl i32 %120, 4 > %213 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %212) > %214 = shl i32 %120, 4 > %215 = or i32 %214, 4 > %216 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %215) > %217 = shl i32 %120, 4 > %218 = or i32 %217, 8 > %219 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %218) > %220 = fmul float %205, %213 > %221 = fmul float %208, %216 > %222 = fmul float %211, %219 > %223 = fadd float %222, %221 > %224 = fadd float %222, %220 > %225 = fadd float %221, %220 > %226 = fsub float -0.000000e+00, %223 > %227 = call float @llvm.fma.f32(float %226, float 2.000000e+00, float 1.000000e+00) > %228 = fsub float -0.000000e+00, %224 > %229 = call float @llvm.fma.f32(float %228, float 2.000000e+00, float 1.000000e+00) > %230 = fsub float -0.000000e+00, %225 > %231 = call float @llvm.fma.f32(float %230, float 2.000000e+00, float 1.000000e+00) > %232 = fmul float %104, %229 > %233 = shl i32 %122, 4 > %234 = or i32 %233, 4 > %235 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %234) > %236 = shl i32 %122, 4 > %237 = or i32 %236, 8 > %238 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %237) > %239 = shl i32 %122, 4 > %240 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %239) > %241 = shl i32 %122, 4 > %242 = or i32 %241, 12 > %243 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %242) > %244 = fmul float %238, %243 > %245 = fmul float %238, %240 > %246 = fmul float %235, %243 > %247 = shl i32 %122, 4 > %248 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %247) > %249 = shl i32 %122, 4 > %250 = or i32 %249, 4 > %251 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %250) > %252 = call float @llvm.fma.f32(float %248, float %251, float %244) > %253 = fmul float %252, %105 > %254 = fmul float %253, 2.000000e+00 > %255 = shl i32 %122, 4 > %256 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %255) > %257 = shl i32 %122, 4 > %258 = or i32 %257, 4 > %259 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %258) > %260 = shl i32 %122, 4 > %261 = or i32 %260, 8 > %262 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %261) > %263 = shl i32 %122, 4 > %264 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %263) > %265 = shl i32 %122, 4 > %266 = or i32 %265, 4 > %267 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %266) > %268 = shl i32 %122, 4 > %269 = or i32 %268, 8 > %270 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %269) > %271 = fmul float %256, %264 > %272 = fmul float %259, %267 > %273 = fmul float %262, %270 > %274 = fadd float %273, %272 > %275 = fadd float %273, %271 > %276 = fadd float %272, %271 > %277 = fsub float -0.000000e+00, %274 > %278 = call float @llvm.fma.f32(float %277, float 2.000000e+00, float 1.000000e+00) > %279 = fsub float -0.000000e+00, %275 > %280 = call float @llvm.fma.f32(float %279, float 2.000000e+00, float 1.000000e+00) > %281 = fsub float -0.000000e+00, %276 > %282 = call float @llvm.fma.f32(float %281, float 2.000000e+00, float 1.000000e+00) > %283 = fmul float %105, %280 > %284 = fadd float %203, %254 > %285 = fadd float %232, %283 > %286 = fadd float %156, %181 > %287 = fadd float %128, %132 > %288 = shl i32 %118, 5 > %289 = or i32 %288, 4 > %290 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %289) > %291 = fmul float %106, %290 > %292 = shl i32 %124, 4 > %293 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %292) > %294 = shl i32 %124, 4 > %295 = or i32 %294, 12 > %296 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %295) > %297 = fmul float %293, %296 > %298 = shl i32 %124, 4 > %299 = or i32 %298, 4 > %300 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %299) > %301 = shl i32 %124, 4 > %302 = or i32 %301, 8 > %303 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %302) > %304 = fsub float -0.000000e+00, %297 > %305 = call float @llvm.fma.f32(float %300, float %303, float %304) > %306 = shl i32 %124, 4 > %307 = or i32 %306, 4 > %308 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %307) > %309 = shl i32 %124, 4 > %310 = or i32 %309, 8 > %311 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %310) > %312 = call float @llvm.fma.f32(float %308, float %311, float %297) > %313 = fmul float %312, %106 > %314 = fmul float %313, 2.000000e+00 > %315 = fmul float %305, %106 > %316 = fmul float %315, 2.000000e+00 > %317 = shl i32 %124, 4 > %318 = or i32 %317, 4 > %319 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %318) > %320 = shl i32 %124, 4 > %321 = or i32 %320, 8 > %322 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %321) > %323 = shl i32 %124, 4 > %324 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %323) > %325 = shl i32 %124, 4 > %326 = or i32 %325, 12 > %327 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %326) > %328 = fmul float %322, %327 > %329 = fmul float %322, %324 > %330 = fmul float %319, %327 > %331 = shl i32 %124, 4 > %332 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %331) > %333 = shl i32 %124, 4 > %334 = or i32 %333, 4 > %335 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %334) > %336 = call float @llvm.fma.f32(float %332, float %335, float %328) > %337 = fmul float %336, %106 > %338 = fmul float %337, 2.000000e+00 > %339 = shl i32 %124, 4 > %340 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %339) > %341 = shl i32 %124, 4 > %342 = or i32 %341, 4 > %343 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %342) > %344 = shl i32 %124, 4 > %345 = or i32 %344, 8 > %346 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %345) > %347 = shl i32 %124, 4 > %348 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %347) > %349 = shl i32 %124, 4 > %350 = or i32 %349, 4 > %351 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %350) > %352 = shl i32 %124, 4 > %353 = or i32 %352, 8 > %354 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %353) > %355 = fmul float %340, %348 > %356 = fmul float %343, %351 > %357 = fmul float %346, %354 > %358 = fadd float %357, %356 > %359 = fadd float %357, %355 > %360 = fadd float %356, %355 > %361 = fsub float -0.000000e+00, %358 > %362 = call float @llvm.fma.f32(float %361, float 2.000000e+00, float 1.000000e+00) > %363 = fsub float -0.000000e+00, %359 > %364 = call float @llvm.fma.f32(float %363, float 2.000000e+00, float 1.000000e+00) > %365 = fsub float -0.000000e+00, %360 > %366 = call float @llvm.fma.f32(float %365, float 2.000000e+00, float 1.000000e+00) > %367 = fmul float %106, %364 > %368 = fadd float %284, %338 > %369 = fadd float %285, %367 > %370 = fadd float %286, %316 > %371 = fadd float %287, %291 > %372 = fmul float %368, %86 > %373 = fmul float %369, %87 > %374 = fadd float %372, %373 > %375 = fmul float %370, %88 > %376 = fadd float %374, %375 > %377 = fadd float %376, %371 > %378 = shl i32 %120, 4 > %379 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %378) > %380 = shl i32 %120, 4 > %381 = or i32 %380, 8 > %382 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %381) > %383 = fsub float -0.000000e+00, %195 > %384 = call float @llvm.fma.f32(float %379, float %382, float %383) > %385 = fmul float %384, %104 > %386 = fmul float %385, 2.000000e+00 > %387 = fmul float %154, 2.000000e+00 > %388 = shl i32 %122, 4 > %389 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %388) > %390 = shl i32 %122, 4 > %391 = or i32 %390, 8 > %392 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %391) > %393 = fsub float -0.000000e+00, %246 > %394 = call float @llvm.fma.f32(float %389, float %392, float %393) > %395 = fmul float %394, %105 > %396 = fmul float %395, 2.000000e+00 > %397 = fmul float %104, %231 > %398 = fmul float %104, %227 > %399 = fmul float %105, %282 > %400 = fmul float %105, %278 > %401 = shl i32 %116, 5 > %402 = or i32 %401, 8 > %403 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %402) > %404 = fmul float %104, %403 > %405 = shl i32 %117, 5 > %406 = or i32 %405, 8 > %407 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %406) > %408 = fmul float %105, %407 > %409 = fadd float %396, %386 > %410 = fadd float %179, %387 > %411 = fadd float %399, %397 > %412 = fadd float %408, %404 > %413 = shl i32 %124, 4 > %414 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %413) > %415 = shl i32 %124, 4 > %416 = or i32 %415, 8 > %417 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %416) > %418 = fsub float -0.000000e+00, %330 > %419 = call float @llvm.fma.f32(float %414, float %417, float %418) > %420 = fmul float %419, %106 > %421 = fmul float %420, 2.000000e+00 > %422 = fmul float %106, %366 > %423 = fmul float %106, %362 > %424 = shl i32 %118, 5 > %425 = or i32 %424, 8 > %426 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %425) > %427 = fmul float %106, %426 > %428 = fadd float %409, %421 > %429 = fadd float %410, %314 > %430 = fadd float %411, %422 > %431 = fadd float %412, %427 > %432 = fmul float %428, %86 > %433 = fmul float %429, %87 > %434 = fadd float %432, %433 > %435 = fmul float %430, %88 > %436 = fadd float %434, %435 > %437 = fadd float %436, %431 > %438 = shl i32 %116, 5 > %439 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %438) > %440 = fmul float %104, %439 > %441 = shl i32 %117, 5 > %442 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %441) > %443 = fmul float %105, %442 > %444 = shl i32 %118, 5 > %445 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %444) > %446 = fmul float %106, %445 > %447 = shl i32 %120, 4 > %448 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %447) > %449 = shl i32 %120, 4 > %450 = or i32 %449, 4 > %451 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %450) > %452 = fsub float -0.000000e+00, %193 > %453 = call float @llvm.fma.f32(float %448, float %451, float %452) > %454 = fadd float %195, %194 > %455 = fmul float %453, %104 > %456 = fmul float %454, %104 > %457 = fmul float %455, 2.000000e+00 > %458 = fmul float %456, 2.000000e+00 > %459 = shl i32 %122, 4 > %460 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %459) > %461 = shl i32 %122, 4 > %462 = or i32 %461, 4 > %463 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %462) > %464 = fsub float -0.000000e+00, %244 > %465 = call float @llvm.fma.f32(float %460, float %463, float %464) > %466 = shl i32 %124, 4 > %467 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %466) > %468 = shl i32 %124, 4 > %469 = or i32 %468, 4 > %470 = call float @llvm.SI.load.const(<16 x i8> %82, i32 %469) > %471 = fsub float -0.000000e+00, %328 > %472 = call float @llvm.fma.f32(float %467, float %470, float %471) > %473 = fadd float %330, %329 > %474 = fmul float %465, %105 > %475 = fmul float %472, %106 > %476 = fmul float %473, %106 > %477 = fmul float %475, 2.000000e+00 > %478 = fmul float %476, 2.000000e+00 > %479 = fadd float %246, %245 > %480 = fmul float %479, %105 > %481 = fmul float %474, 2.000000e+00 > %482 = fmul float %480, 2.000000e+00 > %483 = fadd float %398, %400 > %484 = fadd float %457, %481 > %485 = fadd float %458, %482 > %486 = fadd float %440, %443 > %487 = fadd float %423, %483 > %488 = fadd float %477, %484 > %489 = fadd float %478, %485 > %490 = fadd float %446, %486 > %491 = fmul float %487, %86 > %492 = fmul float %488, %87 > %493 = fadd float %491, %492 > %494 = fmul float %489, %88 > %495 = fadd float %493, %494 > %496 = fadd float %495, %490 > %497 = fmul float %20, %496 > %498 = fmul float %21, %377 > %499 = fadd float %497, %498 > %500 = fmul float %22, %437 > %501 = fadd float %499, %500 > %502 = fadd float %501, %23 > %503 = fmul float %24, %496 > %504 = fmul float %25, %377 > %505 = fadd float %503, %504 > %506 = fmul float %26, %437 > %507 = fadd float %505, %506 > %508 = fadd float %507, %27 > %509 = fmul float %28, %496 > %510 = fmul float %29, %377 > %511 = fadd float %509, %510 > %512 = fmul float %30, %437 > %513 = fadd float %511, %512 > %514 = fadd float %513, %31 > %515 = fmul float %32, %496 > %516 = fmul float %33, %377 > %517 = fadd float %515, %516 > %518 = fmul float %34, %437 > %519 = fadd float %517, %518 > %520 = fadd float %519, %35 > %521 = fsub float %76, %496 > %522 = fsub float %77, %377 > %523 = fsub float %78, %437 > %524 = fmul float %39, %496 > %525 = fmul float %40, %377 > %526 = fadd float %524, %525 > %527 = fmul float %41, %437 > %528 = fadd float %526, %527 > %529 = fadd float %528, %42 > %530 = fadd float %529, %61 > %531 = fmul float %520, %79 > %532 = fmul float %520, %80 > %533 = fsub float -0.000000e+00, %80 > %534 = call float @llvm.fma.f32(float %502, float %79, float %531) > %535 = call float @llvm.fma.f32(float %508, float %533, float %532) > %536 = fmul float %69, %73 > %537 = fmul float %70, %74 > %538 = fmul float %71, %75 > %539 = call float @llvm.fabs.f32(float %520) > %540 = fmul float %539, 0x3EF4F8B580000000 > %541 = call float @llvm.minnum.f32(float %540, float 1.000000e+00) > %542 = fsub float 1.000000e+00, %541 > %543 = fmul float %36, %521 > %544 = fmul float %37, %522 > %545 = fadd float %544, %543 > %546 = fmul float %38, %523 > %547 = fadd float %545, %546 > %548 = fmul float %43, %521 > %549 = fmul float %44, %522 > %550 = fadd float %549, %548 > %551 = fmul float %45, %523 > %552 = fadd float %550, %551 > %553 = fmul float %39, %521 > %554 = fmul float %40, %522 > %555 = fadd float %554, %553 > %556 = fmul float %41, %523 > %557 = fadd float %555, %556 > %558 = fmul float %547, %547 > %559 = fmul float %557, %557 > %560 = fadd float %559, %558 > %561 = fmul float %552, %552 > %562 = fadd float %560, %561 > %563 = call float @llvm.AMDGPU.rsq.clamped.f32(float %562) > %564 = fmul float %563, %547 > %565 = fmul float %563, %557 > %566 = fmul float %563, %552 > %567 = fsub float -0.000000e+00, %557 > %568 = call float @llvm.fma.f32(float %567, float %563, float 0xBFC3333340000000) > %569 = fsub float 1.000000e+00, %568 > %570 = call float @llvm.AMDGPU.clamp.(float %569, float 0.000000e+00, float 1.000000e+00) > %571 = fmul float %570, %570 > %572 = fmul float %564, %66 > %573 = fsub float -0.000000e+00, %572 > %574 = fmul float %565, %67 > %575 = fsub float %573, %574 > %576 = fmul float %566, %68 > %577 = fsub float %575, %576 > %578 = fsub float -0.000000e+00, %51 > %579 = call float @llvm.fma.f32(float %578, float %577, float %50) > %580 = call float @llvm.fma.f32(float %577, float %577, float 1.000000e+00) > %581 = fmul float %580, 0x3FAE8EC8A0000000 > %582 = call float @llvm.fabs.f32(float %579) > %583 = call float @llvm.log2.f32(float %582) > %584 = fmul float %583, -1.500000e+00 > %585 = call float @llvm.exp2.f32(float %584) > %586 = fsub float -0.000000e+00, %48 > %587 = call float @llvm.fma.f32(float %52, float %585, float %586) > %588 = fmul float %585, %52 > %589 = call float @llvm.maxnum.f32(float %587, float 0.000000e+00) > %590 = fsub float -0.000000e+00, %589 > %591 = call float @llvm.fma.f32(float %590, float %542, float %588) > %592 = call float @llvm.maxnum.f32(float %591, float %65) > %593 = fcmp une float %46, 0.000000e+00 > br i1 %593, label %IF, label %ELSE > >IF: ; preds = %main_body > %594 = fdiv float 1.000000e+00, %46 > %595 = fmul float %530, %594 > %596 = fsub float -0.000000e+00, %595 > br label %ENDIF > >ELSE: ; preds = %main_body > %597 = fsub float -0.000000e+00, %530 > %598 = fcmp olt float %530, -0.000000e+00 > %599 = select i1 %598, float 1.000000e+00, float %597 > %600 = fcmp oge float %599, 0.000000e+00 > %.op = fmul float %599, 0x4600000000000000 > %601 = select i1 %600, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp48.0 = phi float [ %596, %IF ], [ %601, %ELSE ] > %602 = fmul float %temp48.0, 0x3FF7154760000000 > %603 = call float @llvm.exp2.f32(float %602) > %604 = fadd float %603, %53 > %605 = fmul float %604, %55 > %606 = fmul float %605, 5.000000e-01 > %607 = fmul float %571, %606 > %608 = call float @llvm.minnum.f32(float %607, float %49) > %609 = call float @llvm.maxnum.f32(float %608, float %54) > %610 = fmul float %609, %592 > %611 = fcmp une float %59, 0.000000e+00 > br i1 %611, label %IF159, label %ELSE160 > >IF159: ; preds = %ENDIF > %612 = fdiv float 1.000000e+00, %59 > %613 = fmul float %530, %612 > %614 = fsub float -0.000000e+00, %613 > br label %ENDIF158 > >ELSE160: ; preds = %ENDIF > %615 = fsub float -0.000000e+00, %530 > %616 = fcmp olt float %530, -0.000000e+00 > %617 = select i1 %616, float 1.000000e+00, float %615 > %618 = fcmp oge float %617, 0.000000e+00 > %.op164 = fmul float %617, 0x4600000000000000 > %619 = select i1 %618, float %.op164, float 0xC600000000000000 > br label %ENDIF158 > >ENDIF158: ; preds = %ELSE160, %IF159 > %temp44.0 = phi float [ %614, %IF159 ], [ %619, %ELSE160 ] > %620 = fsub float %60, %530 > %621 = fcmp une float %47, 0.000000e+00 > br i1 %621, label %IF162, label %ELSE163 > >IF162: ; preds = %ENDIF158 > %622 = fdiv float 1.000000e+00, %47 > %623 = fmul float %620, %622 > br label %ENDIF161 > >ELSE163: ; preds = %ENDIF158 > %624 = fcmp ogt float %620, 0.000000e+00 > %625 = select i1 %624, float 1.000000e+00, float %620 > %626 = fcmp oge float %625, 0.000000e+00 > %.op165 = fmul float %625, 0x4600000000000000 > %627 = select i1 %626, float %.op165, float 0xC600000000000000 > br label %ENDIF161 > >ENDIF161: ; preds = %ELSE163, %IF162 > %temp48.1 = phi float [ %623, %IF162 ], [ %627, %ELSE163 ] > %628 = fmul float %temp44.0, 0x3FF7154760000000 > %629 = call float @llvm.exp2.f32(float %628) > %630 = fmul float %629, %56 > %631 = fmul float %629, %57 > %632 = fmul float %629, %58 > %633 = call float @llvm.fma.f32(float %56, float %629, float %609) > %634 = call float @llvm.fma.f32(float %57, float %629, float %609) > %635 = call float @llvm.fma.f32(float %58, float %629, float %609) > %636 = call float @llvm.fma.f32(float %630, float %581, float %610) > %637 = call float @llvm.fma.f32(float %631, float %581, float %610) > %638 = call float @llvm.fma.f32(float %632, float %581, float %610) > %639 = fmul float %633, %temp48.1 > %640 = fmul float %634, %temp48.1 > %641 = fmul float %635, %temp48.1 > %642 = call float @llvm.fabs.f32(float %520) > %643 = call float @llvm.fabs.f32(float %520) > %644 = call float @llvm.fabs.f32(float %520) > %645 = fmul float %633, %642 > %646 = fmul float %634, %643 > %647 = fmul float %635, %644 > %648 = fmul float %645, 0xBFF7154760000000 > %649 = fmul float %646, 0xBFF7154760000000 > %650 = fmul float %647, 0xBFF7154760000000 > %651 = call float @llvm.exp2.f32(float %648) > %652 = call float @llvm.exp2.f32(float %649) > %653 = call float @llvm.exp2.f32(float %650) > %654 = fmul float %639, 0xBFF7154760000000 > %655 = fmul float %640, 0xBFF7154760000000 > %656 = fmul float %641, 0xBFF7154760000000 > %657 = call float @llvm.log2.f32(float %62) > %658 = call float @llvm.log2.f32(float %63) > %659 = call float @llvm.log2.f32(float %64) > %660 = fmul float %657, 0x3FDD1745E0000000 > %661 = fmul float %658, 0x3FDD1745E0000000 > %662 = fmul float %659, 0x3FDD1745E0000000 > %663 = call float @llvm.exp2.f32(float %660) > %664 = call float @llvm.exp2.f32(float %661) > %665 = call float @llvm.exp2.f32(float %662) > %666 = call float @llvm.exp2.f32(float %654) > %667 = call float @llvm.exp2.f32(float %655) > %668 = call float @llvm.exp2.f32(float %656) > %669 = fmul float %666, %663 > %670 = fmul float %667, %664 > %671 = fmul float %668, %665 > %672 = fcmp oeq float %633, 0.000000e+00 > %673 = fcmp oeq float %634, 0.000000e+00 > %674 = fcmp oeq float %635, 0.000000e+00 > %675 = fcmp ogt float %636, 0.000000e+00 > %676 = select i1 %675, float 1.000000e+00, float %636 > %677 = fcmp oge float %676, 0.000000e+00 > %678 = fcmp ogt float %637, 0.000000e+00 > %679 = select i1 %678, float 1.000000e+00, float %637 > %680 = fcmp oge float %679, 0.000000e+00 > %681 = fcmp ogt float %638, 0.000000e+00 > %682 = select i1 %681, float 1.000000e+00, float %638 > %683 = fcmp oge float %682, 0.000000e+00 > %.op166 = fmul float %676, 0x4600000000000000 > %684 = select i1 %677, float %.op166, float 0xC600000000000000 > %.op167 = fmul float %679, 0x4600000000000000 > %685 = select i1 %680, float %.op167, float 0xC600000000000000 > %.op168 = fmul float %682, 0x4600000000000000 > %686 = select i1 %683, float %.op168, float 0xC600000000000000 > %687 = fdiv float 1.000000e+00, %633 > %688 = fdiv float 1.000000e+00, %634 > %689 = fdiv float 1.000000e+00, %635 > %690 = fmul float %636, %687 > %691 = fmul float %637, %688 > %692 = fmul float %638, %689 > %693 = select i1 %672, float %684, float %690 > %694 = select i1 %673, float %685, float %691 > %695 = select i1 %674, float %686, float %692 > %696 = fmul float %693, %669 > %697 = fmul float %694, %670 > %698 = fmul float %695, %671 > %699 = fsub float 1.000000e+00, %651 > %700 = fsub float 1.000000e+00, %652 > %701 = fsub float 1.000000e+00, %653 > %702 = call float @llvm.fma.f32(float %696, float %699, float 0xBF70624DE0000000) > %703 = call float @llvm.fma.f32(float %697, float %700, float 0xBF70624DE0000000) > %704 = call float @llvm.fma.f32(float %698, float %701, float 0xBF70624DE0000000) > %705 = call float @llvm.maxnum.f32(float %702, float 0.000000e+00) > %706 = call float @llvm.maxnum.f32(float %703, float 0.000000e+00) > %707 = call float @llvm.maxnum.f32(float %704, float 0.000000e+00) > %708 = call float @llvm.fma.f32(float %705, float 0x4018CCCCC0000000, float 5.000000e-01) > %709 = call float @llvm.fma.f32(float %706, float 0x4018CCCCC0000000, float 5.000000e-01) > %710 = call float @llvm.fma.f32(float %707, float 0x4018CCCCC0000000, float 5.000000e-01) > %711 = fmul float %705, %708 > %712 = fmul float %706, %709 > %713 = fmul float %707, %710 > %714 = call float @llvm.fma.f32(float %705, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %715 = call float @llvm.fma.f32(float %706, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %716 = call float @llvm.fma.f32(float %707, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %717 = call float @llvm.fma.f32(float %705, float %714, float 0x3FAEB851E0000000) > %718 = call float @llvm.fma.f32(float %706, float %715, float 0x3FAEB851E0000000) > %719 = call float @llvm.fma.f32(float %707, float %716, float 0x3FAEB851E0000000) > %720 = fcmp oeq float %717, 0.000000e+00 > %721 = fcmp oeq float %718, 0.000000e+00 > %722 = fcmp oeq float %719, 0.000000e+00 > %723 = fcmp ogt float %711, 0.000000e+00 > %724 = select i1 %723, float 1.000000e+00, float %711 > %725 = fcmp oge float %724, 0.000000e+00 > %726 = fcmp ogt float %712, 0.000000e+00 > %727 = select i1 %726, float 1.000000e+00, float %712 > %728 = fcmp oge float %727, 0.000000e+00 > %729 = fcmp ogt float %713, 0.000000e+00 > %730 = select i1 %729, float 1.000000e+00, float %713 > %731 = fcmp oge float %730, 0.000000e+00 > %.op169 = fmul float %724, 0x4600000000000000 > %732 = select i1 %725, float %.op169, float 0xC600000000000000 > %.op170 = fmul float %727, 0x4600000000000000 > %733 = select i1 %728, float %.op170, float 0xC600000000000000 > %.op171 = fmul float %730, 0x4600000000000000 > %734 = select i1 %731, float %.op171, float 0xC600000000000000 > %735 = fdiv float 1.000000e+00, %717 > %736 = fdiv float 1.000000e+00, %718 > %737 = fdiv float 1.000000e+00, %719 > %738 = fmul float %711, %735 > %739 = fmul float %712, %736 > %740 = fmul float %713, %737 > %741 = select i1 %720, float %732, float %738 > %742 = select i1 %721, float %733, float %739 > %743 = select i1 %722, float %734, float %740 > %744 = bitcast i32 %11 to float > %745 = insertvalue <{ float, float, float }> undef, float %744, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %92, float %93, float %426, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %534, float %535, float %514, float %520) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %536, float %537, float %538, float %72) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %741, float %742, float %743, float %651) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %97, float %98, float %99, float %100) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %502, float %508, float %514, float %520) > ret <{ float, float, float }> %745 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..37] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[1] UINT32 {0, 240, 272, 352} >IMM[2] UINT32 {288, 336, 256, 304} >IMM[3] FLT32 { -1.0000, 0.0000, 0.0000, 0.0000} >IMM[4] UINT32 {320, 592, 0, 0} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: FSNE TEMP[1].x, CONST[1][15].zzzz, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][15].zzzz > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][15].wwww > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: MUL TEMP[1].x, CONST[1][17].wwww, CONST[1][22].wwww > 26: MUL TEMP[2].x, CONST[1][18].xxxx, CONST[1][22].wwww > 27: MOV TEMP[1].y, TEMP[2].xxxx > 28: FMA TEMP[2].xy, IN[0].xyyy, CONST[1][21].xyyy, CONST[1][16].yzzz > 29: ADD TEMP[2].xy, TEMP[2].xyyy, TEMP[1].xyyy > 30: MOV TEMP[2].xy, TEMP[2].xyyy > 31: TEX TEMP[2].yw, TEMP[2], SAMP[1], 2D > 32: FMA TEMP[2].xy, TEMP[2].ywww, IMM[0].wwww, IMM[3].xxxx > 33: MOV TEMP[1].x, TEMP[2].xyxx > 34: MOV TEMP[1].z, -TEMP[2].yyyy > 35: MUL TEMP[2].xy, TEMP[1].xzzz, CONST[1][22].xyyy > 36: FMA TEMP[3].xy, IN[0].xyyy, CONST[1][18].yzzz, TEMP[2].xyyy > 37: FMA TEMP[2].xy, IN[0].xyyy, CONST[1][19].xyyy, TEMP[2].xyyy > 38: FMA TEMP[2].xy, CONST[1][22].wwww, CONST[1][17].yzzz, TEMP[2].xyyy > 39: MOV TEMP[2].xy, TEMP[2].xyyy > 40: TEX TEMP[2], TEMP[2], SAMP[2], 2D > 41: MUL TEMP[4].x, CONST[1][16].wwww, CONST[1][22].wwww > 42: MUL TEMP[5].x, CONST[1][17].xxxx, CONST[1][22].wwww > 43: MOV TEMP[4].y, TEMP[5].xxxx > 44: ADD TEMP[3].xy, TEMP[3].xyyy, TEMP[4].xyyy > 45: MOV TEMP[3].xy, TEMP[3].xyyy > 46: TEX TEMP[3], TEMP[3], SAMP[3], 2D > 47: MUL TEMP[1], TEMP[2], TEMP[3] > 48: MUL TEMP[1], TEMP[1], CONST[1][20] > 49: MUL TEMP[2].x, TEMP[0].xxxx, TEMP[1].wwww > 50: MOV TEMP[1].w, TEMP[2].xxxx > 51: MUL TEMP[0], TEMP[1], IN[4] > 52: MUL TEMP[0], TEMP[0], IN[2] > 53: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].wwww > 54: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 55: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][37].xyzz > 56: MOV TEMP[0].xyz, TEMP[0].xyzx > 57: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][16].xxxx > 58: MOV TEMP[0].w, TEMP[1].xxxx > 59: MOV OUT[0], TEMP[0] > 60: END >radeonsi: Compiling shader 364 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 332) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 340) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 356) > %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 364) > %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 592) > %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 596) > %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 600) > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 3 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 > %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 7 > %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0 > %66 = extractelement <8 x i32> %62, i32 7 > %67 = extractelement <4 x i32> %65, i32 0 > %68 = and i32 %67, %66 > %69 = insertelement <4 x i32> %65, i32 %68, i32 0 > %70 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %71 = load <8 x i32>, <8 x i32> addrspace(2)* %70, align 32, !tbaa !0 > %72 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %73 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %72, i64 0, i64 11 > %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 > %75 = extractelement <8 x i32> %71, i32 7 > %76 = extractelement <4 x i32> %74, i32 0 > %77 = and i32 %76, %75 > %78 = insertelement <4 x i32> %74, i32 %77, i32 0 > %79 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %80 = load <8 x i32>, <8 x i32> addrspace(2)* %79, align 32, !tbaa !0 > %81 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %82 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %81, i64 0, i64 15 > %83 = load <4 x i32>, <4 x i32> addrspace(2)* %82, align 16, !tbaa !0 > %84 = extractelement <8 x i32> %80, i32 7 > %85 = extractelement <4 x i32> %83, i32 0 > %86 = and i32 %85, %84 > %87 = insertelement <4 x i32> %83, i32 %86, i32 0 > %88 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %93 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %94 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %95 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %96 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %97 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %98 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %99 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %100 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %101 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %102 = fcmp oeq float %92, 0.000000e+00 > %103 = fcmp oeq float %92, 0.000000e+00 > %104 = fcmp ogt float %90, 0.000000e+00 > %105 = select i1 %104, float 1.000000e+00, float %90 > %106 = fcmp oge float %105, 0.000000e+00 > %107 = fcmp ogt float %91, 0.000000e+00 > %108 = select i1 %107, float 1.000000e+00, float %91 > %109 = fcmp oge float %108, 0.000000e+00 > %.op = fmul float %105, 0x4600000000000000 > %110 = select i1 %106, float %.op, float 0xC600000000000000 > %.op24 = fmul float %108, 0x4600000000000000 > %111 = select i1 %109, float %.op24, float 0xC600000000000000 > %112 = fdiv float 1.000000e+00, %92 > %113 = fmul float %90, %112 > %114 = fmul float %91, %112 > %115 = select i1 %102, float %110, float %113 > %116 = select i1 %103, float %111, float %114 > %117 = bitcast float %115 to i32 > %118 = bitcast float %116 to i32 > %119 = insertelement <2 x i32> undef, i32 %117, i32 0 > %120 = insertelement <2 x i32> %119, i32 %118, i32 1 > %121 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %120, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %122 = extractelement <4 x float> %121, i32 0 > %123 = fsub float %92, %122 > %124 = fcmp une float %25, 0.000000e+00 > %125 = call float @llvm.fabs.f32(float %123) > br i1 %124, label %IF, label %ELSE > >IF: ; preds = %main_body > %126 = fdiv float 1.000000e+00, %25 > %127 = fmul float %125, %126 > br label %ENDIF > >ELSE: ; preds = %main_body > %128 = fcmp one float %123, 0.000000e+00 > %129 = select i1 %128, float 1.000000e+00, float %125 > %130 = fcmp oge float %129, 0.000000e+00 > %.op25 = fmul float %129, 0x4600000000000000 > %131 = select i1 %130, float %.op25, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %127, %IF ], [ %131, %ELSE ] > %132 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %133 = fsub float 1.000000e+00, %132 > %134 = call float @llvm.log2.f32(float %133) > %135 = fmul float %134, %26 > %136 = call float @llvm.exp2.f32(float %135) > %137 = fsub float 1.000000e+00, %136 > %138 = fmul float %34, %48 > %139 = fmul float %35, %48 > %140 = call float @llvm.fma.f32(float %88, float %44, float %28) > %141 = call float @llvm.fma.f32(float %89, float %45, float %29) > %142 = fadd float %140, %138 > %143 = fadd float %141, %139 > %144 = bitcast float %142 to i32 > %145 = bitcast float %143 to i32 > %146 = insertelement <2 x i32> undef, i32 %144, i32 0 > %147 = insertelement <2 x i32> %146, i32 %145, i32 1 > %148 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %147, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %149 = extractelement <4 x float> %148, i32 1 > %150 = extractelement <4 x float> %148, i32 3 > %151 = call float @llvm.fma.f32(float %149, float 2.000000e+00, float -1.000000e+00) > %152 = call float @llvm.fma.f32(float %150, float 2.000000e+00, float -1.000000e+00) > %153 = fmul float %151, %46 > %154 = fmul float %152, %47 > %155 = fsub float -0.000000e+00, %154 > %156 = call float @llvm.fma.f32(float %88, float %36, float %153) > %157 = call float @llvm.fma.f32(float %89, float %37, float %155) > %158 = call float @llvm.fma.f32(float %88, float %38, float %153) > %159 = call float @llvm.fma.f32(float %89, float %39, float %155) > %160 = call float @llvm.fma.f32(float %48, float %32, float %158) > %161 = call float @llvm.fma.f32(float %48, float %33, float %159) > %162 = bitcast float %160 to i32 > %163 = bitcast float %161 to i32 > %164 = insertelement <2 x i32> undef, i32 %162, i32 0 > %165 = insertelement <2 x i32> %164, i32 %163, i32 1 > %166 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %165, <8 x i32> %71, <4 x i32> %78, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %167 = extractelement <4 x float> %166, i32 0 > %168 = extractelement <4 x float> %166, i32 1 > %169 = extractelement <4 x float> %166, i32 2 > %170 = extractelement <4 x float> %166, i32 3 > %171 = fmul float %30, %48 > %172 = fmul float %31, %48 > %173 = fadd float %156, %171 > %174 = fadd float %157, %172 > %175 = bitcast float %173 to i32 > %176 = bitcast float %174 to i32 > %177 = insertelement <2 x i32> undef, i32 %175, i32 0 > %178 = insertelement <2 x i32> %177, i32 %176, i32 1 > %179 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %178, <8 x i32> %80, <4 x i32> %87, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %180 = extractelement <4 x float> %179, i32 0 > %181 = extractelement <4 x float> %179, i32 1 > %182 = extractelement <4 x float> %179, i32 2 > %183 = extractelement <4 x float> %179, i32 3 > %184 = fmul float %167, %180 > %185 = fmul float %168, %181 > %186 = fmul float %169, %182 > %187 = fmul float %170, %183 > %188 = fmul float %184, %40 > %189 = fmul float %185, %41 > %190 = fmul float %186, %42 > %191 = fmul float %187, %43 > %192 = fmul float %137, %191 > %193 = fmul float %188, %98 > %194 = fmul float %189, %99 > %195 = fmul float %190, %100 > %196 = fmul float %192, %101 > %197 = fmul float %193, %93 > %198 = fmul float %194, %94 > %199 = fmul float %195, %95 > %200 = fmul float %196, %96 > %201 = fmul float %197, %97 > %202 = fmul float %198, %97 > %203 = fmul float %199, %97 > %204 = fmul float %200, %201 > %205 = fmul float %200, %202 > %206 = fmul float %200, %203 > %207 = fmul float %204, %49 > %208 = fmul float %205, %50 > %209 = fadd float %208, %207 > %210 = fmul float %206, %51 > %211 = fadd float %209, %210 > %212 = fmul float %211, %27 > %213 = bitcast float %5 to i32 > %214 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %213, 10 > %215 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %214, float %204, 11 > %216 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %215, float %205, 12 > %217 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %216, float %206, 13 > %218 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %217, float %212, 14 > %219 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %218, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %219 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..41] >DCL TEMP[0..13], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 368, 384, 400} >IMM[2] UINT32 {416, 448, 544, 656} >IMM[3] UINT32 {640, 608, 624, 432} >IMM[4] UINT32 {464, 576, 496, 480} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {560, 512, 528, 0} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][23], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][24], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][25], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][26], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][28], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][34].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: MUL TEMP[5].xy, CONST[1][41].xyyy, IMM[0].xyyy > 14: MUL TEMP[6].xy, TEMP[2].xxxx, CONST[1][41].xyyy > 15: FMA TEMP[5].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 16: MOV TEMP[5].zw, TEMP[1].wwzw > 17: ADD TEMP[7].xyz, -IN[0].xyzz, CONST[1][40].xyzz > 18: MUL TEMP[8].xyz, CONST[1][38].xyzz, CONST[1][39].xyzz > 19: MOV TEMP[8].w, CONST[1][38].wwww > 20: DP3 TEMP[1].x, CONST[1][27].xyzz, TEMP[7].xyzz > 21: DP3 TEMP[9].x, CONST[1][29].xyzz, TEMP[7].xyzz > 22: MOV TEMP[1].z, TEMP[9].xxxx > 23: DP3 TEMP[7].x, CONST[1][28].xyzz, TEMP[7].xyzz > 24: MOV TEMP[1].y, TEMP[7].xxxx > 25: DP3 TEMP[9].x, TEMP[1].xyzz, TEMP[1].xyzz > 26: RSQ TEMP[9].x, TEMP[9].xxxx > 27: MUL TEMP[6].xyz, TEMP[9].xxxx, TEMP[1].xyzz > 28: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx, IMM[0].zzzz > 29: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 30: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 31: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 32: DP3 TEMP[9].x, -TEMP[6].xyzz, CONST[1][36].xyzz > 33: FMA TEMP[10].x, -CONST[1][31].yyyy, TEMP[9].xxxx, CONST[1][31].xxxx > 34: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].xxxx > 35: MOV TEMP[0].z, TEMP[9].xxxx > 36: ABS TEMP[9].x, TEMP[10].xxxx > 37: LG2 TEMP[9].x, TEMP[9].xxxx > 38: MOV TEMP[0].w, TEMP[9].xxxx > 39: MUL TEMP[9].xy, TEMP[0].zwww, IMM[5].xyyy > 40: EX2 TEMP[10].x, TEMP[9].yyyy > 41: FMA TEMP[1].x, CONST[1][31].zzzz, TEMP[10].xxxx, -CONST[1][30].zzzz > 42: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][31].zzzz > 43: MAX TEMP[11].x, TEMP[1].xxxx, IMM[0].wwww > 44: ABS TEMP[12].x, TEMP[2].xxxx > 45: MUL TEMP[12].x, TEMP[12].xxxx, IMM[5].zzzz > 46: MIN TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx > 47: ADD TEMP[12].x, -TEMP[12].xxxx, IMM[0].xxxx > 48: FMA TEMP[10].x, -TEMP[11].xxxx, TEMP[12].xxxx, TEMP[10].xxxx > 49: MAX TEMP[10].x, TEMP[10].xxxx, CONST[1][35].wwww > 50: FSNE TEMP[11].x, CONST[1][30].xxxx, IMM[0].wwww > 51: UIF TEMP[11].xxxx :0 > 52: RCP TEMP[11].x, CONST[1][30].xxxx > 53: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 54: ELSE :0 > 55: SSG TEMP[12].x, -TEMP[0].xxxx > 56: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 57: ENDIF > 58: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 59: EX2 TEMP[11].x, TEMP[1].xxxx > 60: ADD TEMP[1].x, TEMP[11].xxxx, CONST[1][31].wwww > 61: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][32].yyyy > 62: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].yyyy > 63: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[1].xxxx > 64: MIN TEMP[7].x, TEMP[7].xxxx, CONST[1][30].wwww > 65: MAX TEMP[7].x, TEMP[7].xxxx, CONST[1][32].xxxx > 66: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx > 67: FSNE TEMP[11].x, CONST[1][33].wwww, IMM[0].wwww > 68: UIF TEMP[11].xxxx :0 > 69: RCP TEMP[11].x, CONST[1][33].wwww > 70: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 71: ELSE :0 > 72: SSG TEMP[12].x, -TEMP[0].xxxx > 73: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 74: ENDIF > 75: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][34].zzzz > 76: FSNE TEMP[12].x, CONST[1][30].yyyy, IMM[0].wwww > 77: UIF TEMP[12].xxxx :0 > 78: RCP TEMP[12].x, CONST[1][30].yyyy > 79: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 80: ELSE :0 > 81: SSG TEMP[13].x, TEMP[0].xxxx > 82: MUL TEMP[12].x, IMM[5].wwww, TEMP[13].xxxx > 83: ENDIF > 84: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 85: EX2 TEMP[11].x, TEMP[1].xxxx > 86: MUL TEMP[6].xyz, TEMP[11].xxxx, CONST[1][33].xyzz > 87: FMA TEMP[7].xyz, CONST[1][33].xyzz, TEMP[11].xxxx, TEMP[7].xxxx > 88: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[10].xxxx > 89: FSEQ TEMP[10].xyz, TEMP[7].xyzz, IMM[0].wwww > 90: SSG TEMP[11].xyz, TEMP[9].xyzz > 91: MUL TEMP[11].xyz, IMM[5].wwww, TEMP[11].xyzz > 92: RCP TEMP[13].x, TEMP[7].xxxx > 93: RCP TEMP[13].y, TEMP[7].yyyy > 94: RCP TEMP[13].z, TEMP[7].zzzz > 95: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 96: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz > 97: MUL TEMP[6].xyz, TEMP[12].xxxx, -TEMP[7].xyzz > 98: ABS TEMP[2].xyz, TEMP[2].xxxx > 99: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[7].xyzz >100: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].xxxx >101: EX2 TEMP[2].x, TEMP[1].xxxx >102: EX2 TEMP[2].y, TEMP[1].yyyy >103: EX2 TEMP[2].z, TEMP[1].zzzz >104: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[7].xxxx >105: LG2 TEMP[7].x, CONST[1][35].xxxx >106: LG2 TEMP[7].y, CONST[1][35].yyyy >107: LG2 TEMP[7].z, CONST[1][35].zzzz >108: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >109: EX2 TEMP[10].x, TEMP[7].xxxx >110: EX2 TEMP[10].y, TEMP[7].yyyy >111: EX2 TEMP[10].z, TEMP[7].zzzz >112: EX2 TEMP[7].x, TEMP[6].xxxx >113: EX2 TEMP[7].y, TEMP[6].yyyy >114: EX2 TEMP[7].z, TEMP[6].zzzz >115: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[10].xyzz >116: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[6].xyzz >117: ADD TEMP[7].xyz, -TEMP[2].xyzz, IMM[0].xxxx >118: MOV TEMP[2].w, TEMP[2].xxxx >119: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].wwww >120: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >121: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >122: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xyzz >123: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >124: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].zzzz >125: FSEQ TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww >126: SSG TEMP[7].xyz, TEMP[1].xyzz >127: MUL TEMP[7].xyz, IMM[5].wwww, TEMP[7].xyzz >128: RCP TEMP[9].x, TEMP[0].xxxx >129: RCP TEMP[9].y, TEMP[0].yyyy >130: RCP TEMP[9].z, TEMP[0].zzzz >131: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[9].xyzz >132: UCMP TEMP[2].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >133: MOV OUT[5], IN[2] >134: MOV OUT[4], TEMP[2] >135: MOV OUT[3], TEMP[8] >136: MOV OUT[2], TEMP[5] >137: MOV OUT[1], TEMP[4] >138: MOV OUT[0], TEMP[3] >139: END >radeonsi: Compiling shader 365 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 412) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 428) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 492) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 540) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 552) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 556) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 568) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 572) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 576) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 580) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 584) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 608) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 612) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 616) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 620) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 624) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 628) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 632) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 640) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 644) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 648) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 656) > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 660) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %13) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %14) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %15) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = extractelement <4 x float> %92, i32 3 > %97 = fmul float %18, %82 > %98 = fmul float %19, %83 > %99 = fadd float %97, %98 > %100 = fmul float %20, %84 > %101 = fadd float %99, %100 > %102 = fadd float %101, %21 > %103 = fmul float %22, %82 > %104 = fmul float %23, %83 > %105 = fadd float %103, %104 > %106 = fmul float %24, %84 > %107 = fadd float %105, %106 > %108 = fadd float %107, %25 > %109 = fmul float %26, %82 > %110 = fmul float %27, %83 > %111 = fadd float %109, %110 > %112 = fmul float %28, %84 > %113 = fadd float %111, %112 > %114 = fadd float %113, %29 > %115 = fmul float %30, %82 > %116 = fmul float %31, %83 > %117 = fadd float %115, %116 > %118 = fmul float %32, %84 > %119 = fadd float %117, %118 > %120 = fadd float %119, %33 > %121 = fmul float %37, %82 > %122 = fmul float %38, %83 > %123 = fadd float %121, %122 > %124 = fmul float %39, %84 > %125 = fadd float %123, %124 > %126 = fadd float %125, %40 > %127 = fadd float %126, %59 > %128 = fsub float -0.000000e+00, %78 > %129 = fmul float %120, %77 > %130 = fmul float %120, %78 > %131 = call float @llvm.fma.f32(float %102, float %77, float %129) > %132 = call float @llvm.fma.f32(float %108, float %128, float %130) > %133 = fsub float %74, %82 > %134 = fsub float %75, %83 > %135 = fsub float %76, %84 > %136 = fmul float %67, %71 > %137 = fmul float %68, %72 > %138 = fmul float %69, %73 > %139 = fmul float %34, %133 > %140 = fmul float %35, %134 > %141 = fadd float %140, %139 > %142 = fmul float %36, %135 > %143 = fadd float %141, %142 > %144 = fmul float %41, %133 > %145 = fmul float %42, %134 > %146 = fadd float %145, %144 > %147 = fmul float %43, %135 > %148 = fadd float %146, %147 > %149 = fmul float %37, %133 > %150 = fmul float %38, %134 > %151 = fadd float %150, %149 > %152 = fmul float %39, %135 > %153 = fadd float %151, %152 > %154 = fmul float %143, %143 > %155 = fmul float %153, %153 > %156 = fadd float %155, %154 > %157 = fmul float %148, %148 > %158 = fadd float %156, %157 > %159 = call float @llvm.AMDGPU.rsq.clamped.f32(float %158) > %160 = fmul float %159, %143 > %161 = fmul float %159, %153 > %162 = fmul float %159, %148 > %163 = fsub float -0.000000e+00, %153 > %164 = call float @llvm.fma.f32(float %163, float %159, float 0xBFC3333340000000) > %165 = fsub float 1.000000e+00, %164 > %166 = call float @llvm.AMDGPU.clamp.(float %165, float 0.000000e+00, float 1.000000e+00) > %167 = fmul float %166, %166 > %168 = fmul float %160, %64 > %169 = fsub float -0.000000e+00, %168 > %170 = fmul float %161, %65 > %171 = fsub float %169, %170 > %172 = fmul float %162, %66 > %173 = fsub float %171, %172 > %174 = fsub float -0.000000e+00, %49 > %175 = call float @llvm.fma.f32(float %174, float %173, float %48) > %176 = call float @llvm.fma.f32(float %173, float %173, float 1.000000e+00) > %177 = call float @llvm.fabs.f32(float %175) > %178 = call float @llvm.log2.f32(float %177) > %179 = fmul float %176, 0x3FAE8EC8A0000000 > %180 = fmul float %178, -1.500000e+00 > %181 = call float @llvm.exp2.f32(float %180) > %182 = fsub float -0.000000e+00, %46 > %183 = call float @llvm.fma.f32(float %50, float %181, float %182) > %184 = fmul float %181, %50 > %185 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00) > %186 = call float @llvm.fabs.f32(float %120) > %187 = fmul float %186, 0x3EF4F8B580000000 > %188 = call float @llvm.minnum.f32(float %187, float 1.000000e+00) > %189 = fsub float 1.000000e+00, %188 > %190 = fsub float -0.000000e+00, %185 > %191 = call float @llvm.fma.f32(float %190, float %189, float %184) > %192 = call float @llvm.maxnum.f32(float %191, float %63) > %193 = fcmp une float %44, 0.000000e+00 > br i1 %193, label %IF, label %ELSE > >IF: ; preds = %main_body > %194 = fdiv float 1.000000e+00, %44 > %195 = fmul float %127, %194 > %196 = fsub float -0.000000e+00, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fsub float -0.000000e+00, %127 > %198 = fcmp olt float %127, -0.000000e+00 > %199 = select i1 %198, float 1.000000e+00, float %197 > %200 = fcmp oge float %199, 0.000000e+00 > %.op = fmul float %199, 0x4600000000000000 > %201 = select i1 %200, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %196, %IF ], [ %201, %ELSE ] > %202 = fmul float %temp44.0, 0x3FF7154760000000 > %203 = call float @llvm.exp2.f32(float %202) > %204 = fadd float %203, %51 > %205 = fmul float %204, %53 > %206 = fmul float %205, 5.000000e-01 > %207 = fmul float %167, %206 > %208 = call float @llvm.minnum.f32(float %207, float %47) > %209 = call float @llvm.maxnum.f32(float %208, float %52) > %210 = fmul float %209, %192 > %211 = fcmp une float %57, 0.000000e+00 > br i1 %211, label %IF57, label %ELSE58 > >IF57: ; preds = %ENDIF > %212 = fdiv float 1.000000e+00, %57 > %213 = fmul float %127, %212 > %214 = fsub float -0.000000e+00, %213 > br label %ENDIF56 > >ELSE58: ; preds = %ENDIF > %215 = fsub float -0.000000e+00, %127 > %216 = fcmp olt float %127, -0.000000e+00 > %217 = select i1 %216, float 1.000000e+00, float %215 > %218 = fcmp oge float %217, 0.000000e+00 > %.op62 = fmul float %217, 0x4600000000000000 > %219 = select i1 %218, float %.op62, float 0xC600000000000000 > br label %ENDIF56 > >ENDIF56: ; preds = %ELSE58, %IF57 > %temp44.1 = phi float [ %214, %IF57 ], [ %219, %ELSE58 ] > %220 = fsub float %58, %127 > %221 = fcmp une float %45, 0.000000e+00 > br i1 %221, label %IF60, label %ELSE61 > >IF60: ; preds = %ENDIF56 > %222 = fdiv float 1.000000e+00, %45 > %223 = fmul float %220, %222 > br label %ENDIF59 > >ELSE61: ; preds = %ENDIF56 > %224 = fcmp ogt float %220, 0.000000e+00 > %225 = select i1 %224, float 1.000000e+00, float %220 > %226 = fcmp oge float %225, 0.000000e+00 > %.op63 = fmul float %225, 0x4600000000000000 > %227 = select i1 %226, float %.op63, float 0xC600000000000000 > br label %ENDIF59 > >ENDIF59: ; preds = %ELSE61, %IF60 > %temp48.0 = phi float [ %223, %IF60 ], [ %227, %ELSE61 ] > %228 = fmul float %temp44.1, 0x3FF7154760000000 > %229 = call float @llvm.exp2.f32(float %228) > %230 = fmul float %229, %54 > %231 = fmul float %229, %55 > %232 = fmul float %229, %56 > %233 = call float @llvm.fma.f32(float %54, float %229, float %209) > %234 = call float @llvm.fma.f32(float %55, float %229, float %209) > %235 = call float @llvm.fma.f32(float %56, float %229, float %209) > %236 = call float @llvm.fma.f32(float %230, float %179, float %210) > %237 = call float @llvm.fma.f32(float %231, float %179, float %210) > %238 = call float @llvm.fma.f32(float %232, float %179, float %210) > %239 = fcmp oeq float %233, 0.000000e+00 > %240 = fcmp oeq float %234, 0.000000e+00 > %241 = fcmp oeq float %235, 0.000000e+00 > %242 = fcmp ogt float %236, 0.000000e+00 > %243 = select i1 %242, float 1.000000e+00, float %236 > %244 = fcmp oge float %243, 0.000000e+00 > %245 = fcmp ogt float %237, 0.000000e+00 > %246 = select i1 %245, float 1.000000e+00, float %237 > %247 = fcmp oge float %246, 0.000000e+00 > %248 = fcmp ogt float %238, 0.000000e+00 > %249 = select i1 %248, float 1.000000e+00, float %238 > %250 = fcmp oge float %249, 0.000000e+00 > %.op64 = fmul float %243, 0x4600000000000000 > %251 = select i1 %244, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %246, 0x4600000000000000 > %252 = select i1 %247, float %.op65, float 0xC600000000000000 > %.op66 = fmul float %249, 0x4600000000000000 > %253 = select i1 %250, float %.op66, float 0xC600000000000000 > %254 = fdiv float 1.000000e+00, %233 > %255 = fdiv float 1.000000e+00, %234 > %256 = fdiv float 1.000000e+00, %235 > %257 = fmul float %236, %254 > %258 = fmul float %237, %255 > %259 = fmul float %238, %256 > %260 = select i1 %239, float %251, float %257 > %261 = select i1 %240, float %252, float %258 > %262 = select i1 %241, float %253, float %259 > %263 = fmul float %233, %temp48.0 > %264 = fmul float %234, %temp48.0 > %265 = fmul float %235, %temp48.0 > %266 = call float @llvm.fabs.f32(float %120) > %267 = call float @llvm.fabs.f32(float %120) > %268 = call float @llvm.fabs.f32(float %120) > %269 = fmul float %233, %266 > %270 = fmul float %234, %267 > %271 = fmul float %235, %268 > %272 = fmul float %269, 0xBFF7154760000000 > %273 = fmul float %270, 0xBFF7154760000000 > %274 = fmul float %271, 0xBFF7154760000000 > %275 = call float @llvm.exp2.f32(float %272) > %276 = call float @llvm.exp2.f32(float %273) > %277 = call float @llvm.exp2.f32(float %274) > %278 = fmul float %263, 0xBFF7154760000000 > %279 = fmul float %264, 0xBFF7154760000000 > %280 = fmul float %265, 0xBFF7154760000000 > %281 = call float @llvm.log2.f32(float %60) > %282 = call float @llvm.log2.f32(float %61) > %283 = call float @llvm.log2.f32(float %62) > %284 = fmul float %281, 0x3FDD1745E0000000 > %285 = fmul float %282, 0x3FDD1745E0000000 > %286 = fmul float %283, 0x3FDD1745E0000000 > %287 = call float @llvm.exp2.f32(float %284) > %288 = call float @llvm.exp2.f32(float %285) > %289 = call float @llvm.exp2.f32(float %286) > %290 = call float @llvm.exp2.f32(float %278) > %291 = call float @llvm.exp2.f32(float %279) > %292 = call float @llvm.exp2.f32(float %280) > %293 = fmul float %290, %287 > %294 = fmul float %291, %288 > %295 = fmul float %292, %289 > %296 = fmul float %260, %293 > %297 = fmul float %261, %294 > %298 = fmul float %262, %295 > %299 = fsub float 1.000000e+00, %275 > %300 = fsub float 1.000000e+00, %276 > %301 = fsub float 1.000000e+00, %277 > %302 = call float @llvm.fma.f32(float %296, float %299, float 0xBF70624DE0000000) > %303 = call float @llvm.fma.f32(float %297, float %300, float 0xBF70624DE0000000) > %304 = call float @llvm.fma.f32(float %298, float %301, float 0xBF70624DE0000000) > %305 = call float @llvm.maxnum.f32(float %302, float 0.000000e+00) > %306 = call float @llvm.maxnum.f32(float %303, float 0.000000e+00) > %307 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00) > %308 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 5.000000e-01) > %309 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 5.000000e-01) > %310 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 5.000000e-01) > %311 = fmul float %305, %308 > %312 = fmul float %306, %309 > %313 = fmul float %307, %310 > %314 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %315 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %316 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %317 = call float @llvm.fma.f32(float %305, float %314, float 0x3FAEB851E0000000) > %318 = call float @llvm.fma.f32(float %306, float %315, float 0x3FAEB851E0000000) > %319 = call float @llvm.fma.f32(float %307, float %316, float 0x3FAEB851E0000000) > %320 = fcmp oeq float %317, 0.000000e+00 > %321 = fcmp oeq float %318, 0.000000e+00 > %322 = fcmp oeq float %319, 0.000000e+00 > %323 = fcmp ogt float %311, 0.000000e+00 > %324 = select i1 %323, float 1.000000e+00, float %311 > %325 = fcmp oge float %324, 0.000000e+00 > %326 = fcmp ogt float %312, 0.000000e+00 > %327 = select i1 %326, float 1.000000e+00, float %312 > %328 = fcmp oge float %327, 0.000000e+00 > %329 = fcmp ogt float %313, 0.000000e+00 > %330 = select i1 %329, float 1.000000e+00, float %313 > %331 = fcmp oge float %330, 0.000000e+00 > %.op67 = fmul float %324, 0x4600000000000000 > %332 = select i1 %325, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %327, 0x4600000000000000 > %333 = select i1 %328, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %330, 0x4600000000000000 > %334 = select i1 %331, float %.op69, float 0xC600000000000000 > %335 = fdiv float 1.000000e+00, %317 > %336 = fdiv float 1.000000e+00, %318 > %337 = fdiv float 1.000000e+00, %319 > %338 = fmul float %311, %335 > %339 = fmul float %312, %336 > %340 = fmul float %313, %337 > %341 = select i1 %320, float %332, float %338 > %342 = select i1 %321, float %333, float %339 > %343 = select i1 %322, float %334, float %340 > %344 = bitcast i32 %11 to float > %345 = insertvalue <{ float, float, float }> undef, float %344, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %131, float %132, float %114, float %120) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %136, float %137, float %138, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %341, float %342, float %343, float %275) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %93, float %94, float %95, float %96) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %102, float %108, float %114, float %120) > ret <{ float, float, float }> %345 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..38] >DCL TEMP[0..13], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 320, 336, 352} >IMM[2] UINT32 {368, 400, 496, 608} >IMM[3] UINT32 {592, 560, 576, 384} >IMM[4] UINT32 {416, 528, 448, 432} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {512, 464, 480, 0} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][20], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][22], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][23], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][25], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][31].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: MUL TEMP[5].xy, CONST[1][38].xyyy, IMM[0].xyyy > 14: MUL TEMP[6].xy, TEMP[2].xxxx, CONST[1][38].xyyy > 15: FMA TEMP[5].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 16: MOV TEMP[5].zw, TEMP[1].wwzw > 17: ADD TEMP[7].xyz, -IN[0].xyzz, CONST[1][37].xyzz > 18: MUL TEMP[8].xyz, CONST[1][35].xyzz, CONST[1][36].xyzz > 19: MOV TEMP[8].w, CONST[1][35].wwww > 20: DP3 TEMP[1].x, CONST[1][24].xyzz, TEMP[7].xyzz > 21: DP3 TEMP[9].x, CONST[1][26].xyzz, TEMP[7].xyzz > 22: MOV TEMP[1].z, TEMP[9].xxxx > 23: DP3 TEMP[7].x, CONST[1][25].xyzz, TEMP[7].xyzz > 24: MOV TEMP[1].y, TEMP[7].xxxx > 25: DP3 TEMP[9].x, TEMP[1].xyzz, TEMP[1].xyzz > 26: RSQ TEMP[9].x, TEMP[9].xxxx > 27: MUL TEMP[6].xyz, TEMP[9].xxxx, TEMP[1].xyzz > 28: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx, IMM[0].zzzz > 29: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 30: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 31: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 32: DP3 TEMP[9].x, -TEMP[6].xyzz, CONST[1][33].xyzz > 33: FMA TEMP[10].x, -CONST[1][28].yyyy, TEMP[9].xxxx, CONST[1][28].xxxx > 34: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].xxxx > 35: MOV TEMP[0].z, TEMP[9].xxxx > 36: ABS TEMP[9].x, TEMP[10].xxxx > 37: LG2 TEMP[9].x, TEMP[9].xxxx > 38: MOV TEMP[0].w, TEMP[9].xxxx > 39: MUL TEMP[9].xy, TEMP[0].zwww, IMM[5].xyyy > 40: EX2 TEMP[10].x, TEMP[9].yyyy > 41: FMA TEMP[1].x, CONST[1][28].zzzz, TEMP[10].xxxx, -CONST[1][27].zzzz > 42: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][28].zzzz > 43: MAX TEMP[11].x, TEMP[1].xxxx, IMM[0].wwww > 44: ABS TEMP[12].x, TEMP[2].xxxx > 45: MUL TEMP[12].x, TEMP[12].xxxx, IMM[5].zzzz > 46: MIN TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx > 47: ADD TEMP[12].x, -TEMP[12].xxxx, IMM[0].xxxx > 48: FMA TEMP[10].x, -TEMP[11].xxxx, TEMP[12].xxxx, TEMP[10].xxxx > 49: MAX TEMP[10].x, TEMP[10].xxxx, CONST[1][32].wwww > 50: FSNE TEMP[11].x, CONST[1][27].xxxx, IMM[0].wwww > 51: UIF TEMP[11].xxxx :0 > 52: RCP TEMP[11].x, CONST[1][27].xxxx > 53: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 54: ELSE :0 > 55: SSG TEMP[12].x, -TEMP[0].xxxx > 56: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 57: ENDIF > 58: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 59: EX2 TEMP[11].x, TEMP[1].xxxx > 60: ADD TEMP[1].x, TEMP[11].xxxx, CONST[1][28].wwww > 61: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][29].yyyy > 62: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].yyyy > 63: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[1].xxxx > 64: MIN TEMP[7].x, TEMP[7].xxxx, CONST[1][27].wwww > 65: MAX TEMP[7].x, TEMP[7].xxxx, CONST[1][29].xxxx > 66: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx > 67: FSNE TEMP[11].x, CONST[1][30].wwww, IMM[0].wwww > 68: UIF TEMP[11].xxxx :0 > 69: RCP TEMP[11].x, CONST[1][30].wwww > 70: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 71: ELSE :0 > 72: SSG TEMP[12].x, -TEMP[0].xxxx > 73: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 74: ENDIF > 75: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][31].zzzz > 76: FSNE TEMP[12].x, CONST[1][27].yyyy, IMM[0].wwww > 77: UIF TEMP[12].xxxx :0 > 78: RCP TEMP[12].x, CONST[1][27].yyyy > 79: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 80: ELSE :0 > 81: SSG TEMP[13].x, TEMP[0].xxxx > 82: MUL TEMP[12].x, IMM[5].wwww, TEMP[13].xxxx > 83: ENDIF > 84: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 85: EX2 TEMP[11].x, TEMP[1].xxxx > 86: MUL TEMP[6].xyz, TEMP[11].xxxx, CONST[1][30].xyzz > 87: FMA TEMP[7].xyz, CONST[1][30].xyzz, TEMP[11].xxxx, TEMP[7].xxxx > 88: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[10].xxxx > 89: FSEQ TEMP[10].xyz, TEMP[7].xyzz, IMM[0].wwww > 90: SSG TEMP[11].xyz, TEMP[9].xyzz > 91: MUL TEMP[11].xyz, IMM[5].wwww, TEMP[11].xyzz > 92: RCP TEMP[13].x, TEMP[7].xxxx > 93: RCP TEMP[13].y, TEMP[7].yyyy > 94: RCP TEMP[13].z, TEMP[7].zzzz > 95: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 96: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz > 97: MUL TEMP[6].xyz, TEMP[12].xxxx, -TEMP[7].xyzz > 98: ABS TEMP[2].xyz, TEMP[2].xxxx > 99: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[7].xyzz >100: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].xxxx >101: EX2 TEMP[2].x, TEMP[1].xxxx >102: EX2 TEMP[2].y, TEMP[1].yyyy >103: EX2 TEMP[2].z, TEMP[1].zzzz >104: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[7].xxxx >105: LG2 TEMP[7].x, CONST[1][32].xxxx >106: LG2 TEMP[7].y, CONST[1][32].yyyy >107: LG2 TEMP[7].z, CONST[1][32].zzzz >108: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >109: EX2 TEMP[10].x, TEMP[7].xxxx >110: EX2 TEMP[10].y, TEMP[7].yyyy >111: EX2 TEMP[10].z, TEMP[7].zzzz >112: EX2 TEMP[7].x, TEMP[6].xxxx >113: EX2 TEMP[7].y, TEMP[6].yyyy >114: EX2 TEMP[7].z, TEMP[6].zzzz >115: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[10].xyzz >116: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[6].xyzz >117: ADD TEMP[7].xyz, -TEMP[2].xyzz, IMM[0].xxxx >118: MOV TEMP[2].w, TEMP[2].xxxx >119: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].wwww >120: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >121: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >122: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xyzz >123: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >124: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].zzzz >125: FSEQ TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww >126: SSG TEMP[7].xyz, TEMP[1].xyzz >127: MUL TEMP[7].xyz, IMM[5].wwww, TEMP[7].xyzz >128: RCP TEMP[9].x, TEMP[0].xxxx >129: RCP TEMP[9].y, TEMP[0].yyyy >130: RCP TEMP[9].z, TEMP[0].zzzz >131: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[9].xyzz >132: UCMP TEMP[2].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >133: MOV OUT[5], IN[2] >134: MOV OUT[4], TEMP[2] >135: MOV OUT[3], TEMP[8] >136: MOV OUT[2], TEMP[5] >137: MOV OUT[1], TEMP[4] >138: MOV OUT[0], TEMP[3] >139: END >radeonsi: Compiling shader 366 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 332) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 412) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 492) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 524) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 560) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 564) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 568) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 572) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 576) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 580) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 584) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 592) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 596) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 600) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 608) > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 612) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %13) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %14) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %15) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = extractelement <4 x float> %92, i32 3 > %97 = fmul float %18, %82 > %98 = fmul float %19, %83 > %99 = fadd float %97, %98 > %100 = fmul float %20, %84 > %101 = fadd float %99, %100 > %102 = fadd float %101, %21 > %103 = fmul float %22, %82 > %104 = fmul float %23, %83 > %105 = fadd float %103, %104 > %106 = fmul float %24, %84 > %107 = fadd float %105, %106 > %108 = fadd float %107, %25 > %109 = fmul float %26, %82 > %110 = fmul float %27, %83 > %111 = fadd float %109, %110 > %112 = fmul float %28, %84 > %113 = fadd float %111, %112 > %114 = fadd float %113, %29 > %115 = fmul float %30, %82 > %116 = fmul float %31, %83 > %117 = fadd float %115, %116 > %118 = fmul float %32, %84 > %119 = fadd float %117, %118 > %120 = fadd float %119, %33 > %121 = fmul float %37, %82 > %122 = fmul float %38, %83 > %123 = fadd float %121, %122 > %124 = fmul float %39, %84 > %125 = fadd float %123, %124 > %126 = fadd float %125, %40 > %127 = fadd float %126, %59 > %128 = fsub float -0.000000e+00, %78 > %129 = fmul float %120, %77 > %130 = fmul float %120, %78 > %131 = call float @llvm.fma.f32(float %102, float %77, float %129) > %132 = call float @llvm.fma.f32(float %108, float %128, float %130) > %133 = fsub float %74, %82 > %134 = fsub float %75, %83 > %135 = fsub float %76, %84 > %136 = fmul float %67, %71 > %137 = fmul float %68, %72 > %138 = fmul float %69, %73 > %139 = fmul float %34, %133 > %140 = fmul float %35, %134 > %141 = fadd float %140, %139 > %142 = fmul float %36, %135 > %143 = fadd float %141, %142 > %144 = fmul float %41, %133 > %145 = fmul float %42, %134 > %146 = fadd float %145, %144 > %147 = fmul float %43, %135 > %148 = fadd float %146, %147 > %149 = fmul float %37, %133 > %150 = fmul float %38, %134 > %151 = fadd float %150, %149 > %152 = fmul float %39, %135 > %153 = fadd float %151, %152 > %154 = fmul float %143, %143 > %155 = fmul float %153, %153 > %156 = fadd float %155, %154 > %157 = fmul float %148, %148 > %158 = fadd float %156, %157 > %159 = call float @llvm.AMDGPU.rsq.clamped.f32(float %158) > %160 = fmul float %159, %143 > %161 = fmul float %159, %153 > %162 = fmul float %159, %148 > %163 = fsub float -0.000000e+00, %153 > %164 = call float @llvm.fma.f32(float %163, float %159, float 0xBFC3333340000000) > %165 = fsub float 1.000000e+00, %164 > %166 = call float @llvm.AMDGPU.clamp.(float %165, float 0.000000e+00, float 1.000000e+00) > %167 = fmul float %166, %166 > %168 = fmul float %160, %64 > %169 = fsub float -0.000000e+00, %168 > %170 = fmul float %161, %65 > %171 = fsub float %169, %170 > %172 = fmul float %162, %66 > %173 = fsub float %171, %172 > %174 = fsub float -0.000000e+00, %49 > %175 = call float @llvm.fma.f32(float %174, float %173, float %48) > %176 = call float @llvm.fma.f32(float %173, float %173, float 1.000000e+00) > %177 = call float @llvm.fabs.f32(float %175) > %178 = call float @llvm.log2.f32(float %177) > %179 = fmul float %176, 0x3FAE8EC8A0000000 > %180 = fmul float %178, -1.500000e+00 > %181 = call float @llvm.exp2.f32(float %180) > %182 = fsub float -0.000000e+00, %46 > %183 = call float @llvm.fma.f32(float %50, float %181, float %182) > %184 = fmul float %181, %50 > %185 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00) > %186 = call float @llvm.fabs.f32(float %120) > %187 = fmul float %186, 0x3EF4F8B580000000 > %188 = call float @llvm.minnum.f32(float %187, float 1.000000e+00) > %189 = fsub float 1.000000e+00, %188 > %190 = fsub float -0.000000e+00, %185 > %191 = call float @llvm.fma.f32(float %190, float %189, float %184) > %192 = call float @llvm.maxnum.f32(float %191, float %63) > %193 = fcmp une float %44, 0.000000e+00 > br i1 %193, label %IF, label %ELSE > >IF: ; preds = %main_body > %194 = fdiv float 1.000000e+00, %44 > %195 = fmul float %127, %194 > %196 = fsub float -0.000000e+00, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fsub float -0.000000e+00, %127 > %198 = fcmp olt float %127, -0.000000e+00 > %199 = select i1 %198, float 1.000000e+00, float %197 > %200 = fcmp oge float %199, 0.000000e+00 > %.op = fmul float %199, 0x4600000000000000 > %201 = select i1 %200, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %196, %IF ], [ %201, %ELSE ] > %202 = fmul float %temp44.0, 0x3FF7154760000000 > %203 = call float @llvm.exp2.f32(float %202) > %204 = fadd float %203, %51 > %205 = fmul float %204, %53 > %206 = fmul float %205, 5.000000e-01 > %207 = fmul float %167, %206 > %208 = call float @llvm.minnum.f32(float %207, float %47) > %209 = call float @llvm.maxnum.f32(float %208, float %52) > %210 = fmul float %209, %192 > %211 = fcmp une float %57, 0.000000e+00 > br i1 %211, label %IF57, label %ELSE58 > >IF57: ; preds = %ENDIF > %212 = fdiv float 1.000000e+00, %57 > %213 = fmul float %127, %212 > %214 = fsub float -0.000000e+00, %213 > br label %ENDIF56 > >ELSE58: ; preds = %ENDIF > %215 = fsub float -0.000000e+00, %127 > %216 = fcmp olt float %127, -0.000000e+00 > %217 = select i1 %216, float 1.000000e+00, float %215 > %218 = fcmp oge float %217, 0.000000e+00 > %.op62 = fmul float %217, 0x4600000000000000 > %219 = select i1 %218, float %.op62, float 0xC600000000000000 > br label %ENDIF56 > >ENDIF56: ; preds = %ELSE58, %IF57 > %temp44.1 = phi float [ %214, %IF57 ], [ %219, %ELSE58 ] > %220 = fsub float %58, %127 > %221 = fcmp une float %45, 0.000000e+00 > br i1 %221, label %IF60, label %ELSE61 > >IF60: ; preds = %ENDIF56 > %222 = fdiv float 1.000000e+00, %45 > %223 = fmul float %220, %222 > br label %ENDIF59 > >ELSE61: ; preds = %ENDIF56 > %224 = fcmp ogt float %220, 0.000000e+00 > %225 = select i1 %224, float 1.000000e+00, float %220 > %226 = fcmp oge float %225, 0.000000e+00 > %.op63 = fmul float %225, 0x4600000000000000 > %227 = select i1 %226, float %.op63, float 0xC600000000000000 > br label %ENDIF59 > >ENDIF59: ; preds = %ELSE61, %IF60 > %temp48.0 = phi float [ %223, %IF60 ], [ %227, %ELSE61 ] > %228 = fmul float %temp44.1, 0x3FF7154760000000 > %229 = call float @llvm.exp2.f32(float %228) > %230 = fmul float %229, %54 > %231 = fmul float %229, %55 > %232 = fmul float %229, %56 > %233 = call float @llvm.fma.f32(float %54, float %229, float %209) > %234 = call float @llvm.fma.f32(float %55, float %229, float %209) > %235 = call float @llvm.fma.f32(float %56, float %229, float %209) > %236 = call float @llvm.fma.f32(float %230, float %179, float %210) > %237 = call float @llvm.fma.f32(float %231, float %179, float %210) > %238 = call float @llvm.fma.f32(float %232, float %179, float %210) > %239 = fcmp oeq float %233, 0.000000e+00 > %240 = fcmp oeq float %234, 0.000000e+00 > %241 = fcmp oeq float %235, 0.000000e+00 > %242 = fcmp ogt float %236, 0.000000e+00 > %243 = select i1 %242, float 1.000000e+00, float %236 > %244 = fcmp oge float %243, 0.000000e+00 > %245 = fcmp ogt float %237, 0.000000e+00 > %246 = select i1 %245, float 1.000000e+00, float %237 > %247 = fcmp oge float %246, 0.000000e+00 > %248 = fcmp ogt float %238, 0.000000e+00 > %249 = select i1 %248, float 1.000000e+00, float %238 > %250 = fcmp oge float %249, 0.000000e+00 > %.op64 = fmul float %243, 0x4600000000000000 > %251 = select i1 %244, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %246, 0x4600000000000000 > %252 = select i1 %247, float %.op65, float 0xC600000000000000 > %.op66 = fmul float %249, 0x4600000000000000 > %253 = select i1 %250, float %.op66, float 0xC600000000000000 > %254 = fdiv float 1.000000e+00, %233 > %255 = fdiv float 1.000000e+00, %234 > %256 = fdiv float 1.000000e+00, %235 > %257 = fmul float %236, %254 > %258 = fmul float %237, %255 > %259 = fmul float %238, %256 > %260 = select i1 %239, float %251, float %257 > %261 = select i1 %240, float %252, float %258 > %262 = select i1 %241, float %253, float %259 > %263 = fmul float %233, %temp48.0 > %264 = fmul float %234, %temp48.0 > %265 = fmul float %235, %temp48.0 > %266 = call float @llvm.fabs.f32(float %120) > %267 = call float @llvm.fabs.f32(float %120) > %268 = call float @llvm.fabs.f32(float %120) > %269 = fmul float %233, %266 > %270 = fmul float %234, %267 > %271 = fmul float %235, %268 > %272 = fmul float %269, 0xBFF7154760000000 > %273 = fmul float %270, 0xBFF7154760000000 > %274 = fmul float %271, 0xBFF7154760000000 > %275 = call float @llvm.exp2.f32(float %272) > %276 = call float @llvm.exp2.f32(float %273) > %277 = call float @llvm.exp2.f32(float %274) > %278 = fmul float %263, 0xBFF7154760000000 > %279 = fmul float %264, 0xBFF7154760000000 > %280 = fmul float %265, 0xBFF7154760000000 > %281 = call float @llvm.log2.f32(float %60) > %282 = call float @llvm.log2.f32(float %61) > %283 = call float @llvm.log2.f32(float %62) > %284 = fmul float %281, 0x3FDD1745E0000000 > %285 = fmul float %282, 0x3FDD1745E0000000 > %286 = fmul float %283, 0x3FDD1745E0000000 > %287 = call float @llvm.exp2.f32(float %284) > %288 = call float @llvm.exp2.f32(float %285) > %289 = call float @llvm.exp2.f32(float %286) > %290 = call float @llvm.exp2.f32(float %278) > %291 = call float @llvm.exp2.f32(float %279) > %292 = call float @llvm.exp2.f32(float %280) > %293 = fmul float %290, %287 > %294 = fmul float %291, %288 > %295 = fmul float %292, %289 > %296 = fmul float %260, %293 > %297 = fmul float %261, %294 > %298 = fmul float %262, %295 > %299 = fsub float 1.000000e+00, %275 > %300 = fsub float 1.000000e+00, %276 > %301 = fsub float 1.000000e+00, %277 > %302 = call float @llvm.fma.f32(float %296, float %299, float 0xBF70624DE0000000) > %303 = call float @llvm.fma.f32(float %297, float %300, float 0xBF70624DE0000000) > %304 = call float @llvm.fma.f32(float %298, float %301, float 0xBF70624DE0000000) > %305 = call float @llvm.maxnum.f32(float %302, float 0.000000e+00) > %306 = call float @llvm.maxnum.f32(float %303, float 0.000000e+00) > %307 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00) > %308 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 5.000000e-01) > %309 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 5.000000e-01) > %310 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 5.000000e-01) > %311 = fmul float %305, %308 > %312 = fmul float %306, %309 > %313 = fmul float %307, %310 > %314 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %315 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %316 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %317 = call float @llvm.fma.f32(float %305, float %314, float 0x3FAEB851E0000000) > %318 = call float @llvm.fma.f32(float %306, float %315, float 0x3FAEB851E0000000) > %319 = call float @llvm.fma.f32(float %307, float %316, float 0x3FAEB851E0000000) > %320 = fcmp oeq float %317, 0.000000e+00 > %321 = fcmp oeq float %318, 0.000000e+00 > %322 = fcmp oeq float %319, 0.000000e+00 > %323 = fcmp ogt float %311, 0.000000e+00 > %324 = select i1 %323, float 1.000000e+00, float %311 > %325 = fcmp oge float %324, 0.000000e+00 > %326 = fcmp ogt float %312, 0.000000e+00 > %327 = select i1 %326, float 1.000000e+00, float %312 > %328 = fcmp oge float %327, 0.000000e+00 > %329 = fcmp ogt float %313, 0.000000e+00 > %330 = select i1 %329, float 1.000000e+00, float %313 > %331 = fcmp oge float %330, 0.000000e+00 > %.op67 = fmul float %324, 0x4600000000000000 > %332 = select i1 %325, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %327, 0x4600000000000000 > %333 = select i1 %328, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %330, 0x4600000000000000 > %334 = select i1 %331, float %.op69, float 0xC600000000000000 > %335 = fdiv float 1.000000e+00, %317 > %336 = fdiv float 1.000000e+00, %318 > %337 = fdiv float 1.000000e+00, %319 > %338 = fmul float %311, %335 > %339 = fmul float %312, %336 > %340 = fmul float %313, %337 > %341 = select i1 %320, float %332, float %338 > %342 = select i1 %321, float %333, float %339 > %343 = select i1 %322, float %334, float %340 > %344 = bitcast i32 %11 to float > %345 = insertvalue <{ float, float, float }> undef, float %344, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %131, float %132, float %114, float %120) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %136, float %137, float %138, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %341, float %342, float %343, float %275) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %93, float %94, float %95, float %96) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %102, float %108, float %114, float %120) > ret <{ float, float, float }> %345 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..34] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 2.0000} >IMM[1] UINT32 {0, 240, 288, 256} >IMM[2] UINT32 {304, 272, 544, 0} >IMM[3] FLT32 { -1.0000, 0.0000, 0.0000, 0.0000} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: FSNE TEMP[1].x, CONST[1][15].zzzz, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][15].zzzz > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][15].wwww > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: FMA TEMP[1].xy, IN[0].xyyy, CONST[1][18].xyyy, CONST[1][16].xyyy > 26: FMA TEMP[1].xy, CONST[1][19].wwww, CONST[1][16].zwww, TEMP[1].xyyy > 27: MOV TEMP[1].xy, TEMP[1].xyyy > 28: TEX TEMP[1].yw, TEMP[1], SAMP[1], 2D > 29: FMA TEMP[1].xy, TEMP[1].ywww, IMM[0].wwww, IMM[3].xxxx > 30: MOV TEMP[2].x, TEMP[1].xyxx > 31: MOV TEMP[2].z, -TEMP[1].yyyy > 32: MUL TEMP[1].xy, TEMP[2].xzzz, CONST[1][19].xyyy > 33: FMA TEMP[1].xy, IN[0].xyyy, CONST[1][17].xyyy, TEMP[1].xyyy > 34: MOV TEMP[1].xy, TEMP[1].xyyy > 35: TEX TEMP[1], TEMP[1], SAMP[2], 2D > 36: MOV TEMP[2].xyz, TEMP[1].xyzx > 37: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].wwww > 38: MOV TEMP[2].w, TEMP[1].xxxx > 39: MUL TEMP[0], TEMP[2], IN[4] > 40: MUL TEMP[0], TEMP[0], IN[2] > 41: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].wwww > 42: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 43: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][34].xyzz > 44: MOV TEMP[1].w, TEMP[1].xxxx > 45: MOV TEMP[1].xyz, TEMP[0].xyzx > 46: MOV OUT[0], TEMP[1] > 47: END >radeonsi: Compiling shader 367 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 544) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 548) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 552) > %41 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !tbaa !0 > %43 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %44 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %43, i64 0, i64 3 > %45 = load <4 x i32>, <4 x i32> addrspace(2)* %44, align 16, !tbaa !0 > %46 = extractelement <8 x i32> %42, i32 7 > %47 = extractelement <4 x i32> %45, i32 0 > %48 = and i32 %47, %46 > %49 = insertelement <4 x i32> %45, i32 %48, i32 0 > %50 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %51 = load <8 x i32>, <8 x i32> addrspace(2)* %50, align 32, !tbaa !0 > %52 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %53 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %52, i64 0, i64 7 > %54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0 > %55 = extractelement <8 x i32> %51, i32 7 > %56 = extractelement <4 x i32> %54, i32 0 > %57 = and i32 %56, %55 > %58 = insertelement <4 x i32> %54, i32 %57, i32 0 > %59 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 > %61 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %62 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %61, i64 0, i64 11 > %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 > %64 = extractelement <8 x i32> %60, i32 7 > %65 = extractelement <4 x i32> %63, i32 0 > %66 = and i32 %65, %64 > %67 = insertelement <4 x i32> %63, i32 %66, i32 0 > %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %72 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %73 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %74 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %75 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %76 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %77 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %78 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %79 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %80 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %81 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %82 = fcmp oeq float %72, 0.000000e+00 > %83 = fcmp oeq float %72, 0.000000e+00 > %84 = fcmp ogt float %70, 0.000000e+00 > %85 = select i1 %84, float 1.000000e+00, float %70 > %86 = fcmp oge float %85, 0.000000e+00 > %87 = fcmp ogt float %71, 0.000000e+00 > %88 = select i1 %87, float 1.000000e+00, float %71 > %89 = fcmp oge float %88, 0.000000e+00 > %.op = fmul float %85, 0x4600000000000000 > %90 = select i1 %86, float %.op, float 0xC600000000000000 > %.op12 = fmul float %88, 0x4600000000000000 > %91 = select i1 %89, float %.op12, float 0xC600000000000000 > %92 = fdiv float 1.000000e+00, %72 > %93 = fmul float %70, %92 > %94 = fmul float %71, %92 > %95 = select i1 %82, float %90, float %93 > %96 = select i1 %83, float %91, float %94 > %97 = bitcast float %95 to i32 > %98 = bitcast float %96 to i32 > %99 = insertelement <2 x i32> undef, i32 %97, i32 0 > %100 = insertelement <2 x i32> %99, i32 %98, i32 1 > %101 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %100, <8 x i32> %42, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %102 = extractelement <4 x float> %101, i32 0 > %103 = fsub float %72, %102 > %104 = fcmp une float %25, 0.000000e+00 > %105 = call float @llvm.fabs.f32(float %103) > br i1 %104, label %IF, label %ELSE > >IF: ; preds = %main_body > %106 = fdiv float 1.000000e+00, %25 > %107 = fmul float %105, %106 > br label %ENDIF > >ELSE: ; preds = %main_body > %108 = fcmp one float %103, 0.000000e+00 > %109 = select i1 %108, float 1.000000e+00, float %105 > %110 = fcmp oge float %109, 0.000000e+00 > %.op13 = fmul float %109, 0x4600000000000000 > %111 = select i1 %110, float %.op13, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %107, %IF ], [ %111, %ELSE ] > %112 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %113 = fsub float 1.000000e+00, %112 > %114 = call float @llvm.log2.f32(float %113) > %115 = fmul float %114, %26 > %116 = call float @llvm.exp2.f32(float %115) > %117 = fsub float 1.000000e+00, %116 > %118 = call float @llvm.fma.f32(float %68, float %33, float %27) > %119 = call float @llvm.fma.f32(float %69, float %34, float %28) > %120 = call float @llvm.fma.f32(float %37, float %29, float %118) > %121 = call float @llvm.fma.f32(float %37, float %30, float %119) > %122 = bitcast float %120 to i32 > %123 = bitcast float %121 to i32 > %124 = insertelement <2 x i32> undef, i32 %122, i32 0 > %125 = insertelement <2 x i32> %124, i32 %123, i32 1 > %126 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %125, <8 x i32> %51, <4 x i32> %58, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %127 = extractelement <4 x float> %126, i32 1 > %128 = extractelement <4 x float> %126, i32 3 > %129 = call float @llvm.fma.f32(float %127, float 2.000000e+00, float -1.000000e+00) > %130 = call float @llvm.fma.f32(float %128, float 2.000000e+00, float -1.000000e+00) > %131 = fmul float %129, %35 > %132 = fmul float %130, %36 > %133 = fsub float -0.000000e+00, %132 > %134 = call float @llvm.fma.f32(float %68, float %31, float %131) > %135 = call float @llvm.fma.f32(float %69, float %32, float %133) > %136 = bitcast float %134 to i32 > %137 = bitcast float %135 to i32 > %138 = insertelement <2 x i32> undef, i32 %136, i32 0 > %139 = insertelement <2 x i32> %138, i32 %137, i32 1 > %140 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %139, <8 x i32> %60, <4 x i32> %67, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %141 = extractelement <4 x float> %140, i32 0 > %142 = extractelement <4 x float> %140, i32 1 > %143 = extractelement <4 x float> %140, i32 2 > %144 = extractelement <4 x float> %140, i32 3 > %145 = fmul float %117, %144 > %146 = fmul float %141, %78 > %147 = fmul float %142, %79 > %148 = fmul float %143, %80 > %149 = fmul float %145, %81 > %150 = fmul float %146, %73 > %151 = fmul float %147, %74 > %152 = fmul float %148, %75 > %153 = fmul float %149, %76 > %154 = fmul float %150, %77 > %155 = fmul float %151, %77 > %156 = fmul float %152, %77 > %157 = fmul float %153, %154 > %158 = fmul float %153, %155 > %159 = fmul float %153, %156 > %160 = fmul float %157, %38 > %161 = fmul float %158, %39 > %162 = fadd float %161, %160 > %163 = fmul float %159, %40 > %164 = fadd float %162, %163 > %165 = bitcast float %5 to i32 > %166 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %165, 10 > %167 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %166, float %157, 11 > %168 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %167, float %158, 12 > %169 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %168, float %159, 13 > %170 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %169, float %164, 14 > %171 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %170, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %171 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL CONST[1][0..21] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 272, 288, 304} >IMM[2] UINT32 {320, 240, 256, 336} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][17], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][18], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][20], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: MUL TEMP[0].xyz, CONST[1][15].yzww, CONST[1][16].xxxx > 10: MUL TEMP[2], IN[1], CONST[1][21] > 11: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz > 12: MUL TEMP[2].x, TEMP[2].wwww, CONST[1][16].yyyy > 13: MOV TEMP[0].w, TEMP[2].xxxx > 14: MOV OUT[1], TEMP[0] > 15: MOV OUT[0], TEMP[1] > 16: END >radeonsi: Compiling shader 368 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 244) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 248) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 252) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 256) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 260) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 272) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 276) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 280) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 284) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 288) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 292) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 296) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 300) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 304) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 308) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 312) > %33 = call float @llvm.SI.load.const(<16 x i8> %16, i32 316) > %34 = call float @llvm.SI.load.const(<16 x i8> %16, i32 320) > %35 = call float @llvm.SI.load.const(<16 x i8> %16, i32 324) > %36 = call float @llvm.SI.load.const(<16 x i8> %16, i32 328) > %37 = call float @llvm.SI.load.const(<16 x i8> %16, i32 332) > %38 = call float @llvm.SI.load.const(<16 x i8> %16, i32 336) > %39 = call float @llvm.SI.load.const(<16 x i8> %16, i32 340) > %40 = call float @llvm.SI.load.const(<16 x i8> %16, i32 344) > %41 = call float @llvm.SI.load.const(<16 x i8> %16, i32 348) > %42 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 > %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %13) > %45 = extractelement <4 x float> %44, i32 0 > %46 = extractelement <4 x float> %44, i32 1 > %47 = extractelement <4 x float> %44, i32 2 > %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 > %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %14) > %51 = extractelement <4 x float> %50, i32 0 > %52 = extractelement <4 x float> %50, i32 1 > %53 = extractelement <4 x float> %50, i32 2 > %54 = extractelement <4 x float> %50, i32 3 > %55 = fmul float %22, %45 > %56 = fmul float %23, %46 > %57 = fadd float %55, %56 > %58 = fmul float %24, %47 > %59 = fadd float %57, %58 > %60 = fadd float %59, %25 > %61 = fmul float %26, %45 > %62 = fmul float %27, %46 > %63 = fadd float %61, %62 > %64 = fmul float %28, %47 > %65 = fadd float %63, %64 > %66 = fadd float %65, %29 > %67 = fmul float %30, %45 > %68 = fmul float %31, %46 > %69 = fadd float %67, %68 > %70 = fmul float %32, %47 > %71 = fadd float %69, %70 > %72 = fadd float %71, %33 > %73 = fmul float %34, %45 > %74 = fmul float %35, %46 > %75 = fadd float %73, %74 > %76 = fmul float %36, %47 > %77 = fadd float %75, %76 > %78 = fadd float %77, %37 > %79 = fmul float %17, %20 > %80 = fmul float %18, %20 > %81 = fmul float %19, %20 > %82 = fmul float %51, %38 > %83 = fmul float %52, %39 > %84 = fmul float %53, %40 > %85 = fmul float %54, %41 > %86 = fmul float %79, %82 > %87 = fmul float %80, %83 > %88 = fmul float %81, %84 > %89 = fmul float %85, %21 > %90 = bitcast i32 %11 to float > %91 = insertvalue <{ float, float, float }> undef, float %90, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float %88, float %89) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %66, float %72, float %78) > ret <{ float, float, float }> %91 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} > 0: MOV OUT[0], IN[0] > 1: MOV OUT[1], IMM[0].xxxx > 2: MOV OUT[2], IMM[0].xxxx > 3: MOV OUT[3], IMM[0].xxxx > 4: END >radeonsi: Compiling shader 369 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %27 = bitcast float %5 to i32 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float 0.000000e+00, 15 > %34 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33, float 0.000000e+00, 16 > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %34, float 0.000000e+00, 17 > %36 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %35, float 0.000000e+00, 18 > %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %36, float 0.000000e+00, 19 > %38 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %37, float 0.000000e+00, 20 > %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float 0.000000e+00, 21 > %40 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float 0.000000e+00, 22 > %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float 0.000000e+00, 23 > %42 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float 0.000000e+00, 24 > %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float 0.000000e+00, 25 > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43, float 0.000000e+00, 26 > %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..34] >DCL TEMP[0..13], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 272, 288, 304} >IMM[2] UINT32 {320, 352, 448, 544} >IMM[3] UINT32 {528, 496, 512, 336} >IMM[4] UINT32 {368, 480, 400, 384} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {464, 416, 432, 0} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][17], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][18], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][20], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][22], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][28].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: MUL TEMP[5].xy, CONST[1][34].xyyy, IMM[0].xyyy > 14: MUL TEMP[6].xy, TEMP[2].xxxx, CONST[1][34].xyyy > 15: FMA TEMP[5].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 16: MOV TEMP[5].zw, TEMP[1].wwzw > 17: ADD TEMP[7].xyz, -IN[0].xyzz, CONST[1][33].xyzz > 18: MUL TEMP[8].xyz, CONST[1][31].xyzz, CONST[1][32].xyzz > 19: MOV TEMP[8].w, CONST[1][31].wwww > 20: DP3 TEMP[1].x, CONST[1][21].xyzz, TEMP[7].xyzz > 21: DP3 TEMP[9].x, CONST[1][23].xyzz, TEMP[7].xyzz > 22: MOV TEMP[1].z, TEMP[9].xxxx > 23: DP3 TEMP[7].x, CONST[1][22].xyzz, TEMP[7].xyzz > 24: MOV TEMP[1].y, TEMP[7].xxxx > 25: DP3 TEMP[9].x, TEMP[1].xyzz, TEMP[1].xyzz > 26: RSQ TEMP[9].x, TEMP[9].xxxx > 27: MUL TEMP[6].xyz, TEMP[9].xxxx, TEMP[1].xyzz > 28: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx, IMM[0].zzzz > 29: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 30: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 31: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 32: DP3 TEMP[9].x, -TEMP[6].xyzz, CONST[1][30].xyzz > 33: FMA TEMP[10].x, -CONST[1][25].yyyy, TEMP[9].xxxx, CONST[1][25].xxxx > 34: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].xxxx > 35: MOV TEMP[0].z, TEMP[9].xxxx > 36: ABS TEMP[9].x, TEMP[10].xxxx > 37: LG2 TEMP[9].x, TEMP[9].xxxx > 38: MOV TEMP[0].w, TEMP[9].xxxx > 39: MUL TEMP[9].xy, TEMP[0].zwww, IMM[5].xyyy > 40: EX2 TEMP[10].x, TEMP[9].yyyy > 41: FMA TEMP[1].x, CONST[1][25].zzzz, TEMP[10].xxxx, -CONST[1][24].zzzz > 42: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][25].zzzz > 43: MAX TEMP[11].x, TEMP[1].xxxx, IMM[0].wwww > 44: ABS TEMP[12].x, TEMP[2].xxxx > 45: MUL TEMP[12].x, TEMP[12].xxxx, IMM[5].zzzz > 46: MIN TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx > 47: ADD TEMP[12].x, -TEMP[12].xxxx, IMM[0].xxxx > 48: FMA TEMP[10].x, -TEMP[11].xxxx, TEMP[12].xxxx, TEMP[10].xxxx > 49: MAX TEMP[10].x, TEMP[10].xxxx, CONST[1][29].wwww > 50: FSNE TEMP[11].x, CONST[1][24].xxxx, IMM[0].wwww > 51: UIF TEMP[11].xxxx :0 > 52: RCP TEMP[11].x, CONST[1][24].xxxx > 53: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 54: ELSE :0 > 55: SSG TEMP[12].x, -TEMP[0].xxxx > 56: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 57: ENDIF > 58: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 59: EX2 TEMP[11].x, TEMP[1].xxxx > 60: ADD TEMP[1].x, TEMP[11].xxxx, CONST[1][25].wwww > 61: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][26].yyyy > 62: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].yyyy > 63: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[1].xxxx > 64: MIN TEMP[7].x, TEMP[7].xxxx, CONST[1][24].wwww > 65: MAX TEMP[7].x, TEMP[7].xxxx, CONST[1][26].xxxx > 66: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx > 67: FSNE TEMP[11].x, CONST[1][27].wwww, IMM[0].wwww > 68: UIF TEMP[11].xxxx :0 > 69: RCP TEMP[11].x, CONST[1][27].wwww > 70: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 71: ELSE :0 > 72: SSG TEMP[12].x, -TEMP[0].xxxx > 73: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 74: ENDIF > 75: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][28].zzzz > 76: FSNE TEMP[12].x, CONST[1][24].yyyy, IMM[0].wwww > 77: UIF TEMP[12].xxxx :0 > 78: RCP TEMP[12].x, CONST[1][24].yyyy > 79: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 80: ELSE :0 > 81: SSG TEMP[13].x, TEMP[0].xxxx > 82: MUL TEMP[12].x, IMM[5].wwww, TEMP[13].xxxx > 83: ENDIF > 84: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 85: EX2 TEMP[11].x, TEMP[1].xxxx > 86: MUL TEMP[6].xyz, TEMP[11].xxxx, CONST[1][27].xyzz > 87: FMA TEMP[7].xyz, CONST[1][27].xyzz, TEMP[11].xxxx, TEMP[7].xxxx > 88: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[10].xxxx > 89: FSEQ TEMP[10].xyz, TEMP[7].xyzz, IMM[0].wwww > 90: SSG TEMP[11].xyz, TEMP[9].xyzz > 91: MUL TEMP[11].xyz, IMM[5].wwww, TEMP[11].xyzz > 92: RCP TEMP[13].x, TEMP[7].xxxx > 93: RCP TEMP[13].y, TEMP[7].yyyy > 94: RCP TEMP[13].z, TEMP[7].zzzz > 95: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 96: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz > 97: MUL TEMP[6].xyz, TEMP[12].xxxx, -TEMP[7].xyzz > 98: ABS TEMP[2].xyz, TEMP[2].xxxx > 99: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[7].xyzz >100: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].xxxx >101: EX2 TEMP[2].x, TEMP[1].xxxx >102: EX2 TEMP[2].y, TEMP[1].yyyy >103: EX2 TEMP[2].z, TEMP[1].zzzz >104: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[7].xxxx >105: LG2 TEMP[7].x, CONST[1][29].xxxx >106: LG2 TEMP[7].y, CONST[1][29].yyyy >107: LG2 TEMP[7].z, CONST[1][29].zzzz >108: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >109: EX2 TEMP[10].x, TEMP[7].xxxx >110: EX2 TEMP[10].y, TEMP[7].yyyy >111: EX2 TEMP[10].z, TEMP[7].zzzz >112: EX2 TEMP[7].x, TEMP[6].xxxx >113: EX2 TEMP[7].y, TEMP[6].yyyy >114: EX2 TEMP[7].z, TEMP[6].zzzz >115: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[10].xyzz >116: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[6].xyzz >117: ADD TEMP[7].xyz, -TEMP[2].xyzz, IMM[0].xxxx >118: MOV TEMP[2].w, TEMP[2].xxxx >119: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].wwww >120: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >121: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >122: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xyzz >123: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >124: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].zzzz >125: FSEQ TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww >126: SSG TEMP[7].xyz, TEMP[1].xyzz >127: MUL TEMP[7].xyz, IMM[5].wwww, TEMP[7].xyzz >128: RCP TEMP[9].x, TEMP[0].xxxx >129: RCP TEMP[9].y, TEMP[0].yyyy >130: RCP TEMP[9].z, TEMP[0].zzzz >131: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[9].xyzz >132: UCMP TEMP[2].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >133: MOV OUT[5], IN[2] >134: MOV OUT[4], TEMP[2] >135: MOV OUT[3], TEMP[8] >136: MOV OUT[2], TEMP[5] >137: MOV OUT[1], TEMP[4] >138: MOV OUT[0], TEMP[3] >139: END >radeonsi: Compiling shader 370 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 272) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 276) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 280) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 284) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 288) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 292) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 296) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 300) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 304) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 308) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 312) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 316) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 332) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 364) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 408) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 412) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 432) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 436) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 476) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 508) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 536) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 544) > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 548) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %13) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %14) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %15) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = extractelement <4 x float> %92, i32 3 > %97 = fmul float %18, %82 > %98 = fmul float %19, %83 > %99 = fadd float %97, %98 > %100 = fmul float %20, %84 > %101 = fadd float %99, %100 > %102 = fadd float %101, %21 > %103 = fmul float %22, %82 > %104 = fmul float %23, %83 > %105 = fadd float %103, %104 > %106 = fmul float %24, %84 > %107 = fadd float %105, %106 > %108 = fadd float %107, %25 > %109 = fmul float %26, %82 > %110 = fmul float %27, %83 > %111 = fadd float %109, %110 > %112 = fmul float %28, %84 > %113 = fadd float %111, %112 > %114 = fadd float %113, %29 > %115 = fmul float %30, %82 > %116 = fmul float %31, %83 > %117 = fadd float %115, %116 > %118 = fmul float %32, %84 > %119 = fadd float %117, %118 > %120 = fadd float %119, %33 > %121 = fmul float %37, %82 > %122 = fmul float %38, %83 > %123 = fadd float %121, %122 > %124 = fmul float %39, %84 > %125 = fadd float %123, %124 > %126 = fadd float %125, %40 > %127 = fadd float %126, %59 > %128 = fsub float -0.000000e+00, %78 > %129 = fmul float %120, %77 > %130 = fmul float %120, %78 > %131 = call float @llvm.fma.f32(float %102, float %77, float %129) > %132 = call float @llvm.fma.f32(float %108, float %128, float %130) > %133 = fsub float %74, %82 > %134 = fsub float %75, %83 > %135 = fsub float %76, %84 > %136 = fmul float %67, %71 > %137 = fmul float %68, %72 > %138 = fmul float %69, %73 > %139 = fmul float %34, %133 > %140 = fmul float %35, %134 > %141 = fadd float %140, %139 > %142 = fmul float %36, %135 > %143 = fadd float %141, %142 > %144 = fmul float %41, %133 > %145 = fmul float %42, %134 > %146 = fadd float %145, %144 > %147 = fmul float %43, %135 > %148 = fadd float %146, %147 > %149 = fmul float %37, %133 > %150 = fmul float %38, %134 > %151 = fadd float %150, %149 > %152 = fmul float %39, %135 > %153 = fadd float %151, %152 > %154 = fmul float %143, %143 > %155 = fmul float %153, %153 > %156 = fadd float %155, %154 > %157 = fmul float %148, %148 > %158 = fadd float %156, %157 > %159 = call float @llvm.AMDGPU.rsq.clamped.f32(float %158) > %160 = fmul float %159, %143 > %161 = fmul float %159, %153 > %162 = fmul float %159, %148 > %163 = fsub float -0.000000e+00, %153 > %164 = call float @llvm.fma.f32(float %163, float %159, float 0xBFC3333340000000) > %165 = fsub float 1.000000e+00, %164 > %166 = call float @llvm.AMDGPU.clamp.(float %165, float 0.000000e+00, float 1.000000e+00) > %167 = fmul float %166, %166 > %168 = fmul float %160, %64 > %169 = fsub float -0.000000e+00, %168 > %170 = fmul float %161, %65 > %171 = fsub float %169, %170 > %172 = fmul float %162, %66 > %173 = fsub float %171, %172 > %174 = fsub float -0.000000e+00, %49 > %175 = call float @llvm.fma.f32(float %174, float %173, float %48) > %176 = call float @llvm.fma.f32(float %173, float %173, float 1.000000e+00) > %177 = call float @llvm.fabs.f32(float %175) > %178 = call float @llvm.log2.f32(float %177) > %179 = fmul float %176, 0x3FAE8EC8A0000000 > %180 = fmul float %178, -1.500000e+00 > %181 = call float @llvm.exp2.f32(float %180) > %182 = fsub float -0.000000e+00, %46 > %183 = call float @llvm.fma.f32(float %50, float %181, float %182) > %184 = fmul float %181, %50 > %185 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00) > %186 = call float @llvm.fabs.f32(float %120) > %187 = fmul float %186, 0x3EF4F8B580000000 > %188 = call float @llvm.minnum.f32(float %187, float 1.000000e+00) > %189 = fsub float 1.000000e+00, %188 > %190 = fsub float -0.000000e+00, %185 > %191 = call float @llvm.fma.f32(float %190, float %189, float %184) > %192 = call float @llvm.maxnum.f32(float %191, float %63) > %193 = fcmp une float %44, 0.000000e+00 > br i1 %193, label %IF, label %ELSE > >IF: ; preds = %main_body > %194 = fdiv float 1.000000e+00, %44 > %195 = fmul float %127, %194 > %196 = fsub float -0.000000e+00, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fsub float -0.000000e+00, %127 > %198 = fcmp olt float %127, -0.000000e+00 > %199 = select i1 %198, float 1.000000e+00, float %197 > %200 = fcmp oge float %199, 0.000000e+00 > %.op = fmul float %199, 0x4600000000000000 > %201 = select i1 %200, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %196, %IF ], [ %201, %ELSE ] > %202 = fmul float %temp44.0, 0x3FF7154760000000 > %203 = call float @llvm.exp2.f32(float %202) > %204 = fadd float %203, %51 > %205 = fmul float %204, %53 > %206 = fmul float %205, 5.000000e-01 > %207 = fmul float %167, %206 > %208 = call float @llvm.minnum.f32(float %207, float %47) > %209 = call float @llvm.maxnum.f32(float %208, float %52) > %210 = fmul float %209, %192 > %211 = fcmp une float %57, 0.000000e+00 > br i1 %211, label %IF57, label %ELSE58 > >IF57: ; preds = %ENDIF > %212 = fdiv float 1.000000e+00, %57 > %213 = fmul float %127, %212 > %214 = fsub float -0.000000e+00, %213 > br label %ENDIF56 > >ELSE58: ; preds = %ENDIF > %215 = fsub float -0.000000e+00, %127 > %216 = fcmp olt float %127, -0.000000e+00 > %217 = select i1 %216, float 1.000000e+00, float %215 > %218 = fcmp oge float %217, 0.000000e+00 > %.op62 = fmul float %217, 0x4600000000000000 > %219 = select i1 %218, float %.op62, float 0xC600000000000000 > br label %ENDIF56 > >ENDIF56: ; preds = %ELSE58, %IF57 > %temp44.1 = phi float [ %214, %IF57 ], [ %219, %ELSE58 ] > %220 = fsub float %58, %127 > %221 = fcmp une float %45, 0.000000e+00 > br i1 %221, label %IF60, label %ELSE61 > >IF60: ; preds = %ENDIF56 > %222 = fdiv float 1.000000e+00, %45 > %223 = fmul float %220, %222 > br label %ENDIF59 > >ELSE61: ; preds = %ENDIF56 > %224 = fcmp ogt float %220, 0.000000e+00 > %225 = select i1 %224, float 1.000000e+00, float %220 > %226 = fcmp oge float %225, 0.000000e+00 > %.op63 = fmul float %225, 0x4600000000000000 > %227 = select i1 %226, float %.op63, float 0xC600000000000000 > br label %ENDIF59 > >ENDIF59: ; preds = %ELSE61, %IF60 > %temp48.0 = phi float [ %223, %IF60 ], [ %227, %ELSE61 ] > %228 = fmul float %temp44.1, 0x3FF7154760000000 > %229 = call float @llvm.exp2.f32(float %228) > %230 = fmul float %229, %54 > %231 = fmul float %229, %55 > %232 = fmul float %229, %56 > %233 = call float @llvm.fma.f32(float %54, float %229, float %209) > %234 = call float @llvm.fma.f32(float %55, float %229, float %209) > %235 = call float @llvm.fma.f32(float %56, float %229, float %209) > %236 = call float @llvm.fma.f32(float %230, float %179, float %210) > %237 = call float @llvm.fma.f32(float %231, float %179, float %210) > %238 = call float @llvm.fma.f32(float %232, float %179, float %210) > %239 = fcmp oeq float %233, 0.000000e+00 > %240 = fcmp oeq float %234, 0.000000e+00 > %241 = fcmp oeq float %235, 0.000000e+00 > %242 = fcmp ogt float %236, 0.000000e+00 > %243 = select i1 %242, float 1.000000e+00, float %236 > %244 = fcmp oge float %243, 0.000000e+00 > %245 = fcmp ogt float %237, 0.000000e+00 > %246 = select i1 %245, float 1.000000e+00, float %237 > %247 = fcmp oge float %246, 0.000000e+00 > %248 = fcmp ogt float %238, 0.000000e+00 > %249 = select i1 %248, float 1.000000e+00, float %238 > %250 = fcmp oge float %249, 0.000000e+00 > %.op64 = fmul float %243, 0x4600000000000000 > %251 = select i1 %244, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %246, 0x4600000000000000 > %252 = select i1 %247, float %.op65, float 0xC600000000000000 > %.op66 = fmul float %249, 0x4600000000000000 > %253 = select i1 %250, float %.op66, float 0xC600000000000000 > %254 = fdiv float 1.000000e+00, %233 > %255 = fdiv float 1.000000e+00, %234 > %256 = fdiv float 1.000000e+00, %235 > %257 = fmul float %236, %254 > %258 = fmul float %237, %255 > %259 = fmul float %238, %256 > %260 = select i1 %239, float %251, float %257 > %261 = select i1 %240, float %252, float %258 > %262 = select i1 %241, float %253, float %259 > %263 = fmul float %233, %temp48.0 > %264 = fmul float %234, %temp48.0 > %265 = fmul float %235, %temp48.0 > %266 = call float @llvm.fabs.f32(float %120) > %267 = call float @llvm.fabs.f32(float %120) > %268 = call float @llvm.fabs.f32(float %120) > %269 = fmul float %233, %266 > %270 = fmul float %234, %267 > %271 = fmul float %235, %268 > %272 = fmul float %269, 0xBFF7154760000000 > %273 = fmul float %270, 0xBFF7154760000000 > %274 = fmul float %271, 0xBFF7154760000000 > %275 = call float @llvm.exp2.f32(float %272) > %276 = call float @llvm.exp2.f32(float %273) > %277 = call float @llvm.exp2.f32(float %274) > %278 = fmul float %263, 0xBFF7154760000000 > %279 = fmul float %264, 0xBFF7154760000000 > %280 = fmul float %265, 0xBFF7154760000000 > %281 = call float @llvm.log2.f32(float %60) > %282 = call float @llvm.log2.f32(float %61) > %283 = call float @llvm.log2.f32(float %62) > %284 = fmul float %281, 0x3FDD1745E0000000 > %285 = fmul float %282, 0x3FDD1745E0000000 > %286 = fmul float %283, 0x3FDD1745E0000000 > %287 = call float @llvm.exp2.f32(float %284) > %288 = call float @llvm.exp2.f32(float %285) > %289 = call float @llvm.exp2.f32(float %286) > %290 = call float @llvm.exp2.f32(float %278) > %291 = call float @llvm.exp2.f32(float %279) > %292 = call float @llvm.exp2.f32(float %280) > %293 = fmul float %290, %287 > %294 = fmul float %291, %288 > %295 = fmul float %292, %289 > %296 = fmul float %260, %293 > %297 = fmul float %261, %294 > %298 = fmul float %262, %295 > %299 = fsub float 1.000000e+00, %275 > %300 = fsub float 1.000000e+00, %276 > %301 = fsub float 1.000000e+00, %277 > %302 = call float @llvm.fma.f32(float %296, float %299, float 0xBF70624DE0000000) > %303 = call float @llvm.fma.f32(float %297, float %300, float 0xBF70624DE0000000) > %304 = call float @llvm.fma.f32(float %298, float %301, float 0xBF70624DE0000000) > %305 = call float @llvm.maxnum.f32(float %302, float 0.000000e+00) > %306 = call float @llvm.maxnum.f32(float %303, float 0.000000e+00) > %307 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00) > %308 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 5.000000e-01) > %309 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 5.000000e-01) > %310 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 5.000000e-01) > %311 = fmul float %305, %308 > %312 = fmul float %306, %309 > %313 = fmul float %307, %310 > %314 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %315 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %316 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %317 = call float @llvm.fma.f32(float %305, float %314, float 0x3FAEB851E0000000) > %318 = call float @llvm.fma.f32(float %306, float %315, float 0x3FAEB851E0000000) > %319 = call float @llvm.fma.f32(float %307, float %316, float 0x3FAEB851E0000000) > %320 = fcmp oeq float %317, 0.000000e+00 > %321 = fcmp oeq float %318, 0.000000e+00 > %322 = fcmp oeq float %319, 0.000000e+00 > %323 = fcmp ogt float %311, 0.000000e+00 > %324 = select i1 %323, float 1.000000e+00, float %311 > %325 = fcmp oge float %324, 0.000000e+00 > %326 = fcmp ogt float %312, 0.000000e+00 > %327 = select i1 %326, float 1.000000e+00, float %312 > %328 = fcmp oge float %327, 0.000000e+00 > %329 = fcmp ogt float %313, 0.000000e+00 > %330 = select i1 %329, float 1.000000e+00, float %313 > %331 = fcmp oge float %330, 0.000000e+00 > %.op67 = fmul float %324, 0x4600000000000000 > %332 = select i1 %325, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %327, 0x4600000000000000 > %333 = select i1 %328, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %330, 0x4600000000000000 > %334 = select i1 %331, float %.op69, float 0xC600000000000000 > %335 = fdiv float 1.000000e+00, %317 > %336 = fdiv float 1.000000e+00, %318 > %337 = fdiv float 1.000000e+00, %319 > %338 = fmul float %311, %335 > %339 = fmul float %312, %336 > %340 = fmul float %313, %337 > %341 = select i1 %320, float %332, float %338 > %342 = select i1 %321, float %333, float %339 > %343 = select i1 %322, float %334, float %340 > %344 = bitcast i32 %11 to float > %345 = insertvalue <{ float, float, float }> undef, float %344, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %131, float %132, float %114, float %120) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %136, float %137, float %138, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %341, float %342, float %343, float %275) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %93, float %94, float %95, float %96) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %102, float %108, float %114, float %120) > ret <{ float, float, float }> %345 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], BUFFER, FLOAT >DCL CONST[1][0..15] >DCL TEMP[0..2], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 240, 0, 0} >IMM[2] INT32 {0, 0, 0, 0} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: FSNE TEMP[1].x, CONST[1][15].zzzz, IMM[0].xxxx > 10: UIF TEMP[1].xxxx :0 > 11: ABS TEMP[1].x, TEMP[0].xxxx > 12: RCP TEMP[2].x, CONST[1][15].zzzz > 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx > 14: ELSE :0 > 15: ABS TEMP[2].x, TEMP[0].xxxx > 16: SSG TEMP[2].x, TEMP[2].xxxx > 17: MUL TEMP[1].x, IMM[0].yyyy, TEMP[2].xxxx > 18: ENDIF > 19: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 20: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 21: LG2 TEMP[1].x, TEMP[0].xxxx > 22: MUL TEMP[0].x, TEMP[1].xxxx, CONST[1][15].wwww > 23: EX2 TEMP[1].x, TEMP[0].xxxx > 24: ADD TEMP[0].x, -TEMP[1].xxxx, IMM[0].zzzz > 25: MOV TEMP[1].xy, IN[0].xyyy > 26: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 27: MOV TEMP[2].xyz, TEMP[1].xyzx > 28: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].wwww > 29: MOV TEMP[2].w, TEMP[1].xxxx > 30: MUL TEMP[0], TEMP[2], IN[4] > 31: MUL TEMP[0], TEMP[0], IN[2] > 32: FMA TEMP[1].xyz, TEMP[0].xyzz, IN[3].wwww, IN[3].xyzz > 33: MUL TEMP[0].x, TEMP[0].wwww, IN[3].wwww > 34: MOV TEMP[0].w, TEMP[0].xxxx > 35: MOV TEMP[2].x, IMM[2].xxxx > 36: MOV TEMP[2].w, IMM[1].xxxx > 37: TXF TEMP[2].x, TEMP[2], SAMP[2], BUFFER > 38: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 39: MOV OUT[0], TEMP[0] > 40: END >radeonsi: Compiling shader 371 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %27 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 3 > %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 > %32 = extractelement <8 x i32> %28, i32 7 > %33 = extractelement <4 x i32> %31, i32 0 > %34 = and i32 %33, %32 > %35 = insertelement <4 x i32> %31, i32 %34, i32 0 > %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 > %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %39 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 7 > %40 = load <4 x i32>, <4 x i32> addrspace(2)* %39, align 16, !tbaa !0 > %41 = extractelement <8 x i32> %37, i32 7 > %42 = extractelement <4 x i32> %40, i32 0 > %43 = and i32 %42, %41 > %44 = insertelement <4 x i32> %40, i32 %43, i32 0 > %45 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %46 = bitcast <8 x i32> addrspace(2)* %45 to <2 x i128> addrspace(2)* > %47 = load <2 x i128>, <2 x i128> addrspace(2)* %46, align 32, !tbaa !0 > %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %65 = fcmp oeq float %52, 0.000000e+00 > %66 = fcmp oeq float %52, 0.000000e+00 > %67 = fcmp ogt float %50, 0.000000e+00 > %68 = select i1 %67, float 1.000000e+00, float %50 > %69 = fcmp oge float %68, 0.000000e+00 > %70 = fcmp ogt float %51, 0.000000e+00 > %71 = select i1 %70, float 1.000000e+00, float %51 > %72 = fcmp oge float %71, 0.000000e+00 > %.op = fmul float %68, 0x4600000000000000 > %73 = select i1 %69, float %.op, float 0xC600000000000000 > %.op12 = fmul float %71, 0x4600000000000000 > %74 = select i1 %72, float %.op12, float 0xC600000000000000 > %75 = fdiv float 1.000000e+00, %52 > %76 = fmul float %50, %75 > %77 = fmul float %51, %75 > %78 = select i1 %65, float %73, float %76 > %79 = select i1 %66, float %74, float %77 > %80 = bitcast float %78 to i32 > %81 = bitcast float %79 to i32 > %82 = insertelement <2 x i32> undef, i32 %80, i32 0 > %83 = insertelement <2 x i32> %82, i32 %81, i32 1 > %84 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %83, <8 x i32> %28, <4 x i32> %35, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %85 = extractelement <4 x float> %84, i32 0 > %86 = fsub float %52, %85 > %87 = fcmp une float %25, 0.000000e+00 > %88 = call float @llvm.fabs.f32(float %86) > br i1 %87, label %IF, label %ELSE > >IF: ; preds = %main_body > %89 = fdiv float 1.000000e+00, %25 > %90 = fmul float %88, %89 > br label %ENDIF > >ELSE: ; preds = %main_body > %91 = fcmp one float %86, 0.000000e+00 > %92 = select i1 %91, float 1.000000e+00, float %88 > %93 = fcmp oge float %92, 0.000000e+00 > %.op13 = fmul float %92, 0x4600000000000000 > %94 = select i1 %93, float %.op13, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp4.0 = phi float [ %90, %IF ], [ %94, %ELSE ] > %95 = call float @llvm.AMDGPU.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) > %96 = fsub float 1.000000e+00, %95 > %97 = call float @llvm.log2.f32(float %96) > %98 = fmul float %97, %26 > %99 = call float @llvm.exp2.f32(float %98) > %100 = fsub float 1.000000e+00, %99 > %101 = bitcast float %48 to i32 > %102 = bitcast float %49 to i32 > %103 = insertelement <2 x i32> undef, i32 %101, i32 0 > %104 = insertelement <2 x i32> %103, i32 %102, i32 1 > %105 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %104, <8 x i32> %37, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %106 = extractelement <4 x float> %105, i32 0 > %107 = extractelement <4 x float> %105, i32 1 > %108 = extractelement <4 x float> %105, i32 2 > %109 = extractelement <4 x float> %105, i32 3 > %110 = fmul float %100, %109 > %111 = fmul float %106, %61 > %112 = fmul float %107, %62 > %113 = fmul float %108, %63 > %114 = fmul float %110, %64 > %115 = fmul float %111, %53 > %116 = fmul float %112, %54 > %117 = fmul float %113, %55 > %118 = fmul float %114, %56 > %119 = call float @llvm.fma.f32(float %115, float %60, float %57) > %120 = call float @llvm.fma.f32(float %116, float %60, float %58) > %121 = call float @llvm.fma.f32(float %117, float %60, float %59) > %122 = fmul float %118, %60 > %123 = extractelement <2 x i128> %47, i32 1 > %124 = bitcast i128 %123 to <16 x i8> > %125 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %124, i32 0, i32 0) > %126 = extractelement <4 x float> %125, i32 0 > %127 = fmul float %126, %119 > %128 = fmul float %126, %120 > %129 = fmul float %126, %121 > %130 = bitcast float %5 to i32 > %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %130, 10 > %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %127, 11 > %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %128, 12 > %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %129, 13 > %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %122, 14 > %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..33] >DCL TEMP[0..13], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 256, 272, 288} >IMM[2] UINT32 {304, 336, 432, 528} >IMM[3] UINT32 {512, 480, 496, 320} >IMM[4] UINT32 {352, 464, 384, 368} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] UINT32 {448, 400, 416, 0} >IMM[7] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][16], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][17], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][18], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: DP4 TEMP[3].x, CONST[1][21], TEMP[0] > 10: ADD TEMP[0].x, TEMP[3].xxxx, CONST[1][27].wwww > 11: MOV TEMP[3], TEMP[1] > 12: MOV TEMP[4].xy, IN[1].xyxx > 13: MUL TEMP[5].xy, CONST[1][33].xyyy, IMM[0].xyyy > 14: MUL TEMP[6].xy, TEMP[2].xxxx, CONST[1][33].xyyy > 15: FMA TEMP[5].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 16: MOV TEMP[5].zw, TEMP[1].wwzw > 17: ADD TEMP[7].xyz, -IN[0].xyzz, CONST[1][32].xyzz > 18: MUL TEMP[8].xyz, CONST[1][30].xyzz, CONST[1][31].xyzz > 19: MOV TEMP[8].w, CONST[1][30].wwww > 20: DP3 TEMP[1].x, CONST[1][20].xyzz, TEMP[7].xyzz > 21: DP3 TEMP[9].x, CONST[1][22].xyzz, TEMP[7].xyzz > 22: MOV TEMP[1].z, TEMP[9].xxxx > 23: DP3 TEMP[7].x, CONST[1][21].xyzz, TEMP[7].xyzz > 24: MOV TEMP[1].y, TEMP[7].xxxx > 25: DP3 TEMP[9].x, TEMP[1].xyzz, TEMP[1].xyzz > 26: RSQ TEMP[9].x, TEMP[9].xxxx > 27: MUL TEMP[6].xyz, TEMP[9].xxxx, TEMP[1].xyzz > 28: FMA TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx, IMM[0].zzzz > 29: ADD TEMP[7].x, -TEMP[7].xxxx, IMM[0].xxxx > 30: MOV_SAT TEMP[7].x, TEMP[7].xxxx > 31: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx > 32: DP3 TEMP[9].x, -TEMP[6].xyzz, CONST[1][29].xyzz > 33: FMA TEMP[10].x, -CONST[1][24].yyyy, TEMP[9].xxxx, CONST[1][24].xxxx > 34: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx, IMM[0].xxxx > 35: MOV TEMP[0].z, TEMP[9].xxxx > 36: ABS TEMP[9].x, TEMP[10].xxxx > 37: LG2 TEMP[9].x, TEMP[9].xxxx > 38: MOV TEMP[0].w, TEMP[9].xxxx > 39: MUL TEMP[9].xy, TEMP[0].zwww, IMM[5].xyyy > 40: EX2 TEMP[10].x, TEMP[9].yyyy > 41: FMA TEMP[1].x, CONST[1][24].zzzz, TEMP[10].xxxx, -CONST[1][23].zzzz > 42: MUL TEMP[10].x, TEMP[10].xxxx, CONST[1][24].zzzz > 43: MAX TEMP[11].x, TEMP[1].xxxx, IMM[0].wwww > 44: ABS TEMP[12].x, TEMP[2].xxxx > 45: MUL TEMP[12].x, TEMP[12].xxxx, IMM[5].zzzz > 46: MIN TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx > 47: ADD TEMP[12].x, -TEMP[12].xxxx, IMM[0].xxxx > 48: FMA TEMP[10].x, -TEMP[11].xxxx, TEMP[12].xxxx, TEMP[10].xxxx > 49: MAX TEMP[10].x, TEMP[10].xxxx, CONST[1][28].wwww > 50: FSNE TEMP[11].x, CONST[1][23].xxxx, IMM[0].wwww > 51: UIF TEMP[11].xxxx :0 > 52: RCP TEMP[11].x, CONST[1][23].xxxx > 53: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 54: ELSE :0 > 55: SSG TEMP[12].x, -TEMP[0].xxxx > 56: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 57: ENDIF > 58: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 59: EX2 TEMP[11].x, TEMP[1].xxxx > 60: ADD TEMP[1].x, TEMP[11].xxxx, CONST[1][24].wwww > 61: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][25].yyyy > 62: MUL TEMP[1].x, TEMP[1].xxxx, IMM[7].yyyy > 63: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[1].xxxx > 64: MIN TEMP[7].x, TEMP[7].xxxx, CONST[1][23].wwww > 65: MAX TEMP[7].x, TEMP[7].xxxx, CONST[1][25].xxxx > 66: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx > 67: FSNE TEMP[11].x, CONST[1][26].wwww, IMM[0].wwww > 68: UIF TEMP[11].xxxx :0 > 69: RCP TEMP[11].x, CONST[1][26].wwww > 70: MUL TEMP[11].x, -TEMP[0].xxxx, TEMP[11].xxxx > 71: ELSE :0 > 72: SSG TEMP[12].x, -TEMP[0].xxxx > 73: MUL TEMP[11].x, IMM[5].wwww, TEMP[12].xxxx > 74: ENDIF > 75: ADD TEMP[0].x, -TEMP[0].xxxx, CONST[1][27].zzzz > 76: FSNE TEMP[12].x, CONST[1][23].yyyy, IMM[0].wwww > 77: UIF TEMP[12].xxxx :0 > 78: RCP TEMP[12].x, CONST[1][23].yyyy > 79: MUL TEMP[12].x, TEMP[0].xxxx, TEMP[12].xxxx > 80: ELSE :0 > 81: SSG TEMP[13].x, TEMP[0].xxxx > 82: MUL TEMP[12].x, IMM[5].wwww, TEMP[13].xxxx > 83: ENDIF > 84: MUL TEMP[1].x, TEMP[11].xxxx, IMM[7].xxxx > 85: EX2 TEMP[11].x, TEMP[1].xxxx > 86: MUL TEMP[6].xyz, TEMP[11].xxxx, CONST[1][26].xyzz > 87: FMA TEMP[7].xyz, CONST[1][26].xyzz, TEMP[11].xxxx, TEMP[7].xxxx > 88: FMA TEMP[9].xyz, TEMP[6].xyzz, TEMP[9].xxxx, TEMP[10].xxxx > 89: FSEQ TEMP[10].xyz, TEMP[7].xyzz, IMM[0].wwww > 90: SSG TEMP[11].xyz, TEMP[9].xyzz > 91: MUL TEMP[11].xyz, IMM[5].wwww, TEMP[11].xyzz > 92: RCP TEMP[13].x, TEMP[7].xxxx > 93: RCP TEMP[13].y, TEMP[7].yyyy > 94: RCP TEMP[13].z, TEMP[7].zzzz > 95: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz > 96: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz > 97: MUL TEMP[6].xyz, TEMP[12].xxxx, -TEMP[7].xyzz > 98: ABS TEMP[2].xyz, TEMP[2].xxxx > 99: MUL TEMP[1].xyz, TEMP[2].xyzz, -TEMP[7].xyzz >100: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[7].xxxx >101: EX2 TEMP[2].x, TEMP[1].xxxx >102: EX2 TEMP[2].y, TEMP[1].yyyy >103: EX2 TEMP[2].z, TEMP[1].zzzz >104: MUL TEMP[6].xyz, TEMP[6].xyzz, IMM[7].xxxx >105: LG2 TEMP[7].x, CONST[1][28].xxxx >106: LG2 TEMP[7].y, CONST[1][28].yyyy >107: LG2 TEMP[7].z, CONST[1][28].zzzz >108: MUL TEMP[7].xyz, TEMP[7].xyzz, IMM[7].zzzz >109: EX2 TEMP[10].x, TEMP[7].xxxx >110: EX2 TEMP[10].y, TEMP[7].yyyy >111: EX2 TEMP[10].z, TEMP[7].zzzz >112: EX2 TEMP[7].x, TEMP[6].xxxx >113: EX2 TEMP[7].y, TEMP[6].yyyy >114: EX2 TEMP[7].z, TEMP[6].zzzz >115: MUL TEMP[6].xyz, TEMP[7].xyzz, TEMP[10].xyzz >116: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[6].xyzz >117: ADD TEMP[7].xyz, -TEMP[2].xyzz, IMM[0].xxxx >118: MOV TEMP[2].w, TEMP[2].xxxx >119: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz, IMM[7].wwww >120: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww >121: FMA TEMP[7].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[7].yyyy >122: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[7].xyzz >123: FMA TEMP[6].xyz, TEMP[0].xyzz, IMM[8].xxxx, IMM[8].yyyy >124: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xyzz, IMM[8].zzzz >125: FSEQ TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww >126: SSG TEMP[7].xyz, TEMP[1].xyzz >127: MUL TEMP[7].xyz, IMM[5].wwww, TEMP[7].xyzz >128: RCP TEMP[9].x, TEMP[0].xxxx >129: RCP TEMP[9].y, TEMP[0].yyyy >130: RCP TEMP[9].z, TEMP[0].zzzz >131: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[9].xyzz >132: UCMP TEMP[2].xyz, TEMP[6].xyzz, TEMP[7].xyzz, TEMP[0].xyzz >133: MOV OUT[5], IN[2] >134: MOV OUT[4], TEMP[2] >135: MOV OUT[3], TEMP[8] >136: MOV OUT[2], TEMP[5] >137: MOV OUT[1], TEMP[4] >138: MOV OUT[0], TEMP[3] >139: END >radeonsi: Compiling shader 372 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0 > %18 = call float @llvm.SI.load.const(<16 x i8> %17, i32 256) > %19 = call float @llvm.SI.load.const(<16 x i8> %17, i32 260) > %20 = call float @llvm.SI.load.const(<16 x i8> %17, i32 264) > %21 = call float @llvm.SI.load.const(<16 x i8> %17, i32 268) > %22 = call float @llvm.SI.load.const(<16 x i8> %17, i32 272) > %23 = call float @llvm.SI.load.const(<16 x i8> %17, i32 276) > %24 = call float @llvm.SI.load.const(<16 x i8> %17, i32 280) > %25 = call float @llvm.SI.load.const(<16 x i8> %17, i32 284) > %26 = call float @llvm.SI.load.const(<16 x i8> %17, i32 288) > %27 = call float @llvm.SI.load.const(<16 x i8> %17, i32 292) > %28 = call float @llvm.SI.load.const(<16 x i8> %17, i32 296) > %29 = call float @llvm.SI.load.const(<16 x i8> %17, i32 300) > %30 = call float @llvm.SI.load.const(<16 x i8> %17, i32 304) > %31 = call float @llvm.SI.load.const(<16 x i8> %17, i32 308) > %32 = call float @llvm.SI.load.const(<16 x i8> %17, i32 312) > %33 = call float @llvm.SI.load.const(<16 x i8> %17, i32 316) > %34 = call float @llvm.SI.load.const(<16 x i8> %17, i32 320) > %35 = call float @llvm.SI.load.const(<16 x i8> %17, i32 324) > %36 = call float @llvm.SI.load.const(<16 x i8> %17, i32 328) > %37 = call float @llvm.SI.load.const(<16 x i8> %17, i32 336) > %38 = call float @llvm.SI.load.const(<16 x i8> %17, i32 340) > %39 = call float @llvm.SI.load.const(<16 x i8> %17, i32 344) > %40 = call float @llvm.SI.load.const(<16 x i8> %17, i32 348) > %41 = call float @llvm.SI.load.const(<16 x i8> %17, i32 352) > %42 = call float @llvm.SI.load.const(<16 x i8> %17, i32 356) > %43 = call float @llvm.SI.load.const(<16 x i8> %17, i32 360) > %44 = call float @llvm.SI.load.const(<16 x i8> %17, i32 368) > %45 = call float @llvm.SI.load.const(<16 x i8> %17, i32 372) > %46 = call float @llvm.SI.load.const(<16 x i8> %17, i32 376) > %47 = call float @llvm.SI.load.const(<16 x i8> %17, i32 380) > %48 = call float @llvm.SI.load.const(<16 x i8> %17, i32 384) > %49 = call float @llvm.SI.load.const(<16 x i8> %17, i32 388) > %50 = call float @llvm.SI.load.const(<16 x i8> %17, i32 392) > %51 = call float @llvm.SI.load.const(<16 x i8> %17, i32 396) > %52 = call float @llvm.SI.load.const(<16 x i8> %17, i32 400) > %53 = call float @llvm.SI.load.const(<16 x i8> %17, i32 404) > %54 = call float @llvm.SI.load.const(<16 x i8> %17, i32 416) > %55 = call float @llvm.SI.load.const(<16 x i8> %17, i32 420) > %56 = call float @llvm.SI.load.const(<16 x i8> %17, i32 424) > %57 = call float @llvm.SI.load.const(<16 x i8> %17, i32 428) > %58 = call float @llvm.SI.load.const(<16 x i8> %17, i32 440) > %59 = call float @llvm.SI.load.const(<16 x i8> %17, i32 444) > %60 = call float @llvm.SI.load.const(<16 x i8> %17, i32 448) > %61 = call float @llvm.SI.load.const(<16 x i8> %17, i32 452) > %62 = call float @llvm.SI.load.const(<16 x i8> %17, i32 456) > %63 = call float @llvm.SI.load.const(<16 x i8> %17, i32 460) > %64 = call float @llvm.SI.load.const(<16 x i8> %17, i32 464) > %65 = call float @llvm.SI.load.const(<16 x i8> %17, i32 468) > %66 = call float @llvm.SI.load.const(<16 x i8> %17, i32 472) > %67 = call float @llvm.SI.load.const(<16 x i8> %17, i32 480) > %68 = call float @llvm.SI.load.const(<16 x i8> %17, i32 484) > %69 = call float @llvm.SI.load.const(<16 x i8> %17, i32 488) > %70 = call float @llvm.SI.load.const(<16 x i8> %17, i32 492) > %71 = call float @llvm.SI.load.const(<16 x i8> %17, i32 496) > %72 = call float @llvm.SI.load.const(<16 x i8> %17, i32 500) > %73 = call float @llvm.SI.load.const(<16 x i8> %17, i32 504) > %74 = call float @llvm.SI.load.const(<16 x i8> %17, i32 512) > %75 = call float @llvm.SI.load.const(<16 x i8> %17, i32 516) > %76 = call float @llvm.SI.load.const(<16 x i8> %17, i32 520) > %77 = call float @llvm.SI.load.const(<16 x i8> %17, i32 528) > %78 = call float @llvm.SI.load.const(<16 x i8> %17, i32 532) > %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 > %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %13) > %82 = extractelement <4 x float> %81, i32 0 > %83 = extractelement <4 x float> %81, i32 1 > %84 = extractelement <4 x float> %81, i32 2 > %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 > %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %14) > %88 = extractelement <4 x float> %87, i32 0 > %89 = extractelement <4 x float> %87, i32 1 > %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 > %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %15) > %93 = extractelement <4 x float> %92, i32 0 > %94 = extractelement <4 x float> %92, i32 1 > %95 = extractelement <4 x float> %92, i32 2 > %96 = extractelement <4 x float> %92, i32 3 > %97 = fmul float %18, %82 > %98 = fmul float %19, %83 > %99 = fadd float %97, %98 > %100 = fmul float %20, %84 > %101 = fadd float %99, %100 > %102 = fadd float %101, %21 > %103 = fmul float %22, %82 > %104 = fmul float %23, %83 > %105 = fadd float %103, %104 > %106 = fmul float %24, %84 > %107 = fadd float %105, %106 > %108 = fadd float %107, %25 > %109 = fmul float %26, %82 > %110 = fmul float %27, %83 > %111 = fadd float %109, %110 > %112 = fmul float %28, %84 > %113 = fadd float %111, %112 > %114 = fadd float %113, %29 > %115 = fmul float %30, %82 > %116 = fmul float %31, %83 > %117 = fadd float %115, %116 > %118 = fmul float %32, %84 > %119 = fadd float %117, %118 > %120 = fadd float %119, %33 > %121 = fmul float %37, %82 > %122 = fmul float %38, %83 > %123 = fadd float %121, %122 > %124 = fmul float %39, %84 > %125 = fadd float %123, %124 > %126 = fadd float %125, %40 > %127 = fadd float %126, %59 > %128 = fsub float -0.000000e+00, %78 > %129 = fmul float %120, %77 > %130 = fmul float %120, %78 > %131 = call float @llvm.fma.f32(float %102, float %77, float %129) > %132 = call float @llvm.fma.f32(float %108, float %128, float %130) > %133 = fsub float %74, %82 > %134 = fsub float %75, %83 > %135 = fsub float %76, %84 > %136 = fmul float %67, %71 > %137 = fmul float %68, %72 > %138 = fmul float %69, %73 > %139 = fmul float %34, %133 > %140 = fmul float %35, %134 > %141 = fadd float %140, %139 > %142 = fmul float %36, %135 > %143 = fadd float %141, %142 > %144 = fmul float %41, %133 > %145 = fmul float %42, %134 > %146 = fadd float %145, %144 > %147 = fmul float %43, %135 > %148 = fadd float %146, %147 > %149 = fmul float %37, %133 > %150 = fmul float %38, %134 > %151 = fadd float %150, %149 > %152 = fmul float %39, %135 > %153 = fadd float %151, %152 > %154 = fmul float %143, %143 > %155 = fmul float %153, %153 > %156 = fadd float %155, %154 > %157 = fmul float %148, %148 > %158 = fadd float %156, %157 > %159 = call float @llvm.AMDGPU.rsq.clamped.f32(float %158) > %160 = fmul float %159, %143 > %161 = fmul float %159, %153 > %162 = fmul float %159, %148 > %163 = fsub float -0.000000e+00, %153 > %164 = call float @llvm.fma.f32(float %163, float %159, float 0xBFC3333340000000) > %165 = fsub float 1.000000e+00, %164 > %166 = call float @llvm.AMDGPU.clamp.(float %165, float 0.000000e+00, float 1.000000e+00) > %167 = fmul float %166, %166 > %168 = fmul float %160, %64 > %169 = fsub float -0.000000e+00, %168 > %170 = fmul float %161, %65 > %171 = fsub float %169, %170 > %172 = fmul float %162, %66 > %173 = fsub float %171, %172 > %174 = fsub float -0.000000e+00, %49 > %175 = call float @llvm.fma.f32(float %174, float %173, float %48) > %176 = call float @llvm.fma.f32(float %173, float %173, float 1.000000e+00) > %177 = call float @llvm.fabs.f32(float %175) > %178 = call float @llvm.log2.f32(float %177) > %179 = fmul float %176, 0x3FAE8EC8A0000000 > %180 = fmul float %178, -1.500000e+00 > %181 = call float @llvm.exp2.f32(float %180) > %182 = fsub float -0.000000e+00, %46 > %183 = call float @llvm.fma.f32(float %50, float %181, float %182) > %184 = fmul float %181, %50 > %185 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00) > %186 = call float @llvm.fabs.f32(float %120) > %187 = fmul float %186, 0x3EF4F8B580000000 > %188 = call float @llvm.minnum.f32(float %187, float 1.000000e+00) > %189 = fsub float 1.000000e+00, %188 > %190 = fsub float -0.000000e+00, %185 > %191 = call float @llvm.fma.f32(float %190, float %189, float %184) > %192 = call float @llvm.maxnum.f32(float %191, float %63) > %193 = fcmp une float %44, 0.000000e+00 > br i1 %193, label %IF, label %ELSE > >IF: ; preds = %main_body > %194 = fdiv float 1.000000e+00, %44 > %195 = fmul float %127, %194 > %196 = fsub float -0.000000e+00, %195 > br label %ENDIF > >ELSE: ; preds = %main_body > %197 = fsub float -0.000000e+00, %127 > %198 = fcmp olt float %127, -0.000000e+00 > %199 = select i1 %198, float 1.000000e+00, float %197 > %200 = fcmp oge float %199, 0.000000e+00 > %.op = fmul float %199, 0x4600000000000000 > %201 = select i1 %200, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp44.0 = phi float [ %196, %IF ], [ %201, %ELSE ] > %202 = fmul float %temp44.0, 0x3FF7154760000000 > %203 = call float @llvm.exp2.f32(float %202) > %204 = fadd float %203, %51 > %205 = fmul float %204, %53 > %206 = fmul float %205, 5.000000e-01 > %207 = fmul float %167, %206 > %208 = call float @llvm.minnum.f32(float %207, float %47) > %209 = call float @llvm.maxnum.f32(float %208, float %52) > %210 = fmul float %209, %192 > %211 = fcmp une float %57, 0.000000e+00 > br i1 %211, label %IF57, label %ELSE58 > >IF57: ; preds = %ENDIF > %212 = fdiv float 1.000000e+00, %57 > %213 = fmul float %127, %212 > %214 = fsub float -0.000000e+00, %213 > br label %ENDIF56 > >ELSE58: ; preds = %ENDIF > %215 = fsub float -0.000000e+00, %127 > %216 = fcmp olt float %127, -0.000000e+00 > %217 = select i1 %216, float 1.000000e+00, float %215 > %218 = fcmp oge float %217, 0.000000e+00 > %.op62 = fmul float %217, 0x4600000000000000 > %219 = select i1 %218, float %.op62, float 0xC600000000000000 > br label %ENDIF56 > >ENDIF56: ; preds = %ELSE58, %IF57 > %temp44.1 = phi float [ %214, %IF57 ], [ %219, %ELSE58 ] > %220 = fsub float %58, %127 > %221 = fcmp une float %45, 0.000000e+00 > br i1 %221, label %IF60, label %ELSE61 > >IF60: ; preds = %ENDIF56 > %222 = fdiv float 1.000000e+00, %45 > %223 = fmul float %220, %222 > br label %ENDIF59 > >ELSE61: ; preds = %ENDIF56 > %224 = fcmp ogt float %220, 0.000000e+00 > %225 = select i1 %224, float 1.000000e+00, float %220 > %226 = fcmp oge float %225, 0.000000e+00 > %.op63 = fmul float %225, 0x4600000000000000 > %227 = select i1 %226, float %.op63, float 0xC600000000000000 > br label %ENDIF59 > >ENDIF59: ; preds = %ELSE61, %IF60 > %temp48.0 = phi float [ %223, %IF60 ], [ %227, %ELSE61 ] > %228 = fmul float %temp44.1, 0x3FF7154760000000 > %229 = call float @llvm.exp2.f32(float %228) > %230 = fmul float %229, %54 > %231 = fmul float %229, %55 > %232 = fmul float %229, %56 > %233 = call float @llvm.fma.f32(float %54, float %229, float %209) > %234 = call float @llvm.fma.f32(float %55, float %229, float %209) > %235 = call float @llvm.fma.f32(float %56, float %229, float %209) > %236 = call float @llvm.fma.f32(float %230, float %179, float %210) > %237 = call float @llvm.fma.f32(float %231, float %179, float %210) > %238 = call float @llvm.fma.f32(float %232, float %179, float %210) > %239 = fcmp oeq float %233, 0.000000e+00 > %240 = fcmp oeq float %234, 0.000000e+00 > %241 = fcmp oeq float %235, 0.000000e+00 > %242 = fcmp ogt float %236, 0.000000e+00 > %243 = select i1 %242, float 1.000000e+00, float %236 > %244 = fcmp oge float %243, 0.000000e+00 > %245 = fcmp ogt float %237, 0.000000e+00 > %246 = select i1 %245, float 1.000000e+00, float %237 > %247 = fcmp oge float %246, 0.000000e+00 > %248 = fcmp ogt float %238, 0.000000e+00 > %249 = select i1 %248, float 1.000000e+00, float %238 > %250 = fcmp oge float %249, 0.000000e+00 > %.op64 = fmul float %243, 0x4600000000000000 > %251 = select i1 %244, float %.op64, float 0xC600000000000000 > %.op65 = fmul float %246, 0x4600000000000000 > %252 = select i1 %247, float %.op65, float 0xC600000000000000 > %.op66 = fmul float %249, 0x4600000000000000 > %253 = select i1 %250, float %.op66, float 0xC600000000000000 > %254 = fdiv float 1.000000e+00, %233 > %255 = fdiv float 1.000000e+00, %234 > %256 = fdiv float 1.000000e+00, %235 > %257 = fmul float %236, %254 > %258 = fmul float %237, %255 > %259 = fmul float %238, %256 > %260 = select i1 %239, float %251, float %257 > %261 = select i1 %240, float %252, float %258 > %262 = select i1 %241, float %253, float %259 > %263 = fmul float %233, %temp48.0 > %264 = fmul float %234, %temp48.0 > %265 = fmul float %235, %temp48.0 > %266 = call float @llvm.fabs.f32(float %120) > %267 = call float @llvm.fabs.f32(float %120) > %268 = call float @llvm.fabs.f32(float %120) > %269 = fmul float %233, %266 > %270 = fmul float %234, %267 > %271 = fmul float %235, %268 > %272 = fmul float %269, 0xBFF7154760000000 > %273 = fmul float %270, 0xBFF7154760000000 > %274 = fmul float %271, 0xBFF7154760000000 > %275 = call float @llvm.exp2.f32(float %272) > %276 = call float @llvm.exp2.f32(float %273) > %277 = call float @llvm.exp2.f32(float %274) > %278 = fmul float %263, 0xBFF7154760000000 > %279 = fmul float %264, 0xBFF7154760000000 > %280 = fmul float %265, 0xBFF7154760000000 > %281 = call float @llvm.log2.f32(float %60) > %282 = call float @llvm.log2.f32(float %61) > %283 = call float @llvm.log2.f32(float %62) > %284 = fmul float %281, 0x3FDD1745E0000000 > %285 = fmul float %282, 0x3FDD1745E0000000 > %286 = fmul float %283, 0x3FDD1745E0000000 > %287 = call float @llvm.exp2.f32(float %284) > %288 = call float @llvm.exp2.f32(float %285) > %289 = call float @llvm.exp2.f32(float %286) > %290 = call float @llvm.exp2.f32(float %278) > %291 = call float @llvm.exp2.f32(float %279) > %292 = call float @llvm.exp2.f32(float %280) > %293 = fmul float %290, %287 > %294 = fmul float %291, %288 > %295 = fmul float %292, %289 > %296 = fmul float %260, %293 > %297 = fmul float %261, %294 > %298 = fmul float %262, %295 > %299 = fsub float 1.000000e+00, %275 > %300 = fsub float 1.000000e+00, %276 > %301 = fsub float 1.000000e+00, %277 > %302 = call float @llvm.fma.f32(float %296, float %299, float 0xBF70624DE0000000) > %303 = call float @llvm.fma.f32(float %297, float %300, float 0xBF70624DE0000000) > %304 = call float @llvm.fma.f32(float %298, float %301, float 0xBF70624DE0000000) > %305 = call float @llvm.maxnum.f32(float %302, float 0.000000e+00) > %306 = call float @llvm.maxnum.f32(float %303, float 0.000000e+00) > %307 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00) > %308 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 5.000000e-01) > %309 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 5.000000e-01) > %310 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 5.000000e-01) > %311 = fmul float %305, %308 > %312 = fmul float %306, %309 > %313 = fmul float %307, %310 > %314 = call float @llvm.fma.f32(float %305, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %315 = call float @llvm.fma.f32(float %306, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %316 = call float @llvm.fma.f32(float %307, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %317 = call float @llvm.fma.f32(float %305, float %314, float 0x3FAEB851E0000000) > %318 = call float @llvm.fma.f32(float %306, float %315, float 0x3FAEB851E0000000) > %319 = call float @llvm.fma.f32(float %307, float %316, float 0x3FAEB851E0000000) > %320 = fcmp oeq float %317, 0.000000e+00 > %321 = fcmp oeq float %318, 0.000000e+00 > %322 = fcmp oeq float %319, 0.000000e+00 > %323 = fcmp ogt float %311, 0.000000e+00 > %324 = select i1 %323, float 1.000000e+00, float %311 > %325 = fcmp oge float %324, 0.000000e+00 > %326 = fcmp ogt float %312, 0.000000e+00 > %327 = select i1 %326, float 1.000000e+00, float %312 > %328 = fcmp oge float %327, 0.000000e+00 > %329 = fcmp ogt float %313, 0.000000e+00 > %330 = select i1 %329, float 1.000000e+00, float %313 > %331 = fcmp oge float %330, 0.000000e+00 > %.op67 = fmul float %324, 0x4600000000000000 > %332 = select i1 %325, float %.op67, float 0xC600000000000000 > %.op68 = fmul float %327, 0x4600000000000000 > %333 = select i1 %328, float %.op68, float 0xC600000000000000 > %.op69 = fmul float %330, 0x4600000000000000 > %334 = select i1 %331, float %.op69, float 0xC600000000000000 > %335 = fdiv float 1.000000e+00, %317 > %336 = fdiv float 1.000000e+00, %318 > %337 = fdiv float 1.000000e+00, %319 > %338 = fmul float %311, %335 > %339 = fmul float %312, %336 > %340 = fmul float %313, %337 > %341 = select i1 %320, float %332, float %338 > %342 = select i1 %321, float %333, float %339 > %343 = select i1 %322, float %334, float %340 > %344 = bitcast i32 %11 to float > %345 = insertvalue <{ float, float, float }> undef, float %344, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %131, float %132, float %114, float %120) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %136, float %137, float %138, float %70) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %341, float %342, float %343, float %275) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %93, float %94, float %95, float %96) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %102, float %108, float %114, float %120) > ret <{ float, float, float }> %345 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], BUFFER, FLOAT >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 0.0667, 1.0000} >IMM[1] INT32 {0, 0, 0, 0} >IMM[2] UINT32 {0, 0, 0, 0} > 0: FSEQ TEMP[0].xy, IN[1].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[1].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[1].wwww > 4: MUL TEMP[2].xy, IN[1].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D > 8: ADD TEMP[0].x, -TEMP[1].xxxx, IN[1].wwww > 9: ABS TEMP[1].x, TEMP[0].xxxx > 10: MUL TEMP[0].x, TEMP[1].xxxx, IMM[0].zzzz > 11: MOV TEMP[1].xy, IN[0].xyyy > 12: TEX TEMP[1], TEMP[1], SAMP[1], 2D > 13: MOV TEMP[2].xyz, TEMP[1].xyzx > 14: MIN TEMP[3].x, TEMP[0].xxxx, IMM[0].wwww > 15: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].wwww > 16: MOV TEMP[2].w, TEMP[1].xxxx > 17: MUL TEMP[0], TEMP[2], IN[4] > 18: MUL TEMP[0], TEMP[0], IN[2] > 19: FMA TEMP[1].xyz, TEMP[0].xyzz, IN[3].wwww, IN[3].xyzz > 20: MUL TEMP[0].x, TEMP[0].wwww, IN[3].wwww > 21: MOV TEMP[0].w, TEMP[0].xxxx > 22: MOV TEMP[2].x, IMM[1].xxxx > 23: MOV TEMP[2].w, IMM[2].xxxx > 24: TXF TEMP[2].x, TEMP[2], SAMP[2], BUFFER > 25: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz > 26: MOV OUT[0], TEMP[0] > 27: END >radeonsi: Compiling shader 373 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 > %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3 > %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !tbaa !0 > %28 = extractelement <8 x i32> %24, i32 7 > %29 = extractelement <4 x i32> %27, i32 0 > %30 = and i32 %29, %28 > %31 = insertelement <4 x i32> %27, i32 %30, i32 0 > %32 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32, !tbaa !0 > %34 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %35 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 7 > %36 = load <4 x i32>, <4 x i32> addrspace(2)* %35, align 16, !tbaa !0 > %37 = extractelement <8 x i32> %33, i32 7 > %38 = extractelement <4 x i32> %36, i32 0 > %39 = and i32 %38, %37 > %40 = insertelement <4 x i32> %36, i32 %39, i32 0 > %41 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %42 = bitcast <8 x i32> addrspace(2)* %41 to <2 x i128> addrspace(2)* > %43 = load <2 x i128>, <2 x i128> addrspace(2)* %42, align 32, !tbaa !0 > %44 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %49 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %50 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %51 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %52 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %53 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %54 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %55 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %56 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %57 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) > %61 = fcmp oeq float %48, 0.000000e+00 > %62 = fcmp oeq float %48, 0.000000e+00 > %63 = fcmp ogt float %46, 0.000000e+00 > %64 = select i1 %63, float 1.000000e+00, float %46 > %65 = fcmp oge float %64, 0.000000e+00 > %66 = fcmp ogt float %47, 0.000000e+00 > %67 = select i1 %66, float 1.000000e+00, float %47 > %68 = fcmp oge float %67, 0.000000e+00 > %.op = fmul float %64, 0x4600000000000000 > %69 = select i1 %65, float %.op, float 0xC600000000000000 > %.op16 = fmul float %67, 0x4600000000000000 > %70 = select i1 %68, float %.op16, float 0xC600000000000000 > %71 = fdiv float 1.000000e+00, %48 > %72 = fmul float %46, %71 > %73 = fmul float %47, %71 > %74 = select i1 %61, float %69, float %72 > %75 = select i1 %62, float %70, float %73 > %76 = bitcast float %74 to i32 > %77 = bitcast float %75 to i32 > %78 = insertelement <2 x i32> undef, i32 %76, i32 0 > %79 = insertelement <2 x i32> %78, i32 %77, i32 1 > %80 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %79, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %81 = extractelement <4 x float> %80, i32 0 > %82 = fsub float %48, %81 > %83 = call float @llvm.fabs.f32(float %82) > %84 = fmul float %83, 0x3FB1111120000000 > %85 = bitcast float %44 to i32 > %86 = bitcast float %45 to i32 > %87 = insertelement <2 x i32> undef, i32 %85, i32 0 > %88 = insertelement <2 x i32> %87, i32 %86, i32 1 > %89 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %88, <8 x i32> %33, <4 x i32> %40, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %90 = extractelement <4 x float> %89, i32 0 > %91 = extractelement <4 x float> %89, i32 1 > %92 = extractelement <4 x float> %89, i32 2 > %93 = extractelement <4 x float> %89, i32 3 > %94 = call float @llvm.minnum.f32(float %84, float 1.000000e+00) > %95 = fmul float %94, %93 > %96 = fmul float %90, %57 > %97 = fmul float %91, %58 > %98 = fmul float %92, %59 > %99 = fmul float %95, %60 > %100 = fmul float %96, %49 > %101 = fmul float %97, %50 > %102 = fmul float %98, %51 > %103 = fmul float %99, %52 > %104 = call float @llvm.fma.f32(float %100, float %56, float %53) > %105 = call float @llvm.fma.f32(float %101, float %56, float %54) > %106 = call float @llvm.fma.f32(float %102, float %56, float %55) > %107 = fmul float %103, %56 > %108 = extractelement <2 x i128> %43, i32 1 > %109 = bitcast i128 %108 to <16 x i8> > %110 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 0) > %111 = extractelement <4 x float> %110, i32 0 > %112 = fmul float %111, %104 > %113 = fmul float %111, %105 > %114 = fmul float %111, %106 > %115 = bitcast float %5 to i32 > %116 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %115, 10 > %117 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %116, float %112, 11 > %118 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %117, float %113, 12 > %119 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %118, float %114, 13 > %120 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %119, float %107, 14 > %121 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %120, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %121 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL IN[6] >DCL IN[7] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..38] >DCL CONST[2][0..4095] >DCL TEMP[0..21], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, -1.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 368} >IMM[3] UINT32 {384, 400, 416, 592} >IMM[4] UINT32 {448, 544, 608, 288} >IMM[5] FLT32 { 0.0000, -0.1500, 0.0597, -1.5000} >IMM[6] UINT32 {432, 464, 576, 496} >IMM[7] UINT32 {480, 560, 512, 528} >IMM[8] FLT32 { 0.0000, 158456325028528675187087900672.0000, 1.4427, 0.5000} >IMM[9] FLT32 { 0.4545, -0.0040, 6.2000, 1.7000} >IMM[10] FLT32 { 0.0600, -0.0187, 0.0743, -0.2121} >IMM[11] FLT32 { 1.5707, -2.0000, 3.1416, 0.5000} >IMM[12] FLT32 { 0.5000, -0.5000, 0.0000, 0.0000} > 0: MUL TEMP[0].xyz, IN[7].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[6].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[6].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[6].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[6].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[6].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[6].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[6].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[6].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[6].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[6].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[6].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[6].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[6].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[18].y, TEMP[18].xxxx >224: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[20].xxxx >227: MOV TEMP[19].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[20].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[21].xxxx >231: MOV TEMP[20].z, CONST[2][ADDR[0].x] >232: FMA TEMP[19].x, TEMP[19].xxxx, TEMP[20].zzzz, -TEMP[8].zzzz >233: MUL TEMP[19].x, TEMP[19].xxxx, IN[6].xxxx >234: MUL TEMP[19].x, IMM[0].yyyy, TEMP[19].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[19].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[20].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[20].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[20].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[21].x, TEMP[20].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[21].xxxx >244: MOV TEMP[20].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[20].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[6].xxxx, TEMP[10].zzzz >249: MOV TEMP[19].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[6].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[6].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[6].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].zzzz >259: MOV TEMP[19].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[19] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[6].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[6].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[6].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[18].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[10].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[6].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[10].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[6].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[10].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[6].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[10].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[13].xxxx >315: MOV TEMP[10].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[10].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[6].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[8].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[10].x, TEMP[8].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[10].xxxx >329: MOV TEMP[8].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[8].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[8].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[8].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[8].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[8].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[6].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[6].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[18].x, TEMP[0], TEMP[5] >354: MOV TEMP[18].w, IMM[0].zzzz >355: DP4 TEMP[1].x, CONST[1][23], TEMP[18] >356: DP4 TEMP[3].x, CONST[1][24], TEMP[18] >357: MOV TEMP[1].y, TEMP[3].xxxx >358: DP4 TEMP[3].x, CONST[1][25], TEMP[18] >359: MOV TEMP[1].z, TEMP[3].xxxx >360: DP4 TEMP[3].x, CONST[1][26], TEMP[18] >361: MOV TEMP[1].w, TEMP[3].xxxx >362: ADD TEMP[2].xyz, -TEMP[18].xyzz, CONST[1][37].xyzz >363: MOV TEMP[6], TEMP[1] >364: MOV TEMP[8].zw, TEMP[1].wwzw >365: MUL TEMP[5].xy, TEMP[3].xxxx, CONST[1][38].xyyy >366: MUL TEMP[3].xy, CONST[1][38].xyyy, IMM[0].zwww >367: FMA TEMP[8].xy, TEMP[1].xyyy, TEMP[3].xyyy, TEMP[5].xyyy >368: FSNE TEMP[3].x, CONST[1][30].xxxx, IMM[8].xxxx >369: UIF TEMP[3].xxxx :0 >370: ELSE :0 >371: ENDIF >372: FSNE TEMP[3].x, CONST[1][33].wwww, IMM[8].xxxx >373: UIF TEMP[3].xxxx :0 >374: ELSE :0 >375: ENDIF >376: FSNE TEMP[3].x, CONST[1][30].yyyy, IMM[8].xxxx >377: UIF TEMP[3].xxxx :0 >378: ELSE :0 >379: ENDIF >380: ABS TEMP[3].x, IN[3].yyyy >381: FMA TEMP[3].x, TEMP[3].xxxx, IMM[10].yyyy, IMM[10].zzzz >382: ABS TEMP[5].x, IN[3].yyyy >383: FMA TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx, IMM[10].wwww >384: ABS TEMP[5].x, IN[3].yyyy >385: FMA TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx, IMM[11].xxxx >386: ABS TEMP[5].x, IN[3].yyyy >387: ADD TEMP[1].x, -TEMP[5].xxxx, IMM[0].zzzz >388: SQRT TEMP[5].x, TEMP[1].xxxx >389: MUL TEMP[9].x, TEMP[3].xxxx, TEMP[5].xxxx >390: FMA TEMP[9].x, TEMP[9].xxxx, IMM[11].yyyy, IMM[11].zzzz >391: FSLT TEMP[10].x, IN[3].yyyy, -IN[3].yyyy >392: AND TEMP[10].x, TEMP[10].xxxx, IMM[1].xxxx >393: INEG TEMP[10].x, TEMP[10].xxxx >394: AND TEMP[9].x, TEMP[10].xxxx, TEMP[9].xxxx >395: FMA TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx, TEMP[9].xxxx >396: SIN TEMP[3].xy, TEMP[3].xxxx >397: MOV TEMP[1].yz, TEMP[3].yxyy >398: MOV TEMP[1].x, IN[3].yyyy >399: MUL TEMP[3].xyz, TEMP[1].xyzz, IMM[12].xxyy >400: DP3 TEMP[5].x, TEMP[4].xyzz, IN[3].xyzz >401: MOV TEMP[1].y, TEMP[5].xxxx >402: DP3 TEMP[5].x, TEMP[7].xyzz, IN[3].xyzz >403: MOV TEMP[1].z, TEMP[5].xxxx >404: DP3 TEMP[1].x, TEMP[0].xyzz, IN[3].xyzz >405: DP3 TEMP[5].x, TEMP[1].xyzz, TEMP[2].xyzz >406: DP3 TEMP[9].x, TEMP[4].xyzz, IN[4].xyzz >407: MOV TEMP[1].y, TEMP[9].xxxx >408: DP3 TEMP[9].x, TEMP[4].xyzz, IN[1].xyzz >409: MOV TEMP[4].y, TEMP[9].xxxx >410: DP3 TEMP[9].x, TEMP[7].xyzz, IN[4].xyzz >411: MOV TEMP[1].z, TEMP[9].xxxx >412: DP3 TEMP[7].x, TEMP[7].xyzz, IN[1].xyzz >413: MOV TEMP[4].z, TEMP[7].xxxx >414: DP3 TEMP[1].x, TEMP[0].xyzz, IN[4].xyzz >415: DP3 TEMP[4].x, TEMP[0].xyzz, IN[1].xyzz >416: DP3 TEMP[0].x, TEMP[4].xyzz, TEMP[2].xyzz >417: MOV TEMP[5].z, TEMP[0].xxxx >418: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[2].xyzz >419: MOV TEMP[5].y, TEMP[0].xxxx >420: MOV OUT[5], TEMP[5] >421: MOV OUT[4], TEMP[3] >422: MOV OUT[3], IN[5] >423: MOV OUT[1], IN[2].xyxy >424: MOV OUT[2], TEMP[8] >425: MOV OUT[0], TEMP[6] >426: END >radeonsi: Compiling shader 374 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 > %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368) > %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372) > %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376) > %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 380) > %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 384) > %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 388) > %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 392) > %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 396) > %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 400) > %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 404) > %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 408) > %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 412) > %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 416) > %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 420) > %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 424) > %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 428) > %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 592) > %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 596) > %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 600) > %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 608) > %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 612) > %44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 > %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 > %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %13) > %49 = extractelement <4 x float> %48, i32 0 > %50 = extractelement <4 x float> %48, i32 1 > %51 = extractelement <4 x float> %48, i32 2 > %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 > %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %14) > %55 = extractelement <4 x float> %54, i32 0 > %56 = extractelement <4 x float> %54, i32 1 > %57 = extractelement <4 x float> %54, i32 2 > %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 > %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %15) > %61 = extractelement <4 x float> %60, i32 0 > %62 = extractelement <4 x float> %60, i32 1 > %63 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 > %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %16) > %66 = extractelement <4 x float> %65, i32 0 > %67 = extractelement <4 x float> %65, i32 1 > %68 = extractelement <4 x float> %65, i32 2 > %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 > %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %17) > %72 = extractelement <4 x float> %71, i32 0 > %73 = extractelement <4 x float> %71, i32 1 > %74 = extractelement <4 x float> %71, i32 2 > %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 > %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %18) > %78 = extractelement <4 x float> %77, i32 0 > %79 = extractelement <4 x float> %77, i32 1 > %80 = extractelement <4 x float> %77, i32 2 > %81 = extractelement <4 x float> %77, i32 3 > %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6 > %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 > %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %19) > %85 = extractelement <4 x float> %84, i32 0 > %86 = extractelement <4 x float> %84, i32 1 > %87 = extractelement <4 x float> %84, i32 2 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 7 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %20) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = extractelement <4 x float> %90, i32 2 > %94 = fmul float %93, 0x406FE01000000000 > %95 = fmul float %92, 0x406FE01000000000 > %96 = fmul float %91, 0x406FE01000000000 > %97 = fptosi float %94 to i32 > %98 = fptosi float %95 to i32 > %99 = fptosi float %96 to i32 > %100 = shl i32 %97, 1 > %101 = or i32 %100, 1 > %102 = shl i32 %98, 1 > %103 = or i32 %102, 1 > %104 = shl i32 %99, 1 > %105 = or i32 %104, 1 > %106 = shl i32 %97, 5 > %107 = or i32 %106, 4 > %108 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %107) > %109 = fmul float %85, %108 > %110 = shl i32 %98, 5 > %111 = or i32 %110, 4 > %112 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %111) > %113 = fmul float %86, %112 > %114 = shl i32 %101, 4 > %115 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %114) > %116 = shl i32 %101, 4 > %117 = or i32 %116, 12 > %118 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %117) > %119 = fmul float %115, %118 > %120 = shl i32 %101, 4 > %121 = or i32 %120, 4 > %122 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %121) > %123 = shl i32 %101, 4 > %124 = or i32 %123, 8 > %125 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %124) > %126 = fsub float -0.000000e+00, %119 > %127 = call float @llvm.fma.f32(float %122, float %125, float %126) > %128 = shl i32 %101, 4 > %129 = or i32 %128, 4 > %130 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %129) > %131 = shl i32 %101, 4 > %132 = or i32 %131, 8 > %133 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %132) > %134 = call float @llvm.fma.f32(float %130, float %133, float %119) > %135 = fmul float %134, %85 > %136 = fmul float %127, %85 > %137 = fmul float %136, 2.000000e+00 > %138 = shl i32 %103, 4 > %139 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %138) > %140 = shl i32 %103, 4 > %141 = or i32 %140, 12 > %142 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %141) > %143 = fmul float %139, %142 > %144 = shl i32 %103, 4 > %145 = or i32 %144, 4 > %146 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %145) > %147 = shl i32 %103, 4 > %148 = or i32 %147, 8 > %149 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %148) > %150 = fsub float -0.000000e+00, %143 > %151 = call float @llvm.fma.f32(float %146, float %149, float %150) > %152 = shl i32 %103, 4 > %153 = or i32 %152, 4 > %154 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %153) > %155 = shl i32 %103, 4 > %156 = or i32 %155, 8 > %157 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %156) > %158 = call float @llvm.fma.f32(float %154, float %157, float %143) > %159 = fmul float %158, %86 > %160 = fmul float %159, 2.000000e+00 > %161 = fmul float %151, %86 > %162 = fmul float %161, 2.000000e+00 > %163 = shl i32 %101, 4 > %164 = or i32 %163, 4 > %165 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %164) > %166 = shl i32 %101, 4 > %167 = or i32 %166, 8 > %168 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %167) > %169 = shl i32 %101, 4 > %170 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %169) > %171 = shl i32 %101, 4 > %172 = or i32 %171, 12 > %173 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %172) > %174 = fmul float %168, %173 > %175 = fmul float %168, %170 > %176 = fmul float %165, %173 > %177 = shl i32 %101, 4 > %178 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %177) > %179 = shl i32 %101, 4 > %180 = or i32 %179, 4 > %181 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %180) > %182 = call float @llvm.fma.f32(float %178, float %181, float %174) > %183 = fmul float %182, %85 > %184 = fmul float %183, 2.000000e+00 > %185 = shl i32 %101, 4 > %186 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %185) > %187 = shl i32 %101, 4 > %188 = or i32 %187, 4 > %189 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %188) > %190 = shl i32 %101, 4 > %191 = or i32 %190, 8 > %192 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %191) > %193 = shl i32 %101, 4 > %194 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %193) > %195 = shl i32 %101, 4 > %196 = or i32 %195, 4 > %197 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %196) > %198 = shl i32 %101, 4 > %199 = or i32 %198, 8 > %200 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %199) > %201 = fmul float %186, %194 > %202 = fmul float %189, %197 > %203 = fmul float %192, %200 > %204 = fadd float %203, %202 > %205 = fadd float %203, %201 > %206 = fadd float %202, %201 > %207 = fsub float -0.000000e+00, %204 > %208 = call float @llvm.fma.f32(float %207, float 2.000000e+00, float 1.000000e+00) > %209 = fsub float -0.000000e+00, %205 > %210 = call float @llvm.fma.f32(float %209, float 2.000000e+00, float 1.000000e+00) > %211 = fsub float -0.000000e+00, %206 > %212 = call float @llvm.fma.f32(float %211, float 2.000000e+00, float 1.000000e+00) > %213 = fmul float %85, %210 > %214 = shl i32 %103, 4 > %215 = or i32 %214, 4 > %216 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %215) > %217 = shl i32 %103, 4 > %218 = or i32 %217, 8 > %219 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %218) > %220 = shl i32 %103, 4 > %221 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %220) > %222 = shl i32 %103, 4 > %223 = or i32 %222, 12 > %224 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %223) > %225 = fmul float %219, %224 > %226 = fmul float %219, %221 > %227 = fmul float %216, %224 > %228 = shl i32 %103, 4 > %229 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %228) > %230 = shl i32 %103, 4 > %231 = or i32 %230, 4 > %232 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %231) > %233 = call float @llvm.fma.f32(float %229, float %232, float %225) > %234 = fmul float %233, %86 > %235 = fmul float %234, 2.000000e+00 > %236 = shl i32 %103, 4 > %237 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %236) > %238 = shl i32 %103, 4 > %239 = or i32 %238, 4 > %240 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %239) > %241 = shl i32 %103, 4 > %242 = or i32 %241, 8 > %243 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %242) > %244 = shl i32 %103, 4 > %245 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %244) > %246 = shl i32 %103, 4 > %247 = or i32 %246, 4 > %248 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %247) > %249 = shl i32 %103, 4 > %250 = or i32 %249, 8 > %251 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %250) > %252 = fmul float %237, %245 > %253 = fmul float %240, %248 > %254 = fmul float %243, %251 > %255 = fadd float %254, %253 > %256 = fadd float %254, %252 > %257 = fadd float %253, %252 > %258 = fsub float -0.000000e+00, %255 > %259 = call float @llvm.fma.f32(float %258, float 2.000000e+00, float 1.000000e+00) > %260 = fsub float -0.000000e+00, %256 > %261 = call float @llvm.fma.f32(float %260, float 2.000000e+00, float 1.000000e+00) > %262 = fsub float -0.000000e+00, %257 > %263 = call float @llvm.fma.f32(float %262, float 2.000000e+00, float 1.000000e+00) > %264 = fmul float %86, %261 > %265 = fadd float %184, %235 > %266 = fadd float %213, %264 > %267 = fadd float %137, %162 > %268 = fadd float %109, %113 > %269 = shl i32 %99, 5 > %270 = or i32 %269, 4 > %271 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %270) > %272 = fmul float %87, %271 > %273 = shl i32 %105, 4 > %274 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %273) > %275 = shl i32 %105, 4 > %276 = or i32 %275, 12 > %277 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %276) > %278 = fmul float %274, %277 > %279 = shl i32 %105, 4 > %280 = or i32 %279, 4 > %281 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %280) > %282 = shl i32 %105, 4 > %283 = or i32 %282, 8 > %284 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %283) > %285 = fsub float -0.000000e+00, %278 > %286 = call float @llvm.fma.f32(float %281, float %284, float %285) > %287 = shl i32 %105, 4 > %288 = or i32 %287, 4 > %289 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %288) > %290 = shl i32 %105, 4 > %291 = or i32 %290, 8 > %292 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %291) > %293 = call float @llvm.fma.f32(float %289, float %292, float %278) > %294 = fmul float %293, %87 > %295 = fmul float %294, 2.000000e+00 > %296 = fmul float %286, %87 > %297 = fmul float %296, 2.000000e+00 > %298 = shl i32 %105, 4 > %299 = or i32 %298, 4 > %300 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %299) > %301 = shl i32 %105, 4 > %302 = or i32 %301, 8 > %303 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %302) > %304 = shl i32 %105, 4 > %305 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %304) > %306 = shl i32 %105, 4 > %307 = or i32 %306, 12 > %308 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %307) > %309 = fmul float %303, %308 > %310 = fmul float %303, %305 > %311 = fmul float %300, %308 > %312 = shl i32 %105, 4 > %313 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %312) > %314 = shl i32 %105, 4 > %315 = or i32 %314, 4 > %316 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %315) > %317 = call float @llvm.fma.f32(float %313, float %316, float %309) > %318 = fmul float %317, %87 > %319 = fmul float %318, 2.000000e+00 > %320 = shl i32 %105, 4 > %321 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %320) > %322 = shl i32 %105, 4 > %323 = or i32 %322, 4 > %324 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %323) > %325 = shl i32 %105, 4 > %326 = or i32 %325, 8 > %327 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %326) > %328 = shl i32 %105, 4 > %329 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %328) > %330 = shl i32 %105, 4 > %331 = or i32 %330, 4 > %332 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %331) > %333 = shl i32 %105, 4 > %334 = or i32 %333, 8 > %335 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %334) > %336 = fmul float %321, %329 > %337 = fmul float %324, %332 > %338 = fmul float %327, %335 > %339 = fadd float %338, %337 > %340 = fadd float %338, %336 > %341 = fadd float %337, %336 > %342 = fsub float -0.000000e+00, %339 > %343 = call float @llvm.fma.f32(float %342, float 2.000000e+00, float 1.000000e+00) > %344 = fsub float -0.000000e+00, %340 > %345 = call float @llvm.fma.f32(float %344, float 2.000000e+00, float 1.000000e+00) > %346 = fsub float -0.000000e+00, %341 > %347 = call float @llvm.fma.f32(float %346, float 2.000000e+00, float 1.000000e+00) > %348 = fmul float %87, %345 > %349 = fadd float %265, %319 > %350 = fadd float %266, %348 > %351 = fadd float %267, %297 > %352 = fadd float %268, %272 > %353 = fmul float %349, %49 > %354 = fmul float %350, %50 > %355 = fadd float %353, %354 > %356 = fmul float %351, %51 > %357 = fadd float %355, %356 > %358 = fadd float %357, %352 > %359 = shl i32 %101, 4 > %360 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %359) > %361 = shl i32 %101, 4 > %362 = or i32 %361, 8 > %363 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %362) > %364 = fsub float -0.000000e+00, %176 > %365 = call float @llvm.fma.f32(float %360, float %363, float %364) > %366 = fmul float %365, %85 > %367 = fmul float %366, 2.000000e+00 > %368 = fmul float %135, 2.000000e+00 > %369 = shl i32 %103, 4 > %370 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %369) > %371 = shl i32 %103, 4 > %372 = or i32 %371, 8 > %373 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %372) > %374 = fsub float -0.000000e+00, %227 > %375 = call float @llvm.fma.f32(float %370, float %373, float %374) > %376 = fmul float %375, %86 > %377 = fmul float %376, 2.000000e+00 > %378 = fmul float %85, %212 > %379 = fmul float %85, %208 > %380 = fmul float %86, %263 > %381 = fmul float %86, %259 > %382 = shl i32 %97, 5 > %383 = or i32 %382, 8 > %384 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %383) > %385 = fmul float %85, %384 > %386 = shl i32 %98, 5 > %387 = or i32 %386, 8 > %388 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %387) > %389 = fmul float %86, %388 > %390 = fadd float %377, %367 > %391 = fadd float %160, %368 > %392 = fadd float %380, %378 > %393 = fadd float %389, %385 > %394 = shl i32 %105, 4 > %395 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %394) > %396 = shl i32 %105, 4 > %397 = or i32 %396, 8 > %398 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %397) > %399 = fsub float -0.000000e+00, %311 > %400 = call float @llvm.fma.f32(float %395, float %398, float %399) > %401 = fmul float %400, %87 > %402 = fmul float %401, 2.000000e+00 > %403 = fmul float %87, %347 > %404 = fmul float %87, %343 > %405 = shl i32 %99, 5 > %406 = or i32 %405, 8 > %407 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %406) > %408 = fmul float %87, %407 > %409 = fadd float %390, %402 > %410 = fadd float %391, %295 > %411 = fadd float %392, %403 > %412 = fadd float %393, %408 > %413 = fmul float %409, %49 > %414 = fmul float %410, %50 > %415 = fadd float %413, %414 > %416 = fmul float %411, %51 > %417 = fadd float %415, %416 > %418 = fadd float %417, %412 > %419 = shl i32 %97, 5 > %420 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %419) > %421 = fmul float %85, %420 > %422 = shl i32 %98, 5 > %423 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %422) > %424 = fmul float %86, %423 > %425 = shl i32 %99, 5 > %426 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %425) > %427 = fmul float %87, %426 > %428 = shl i32 %101, 4 > %429 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %428) > %430 = shl i32 %101, 4 > %431 = or i32 %430, 4 > %432 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %431) > %433 = fsub float -0.000000e+00, %174 > %434 = call float @llvm.fma.f32(float %429, float %432, float %433) > %435 = fadd float %176, %175 > %436 = fmul float %434, %85 > %437 = fmul float %435, %85 > %438 = fmul float %436, 2.000000e+00 > %439 = fmul float %437, 2.000000e+00 > %440 = shl i32 %103, 4 > %441 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %440) > %442 = shl i32 %103, 4 > %443 = or i32 %442, 4 > %444 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %443) > %445 = fsub float -0.000000e+00, %225 > %446 = call float @llvm.fma.f32(float %441, float %444, float %445) > %447 = shl i32 %105, 4 > %448 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %447) > %449 = shl i32 %105, 4 > %450 = or i32 %449, 4 > %451 = call float @llvm.SI.load.const(<16 x i8> %45, i32 %450) > %452 = fsub float -0.000000e+00, %309 > %453 = call float @llvm.fma.f32(float %448, float %451, float %452) > %454 = fadd float %311, %310 > %455 = fmul float %446, %86 > %456 = fmul float %453, %87 > %457 = fmul float %454, %87 > %458 = fmul float %456, 2.000000e+00 > %459 = fmul float %457, 2.000000e+00 > %460 = fadd float %227, %226 > %461 = fmul float %460, %86 > %462 = fmul float %455, 2.000000e+00 > %463 = fmul float %461, 2.000000e+00 > %464 = fadd float %379, %381 > %465 = fadd float %438, %462 > %466 = fadd float %439, %463 > %467 = fadd float %421, %424 > %468 = fadd float %404, %464 > %469 = fadd float %458, %465 > %470 = fadd float %459, %466 > %471 = fadd float %427, %467 > %472 = fmul float %468, %49 > %473 = fmul float %469, %50 > %474 = fadd float %472, %473 > %475 = fmul float %470, %51 > %476 = fadd float %474, %475 > %477 = fadd float %476, %471 > %478 = fmul float %23, %477 > %479 = fmul float %24, %358 > %480 = fadd float %478, %479 > %481 = fmul float %25, %418 > %482 = fadd float %480, %481 > %483 = fadd float %482, %26 > %484 = fmul float %27, %477 > %485 = fmul float %28, %358 > %486 = fadd float %484, %485 > %487 = fmul float %29, %418 > %488 = fadd float %486, %487 > %489 = fadd float %488, %30 > %490 = fmul float %31, %477 > %491 = fmul float %32, %358 > %492 = fadd float %490, %491 > %493 = fmul float %33, %418 > %494 = fadd float %492, %493 > %495 = fadd float %494, %34 > %496 = fmul float %35, %477 > %497 = fmul float %36, %358 > %498 = fadd float %496, %497 > %499 = fmul float %37, %418 > %500 = fadd float %498, %499 > %501 = fadd float %500, %38 > %502 = fsub float %39, %477 > %503 = fsub float %40, %358 > %504 = fsub float %41, %418 > %505 = fmul float %501, %42 > %506 = fmul float %501, %43 > %507 = fsub float -0.000000e+00, %43 > %508 = call float @llvm.fma.f32(float %483, float %42, float %505) > %509 = call float @llvm.fma.f32(float %489, float %507, float %506) > %510 = call float @llvm.fabs.f32(float %67) > %511 = call float @llvm.fma.f32(float %510, float 0xBF932DC600000000, float 0x3FB302C4E0000000) > %512 = call float @llvm.fabs.f32(float %67) > %513 = call float @llvm.fma.f32(float %511, float %512, float 0xBFCB269080000000) > %514 = call float @llvm.fabs.f32(float %67) > %515 = call float @llvm.fma.f32(float %513, float %514, float 0x3FF921B480000000) > %516 = call float @llvm.fabs.f32(float %67) > %517 = fsub float 1.000000e+00, %516 > %518 = call float @llvm.sqrt.f32(float %517) > %519 = fmul float %515, %518 > %520 = call float @llvm.fma.f32(float %519, float -2.000000e+00, float 0x400921FB60000000) > %521 = fsub float -0.000000e+00, %67 > %522 = fcmp olt float %67, %521 > %523 = select i1 %522, float %520, float 0.000000e+00 > %524 = call float @llvm.fma.f32(float %515, float %518, float %523) > %525 = call float @llvm.sin.f32(float %524) > %526 = fmul float %67, 5.000000e-01 > %527 = fmul float %525, 5.000000e-01 > %528 = fmul float %525, -5.000000e-01 > %529 = fmul float %349, %66 > %530 = fmul float %350, %67 > %531 = fadd float %530, %529 > %532 = fmul float %351, %68 > %533 = fadd float %531, %532 > %534 = fmul float %409, %66 > %535 = fmul float %410, %67 > %536 = fadd float %535, %534 > %537 = fmul float %411, %68 > %538 = fadd float %536, %537 > %539 = fmul float %468, %66 > %540 = fmul float %469, %67 > %541 = fadd float %540, %539 > %542 = fmul float %470, %68 > %543 = fadd float %541, %542 > %544 = fmul float %543, %502 > %545 = fmul float %533, %503 > %546 = fadd float %545, %544 > %547 = fmul float %538, %504 > %548 = fadd float %546, %547 > %549 = fmul float %349, %72 > %550 = fmul float %350, %73 > %551 = fadd float %550, %549 > %552 = fmul float %351, %74 > %553 = fadd float %551, %552 > %554 = fmul float %349, %55 > %555 = fmul float %350, %56 > %556 = fadd float %555, %554 > %557 = fmul float %351, %57 > %558 = fadd float %556, %557 > %559 = fmul float %409, %72 > %560 = fmul float %410, %73 > %561 = fadd float %560, %559 > %562 = fmul float %411, %74 > %563 = fadd float %561, %562 > %564 = fmul float %409, %55 > %565 = fmul float %410, %56 > %566 = fadd float %565, %564 > %567 = fmul float %411, %57 > %568 = fadd float %566, %567 > %569 = fmul float %468, %72 > %570 = fmul float %469, %73 > %571 = fadd float %570, %569 > %572 = fmul float %470, %74 > %573 = fadd float %571, %572 > %574 = fmul float %468, %55 > %575 = fmul float %469, %56 > %576 = fadd float %575, %574 > %577 = fmul float %470, %57 > %578 = fadd float %576, %577 > %579 = fmul float %578, %502 > %580 = fmul float %558, %503 > %581 = fadd float %580, %579 > %582 = fmul float %568, %504 > %583 = fadd float %581, %582 > %584 = fmul float %573, %502 > %585 = fmul float %553, %503 > %586 = fadd float %585, %584 > %587 = fmul float %563, %504 > %588 = fadd float %586, %587 > %589 = bitcast i32 %11 to float > %590 = insertvalue <{ float, float, float }> undef, float %589, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %61, float %62, float %61, float %62) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %508, float %509, float %495, float %501) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %78, float %79, float %80, float %81) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %526, float %527, float %528, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %548, float %588, float %583, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %483, float %489, float %495, float %501) > ret <{ float, float, float }> %590 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sin.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], BUFFER, FLOAT >DCL SVIEW[4], 2D, FLOAT >DCL CONST[1][0..22] >DCL TEMP[0..5], LOCAL >IMM[0] UINT32 {0, 336, 352, 304} >IMM[1] UINT32 {320, 288, 272, 256} >IMM[2] FLT32 { 2.0000, -1.0000, -0.1000, 0.0000} >IMM[3] INT32 {0, 0, 0, 0} >IMM[4] FLT32 {158456325028528675187087900672.0000, 0.0000, 0.0000, 0.0000} > 0: DP3 TEMP[0].x, IN[4].xyzz, IN[4].xyzz > 1: RSQ TEMP[1].x, TEMP[0].xxxx > 2: MUL TEMP[1].x, TEMP[1].xxxx, IN[4].zzzz > 3: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 4: MUL TEMP[2], IN[0], CONST[1][21] > 5: FMA TEMP[3].xy, CONST[1][22].xxxx, CONST[1][19].xyyy, TEMP[2].xyyy > 6: FMA TEMP[2].xy, CONST[1][22].xxxx, CONST[1][20].xyyy, TEMP[2].zwww > 7: MOV TEMP[4].xy, TEMP[3].xyyy > 8: TEX TEMP[4].yw, TEMP[4], SAMP[0], 2D > 9: MOV TEMP[3].xy, TEMP[3].xyyy > 10: TEX TEMP[3].y, TEMP[3], SAMP[1], 2D > 11: FMA TEMP[4].xy, TEMP[4].ywww, IMM[2].xxxx, IMM[2].yyyy > 12: MOV TEMP[5].xy, TEMP[2].xyyy > 13: TEX TEMP[5].yw, TEMP[5], SAMP[0], 2D > 14: MOV TEMP[2].xy, TEMP[2].xyyy > 15: TEX TEMP[2].w, TEMP[2], SAMP[1], 2D > 16: FMA TEMP[4].xy, TEMP[5].wyyy, IMM[2].xxxx, TEMP[4].xyyy > 17: ADD TEMP[4].xy, TEMP[4].xyyy, IMM[2].yyyy > 18: DP2 TEMP[4].x, IN[3].xyyy, TEMP[4].xyyy > 19: MOV_SAT TEMP[4].x, TEMP[4].xxxx > 20: MUL TEMP[0].x, TEMP[4].xxxx, TEMP[1].xxxx > 21: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][18].xxxx > 22: MUL TEMP[1].xyz, TEMP[0].xxxx, CONST[1][17].xyzz > 23: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[2].xyzz > 24: MOV TEMP[4].xy, IN[0].xyyy > 25: TEX TEMP[4].x, TEMP[4], SAMP[2], 2D > 26: MUL TEMP[3].x, TEMP[3].yyyy, TEMP[4].xxxx > 27: MUL TEMP[2].x, TEMP[2].wwww, TEMP[3].xxxx > 28: FMA TEMP[1].xyz, CONST[1][16].xyzz, TEMP[2].xxxx, TEMP[1].xyzz > 29: MUL TEMP[2].x, TEMP[2].xxxx, IN[2].wwww > 30: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx > 31: FMA TEMP[2].x, CONST[1][17].wwww, TEMP[2].xxxx, IMM[2].zzzz > 32: MOV_SAT TEMP[2].x, TEMP[2].xxxx > 33: MOV TEMP[3].x, IMM[3].xxxx > 34: MOV TEMP[3].w, IMM[0].xxxx > 35: TXF TEMP[3].x, TEMP[3], SAMP[3], BUFFER > 36: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx > 37: FSEQ TEMP[3].xy, IN[1].wwww, IMM[2].wwww > 38: SSG TEMP[4].xy, IN[1].xyyy > 39: MUL TEMP[4].xy, IMM[4].xxxx, TEMP[4].xyyy > 40: RCP TEMP[5].xy, IN[1].wwww > 41: MUL TEMP[5].xy, IN[1].xyyy, TEMP[5].xyyy > 42: UCMP TEMP[3].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[5].xyyy > 43: MOV TEMP[3].xy, TEMP[3].xyyy > 44: TEX TEMP[3].x, TEMP[3], SAMP[4], 2D > 45: ADD TEMP[0].x, TEMP[3].xxxx, -IN[1].wwww > 46: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx > 47: FSNE TEMP[3].x, CONST[1][18].zzzz, IMM[2].wwww > 48: UIF TEMP[3].xxxx :0 > 49: RCP TEMP[3].x, CONST[1][18].zzzz > 50: MUL TEMP[3].x, TEMP[0].xxxx, TEMP[3].xxxx > 51: ELSE :0 > 52: SSG TEMP[0].x, TEMP[0].xxxx > 53: MUL TEMP[3].x, IMM[4].xxxx, TEMP[0].xxxx > 54: ENDIF > 55: MOV_SAT TEMP[0].x, TEMP[3].xxxx > 56: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx > 57: MOV TEMP[1].w, TEMP[0].xxxx > 58: MOV OUT[0], TEMP[1] > 59: END >radeonsi: Compiling shader 375 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 340) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 344) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 352) > %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 > %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 3 > %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 > %48 = extractelement <8 x i32> %44, i32 7 > %49 = extractelement <4 x i32> %47, i32 0 > %50 = and i32 %49, %48 > %51 = insertelement <4 x i32> %47, i32 %50, i32 0 > %52 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 > %54 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %55 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %54, i64 0, i64 7 > %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 > %57 = extractelement <8 x i32> %53, i32 7 > %58 = extractelement <4 x i32> %56, i32 0 > %59 = and i32 %58, %57 > %60 = insertelement <4 x i32> %56, i32 %59, i32 0 > %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 > %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 11 > %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0 > %66 = extractelement <8 x i32> %62, i32 7 > %67 = extractelement <4 x i32> %65, i32 0 > %68 = and i32 %67, %66 > %69 = insertelement <4 x i32> %65, i32 %68, i32 0 > %70 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %71 = bitcast <8 x i32> addrspace(2)* %70 to <2 x i128> addrspace(2)* > %72 = load <2 x i128>, <2 x i128> addrspace(2)* %71, align 32, !tbaa !0 > %73 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %74 = load <8 x i32>, <8 x i32> addrspace(2)* %73, align 32, !tbaa !0 > %75 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %76 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %75, i64 0, i64 19 > %77 = load <4 x i32>, <4 x i32> addrspace(2)* %76, align 16, !tbaa !0 > %78 = extractelement <8 x i32> %74, i32 7 > %79 = extractelement <4 x i32> %77, i32 0 > %80 = and i32 %79, %78 > %81 = insertelement <4 x i32> %77, i32 %80, i32 0 > %82 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %83 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %88 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %89 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %90 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %91 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %92 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %93 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %94 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %95 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %96 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %97 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %98 = fmul float %95, %95 > %99 = fmul float %96, %96 > %100 = fadd float %99, %98 > %101 = fmul float %97, %97 > %102 = fadd float %100, %101 > %103 = call float @llvm.AMDGPU.rsq.clamped.f32(float %102) > %104 = fmul float %103, %97 > %105 = call float @llvm.AMDGPU.clamp.(float %104, float 0.000000e+00, float 1.000000e+00) > %106 = fmul float %82, %38 > %107 = fmul float %83, %39 > %108 = fmul float %84, %40 > %109 = fmul float %85, %41 > %110 = call float @llvm.fma.f32(float %42, float %34, float %106) > %111 = call float @llvm.fma.f32(float %42, float %35, float %107) > %112 = call float @llvm.fma.f32(float %42, float %36, float %108) > %113 = call float @llvm.fma.f32(float %42, float %37, float %109) > %114 = bitcast float %110 to i32 > %115 = bitcast float %111 to i32 > %116 = insertelement <2 x i32> undef, i32 %114, i32 0 > %117 = insertelement <2 x i32> %116, i32 %115, i32 1 > %118 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %117, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %119 = extractelement <4 x float> %118, i32 1 > %120 = extractelement <4 x float> %118, i32 3 > %121 = bitcast float %110 to i32 > %122 = bitcast float %111 to i32 > %123 = insertelement <2 x i32> undef, i32 %121, i32 0 > %124 = insertelement <2 x i32> %123, i32 %122, i32 1 > %125 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %124, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %126 = extractelement <4 x float> %125, i32 1 > %127 = call float @llvm.fma.f32(float %119, float 2.000000e+00, float -1.000000e+00) > %128 = call float @llvm.fma.f32(float %120, float 2.000000e+00, float -1.000000e+00) > %129 = bitcast float %112 to i32 > %130 = bitcast float %113 to i32 > %131 = insertelement <2 x i32> undef, i32 %129, i32 0 > %132 = insertelement <2 x i32> %131, i32 %130, i32 1 > %133 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %132, <8 x i32> %44, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %134 = extractelement <4 x float> %133, i32 1 > %135 = extractelement <4 x float> %133, i32 3 > %136 = bitcast float %112 to i32 > %137 = bitcast float %113 to i32 > %138 = insertelement <2 x i32> undef, i32 %136, i32 0 > %139 = insertelement <2 x i32> %138, i32 %137, i32 1 > %140 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %139, <8 x i32> %53, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %141 = extractelement <4 x float> %140, i32 3 > %142 = call float @llvm.fma.f32(float %135, float 2.000000e+00, float %127) > %143 = call float @llvm.fma.f32(float %134, float 2.000000e+00, float %128) > %144 = fadd float %142, -1.000000e+00 > %145 = fadd float %143, -1.000000e+00 > %146 = fmul float %93, %144 > %147 = fmul float %94, %145 > %148 = fadd float %146, %147 > %149 = call float @llvm.AMDGPU.clamp.(float %148, float 0.000000e+00, float 1.000000e+00) > %150 = fmul float %149, %105 > %151 = fmul float %150, %32 > %152 = fmul float %151, %28 > %153 = fmul float %151, %29 > %154 = fmul float %151, %30 > %155 = fmul float %152, %89 > %156 = fmul float %153, %90 > %157 = fmul float %154, %91 > %158 = bitcast float %82 to i32 > %159 = bitcast float %83 to i32 > %160 = insertelement <2 x i32> undef, i32 %158, i32 0 > %161 = insertelement <2 x i32> %160, i32 %159, i32 1 > %162 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %161, <8 x i32> %62, <4 x i32> %69, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %163 = extractelement <4 x float> %162, i32 0 > %164 = fmul float %126, %163 > %165 = fmul float %141, %164 > %166 = call float @llvm.fma.f32(float %25, float %165, float %155) > %167 = call float @llvm.fma.f32(float %26, float %165, float %156) > %168 = call float @llvm.fma.f32(float %27, float %165, float %157) > %169 = fmul float %165, %92 > %170 = fmul float %169, %169 > %171 = call float @llvm.fma.f32(float %31, float %170, float 0xBFB99999A0000000) > %172 = call float @llvm.AMDGPU.clamp.(float %171, float 0.000000e+00, float 1.000000e+00) > %173 = extractelement <2 x i128> %72, i32 1 > %174 = bitcast i128 %173 to <16 x i8> > %175 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %174, i32 0, i32 0) > %176 = extractelement <4 x float> %175, i32 0 > %177 = fmul float %166, %176 > %178 = fmul float %167, %176 > %179 = fmul float %168, %176 > %180 = fcmp oeq float %88, 0.000000e+00 > %181 = fcmp oeq float %88, 0.000000e+00 > %182 = fcmp ogt float %86, 0.000000e+00 > %183 = select i1 %182, float 1.000000e+00, float %86 > %184 = fcmp oge float %183, 0.000000e+00 > %185 = fcmp ogt float %87, 0.000000e+00 > %186 = select i1 %185, float 1.000000e+00, float %87 > %187 = fcmp oge float %186, 0.000000e+00 > %.op = fmul float %183, 0x4600000000000000 > %188 = select i1 %184, float %.op, float 0xC600000000000000 > %.op24 = fmul float %186, 0x4600000000000000 > %189 = select i1 %187, float %.op24, float 0xC600000000000000 > %190 = fdiv float 1.000000e+00, %88 > %191 = fmul float %86, %190 > %192 = fmul float %87, %190 > %193 = select i1 %180, float %188, float %191 > %194 = select i1 %181, float %189, float %192 > %195 = bitcast float %193 to i32 > %196 = bitcast float %194 to i32 > %197 = insertelement <2 x i32> undef, i32 %195, i32 0 > %198 = insertelement <2 x i32> %197, i32 %196, i32 1 > %199 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %198, <8 x i32> %74, <4 x i32> %81, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %200 = extractelement <4 x float> %199, i32 0 > %201 = fsub float %200, %88 > %202 = fmul float %201, %172 > %203 = fcmp une float %33, 0.000000e+00 > br i1 %203, label %IF, label %ELSE > >IF: ; preds = %main_body > %204 = fdiv float 1.000000e+00, %33 > %205 = fmul float %202, %204 > br label %ENDIF > >ELSE: ; preds = %main_body > %206 = fcmp ogt float %202, 0.000000e+00 > %207 = select i1 %206, float 1.000000e+00, float %202 > %208 = fcmp oge float %207, 0.000000e+00 > %.op25 = fmul float %207, 0x4600000000000000 > %209 = select i1 %208, float %.op25, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp12.0 = phi float [ %205, %IF ], [ %209, %ELSE ] > %210 = call float @llvm.AMDGPU.clamp.(float %temp12.0, float 0.000000e+00, float 1.000000e+00) > %211 = fmul float %210, %172 > %212 = bitcast float %5 to i32 > %213 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %212, 10 > %214 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %213, float %177, 11 > %215 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %214, float %178, 12 > %216 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %215, float %179, 13 > %217 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %216, float %211, 14 > %218 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %217, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %218 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #2 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..36] >DCL TEMP[0..3], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, -0.1500, 0.0000} >IMM[1] UINT32 {0, 304, 320, 336} >IMM[2] UINT32 {352, 384, 480, 576} >IMM[3] UINT32 {560, 368, 400, 512} >IMM[4] UINT32 {432, 416, 496, 448} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[7] UINT32 {464, 0, 0, 0} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, 0.0000} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][19], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][20], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][21], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][22], TEMP[0] > 8: MOV TEMP[1].w, TEMP[0].xxxx > 9: MOV TEMP[2], TEMP[1] > 10: MUL TEMP[3].xy, CONST[1][36].xyyy, IMM[0].xyyy > 11: MUL TEMP[0].xy, TEMP[0].xxxx, CONST[1][36].xyyy > 12: FMA TEMP[0].xy, TEMP[1].xyyy, TEMP[3].xyyy, TEMP[0].xyyy > 13: MOV TEMP[0].zw, TEMP[1].wwzw > 14: FSNE TEMP[1].x, CONST[1][26].xxxx, IMM[0].wwww > 15: UIF TEMP[1].xxxx :0 > 16: ELSE :0 > 17: ENDIF > 18: FSNE TEMP[1].x, CONST[1][29].wwww, IMM[0].wwww > 19: UIF TEMP[1].xxxx :0 > 20: ELSE :0 > 21: ENDIF > 22: FSNE TEMP[1].x, CONST[1][26].yyyy, IMM[0].wwww > 23: UIF TEMP[1].xxxx :0 > 24: ELSE :0 > 25: ENDIF > 26: MOV OUT[2], IN[1] > 27: MOV OUT[1], TEMP[0] > 28: MOV OUT[0], TEMP[2] > 29: END >radeonsi: Compiling shader 376 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { >main_body: > %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !tbaa !0 > %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 304) > %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 308) > %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 312) > %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 316) > %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 320) > %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 324) > %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 328) > %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 332) > %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 336) > %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 340) > %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 344) > %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 348) > %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 352) > %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 356) > %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 360) > %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 364) > %33 = call float @llvm.SI.load.const(<16 x i8> %16, i32 576) > %34 = call float @llvm.SI.load.const(<16 x i8> %16, i32 580) > %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 > %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %13) > %38 = extractelement <4 x float> %37, i32 0 > %39 = extractelement <4 x float> %37, i32 1 > %40 = extractelement <4 x float> %37, i32 2 > %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 > %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %14) > %44 = extractelement <4 x float> %43, i32 0 > %45 = extractelement <4 x float> %43, i32 1 > %46 = extractelement <4 x float> %43, i32 2 > %47 = extractelement <4 x float> %43, i32 3 > %48 = fmul float %17, %38 > %49 = fmul float %18, %39 > %50 = fadd float %48, %49 > %51 = fmul float %19, %40 > %52 = fadd float %50, %51 > %53 = fadd float %52, %20 > %54 = fmul float %21, %38 > %55 = fmul float %22, %39 > %56 = fadd float %54, %55 > %57 = fmul float %23, %40 > %58 = fadd float %56, %57 > %59 = fadd float %58, %24 > %60 = fmul float %25, %38 > %61 = fmul float %26, %39 > %62 = fadd float %60, %61 > %63 = fmul float %27, %40 > %64 = fadd float %62, %63 > %65 = fadd float %64, %28 > %66 = fmul float %29, %38 > %67 = fmul float %30, %39 > %68 = fadd float %66, %67 > %69 = fmul float %31, %40 > %70 = fadd float %68, %69 > %71 = fadd float %70, %32 > %72 = fsub float -0.000000e+00, %34 > %73 = fmul float %71, %33 > %74 = fmul float %71, %34 > %75 = call float @llvm.fma.f32(float %53, float %33, float %73) > %76 = call float @llvm.fma.f32(float %59, float %72, float %74) > %77 = bitcast i32 %11 to float > %78 = insertvalue <{ float, float, float }> undef, float %77, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %75, float %76, float %65, float %71) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %44, float %45, float %46, float %47) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %53, float %59, float %65, float %71) > ret <{ float, float, float }> %78 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL OUT[1], COLOR[1] >DCL OUT[2], COLOR[2] >DCL OUT[3], COLOR[3] >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SAMP[4] >DCL SAMP[5] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL SVIEW[4], 3D, FLOAT >DCL SVIEW[5], 3D, FLOAT >DCL CONST[1][0..18] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, 2.0000, -1.0000} >IMM[1] FLT32 { 0.0208, -0.0851, 0.1801, -0.3303} >IMM[2] FLT32 { 0.9999, -2.0000, 1.5708, 0.1592} >IMM[3] INT32 {1, 0, 0, 0} >IMM[4] UINT32 {3226013659, 0, 288, 256} >IMM[5] UINT32 {240, 272, 0, 0} >IMM[6] FLT32 { 1.0000, 0.5000, 3.0000, -0.5000} >IMM[7] FLT32 { 0.3300, -0.2500, 255.0000, 99.0000} >IMM[8] FLT32 { 0.8750, 0.0625, 0.0000, 0.0000} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: FMA TEMP[1].xy, TEMP[0].yxxx, IMM[0].zzzz, IMM[0].wwww > 7: ABS TEMP[2].x, TEMP[1].yyyy > 8: ABS TEMP[3].x, TEMP[1].xxxx > 9: MAX TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx > 10: FSNE TEMP[3].x, TEMP[2].xxxx, IMM[0].xxxx > 11: UIF TEMP[3].xxxx :0 > 12: RCP TEMP[3].x, TEMP[2].xxxx > 13: ELSE :0 > 14: MOV TEMP[3].x, IMM[0].yyyy > 15: ENDIF > 16: ABS TEMP[4].x, TEMP[1].yyyy > 17: ABS TEMP[5].x, TEMP[1].xxxx > 18: MIN TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx > 19: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[4].xxxx > 20: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[2].xxxx > 21: FMA TEMP[4].x, TEMP[3].xxxx, IMM[1].xxxx, IMM[1].yyyy > 22: FMA TEMP[4].x, TEMP[3].xxxx, TEMP[4].xxxx, IMM[1].zzzz > 23: FMA TEMP[4].x, TEMP[3].xxxx, TEMP[4].xxxx, IMM[1].wwww > 24: FMA TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx, IMM[2].xxxx > 25: MUL TEMP[4].x, TEMP[3].xxxx, TEMP[2].xxxx > 26: FMA TEMP[4].x, TEMP[4].xxxx, IMM[2].yyyy, IMM[2].zzzz > 27: ABS TEMP[5].x, TEMP[1].yyyy > 28: ABS TEMP[6].x, TEMP[1].xxxx > 29: FSLT TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx > 30: AND TEMP[5].x, TEMP[5].xxxx, IMM[3].xxxx > 31: INEG TEMP[5].x, TEMP[5].xxxx > 32: AND TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx > 33: FMA TEMP[3].x, TEMP[2].xxxx, TEMP[3].xxxx, TEMP[4].xxxx > 34: FSLT TEMP[4].x, TEMP[1].yyyy, -TEMP[1].yyyy > 35: AND TEMP[4].x, TEMP[4].xxxx, IMM[3].xxxx > 36: INEG TEMP[4].x, TEMP[4].xxxx > 37: AND TEMP[4].x, TEMP[4].xxxx, IMM[4].xxxx > 38: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[3].xxxx > 39: MIN TEMP[3].x, TEMP[1].yyyy, TEMP[1].xxxx > 40: FSLT TEMP[3].x, TEMP[3].xxxx, -TEMP[3].xxxx > 41: AND TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx > 42: INEG TEMP[3].x, TEMP[3].xxxx > 43: MAX TEMP[4].x, TEMP[1].yyyy, TEMP[1].xxxx > 44: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy > 45: SQRT TEMP[1].x, TEMP[1].xxxx > 46: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx > 47: FSGE TEMP[4].x, TEMP[4].xxxx, -TEMP[4].xxxx > 48: AND TEMP[4].x, TEMP[4].xxxx, IMM[3].xxxx > 49: INEG TEMP[4].x, TEMP[4].xxxx > 50: AND TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx > 51: MOV TEMP[4].x, -TEMP[2].xxxx > 52: USNE TEMP[3].x, TEMP[3].xxxx, IMM[4].yyyy > 53: UIF TEMP[3].xxxx :0 > 54: MOV TEMP[3].x, TEMP[4].xxxx > 55: ELSE :0 > 56: MOV TEMP[3].x, TEMP[2].xxxx > 57: ENDIF > 58: MUL TEMP[3].x, TEMP[3].xxxx, IMM[2].wwww > 59: MOV TEMP[1].y, TEMP[3].xxxx > 60: FMA TEMP[3].xy, CONST[1][18].wwww, CONST[1][18].yzzz, TEMP[1].xyyy > 61: FMA TEMP[2].xy, CONST[1][18].wwww, CONST[1][16].xyyy, TEMP[1].xyyy > 62: MOV TEMP[4].xy, TEMP[2].xyyy > 63: TEX TEMP[4].xy, TEMP[4], SAMP[0], 2D > 64: MUL TEMP[2].xy, TEMP[4].xyyy, CONST[1][15].wwww > 65: MUL TEMP[1].x, TEMP[3].xxxx, CONST[1][17].wwww > 66: MUL TEMP[3].x, TEMP[3].yyyy, CONST[1][18].xxxx > 67: MOV TEMP[1].y, TEMP[3].xxxx > 68: MOV TEMP[3].xy, TEMP[1].xyyy > 69: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D > 70: FMA TEMP[3].xy, TEMP[3].ywww, IMM[0].zzzz, IMM[0].wwww > 71: ADD TEMP[4].xy, -CONST[1][17].xyyy, IMM[6].xxxx > 72: MUL TEMP[4].xy, TEMP[4].xyyy, IMM[6].yyyy > 73: FMA TEMP[4].xy, TEMP[0].xyyy, CONST[1][17].xyyy, TEMP[4].xyyy > 74: FMA TEMP[3].xy, TEMP[3].xyyy, CONST[1][17].zzzz, TEMP[4].xyyy > 75: FMA TEMP[3].xy, TEMP[3].xyyy, IMM[0].zzzz, IMM[0].wwww > 76: DP2 TEMP[3].x, TEMP[3].xyyy, TEMP[3].xyyy > 77: ADD TEMP[3].x, TEMP[3].xxxx, -CONST[1][15].xxxx > 78: FSNE TEMP[4].x, CONST[1][15].yyyy, IMM[0].xxxx > 79: UIF TEMP[4].xxxx :0 > 80: RCP TEMP[4].x, CONST[1][15].yyyy > 81: ELSE :0 > 82: MOV TEMP[4].x, IMM[0].yyyy > 83: ENDIF > 84: MUL TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx > 85: MOV_SAT TEMP[3].x, TEMP[3].xxxx > 86: FMA TEMP[4].x, TEMP[3].xxxx, IMM[2].yyyy, IMM[6].zzzz > 87: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx > 88: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx > 89: ADD TEMP[4].xy, TEMP[0].xyyy, IMM[6].wwww > 90: MUL TEMP[4].xy, TEMP[4].xyyy, IMM[7].xxxx > 91: FMA TEMP[4].xy, TEMP[2].xyyy, TEMP[4].xyyy, TEMP[0].xyyy > 92: ADD TEMP[5].x, -CONST[1][16].wwww, IMM[6].xxxx > 93: MUL TEMP[5].x, TEMP[5].xxxx, IMM[6].yyyy > 94: FMA TEMP[5].xy, TEMP[4].xyyy, CONST[1][16].wwww, TEMP[5].xxxx > 95: ADD TEMP[2].xy, -TEMP[4].xyyy, TEMP[5].xyyy > 96: FMA TEMP[5].xy, TEMP[4].xyyy, IMM[0].zzzz, IMM[0].wwww > 97: DP2 TEMP[5].x, TEMP[5].xyyy, TEMP[5].xyyy > 98: ADD TEMP[5].x, TEMP[5].xxxx, IMM[7].yyyy > 99: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx >100: FMA TEMP[0].xy, TEMP[5].xxxx, TEMP[2].xyyy, TEMP[4].xyyy >101: MOV TEMP[2].xy, TEMP[0].xyyy >102: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D >103: MOV TEMP[4].xy, TEMP[0].xyyy >104: TEX TEMP[4].w, TEMP[4], SAMP[3], 2D >105: MUL TEMP[0].x, TEMP[4].wwww, IMM[7].zzzz >106: ROUND TEMP[0].x, TEMP[0].xxxx >107: FSLT TEMP[0].x, IMM[7].wwww, TEMP[0].xxxx >108: AND TEMP[0].x, TEMP[0].xxxx, IMM[3].xxxx >109: INEG TEMP[0].x, TEMP[0].xxxx >110: FMA TEMP[1].xyz, TEMP[2].xyzz, IMM[8].xxxx, IMM[8].yyyy >111: MOV TEMP[4].xyz, TEMP[1].xyzz >112: MOV TEMP[4].w, IMM[0].xxxx >113: TXL TEMP[4].xyz, TEMP[4], SAMP[4], 3D >114: MOV TEMP[1].xyz, TEMP[1].xyzz >115: MOV TEMP[1].w, IMM[0].xxxx >116: TXL TEMP[1].xyz, TEMP[1], SAMP[5], 3D >117: ADD TEMP[4].xyz, -TEMP[1].xyzz, TEMP[4].xyzz >118: FMA TEMP[1].xyz, TEMP[3].xxxx, TEMP[4].xyzz, TEMP[1].xyzz >119: ADD TEMP[1].xyz, -TEMP[2].xyzz, TEMP[1].xyzz >120: MOV_SAT TEMP[3].x, CONST[1][16].zzzz >121: MOV_SAT TEMP[4].x, CONST[1][15].zzzz >122: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].xxxx >123: USNE TEMP[0].x, TEMP[0].xxxx, IMM[4].yyyy >124: UIF TEMP[0].xxxx :0 >125: MOV TEMP[0].x, TEMP[4].xxxx >126: ELSE :0 >127: MOV TEMP[0].x, TEMP[3].xxxx >128: ENDIF >129: FMA TEMP[0].xyz, TEMP[0].xxxx, TEMP[1].xyzz, TEMP[2].xyzz >130: MOV TEMP[0].w, IN[1].wwww >131: MOV OUT[0], TEMP[0] >132: MOV OUT[1], IMM[0].xxxx >133: MOV OUT[2], IMM[0].xxxx >134: MOV OUT[3], IMM[0].xxxx >135: END >radeonsi: Compiling shader 377 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 240) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 248) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 256) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 260) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 264) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 268) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 272) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 276) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 280) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 284) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 288) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 292) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 296) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 300) > %41 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !tbaa !0 > %43 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %44 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %43, i64 0, i64 3 > %45 = load <4 x i32>, <4 x i32> addrspace(2)* %44, align 16, !tbaa !0 > %46 = extractelement <8 x i32> %42, i32 7 > %47 = extractelement <4 x i32> %45, i32 0 > %48 = and i32 %47, %46 > %49 = insertelement <4 x i32> %45, i32 %48, i32 0 > %50 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %51 = load <8 x i32>, <8 x i32> addrspace(2)* %50, align 32, !tbaa !0 > %52 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %53 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %52, i64 0, i64 7 > %54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0 > %55 = extractelement <8 x i32> %51, i32 7 > %56 = extractelement <4 x i32> %54, i32 0 > %57 = and i32 %56, %55 > %58 = insertelement <4 x i32> %54, i32 %57, i32 0 > %59 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 > %61 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %62 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %61, i64 0, i64 11 > %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 > %64 = extractelement <8 x i32> %60, i32 7 > %65 = extractelement <4 x i32> %63, i32 0 > %66 = and i32 %65, %64 > %67 = insertelement <4 x i32> %63, i32 %66, i32 0 > %68 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %69 = load <8 x i32>, <8 x i32> addrspace(2)* %68, align 32, !tbaa !0 > %70 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %71 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %70, i64 0, i64 15 > %72 = load <4 x i32>, <4 x i32> addrspace(2)* %71, align 16, !tbaa !0 > %73 = extractelement <8 x i32> %69, i32 7 > %74 = extractelement <4 x i32> %72, i32 0 > %75 = and i32 %74, %73 > %76 = insertelement <4 x i32> %72, i32 %75, i32 0 > %77 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8 > %78 = load <8 x i32>, <8 x i32> addrspace(2)* %77, align 32, !tbaa !0 > %79 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %80 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %79, i64 0, i64 19 > %81 = load <4 x i32>, <4 x i32> addrspace(2)* %80, align 16, !tbaa !0 > %82 = extractelement <8 x i32> %78, i32 7 > %83 = extractelement <4 x i32> %81, i32 0 > %84 = and i32 %83, %82 > %85 = insertelement <4 x i32> %81, i32 %84, i32 0 > %86 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10 > %87 = load <8 x i32>, <8 x i32> addrspace(2)* %86, align 32, !tbaa !0 > %88 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %89 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %88, i64 0, i64 23 > %90 = load <4 x i32>, <4 x i32> addrspace(2)* %89, align 16, !tbaa !0 > %91 = extractelement <8 x i32> %87, i32 7 > %92 = extractelement <4 x i32> %90, i32 0 > %93 = and i32 %92, %91 > %94 = insertelement <4 x i32> %90, i32 %93, i32 0 > %95 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %96 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %97 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %98 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %99 = fcmp oeq float %97, 0.000000e+00 > %100 = fcmp oeq float %97, 0.000000e+00 > %101 = fcmp ogt float %95, 0.000000e+00 > %102 = select i1 %101, float 1.000000e+00, float %95 > %103 = fcmp oge float %102, 0.000000e+00 > %104 = fcmp ogt float %96, 0.000000e+00 > %105 = select i1 %104, float 1.000000e+00, float %96 > %106 = fcmp oge float %105, 0.000000e+00 > %.op = fmul float %102, 0x4600000000000000 > %107 = select i1 %103, float %.op, float 0xC600000000000000 > %.op38 = fmul float %105, 0x4600000000000000 > %108 = select i1 %106, float %.op38, float 0xC600000000000000 > %109 = fdiv float 1.000000e+00, %97 > %110 = fmul float %95, %109 > %111 = fmul float %96, %109 > %112 = select i1 %99, float %107, float %110 > %113 = select i1 %100, float %108, float %111 > %114 = call float @llvm.fma.f32(float %113, float 2.000000e+00, float -1.000000e+00) > %115 = call float @llvm.fma.f32(float %112, float 2.000000e+00, float -1.000000e+00) > %116 = call float @llvm.fabs.f32(float %115) > %117 = call float @llvm.fabs.f32(float %114) > %118 = call float @llvm.maxnum.f32(float %116, float %117) > %119 = fcmp une float %118, 0.000000e+00 > %120 = fdiv float 1.000000e+00, %118 > %temp12.0 = select i1 %119, float %120, float 0x4600000000000000 > %121 = call float @llvm.fabs.f32(float %115) > %122 = call float @llvm.fabs.f32(float %114) > %123 = call float @llvm.minnum.f32(float %121, float %122) > %124 = fmul float %temp12.0, %123 > %125 = fmul float %124, %124 > %126 = call float @llvm.fma.f32(float %125, float 0x3F9555CBE0000000, float 0xBFB5CB46C0000000) > %127 = call float @llvm.fma.f32(float %125, float %126, float 0x3FC70EDC40000000) > %128 = call float @llvm.fma.f32(float %125, float %127, float 0xBFD523A080000000) > %129 = call float @llvm.fma.f32(float %125, float %128, float 0x3FEFFEE700000000) > %130 = fmul float %129, %124 > %131 = call float @llvm.fma.f32(float %130, float -2.000000e+00, float 0x3FF921FB60000000) > %132 = call float @llvm.fabs.f32(float %115) > %133 = call float @llvm.fabs.f32(float %114) > %134 = fcmp olt float %132, %133 > %135 = select i1 %134, float %131, float 0.000000e+00 > %136 = call float @llvm.fma.f32(float %124, float %129, float %135) > %137 = fsub float -0.000000e+00, %115 > %138 = fcmp olt float %115, %137 > %139 = select i1 %138, float 0xC00921FB60000000, float 0.000000e+00 > %140 = fadd float %139, %136 > %141 = call float @llvm.minnum.f32(float %115, float %114) > %142 = fsub float -0.000000e+00, %141 > %143 = fcmp olt float %141, %142 > %144 = call float @llvm.maxnum.f32(float %115, float %114) > %145 = fmul float %114, %114 > %146 = fmul float %115, %115 > %147 = fadd float %145, %146 > %148 = call float @llvm.sqrt.f32(float %147) > %149 = fadd float %148, %148 > %150 = fsub float -0.000000e+00, %144 > %151 = fcmp oge float %144, %150 > %152 = and i1 %151, %143 > %153 = fsub float -0.000000e+00, %140 > %. = select i1 %152, float %153, float %140 > %154 = fmul float %., 0x3FC45F57C0000000 > %155 = call float @llvm.fma.f32(float %40, float %38, float %149) > %156 = call float @llvm.fma.f32(float %40, float %39, float %154) > %157 = call float @llvm.fma.f32(float %40, float %29, float %149) > %158 = call float @llvm.fma.f32(float %40, float %30, float %154) > %159 = bitcast float %157 to i32 > %160 = bitcast float %158 to i32 > %161 = insertelement <2 x i32> undef, i32 %159, i32 0 > %162 = insertelement <2 x i32> %161, i32 %160, i32 1 > %163 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %162, <8 x i32> %42, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %164 = extractelement <4 x float> %163, i32 0 > %165 = extractelement <4 x float> %163, i32 1 > %166 = fmul float %164, %28 > %167 = fmul float %165, %28 > %168 = fmul float %155, %36 > %169 = fmul float %156, %37 > %170 = bitcast float %168 to i32 > %171 = bitcast float %169 to i32 > %172 = insertelement <2 x i32> undef, i32 %170, i32 0 > %173 = insertelement <2 x i32> %172, i32 %171, i32 1 > %174 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %173, <8 x i32> %51, <4 x i32> %58, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %175 = extractelement <4 x float> %174, i32 1 > %176 = extractelement <4 x float> %174, i32 3 > %177 = call float @llvm.fma.f32(float %175, float 2.000000e+00, float -1.000000e+00) > %178 = call float @llvm.fma.f32(float %176, float 2.000000e+00, float -1.000000e+00) > %179 = fsub float 1.000000e+00, %33 > %180 = fsub float 1.000000e+00, %34 > %181 = fmul float %179, 5.000000e-01 > %182 = fmul float %180, 5.000000e-01 > %183 = call float @llvm.fma.f32(float %112, float %33, float %181) > %184 = call float @llvm.fma.f32(float %113, float %34, float %182) > %185 = call float @llvm.fma.f32(float %177, float %35, float %183) > %186 = call float @llvm.fma.f32(float %178, float %35, float %184) > %187 = call float @llvm.fma.f32(float %185, float 2.000000e+00, float -1.000000e+00) > %188 = call float @llvm.fma.f32(float %186, float 2.000000e+00, float -1.000000e+00) > %189 = fmul float %187, %187 > %190 = fmul float %188, %188 > %191 = fadd float %189, %190 > %192 = fsub float %191, %25 > %193 = fcmp une float %26, 0.000000e+00 > %194 = fdiv float 1.000000e+00, %26 > %temp16.0 = select i1 %193, float %194, float 0x4600000000000000 > %195 = fmul float %temp16.0, %192 > %196 = call float @llvm.AMDGPU.clamp.(float %195, float 0.000000e+00, float 1.000000e+00) > %197 = call float @llvm.fma.f32(float %196, float -2.000000e+00, float 3.000000e+00) > %198 = fmul float %196, %196 > %199 = fmul float %198, %197 > %200 = fadd float %112, -5.000000e-01 > %201 = fadd float %113, -5.000000e-01 > %202 = fmul float %200, 0x3FD51EB860000000 > %203 = fmul float %201, 0x3FD51EB860000000 > %204 = call float @llvm.fma.f32(float %166, float %202, float %112) > %205 = call float @llvm.fma.f32(float %167, float %203, float %113) > %206 = fsub float 1.000000e+00, %32 > %207 = fmul float %206, 5.000000e-01 > %208 = call float @llvm.fma.f32(float %204, float %32, float %207) > %209 = call float @llvm.fma.f32(float %205, float %32, float %207) > %210 = fsub float %208, %204 > %211 = fsub float %209, %205 > %212 = call float @llvm.fma.f32(float %204, float 2.000000e+00, float -1.000000e+00) > %213 = call float @llvm.fma.f32(float %205, float 2.000000e+00, float -1.000000e+00) > %214 = fmul float %212, %212 > %215 = fmul float %213, %213 > %216 = fadd float %214, %215 > %217 = fadd float %216, -2.500000e-01 > %218 = fmul float %217, %217 > %219 = call float @llvm.fma.f32(float %218, float %210, float %204) > %220 = call float @llvm.fma.f32(float %218, float %211, float %205) > %221 = bitcast float %219 to i32 > %222 = bitcast float %220 to i32 > %223 = insertelement <2 x i32> undef, i32 %221, i32 0 > %224 = insertelement <2 x i32> %223, i32 %222, i32 1 > %225 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %224, <8 x i32> %60, <4 x i32> %67, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %226 = extractelement <4 x float> %225, i32 0 > %227 = extractelement <4 x float> %225, i32 1 > %228 = extractelement <4 x float> %225, i32 2 > %229 = bitcast float %219 to i32 > %230 = bitcast float %220 to i32 > %231 = insertelement <2 x i32> undef, i32 %229, i32 0 > %232 = insertelement <2 x i32> %231, i32 %230, i32 1 > %233 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %232, <8 x i32> %69, <4 x i32> %76, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %234 = extractelement <4 x float> %233, i32 3 > %235 = fmul float %234, 2.550000e+02 > %236 = call float @llvm.rint.f32(float %235) > %237 = fcmp ogt float %236, 9.900000e+01 > %238 = call float @llvm.fma.f32(float %226, float 8.750000e-01, float 6.250000e-02) > %239 = call float @llvm.fma.f32(float %227, float 8.750000e-01, float 6.250000e-02) > %240 = call float @llvm.fma.f32(float %228, float 8.750000e-01, float 6.250000e-02) > %241 = bitcast float %238 to i32 > %242 = bitcast float %239 to i32 > %243 = bitcast float %240 to i32 > %244 = insertelement <4 x i32> undef, i32 %241, i32 0 > %245 = insertelement <4 x i32> %244, i32 %242, i32 1 > %246 = insertelement <4 x i32> %245, i32 %243, i32 2 > %247 = insertelement <4 x i32> %246, i32 0, i32 3 > %248 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %247, <8 x i32> %78, <4 x i32> %85, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %249 = extractelement <4 x float> %248, i32 0 > %250 = extractelement <4 x float> %248, i32 1 > %251 = extractelement <4 x float> %248, i32 2 > %252 = bitcast float %238 to i32 > %253 = bitcast float %239 to i32 > %254 = bitcast float %240 to i32 > %255 = insertelement <4 x i32> undef, i32 %252, i32 0 > %256 = insertelement <4 x i32> %255, i32 %253, i32 1 > %257 = insertelement <4 x i32> %256, i32 %254, i32 2 > %258 = insertelement <4 x i32> %257, i32 0, i32 3 > %259 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %258, <8 x i32> %87, <4 x i32> %94, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %260 = extractelement <4 x float> %259, i32 0 > %261 = extractelement <4 x float> %259, i32 1 > %262 = extractelement <4 x float> %259, i32 2 > %263 = fsub float %249, %260 > %264 = fsub float %250, %261 > %265 = fsub float %251, %262 > %266 = call float @llvm.fma.f32(float %199, float %263, float %260) > %267 = call float @llvm.fma.f32(float %199, float %264, float %261) > %268 = call float @llvm.fma.f32(float %199, float %265, float %262) > %269 = fsub float %266, %226 > %270 = fsub float %267, %227 > %271 = fsub float %268, %228 > %272 = call float @llvm.AMDGPU.clamp.(float %31, float 0.000000e+00, float 1.000000e+00) > %273 = call float @llvm.AMDGPU.clamp.(float %27, float 0.000000e+00, float 1.000000e+00) > %274 = fmul float %273, %272 > %.37 = select i1 %237, float %274, float %272 > %275 = call float @llvm.fma.f32(float %.37, float %269, float %226) > %276 = call float @llvm.fma.f32(float %.37, float %270, float %227) > %277 = call float @llvm.fma.f32(float %.37, float %271, float %228) > %278 = bitcast float %5 to i32 > %279 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %278, 10 > %280 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %279, float %275, 11 > %281 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %280, float %276, 12 > %282 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %281, float %277, 13 > %283 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %282, float %98, 14 > %284 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %283, float 0.000000e+00, 15 > %285 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %284, float 0.000000e+00, 16 > %286 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %285, float 0.000000e+00, 17 > %287 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %286, float 0.000000e+00, 18 > %288 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %287, float 0.000000e+00, 19 > %289 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %288, float 0.000000e+00, 20 > %290 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %289, float 0.000000e+00, 21 > %291 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %290, float 0.000000e+00, 22 > %292 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %291, float 0.000000e+00, 23 > %293 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %292, float 0.000000e+00, 24 > %294 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %293, float 0.000000e+00, 25 > %295 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %294, float 0.000000e+00, 26 > %296 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %295, float %21, 27 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %296 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >; Function Attrs: nounwind readnone >declare float @llvm.rint.f32(float) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..38] >DCL TEMP[0..7], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.1500} >IMM[1] UINT32 {0, 368, 384, 400} >IMM[2] UINT32 {416, 448, 544, 608} >IMM[3] UINT32 {288, 592, 432, 464} >IMM[4] UINT32 {576, 496, 480, 560} >IMM[5] FLT32 { 0.0597, -1.5000, 0.0000, 158456325028528675187087900672.0000} >IMM[6] FLT32 { 1.4427, 0.5000, 0.4545, -0.0040} >IMM[7] UINT32 {512, 528, 0, 0} >IMM[8] FLT32 { 6.2000, 1.7000, 0.0600, -0.0187} >IMM[9] FLT32 { 0.0743, -0.2121, 1.5707, -2.0000} >IMM[10] FLT32 { 3.1416, 0.5000, -0.5000, 0.0000} >IMM[11] INT32 {1, 0, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][23], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][24], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][25], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][26], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: MOV TEMP[3], TEMP[1] > 10: MOV TEMP[4].zw, TEMP[1].wwzw > 11: MUL TEMP[2].xy, TEMP[2].xxxx, CONST[1][38].xyyy > 12: MUL TEMP[5].xy, CONST[1][38].xyyy, IMM[0].xyyy > 13: FMA TEMP[4].xy, TEMP[1].xyyy, TEMP[5].xyyy, TEMP[2].xyyy > 14: ADD TEMP[1].xyz, -IN[0].xyzz, CONST[1][37].xyzz > 15: FSNE TEMP[2].x, CONST[1][30].xxxx, IMM[5].zzzz > 16: UIF TEMP[2].xxxx :0 > 17: ELSE :0 > 18: ENDIF > 19: FSNE TEMP[2].x, CONST[1][33].wwww, IMM[5].zzzz > 20: UIF TEMP[2].xxxx :0 > 21: ELSE :0 > 22: ENDIF > 23: FSNE TEMP[2].x, CONST[1][30].yyyy, IMM[5].zzzz > 24: UIF TEMP[2].xxxx :0 > 25: ELSE :0 > 26: ENDIF > 27: ABS TEMP[2].x, IN[3].yyyy > 28: FMA TEMP[0].x, TEMP[2].xxxx, IMM[8].wwww, IMM[9].xxxx > 29: ABS TEMP[2].x, IN[3].yyyy > 30: FMA TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx, IMM[9].yyyy > 31: ABS TEMP[2].x, IN[3].yyyy > 32: FMA TEMP[2].x, TEMP[0].xxxx, TEMP[2].xxxx, IMM[9].zzzz > 33: ABS TEMP[5].x, IN[3].yyyy > 34: ADD TEMP[5].x, -TEMP[5].xxxx, IMM[0].xxxx > 35: SQRT TEMP[5].x, TEMP[5].xxxx > 36: MUL TEMP[6].x, TEMP[5].xxxx, TEMP[2].xxxx > 37: FMA TEMP[6].x, TEMP[6].xxxx, IMM[9].wwww, IMM[10].xxxx > 38: FSLT TEMP[7].x, IN[3].yyyy, -IN[3].yyyy > 39: AND TEMP[7].x, TEMP[7].xxxx, IMM[11].xxxx > 40: INEG TEMP[7].x, TEMP[7].xxxx > 41: AND TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx > 42: FMA TEMP[0].x, TEMP[2].xxxx, TEMP[5].xxxx, TEMP[6].xxxx > 43: SIN TEMP[2].xy, TEMP[0].xxxx > 44: MOV TEMP[0].yz, TEMP[2].yxyy > 45: MOV TEMP[0].x, IN[3].yyyy > 46: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[10].yyzz > 47: DP3 TEMP[2].x, IN[3].xyzz, TEMP[1].xyzz > 48: DP3 TEMP[5].x, IN[4].xyzz, TEMP[1].xyzz > 49: MOV TEMP[2].y, TEMP[5].xxxx > 50: DP3 TEMP[1].x, IN[1].xyzz, TEMP[1].xyzz > 51: MOV TEMP[2].z, TEMP[1].xxxx > 52: MOV OUT[5], TEMP[2] > 53: MOV OUT[4], TEMP[0] > 54: MOV OUT[3], IN[5] > 55: MOV OUT[1], IN[2].xyxy > 56: MOV OUT[2], TEMP[4] > 57: MOV OUT[0], TEMP[3] > 58: END >radeonsi: Compiling shader 378 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 368) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 372) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 376) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 380) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 384) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 388) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 392) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 396) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 400) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 404) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 408) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 412) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 416) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 420) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 424) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 428) > %37 = call float @llvm.SI.load.const(<16 x i8> %20, i32 592) > %38 = call float @llvm.SI.load.const(<16 x i8> %20, i32 596) > %39 = call float @llvm.SI.load.const(<16 x i8> %20, i32 600) > %40 = call float @llvm.SI.load.const(<16 x i8> %20, i32 608) > %41 = call float @llvm.SI.load.const(<16 x i8> %20, i32 612) > %42 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 > %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %13) > %45 = extractelement <4 x float> %44, i32 0 > %46 = extractelement <4 x float> %44, i32 1 > %47 = extractelement <4 x float> %44, i32 2 > %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 > %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %14) > %51 = extractelement <4 x float> %50, i32 0 > %52 = extractelement <4 x float> %50, i32 1 > %53 = extractelement <4 x float> %50, i32 2 > %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 > %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %15) > %57 = extractelement <4 x float> %56, i32 0 > %58 = extractelement <4 x float> %56, i32 1 > %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 > %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %16) > %62 = extractelement <4 x float> %61, i32 0 > %63 = extractelement <4 x float> %61, i32 1 > %64 = extractelement <4 x float> %61, i32 2 > %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 > %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %17) > %68 = extractelement <4 x float> %67, i32 0 > %69 = extractelement <4 x float> %67, i32 1 > %70 = extractelement <4 x float> %67, i32 2 > %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 > %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %18) > %74 = extractelement <4 x float> %73, i32 0 > %75 = extractelement <4 x float> %73, i32 1 > %76 = extractelement <4 x float> %73, i32 2 > %77 = extractelement <4 x float> %73, i32 3 > %78 = fmul float %21, %45 > %79 = fmul float %22, %46 > %80 = fadd float %78, %79 > %81 = fmul float %23, %47 > %82 = fadd float %80, %81 > %83 = fadd float %82, %24 > %84 = fmul float %25, %45 > %85 = fmul float %26, %46 > %86 = fadd float %84, %85 > %87 = fmul float %27, %47 > %88 = fadd float %86, %87 > %89 = fadd float %88, %28 > %90 = fmul float %29, %45 > %91 = fmul float %30, %46 > %92 = fadd float %90, %91 > %93 = fmul float %31, %47 > %94 = fadd float %92, %93 > %95 = fadd float %94, %32 > %96 = fmul float %33, %45 > %97 = fmul float %34, %46 > %98 = fadd float %96, %97 > %99 = fmul float %35, %47 > %100 = fadd float %98, %99 > %101 = fadd float %100, %36 > %102 = fmul float %101, %40 > %103 = fmul float %101, %41 > %104 = fsub float -0.000000e+00, %41 > %105 = call float @llvm.fma.f32(float %83, float %40, float %102) > %106 = call float @llvm.fma.f32(float %89, float %104, float %103) > %107 = fsub float %37, %45 > %108 = fsub float %38, %46 > %109 = fsub float %39, %47 > %110 = call float @llvm.fabs.f32(float %63) > %111 = call float @llvm.fma.f32(float %110, float 0xBF932DC600000000, float 0x3FB302C4E0000000) > %112 = call float @llvm.fabs.f32(float %63) > %113 = call float @llvm.fma.f32(float %111, float %112, float 0xBFCB269080000000) > %114 = call float @llvm.fabs.f32(float %63) > %115 = call float @llvm.fma.f32(float %113, float %114, float 0x3FF921B480000000) > %116 = call float @llvm.fabs.f32(float %63) > %117 = fsub float 1.000000e+00, %116 > %118 = call float @llvm.sqrt.f32(float %117) > %119 = fmul float %118, %115 > %120 = call float @llvm.fma.f32(float %119, float -2.000000e+00, float 0x400921FB60000000) > %121 = fsub float -0.000000e+00, %63 > %122 = fcmp olt float %63, %121 > %123 = select i1 %122, float %120, float 0.000000e+00 > %124 = call float @llvm.fma.f32(float %115, float %118, float %123) > %125 = call float @llvm.sin.f32(float %124) > %126 = fmul float %63, 5.000000e-01 > %127 = fmul float %125, 5.000000e-01 > %128 = fmul float %125, -5.000000e-01 > %129 = fmul float %62, %107 > %130 = fmul float %63, %108 > %131 = fadd float %130, %129 > %132 = fmul float %64, %109 > %133 = fadd float %131, %132 > %134 = fmul float %68, %107 > %135 = fmul float %69, %108 > %136 = fadd float %135, %134 > %137 = fmul float %70, %109 > %138 = fadd float %136, %137 > %139 = fmul float %51, %107 > %140 = fmul float %52, %108 > %141 = fadd float %140, %139 > %142 = fmul float %53, %109 > %143 = fadd float %141, %142 > %144 = bitcast i32 %11 to float > %145 = insertvalue <{ float, float, float }> undef, float %144, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %57, float %58, float %57, float %58) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %105, float %106, float %95, float %101) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %74, float %75, float %76, float %77) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %126, float %127, float %128, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %133, float %138, float %143, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %83, float %89, float %95, float %101) > ret <{ float, float, float }> %145 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.sin.f32(float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL IN[5] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL OUT[5], GENERIC[4] >DCL CONST[1][0..21] >DCL TEMP[0..5], LOCAL >IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} >IMM[1] UINT32 {0, 256, 272, 288} >IMM[2] UINT32 {304, 336, 0, 0} > 0: MOV TEMP[0].xyz, IN[0].xyzx > 1: MOV TEMP[0].w, IMM[0].xxxx > 2: DP4 TEMP[1].x, CONST[1][16], TEMP[0] > 3: DP4 TEMP[2].x, CONST[1][17], TEMP[0] > 4: MOV TEMP[1].y, TEMP[2].xxxx > 5: DP4 TEMP[2].x, CONST[1][18], TEMP[0] > 6: MOV TEMP[1].z, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][19], TEMP[0] > 8: MOV TEMP[1].w, TEMP[2].xxxx > 9: MOV TEMP[3].zw, TEMP[1].wwzw > 10: MUL TEMP[0].xy, TEMP[2].xxxx, CONST[1][21].xyyy > 11: MUL TEMP[2].xy, CONST[1][21].xyyy, IMM[0].xyyy > 12: FMA TEMP[3].xy, TEMP[1].xyyy, TEMP[2].xyyy, TEMP[0].xyyy > 13: MOV TEMP[2].xy, IN[2].xyxx > 14: DP3 TEMP[0].x, IN[3].xyzz, CONST[1][16].xyzz > 15: DP3 TEMP[4].x, IN[4].xyzz, CONST[1][16].xyzz > 16: MOV TEMP[0].y, TEMP[4].xxxx > 17: DP3 TEMP[4].x, IN[1].xyzz, CONST[1][16].xyzz > 18: MOV TEMP[0].z, TEMP[4].xxxx > 19: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz > 20: RSQ TEMP[4].x, TEMP[4].xxxx > 21: MUL TEMP[4].xyz, TEMP[4].xxxx, TEMP[0].xyzz > 22: DP3 TEMP[0].x, IN[3].xyzz, CONST[1][17].xyzz > 23: DP3 TEMP[5].x, IN[4].xyzz, CONST[1][17].xyzz > 24: MOV TEMP[0].y, TEMP[5].xxxx > 25: DP3 TEMP[5].x, IN[1].xyzz, CONST[1][17].xyzz > 26: MOV TEMP[0].z, TEMP[5].xxxx > 27: DP3 TEMP[5].x, TEMP[0].xyzz, TEMP[0].xyzz > 28: RSQ TEMP[5].x, TEMP[5].xxxx > 29: MUL TEMP[0].xyz, TEMP[5].xxxx, TEMP[0].xyzz > 30: MOV OUT[5], TEMP[0] > 31: MOV OUT[4], TEMP[4] > 32: MOV OUT[3], IN[5] > 33: MOV OUT[2], TEMP[2] > 34: MOV OUT[1], TEMP[3] > 35: MOV OUT[0], TEMP[1] > 36: END >radeonsi: Compiling shader 379 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 > %21 = call float @llvm.SI.load.const(<16 x i8> %20, i32 256) > %22 = call float @llvm.SI.load.const(<16 x i8> %20, i32 260) > %23 = call float @llvm.SI.load.const(<16 x i8> %20, i32 264) > %24 = call float @llvm.SI.load.const(<16 x i8> %20, i32 268) > %25 = call float @llvm.SI.load.const(<16 x i8> %20, i32 272) > %26 = call float @llvm.SI.load.const(<16 x i8> %20, i32 276) > %27 = call float @llvm.SI.load.const(<16 x i8> %20, i32 280) > %28 = call float @llvm.SI.load.const(<16 x i8> %20, i32 284) > %29 = call float @llvm.SI.load.const(<16 x i8> %20, i32 288) > %30 = call float @llvm.SI.load.const(<16 x i8> %20, i32 292) > %31 = call float @llvm.SI.load.const(<16 x i8> %20, i32 296) > %32 = call float @llvm.SI.load.const(<16 x i8> %20, i32 300) > %33 = call float @llvm.SI.load.const(<16 x i8> %20, i32 304) > %34 = call float @llvm.SI.load.const(<16 x i8> %20, i32 308) > %35 = call float @llvm.SI.load.const(<16 x i8> %20, i32 312) > %36 = call float @llvm.SI.load.const(<16 x i8> %20, i32 316) > %37 = call float @llvm.SI.load.const(<16 x i8> %20, i32 336) > %38 = call float @llvm.SI.load.const(<16 x i8> %20, i32 340) > %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 > %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %13) > %42 = extractelement <4 x float> %41, i32 0 > %43 = extractelement <4 x float> %41, i32 1 > %44 = extractelement <4 x float> %41, i32 2 > %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 > %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %14) > %48 = extractelement <4 x float> %47, i32 0 > %49 = extractelement <4 x float> %47, i32 1 > %50 = extractelement <4 x float> %47, i32 2 > %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 > %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %15) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 > %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %16) > %59 = extractelement <4 x float> %58, i32 0 > %60 = extractelement <4 x float> %58, i32 1 > %61 = extractelement <4 x float> %58, i32 2 > %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 > %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %17) > %65 = extractelement <4 x float> %64, i32 0 > %66 = extractelement <4 x float> %64, i32 1 > %67 = extractelement <4 x float> %64, i32 2 > %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5 > %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 > %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %18) > %71 = extractelement <4 x float> %70, i32 0 > %72 = extractelement <4 x float> %70, i32 1 > %73 = extractelement <4 x float> %70, i32 2 > %74 = extractelement <4 x float> %70, i32 3 > %75 = fmul float %21, %42 > %76 = fmul float %22, %43 > %77 = fadd float %75, %76 > %78 = fmul float %23, %44 > %79 = fadd float %77, %78 > %80 = fadd float %79, %24 > %81 = fmul float %25, %42 > %82 = fmul float %26, %43 > %83 = fadd float %81, %82 > %84 = fmul float %27, %44 > %85 = fadd float %83, %84 > %86 = fadd float %85, %28 > %87 = fmul float %29, %42 > %88 = fmul float %30, %43 > %89 = fadd float %87, %88 > %90 = fmul float %31, %44 > %91 = fadd float %89, %90 > %92 = fadd float %91, %32 > %93 = fmul float %33, %42 > %94 = fmul float %34, %43 > %95 = fadd float %93, %94 > %96 = fmul float %35, %44 > %97 = fadd float %95, %96 > %98 = fadd float %97, %36 > %99 = fmul float %98, %37 > %100 = fmul float %98, %38 > %101 = fsub float -0.000000e+00, %38 > %102 = call float @llvm.fma.f32(float %80, float %37, float %99) > %103 = call float @llvm.fma.f32(float %86, float %101, float %100) > %104 = fmul float %59, %21 > %105 = fmul float %60, %22 > %106 = fadd float %105, %104 > %107 = fmul float %61, %23 > %108 = fadd float %106, %107 > %109 = fmul float %65, %21 > %110 = fmul float %66, %22 > %111 = fadd float %110, %109 > %112 = fmul float %67, %23 > %113 = fadd float %111, %112 > %114 = fmul float %48, %21 > %115 = fmul float %49, %22 > %116 = fadd float %115, %114 > %117 = fmul float %50, %23 > %118 = fadd float %116, %117 > %119 = fmul float %108, %108 > %120 = fmul float %113, %113 > %121 = fadd float %120, %119 > %122 = fmul float %118, %118 > %123 = fadd float %121, %122 > %124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123) > %125 = fmul float %124, %108 > %126 = fmul float %124, %113 > %127 = fmul float %124, %118 > %128 = fmul float %59, %25 > %129 = fmul float %60, %26 > %130 = fadd float %129, %128 > %131 = fmul float %61, %27 > %132 = fadd float %130, %131 > %133 = fmul float %65, %25 > %134 = fmul float %66, %26 > %135 = fadd float %134, %133 > %136 = fmul float %67, %27 > %137 = fadd float %135, %136 > %138 = fmul float %48, %25 > %139 = fmul float %49, %26 > %140 = fadd float %139, %138 > %141 = fmul float %50, %27 > %142 = fadd float %140, %141 > %143 = fmul float %132, %132 > %144 = fmul float %137, %137 > %145 = fadd float %144, %143 > %146 = fmul float %142, %142 > %147 = fadd float %145, %146 > %148 = call float @llvm.AMDGPU.rsq.clamped.f32(float %147) > %149 = fmul float %148, %132 > %150 = fmul float %148, %137 > %151 = fmul float %148, %142 > %152 = bitcast i32 %11 to float > %153 = insertvalue <{ float, float, float }> undef, float %152, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %102, float %103, float %92, float %98) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %54, float %55, float undef, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %71, float %72, float %73, float %74) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %125, float %126, float %127, float undef) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %149, float %150, float %151, float 1.000000e+00) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %80, float %86, float %92, float %98) > ret <{ float, float, float }> %153 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL IN[4], GENERIC[4], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL CONST[1][0..20] >DCL TEMP[0..6], LOCAL >IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} >IMM[1] UINT32 {0, 320, 240, 0} >IMM[2] FLT32 {158456325028528675187087900672.0000, 0.0000, 0.0000, 0.0000} > 0: MOV TEMP[0].xy, IN[1].xyyy > 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D > 2: FMA TEMP[1].xy, TEMP[0].ywww, IMM[0].xxxx, IMM[0].yyyy > 3: ADD TEMP[2].xy, TEMP[0].ywww, TEMP[0].ywww > 4: FMA TEMP[0].x, -TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz > 5: FMA TEMP[0].x, -TEMP[1].yyyy, TEMP[1].yyyy, TEMP[0].xxxx > 6: ABS TEMP[3].x, TEMP[1].yyyy > 7: ABS TEMP[1].x, TEMP[1].xxxx > 8: MAX TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx > 9: ADD TEMP[1].x, -TEMP[1].xxxx, IMM[0].zzzz > 10: MUL TEMP[1].xyz, TEMP[1].xxxx, IN[2].xyzz > 11: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][20].xyzz > 12: SQRT TEMP[3].x, TEMP[0].xxxx > 13: MOV TEMP[2].z, TEMP[3].xxxx > 14: ADD TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyyy > 15: DP3 TEMP[3].x, IN[3].xyzz, TEMP[2].xyzz > 16: DP3 TEMP[4].x, IN[4].xyzz, TEMP[2].xyzz > 17: MOV TEMP[3].y, TEMP[4].xxxx > 18: FSEQ TEMP[4].xy, IN[0].wwww, IMM[0].wwww > 19: SSG TEMP[5].xy, IN[0].xyyy > 20: MUL TEMP[5].xy, IMM[2].xxxx, TEMP[5].xyyy > 21: RCP TEMP[6].xy, IN[0].wwww > 22: MUL TEMP[6].xy, IN[0].xyyy, TEMP[6].xyyy > 23: UCMP TEMP[2].xy, TEMP[4].xyyy, TEMP[5].xyyy, TEMP[6].xyyy > 24: MUL TEMP[4].x, CONST[1][15].yyyy, CONST[1][20].wwww > 25: MOV TEMP[4].y, -TEMP[4].xxxx > 26: FMA TEMP[2].xy, TEMP[3].xyyy, TEMP[4].xyyy, TEMP[2].xyyy > 27: MOV TEMP[2].xy, TEMP[2].xyyy > 28: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D > 29: FMA TEMP[1].xyz, TEMP[1].xyzz, CONST[1][20].wwww, TEMP[2].xyzz > 30: MOV TEMP[2].xy, IN[1].xyyy > 31: TEX TEMP[2].y, TEMP[2], SAMP[2], 2D > 32: MUL TEMP[0].x, TEMP[2].yyyy, IN[2].wwww > 33: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][20].wwww > 34: MOV TEMP[1].w, TEMP[0].xxxx > 35: MOV OUT[0], TEMP[1] > 36: END >radeonsi: Compiling shader 380 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 244) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 332) > %30 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 > %32 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %33 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %32, i64 0, i64 3 > %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 > %35 = extractelement <8 x i32> %31, i32 7 > %36 = extractelement <4 x i32> %34, i32 0 > %37 = and i32 %36, %35 > %38 = insertelement <4 x i32> %34, i32 %37, i32 0 > %39 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 > %41 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %42 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %41, i64 0, i64 7 > %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 > %44 = extractelement <8 x i32> %40, i32 7 > %45 = extractelement <4 x i32> %43, i32 0 > %46 = and i32 %45, %44 > %47 = insertelement <4 x i32> %43, i32 %46, i32 0 > %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 > %50 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %50, i64 0, i64 11 > %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 > %53 = extractelement <8 x i32> %49, i32 7 > %54 = extractelement <4 x i32> %52, i32 0 > %55 = and i32 %54, %53 > %56 = insertelement <4 x i32> %52, i32 %55, i32 0 > %57 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %58 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %59 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %60 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %61 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %62 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) > %63 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) > %64 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) > %65 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %66 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %67 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %68 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %69 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) > %70 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) > %71 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) > %72 = bitcast float %60 to i32 > %73 = bitcast float %61 to i32 > %74 = insertelement <2 x i32> undef, i32 %72, i32 0 > %75 = insertelement <2 x i32> %74, i32 %73, i32 1 > %76 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %75, <8 x i32> %31, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %77 = extractelement <4 x float> %76, i32 1 > %78 = extractelement <4 x float> %76, i32 3 > %79 = call float @llvm.fma.f32(float %77, float 2.000000e+00, float -1.000000e+00) > %80 = call float @llvm.fma.f32(float %78, float 2.000000e+00, float -1.000000e+00) > %81 = fadd float %77, %77 > %82 = fadd float %78, %78 > %83 = fsub float -0.000000e+00, %79 > %84 = call float @llvm.fma.f32(float %83, float %79, float 1.000000e+00) > %85 = fsub float -0.000000e+00, %80 > %86 = call float @llvm.fma.f32(float %85, float %80, float %84) > %87 = call float @llvm.fabs.f32(float %80) > %88 = call float @llvm.fabs.f32(float %79) > %89 = call float @llvm.maxnum.f32(float %87, float %88) > %90 = fsub float 1.000000e+00, %89 > %91 = fmul float %90, %62 > %92 = fmul float %90, %63 > %93 = fmul float %90, %64 > %94 = fmul float %91, %26 > %95 = fmul float %92, %27 > %96 = fmul float %93, %28 > %97 = call float @llvm.sqrt.f32(float %86) > %98 = fadd float %81, -1.000000e+00 > %99 = fadd float %82, -1.000000e+00 > %100 = fadd float %97, -1.000000e+00 > %101 = fmul float %66, %98 > %102 = fmul float %67, %99 > %103 = fadd float %102, %101 > %104 = fmul float %68, %100 > %105 = fadd float %103, %104 > %106 = fmul float %69, %98 > %107 = fmul float %70, %99 > %108 = fadd float %107, %106 > %109 = fmul float %71, %100 > %110 = fadd float %108, %109 > %111 = fcmp oeq float %59, 0.000000e+00 > %112 = fcmp oeq float %59, 0.000000e+00 > %113 = fcmp ogt float %57, 0.000000e+00 > %114 = select i1 %113, float 1.000000e+00, float %57 > %115 = fcmp oge float %114, 0.000000e+00 > %116 = fcmp ogt float %58, 0.000000e+00 > %117 = select i1 %116, float 1.000000e+00, float %58 > %118 = fcmp oge float %117, 0.000000e+00 > %.op = fmul float %114, 0x4600000000000000 > %119 = select i1 %115, float %.op, float 0xC600000000000000 > %.op28 = fmul float %117, 0x4600000000000000 > %120 = select i1 %118, float %.op28, float 0xC600000000000000 > %121 = fdiv float 1.000000e+00, %59 > %122 = fmul float %57, %121 > %123 = fmul float %58, %121 > %124 = select i1 %111, float %119, float %122 > %125 = select i1 %112, float %120, float %123 > %126 = fmul float %25, %29 > %127 = fsub float -0.000000e+00, %126 > %128 = call float @llvm.fma.f32(float %105, float %126, float %124) > %129 = call float @llvm.fma.f32(float %110, float %127, float %125) > %130 = bitcast float %128 to i32 > %131 = bitcast float %129 to i32 > %132 = insertelement <2 x i32> undef, i32 %130, i32 0 > %133 = insertelement <2 x i32> %132, i32 %131, i32 1 > %134 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %133, <8 x i32> %40, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %135 = extractelement <4 x float> %134, i32 0 > %136 = extractelement <4 x float> %134, i32 1 > %137 = extractelement <4 x float> %134, i32 2 > %138 = call float @llvm.fma.f32(float %94, float %29, float %135) > %139 = call float @llvm.fma.f32(float %95, float %29, float %136) > %140 = call float @llvm.fma.f32(float %96, float %29, float %137) > %141 = bitcast float %60 to i32 > %142 = bitcast float %61 to i32 > %143 = insertelement <2 x i32> undef, i32 %141, i32 0 > %144 = insertelement <2 x i32> %143, i32 %142, i32 1 > %145 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %144, <8 x i32> %49, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %146 = extractelement <4 x float> %145, i32 1 > %147 = fmul float %146, %65 > %148 = fmul float %147, %29 > %149 = bitcast float %5 to i32 > %150 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %149, 10 > %151 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %150, float %138, 11 > %152 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %151, float %139, 12 > %153 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %152, float %140, 13 > %154 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %153, float %148, 14 > %155 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %154, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %155 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.sqrt.f32(float) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >radeonsi: Compiling shader 381 >Vertex Shader Prolog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { >main_body: > %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %0, 0 > %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %19, i32 %1, 1 > %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %20, i32 %2, 2 > %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %21, i32 %3, 3 > %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %22, i32 %4, 4 > %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %23, i32 %5, 5 > %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %24, i32 %6, 6 > %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %25, i32 %7, 7 > %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %26, i32 %8, 8 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %27, i32 %9, 9 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %28, i32 %10, 10 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %29, i32 %11, 11 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %30, i32 %12, 12 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %31, i32 %13, 13 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %32, i32 %14, 14 > %34 = bitcast i32 %15 to float > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %33, float %34, 15 > %36 = bitcast i32 %16 to float > %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %35, float %36, 16 > %38 = bitcast i32 %17 to float > %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %37, float %38, 17 > %40 = bitcast i32 %18 to float > %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %40, 18 > %42 = add i32 %15, %12 > %43 = bitcast i32 %42 to float > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %43, 19 > %45 = add i32 %15, %12 > %46 = bitcast i32 %45 to float > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %46, 20 > %48 = add i32 %15, %12 > %49 = bitcast i32 %48 to float > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %49, 21 > %51 = add i32 %15, %12 > %52 = bitcast i32 %51 to float > %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %50, float %52, 22 > %54 = add i32 %15, %12 > %55 = bitcast i32 %54 to float > %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %55, 23 > %57 = add i32 %15, %12 > %58 = bitcast i32 %57 to float > %59 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %58, 24 > %60 = add i32 %15, %12 > %61 = bitcast i32 %60 to float > %62 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %59, float %61, 25 > %63 = add i32 %15, %12 > %64 = bitcast i32 %63 to float > %65 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %62, float %64, 26 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }> %65 >} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 > v_mov_b32_e32 v8, v4 ; 7E100304 > v_mov_b32_e32 v9, v4 ; 7E120304 > v_mov_b32_e32 v10, v4 ; 7E140304 > v_mov_b32_e32 v11, v4 ; 7E160304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[20:23], s[10:11], 0xc ; C08A0B0C > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[12:15], v4, s[4:7], 0 idxen ; E00C2000 80010C04 > s_load_dwordx4 s[4:7], s[10:11], 0x10 ; C0820B10 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[15:18], v5, s[12:15], 0 idxen ; E00C2000 80030F05 > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > s_load_dwordx4 s[12:15], s[10:11], 0x14 ; C0860B14 > s_load_dwordx4 s[16:19], s[10:11], 0x18 ; C0880B18 > s_load_dwordx4 s[8:11], s[10:11], 0x1c ; C0840B1C > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[18:21], v7, s[20:23], 0 idxen ; E00C2000 80051207 > s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 > buffer_load_format_xyzw v[5:8], v8, s[4:7], 0 idxen ; E00C2000 80010508 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[21:24], v9, s[12:15], 0 idxen ; E00C2000 80031509 > buffer_load_format_xyzw v[25:28], v10, s[16:19], 0 idxen ; E00C2000 8004190A > s_waitcnt vmcnt(2) ; BF8C0F72 > buffer_load_format_xyzw v[8:11], v11, s[8:11], 0 idxen ; E00C2000 8002080B > s_load_dwordx4 s[4:7], s[2:3], 0x8 ; C0820308 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 > s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 > s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 > s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 > s_buffer_load_dword s17, s[0:3], 0xd ; C208810D > s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 > s_buffer_load_dword s16, s[0:3], 0xc ; C208010C > s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 > s_buffer_load_dword s18, s[0:3], 0xe ; C209010E > s_buffer_load_dword s14, s[0:3], 0xa ; C207010A > s_buffer_load_dword s19, s[0:3], 0xf ; C209810F > s_buffer_load_dword s20, s[0:3], 0x1c ; C20A011C > s_buffer_load_dword s23, s[0:3], 0x20 ; C20B8120 > s_buffer_load_dword s26, s[0:3], 0x24 ; C20D0124 > s_buffer_load_dword s21, s[0:3], 0x1d ; C20A811D > s_buffer_load_dword s24, s[0:3], 0x21 ; C20C0121 > s_buffer_load_dword s27, s[0:3], 0x25 ; C20D8125 > s_buffer_load_dword s22, s[0:3], 0x1e ; C20B011E > s_buffer_load_dword s25, s[0:3], 0x22 ; C20C8122 > s_buffer_load_dword s28, s[0:3], 0x26 ; C20E0126 > s_buffer_load_dword s15, s[0:3], 0xb ; C207810B > s_buffer_load_dword s29, s[0:3], 0x28 ; C20E8128 > s_buffer_load_dword s30, s[0:3], 0x29 ; C20F0129 > s_buffer_load_dword s31, s[0:3], 0x2a ; C20F812A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v9, v0, v9 ; 10121300 > v_mul_f32_e32 v1, v0, v10 ; 10021500 > v_mul_f32_e32 v0, v0, v8 ; 10001100 > v_cvt_i32_f32_e32 v8, v9 ; 7E101109 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v8, 5, v8 ; 34101085 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_or_b32_e32 v10, 4, v8 ; 38141084 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v9, 4, v1 ; 38120284 > buffer_load_dword v10, v10, s[4:7], 0 offen ; E0301000 80010A0A > v_or_b32_e32 v11, 16, v8 ; 38161090 > v_or_b32_e32 v28, 28, v8 ; 3838109C > v_or_b32_e32 v29, 20, v8 ; 383A1094 > v_or_b32_e32 v30, 4, v0 ; 383C0084 > v_or_b32_e32 v31, 24, v8 ; 383E1098 > buffer_load_dword v11, v11, s[4:7], 0 offen ; E0301000 80010B0B > buffer_load_dword v9, v9, s[4:7], 0 offen ; E0301000 80010909 > buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C > buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D > buffer_load_dword v30, v30, s[4:7], 0 offen ; E0301000 80011E1E > buffer_load_dword v31, v31, s[4:7], 0 offen ; E0301000 80011F1F > v_or_b32_e32 v33, 28, v1 ; 3842029C > v_or_b32_e32 v34, 24, v1 ; 38440298 > v_or_b32_e32 v32, 16, v1 ; 38400290 > v_or_b32_e32 v35, 20, v1 ; 38460294 > buffer_load_dword v33, v33, s[4:7], 0 offen ; E0301000 80012121 > buffer_load_dword v34, v34, s[4:7], 0 offen ; E0301000 80012222 > buffer_load_dword v32, v32, s[4:7], 0 offen ; E0301000 80012020 > buffer_load_dword v35, v35, s[4:7], 0 offen ; E0301000 80012323 > v_or_b32_e32 v37, 28, v0 ; 384A009C > v_or_b32_e32 v38, 24, v0 ; 384C0098 > v_or_b32_e32 v36, 16, v0 ; 38480090 > v_or_b32_e32 v39, 20, v0 ; 384E0094 > buffer_load_dword v37, v37, s[4:7], 0 offen ; E0301000 80012525 > buffer_load_dword v38, v38, s[4:7], 0 offen ; E0301000 80012626 > buffer_load_dword v36, v36, s[4:7], 0 offen ; E0301000 80012424 > buffer_load_dword v39, v39, s[4:7], 0 offen ; E0301000 80012727 > v_or_b32_e32 v41, 8, v8 ; 38521088 > buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 > v_or_b32_e32 v40, 8, v1 ; 38500288 > buffer_load_dword v1, v1, s[4:7], 0 offen ; E0301000 80010101 > v_or_b32_e32 v42, 8, v0 ; 38540088 > buffer_load_dword v0, v0, s[4:7], 0 offen ; E0301000 80010000 > buffer_load_dword v40, v40, s[4:7], 0 offen ; E0301000 80012828 > buffer_load_dword v41, v41, s[4:7], 0 offen ; E0301000 80012929 > buffer_load_dword v42, v42, s[4:7], 0 offen ; E0301000 80012A2A > s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 > s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 > s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 > s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 > s_buffer_load_dword s0, s[0:3], 0x2b ; C200012B > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v10, v10, v26 ; 1014350A > v_mul_f32_e32 v48, v11, v11 ; 1060170B > v_mac_f32_e32 v10, v9, v25 ; 3E143309 > v_mul_f32_e32 v9, v28, v11 ; 1012171C > v_mac_f32_e32 v10, v30, v27 ; 3E14371E > s_waitcnt vmcnt(14) ; BF8C0F7E > v_fma_f32 v30, v29, v31, -v9 ; D296001E 84263F1D > v_mul_f32_e32 v43, v26, v30 ; 10563D1A > v_mac_f32_e32 v43, v26, v30 ; 3E563D1A > v_mul_f32_e32 v30, v28, v31 ; 103C3F1C > v_fma_f32 v44, v11, v29, v30 ; D296002C 047A3B0B > v_mul_f32_e32 v45, v26, v44 ; 105A591A > v_mac_f32_e32 v45, v26, v44 ; 3E5A591A > s_waitcnt vmcnt(12) ; BF8C0F7C > v_mul_f32_e32 v44, v33, v34 ; 10584521 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_fma_f32 v46, v32, v35, v44 ; D296002E 04B24720 > v_mul_f32_e32 v46, v25, v46 ; 105C5D19 > v_mad_f32 v49, v31, v31, v48 ; D2820031 04C23F1F > v_mac_f32_e32 v45, 2.0, v46 ; 3E5A5CF4 > v_mul_f32_e32 v46, v32, v32 ; 105C4120 > v_mad_f32 v47, v34, v34, v46 ; D282002F 04BA4522 > v_fma_f32 v49, -v49, 2.0, 1.0 ; D2960031 23C9E931 > v_fma_f32 v47, -v47, 2.0, 1.0 ; D296002F 23C9E92F > v_mul_f32_e32 v49, v49, v26 ; 10623531 > v_mac_f32_e32 v49, v47, v25 ; 3E62332F > v_mul_f32_e32 v47, v33, v32 ; 105E4121 > v_fma_f32 v50, v35, v34, -v47 ; D2960032 84BE4523 > v_mul_f32_e32 v50, v25, v50 ; 10646519 > v_mac_f32_e32 v43, 2.0, v50 ; 3E5664F4 > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mul_f32_e32 v50, v37, v38 ; 10644D25 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_fma_f32 v51, v36, v39, v50 ; D2960033 04CA4F24 > v_mul_f32_e32 v51, v27, v51 ; 1066671B > s_waitcnt vmcnt(5) ; BF8C0F75 > v_mul_f32_e32 v8, v8, v26 ; 10103508 > v_mac_f32_e32 v45, 2.0, v51 ; 3E5A66F4 > v_mul_f32_e32 v51, v36, v36 ; 10664924 > v_mad_f32 v52, v38, v38, v51 ; D2820034 04CE4D26 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mac_f32_e32 v8, v1, v25 ; 3E103301 > v_mul_f32_e32 v1, v33, v35 ; 10024721 > v_fma_f32 v52, -v52, 2.0, 1.0 ; D2960034 23C9E934 > v_fma_f32 v33, v32, v35, -v44 ; D2960021 84B24720 > v_fma_f32 v44, v32, v34, -v1 ; D296002C 84064520 > v_mac_f32_e32 v1, v32, v34 ; 3E024520 > v_mul_f32_e32 v32, v37, v39 ; 10404F25 > v_mul_f32_e32 v28, v28, v29 ; 10383B1C > v_fma_f32 v30, v11, v29, -v30 ; D296001E 847A3B0B > v_mac_f32_e32 v49, v52, v27 ; 3E623734 > v_mul_f32_e32 v52, v37, v36 ; 10684925 > v_fma_f32 v37, v36, v39, -v50 ; D2960025 84CA4F24 > v_fma_f32 v50, v36, v38, -v32 ; D2960032 84824D24 > v_mac_f32_e32 v32, v36, v38 ; 3E404D24 > v_fma_f32 v36, v11, v31, -v28 ; D2960024 84723F0B > v_mac_f32_e32 v28, v11, v31 ; 3E383F0B > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mac_f32_e32 v8, v0, v27 ; 3E103700 > v_mul_f32_e32 v0, v26, v30 ; 10003D1A > v_mul_f32_e32 v11, v26, v28 ; 1016391A > v_mac_f32_e32 v0, v26, v30 ; 3E003D1A > v_mul_f32_e32 v30, v29, v29 ; 103C3B1D > v_mac_f32_e32 v30, v31, v31 ; 3E3C3F1F > v_mac_f32_e32 v11, v26, v28 ; 3E16391A > v_mul_f32_e32 v28, v35, v35 ; 10384723 > v_mul_f32_e32 v1, v25, v1 ; 10020319 > v_mac_f32_e32 v28, v34, v34 ; 3E384522 > v_fma_f32 v30, -v30, 2.0, 1.0 ; D296001E 23C9E91E > v_mac_f32_e32 v11, 2.0, v1 ; 3E1602F4 > v_mul_f32_e32 v1, v39, v39 ; 10024F27 > v_mac_f32_e32 v1, v38, v38 ; 3E024D26 > v_fma_f32 v28, -v28, 2.0, 1.0 ; D296001C 23C9E91C > v_mul_f32_e32 v30, v30, v26 ; 103C351E > v_mac_f32_e32 v30, v28, v25 ; 3E3C331C > v_mul_f32_e32 v28, v25, v33 ; 10384319 > v_fma_f32 v1, -v1, 2.0, 1.0 ; D2960001 23C9E901 > v_mac_f32_e32 v0, 2.0, v28 ; 3E0038F4 > v_mac_f32_e32 v30, v1, v27 ; 3E3C3701 > v_mul_f32_e32 v1, v27, v37 ; 10024B1B > v_mac_f32_e32 v0, 2.0, v1 ; 3E0002F4 > v_mul_f32_e32 v1, v27, v32 ; 1002411B > v_mac_f32_e32 v11, 2.0, v1 ; 3E1602F4 > v_mul_f32_e32 v1, v13, v0 ; 1002010D > v_mac_f32_e32 v1, v12, v30 ; 3E023D0C > v_mac_f32_e32 v1, v14, v11 ; 3E02170E > v_add_f32_e32 v1, v8, v1 ; 06020308 > v_fma_f32 v8, v35, v34, v47 ; D2960008 04BE4523 > v_fma_f32 v9, v29, v31, v9 ; D2960009 04263F1D > v_mul_f32_e32 v31, v25, v8 ; 103E1119 > v_mac_f32_e32 v46, v35, v35 ; 3E5C4723 > v_mac_f32_e32 v48, v29, v29 ; 3E603B1D > v_mul_f32_e32 v29, v25, v44 ; 103A5919 > v_mac_f32_e32 v31, v25, v8 ; 3E3E1119 > v_fma_f32 v8, -v46, 2.0, 1.0 ; D2960008 23C9E92E > v_fma_f32 v28, v39, v38, v52 ; D296001C 04D24D27 > v_mul_f32_e32 v9, v26, v9 ; 1012131A > v_mac_f32_e32 v51, v39, v39 ; 3E664F27 > v_mac_f32_e32 v29, v25, v44 ; 3E3A5919 > v_mul_f32_e32 v8, v8, v25 ; 10103308 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v25, v40, v25 ; 10323328 > v_fma_f32 v32, -v48, 2.0, 1.0 ; D2960020 23C9E930 > v_fma_f32 v53, v39, v38, -v52 ; D2960035 84D24D27 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v25, v41, v26 ; 3E323529 > v_mac_f32_e32 v8, v32, v26 ; 3E103520 > v_fma_f32 v32, -v51, 2.0, 1.0 ; D2960020 23C9E933 > v_mul_f32_e32 v26, v26, v36 ; 1034491A > v_mul_f32_e32 v28, v27, v28 ; 1038391B > v_mac_f32_e32 v31, 2.0, v9 ; 3E3E12F4 > v_mul_f32_e32 v53, v27, v53 ; 106A6B1B > v_mac_f32_e32 v31, 2.0, v28 ; 3E3E38F4 > v_mac_f32_e32 v8, v32, v27 ; 3E103720 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v25, v42, v27 ; 3E32372A > v_mul_f32_e32 v27, v27, v50 ; 1036651B > v_mac_f32_e32 v29, 2.0, v26 ; 3E3A34F4 > v_mac_f32_e32 v29, 2.0, v27 ; 3E3A36F4 > v_mul_f32_e32 v9, v13, v31 ; 10123F0D > v_mac_f32_e32 v9, v12, v29 ; 3E123B0C > v_mac_f32_e32 v9, v14, v8 ; 3E12110E > v_mac_f32_e32 v43, 2.0, v53 ; 3E566AF4 > v_mul_f32_e32 v53, v13, v49 ; 106A630D > v_mac_f32_e32 v53, v12, v45 ; 3E6A5B0C > v_add_f32_e32 v9, v25, v9 ; 06121319 > v_mov_b32_e32 v25, 1.0 ; 7E3202F2 > v_mac_f32_e32 v53, v14, v43 ; 3E6A570E > exp 15, 32, 0, 0, 0, v3, v4, v14, v25 ; F800020F 190E0403 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mul_f32_e32 v3, v18, v45 ; 10065B12 > v_mul_f32_e32 v4, v18, v29 ; 10083B12 > v_mul_f32_e32 v14, v18, v30 ; 101C3D12 > v_mac_f32_e32 v3, v19, v49 ; 3E066313 > v_mac_f32_e32 v4, v19, v31 ; 3E083F13 > v_mac_f32_e32 v14, v19, v0 ; 3E1C0113 > v_mul_f32_e32 v18, v5, v45 ; 10245B05 > v_mul_f32_e32 v19, v5, v29 ; 10263B05 > v_mul_f32_e32 v5, v5, v30 ; 100A3D05 > v_mac_f32_e32 v18, v6, v49 ; 3E246306 > v_mac_f32_e32 v19, v6, v31 ; 3E263F06 > v_mac_f32_e32 v5, v6, v0 ; 3E0A0106 > v_mac_f32_e32 v18, v7, v43 ; 3E245707 > v_mac_f32_e32 v19, v7, v8 ; 3E261107 > v_mac_f32_e32 v5, v7, v11 ; 3E0A1707 > v_mul_f32_e32 v7, v15, v29 ; 100E3B0F > v_mac_f32_e32 v7, v16, v31 ; 3E0E3F10 > v_mac_f32_e32 v14, v20, v11 ; 3E1C1714 > v_add_f32_e32 v53, v10, v53 ; 066A6B0A > v_mul_f32_e32 v6, v15, v45 ; 100C5B0F > v_mac_f32_e32 v3, v20, v43 ; 3E065714 > v_mac_f32_e32 v4, v20, v8 ; 3E081114 > v_mac_f32_e32 v7, v17, v8 ; 3E0E1111 > v_mul_f32_e32 v8, v15, v30 ; 10103D0F > v_mul_f32_e32 v15, v14, v14 ; 101E1D0E > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v13, s9, v53 ; 101A6A09 > v_mac_f32_e32 v15, v3, v3 ; 3E1E0703 > v_mac_f32_e32 v13, s8, v1 ; 3E1A0208 > v_mac_f32_e32 v15, v4, v4 ; 3E1E0904 > v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F > v_mac_f32_e32 v13, s10, v9 ; 3E1A120A > v_mac_f32_e32 v8, v16, v0 ; 3E100110 > v_add_f32_e32 v0, s11, v13 ; 06001A0B > v_mul_f32_e32 v13, s17, v53 ; 101A6A11 > v_mul_f32_e32 v54, s5, v53 ; 106C6A05 > v_mac_f32_e32 v13, s16, v1 ; 3E1A0210 > v_mac_f32_e32 v8, v17, v11 ; 3E101711 > v_mul_f32_e32 v11, s13, v53 ; 10166A0D > v_mac_f32_e32 v54, s4, v1 ; 3E6C0204 > v_mac_f32_e32 v6, v16, v49 ; 3E0C6310 > v_mac_f32_e32 v11, s12, v1 ; 3E16020C > v_mac_f32_e32 v13, s18, v9 ; 3E1A1212 > v_mac_f32_e32 v11, s14, v9 ; 3E16120E > v_mac_f32_e32 v54, s6, v9 ; 3E6C1206 > v_add_f32_e32 v1, s19, v13 ; 06021A13 > v_mul_f32_e32 v13, v5, v5 ; 101A0B05 > v_mul_f32_e32 v9, v14, v15 ; 10121F0E > v_mac_f32_e32 v6, v17, v43 ; 3E0C5711 > v_mul_f32_e32 v14, v8, v8 ; 101C1108 > v_mac_f32_e32 v13, v18, v18 ; 3E1A2512 > v_mac_f32_e32 v14, v6, v6 ; 3E1C0D06 > v_mac_f32_e32 v13, v19, v19 ; 3E1A2713 > v_mac_f32_e32 v14, v7, v7 ; 3E1C0F07 > v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E > v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D > v_mul_f32_e32 v3, v3, v15 ; 10061F03 > v_mul_f32_e32 v4, v4, v15 ; 10081F04 > v_mul_f32_e32 v8, v8, v14 ; 10101D08 > v_mul_f32_e32 v5, v5, v13 ; 100A1B05 > v_mul_f32_e32 v15, s20, v9 ; 101E1214 > v_mul_f32_e32 v18, v18, v13 ; 10241B12 > v_mul_f32_e32 v16, s20, v5 ; 10200A14 > v_mul_f32_e32 v17, s20, v8 ; 10221014 > v_mul_f32_e32 v6, v6, v14 ; 100C1D06 > v_mul_f32_e32 v7, v7, v14 ; 100E1D07 > v_mul_f32_e32 v14, s23, v9 ; 101C1217 > v_mul_f32_e32 v9, s26, v9 ; 1012121A > v_mac_f32_e32 v15, s21, v3 ; 3E1E0615 > v_mac_f32_e32 v17, s21, v6 ; 3E220C15 > v_mac_f32_e32 v14, s24, v3 ; 3E1C0618 > v_mac_f32_e32 v9, s27, v3 ; 3E12061B > v_mul_f32_e32 v3, s23, v5 ; 10060A17 > v_mul_f32_e32 v13, v19, v13 ; 101A1B13 > v_mul_f32_e32 v19, s23, v8 ; 10261017 > v_mac_f32_e32 v16, s21, v18 ; 3E202415 > v_mac_f32_e32 v3, s24, v18 ; 3E062418 > v_mac_f32_e32 v19, s24, v6 ; 3E260C18 > v_mul_f32_e32 v5, s26, v5 ; 100A0A1A > v_mul_f32_e32 v8, s26, v8 ; 1010101A > v_mac_f32_e32 v15, s22, v4 ; 3E1E0816 > v_mac_f32_e32 v16, s22, v13 ; 3E201A16 > v_mac_f32_e32 v17, s22, v7 ; 3E220E16 > v_mac_f32_e32 v8, s27, v6 ; 3E100C1B > v_mac_f32_e32 v5, s27, v18 ; 3E0A241B > v_mac_f32_e32 v14, s25, v4 ; 3E1C0819 > v_mac_f32_e32 v3, s25, v13 ; 3E061A19 > v_mac_f32_e32 v19, s25, v7 ; 3E260E19 > exp 15, 33, 0, 0, 0, v15, v16, v17, v0 ; F800021F 0011100F > v_mul_f32_e32 v6, v22, v24 ; 100C3116 > v_mac_f32_e32 v9, s28, v4 ; 3E12081C > v_mul_f32_e32 v4, v21, v24 ; 10083115 > v_mac_f32_e32 v5, s28, v13 ; 3E0A1A1C > v_mac_f32_e32 v8, s28, v7 ; 3E100E1C > v_mul_f32_e32 v7, v23, v24 ; 100E3117 > exp 15, 34, 0, 0, 0, v14, v3, v19, v10 ; F800022F 0A13030E > v_mul_f32_e32 v4, s29, v4 ; 1008081D > v_mul_f32_e32 v6, s30, v6 ; 100C0C1E > v_mul_f32_e32 v7, s31, v7 ; 100E0E1F > v_mul_f32_e32 v13, s0, v24 ; 101A3000 > exp 15, 35, 0, 0, 0, v9, v5, v8, v0 ; F800023F 00080509 > v_add_f32_e32 v12, s7, v54 ; 06186C07 > v_add_f32_e32 v11, s15, v11 ; 0616160F > exp 15, 36, 0, 0, 0, v4, v6, v7, v13 ; F800024F 0D070604 > exp 15, 12, 0, 1, 0, v12, v0, v11, v1 ; F80008CF 010B000C > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 56 >Code Size: 1652 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 4 >******************** >radeonsi: Compiling shader 382 >Fragment Shader Epilog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { >main_body: > %22 = call i32 @llvm.SI.packf16(float %6, float %7) > %23 = bitcast i32 %22 to float > %24 = call i32 @llvm.SI.packf16(float %8, float %9) > %25 = bitcast i32 %24 to float > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %23, float %25, float undef, float undef) > %26 = call i32 @llvm.SI.packf16(float %10, float %11) > %27 = bitcast i32 %26 to float > %28 = call i32 @llvm.SI.packf16(float %12, float %13) > %29 = bitcast i32 %28 to float > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %27, float %29, float undef, float undef) > %30 = call i32 @llvm.SI.packf16(float %14, float %15) > %31 = bitcast i32 %30 to float > %32 = call i32 @llvm.SI.packf16(float %16, float %17) > %33 = bitcast i32 %32 to float > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 1, float %31, float %33, float undef, float undef) > %34 = call i32 @llvm.SI.packf16(float %18, float %19) > %35 = bitcast i32 %34 to float > %36 = call i32 @llvm.SI.packf16(float %20, float %21) > %37 = bitcast i32 %36 to float > call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %35, float %37, float undef, float undef) > ret void >} > >; Function Attrs: nounwind readnone >declare i32 @llvm.SI.packf16(float, float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { "InitialPSInputAddr"="16777215" } >attributes #1 = { nounwind readnone } > > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v12, v2, 0, 0, [m0] ; C8300002 > v_mov_b32_e32 v16, v13 ; 7E20030D > v_interp_p2_f32 v12, [v12], v3, 0, 0, [m0] ; C8310003 > v_interp_p1_f32 v13, v2, 1, 0, [m0] ; C8340102 > v_interp_p2_f32 v13, [v13], v3, 1, 0, [m0] ; C8350103 > v_interp_p1_f32 v4, v2, 0, 1, [m0] ; C8100402 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_interp_p2_f32 v4, [v4], v3, 0, 1, [m0] ; C8110403 > v_interp_p1_f32 v5, v2, 1, 1, [m0] ; C8140502 > v_interp_p2_f32 v5, [v5], v3, 1, 1, [m0] ; C8150503 > v_interp_p1_f32 v6, v2, 2, 1, [m0] ; C8180602 > v_interp_p2_f32 v6, [v6], v3, 2, 1, [m0] ; C8190603 > v_interp_p1_f32 v7, v2, 0, 2, [m0] ; C81C0802 > v_interp_p2_f32 v7, [v7], v3, 0, 2, [m0] ; C81D0803 > v_interp_p1_f32 v14, v2, 1, 2, [m0] ; C8380902 > v_interp_p2_f32 v14, [v14], v3, 1, 2, [m0] ; C8390903 > v_interp_p1_f32 v15, v2, 2, 2, [m0] ; C83C0A02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s7, s[0:3], 0x59 ; C2038159 > v_interp_p2_f32 v15, [v15], v3, 2, 2, [m0] ; C83D0A03 > v_interp_p1_f32 v17, v2, 0, 3, [m0] ; C8440C02 > v_interp_p2_f32 v17, [v17], v3, 0, 3, [m0] ; C8450C03 > v_interp_p1_f32 v18, v2, 1, 3, [m0] ; C8480D02 > s_buffer_load_dword s6, s[0:3], 0x58 ; C2030158 > v_interp_p2_f32 v18, [v18], v3, 1, 3, [m0] ; C8490D03 > v_interp_p1_f32 v19, v2, 2, 3, [m0] ; C84C0E02 > v_interp_p2_f32 v19, [v19], v3, 2, 3, [m0] ; C84D0E03 > v_interp_p1_f32 v0, v2, 3, 4, [m0] ; C8001302 > v_interp_p2_f32 v0, [v0], v3, 3, 4, [m0] ; C8011303 > v_sub_f32_e32 v2, 1.0, v0 ; 080400F2 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_add_f32_e64 v1, 1.0, s7 ; D2060001 00000EF2 > v_fma_f32 v1, s6, v1, v2 ; D2960001 040A0206 > v_rcp_f32_e32 v2, s7 ; 7E045407 > v_cmp_neq_f32_e64 vcc, 0, s7 ; D01A006A 00000E80 > v_bfrev_b32_e32 v3, 14 ; 7E06708E > v_sub_f32_e32 v0, 0x3f7eb852, v0 ; 080000FF 3F7EB852 > v_cndmask_b32_e32 v2, v3, v2 ; 00040503 > v_mad_f32 v1, v1, v2, -v2 ; D2820001 840A0501 > v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 > v_mov_b32_e32 v2, 0x40400000 ; 7E0402FF 40400000 > v_ceil_f32_e32 v0, v0 ; 7E004500 > v_fma_f32 v2, -2.0, v1, v2 ; D2960002 040A02F5 > v_mul_f32_e32 v1, v1, v1 ; 10020301 > v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 > v_mul_f32_e32 v1, v2, v1 ; 10020302 > v_fma_f32 v2, -v0, v1, 1.0 ; D2960002 23CA0300 > s_buffer_load_dword s22, s[0:3], 0x60 ; C20B0160 > v_log_f32_e32 v2, v2 ; 7E044F02 > s_buffer_load_dword s21, s[0:3], 0x5f ; C20A815F > s_buffer_load_dword s8, s[0:3], 0x5c ; C204015C > s_buffer_load_dword s9, s[0:3], 0x5d ; C204815D > s_buffer_load_dword s20, s[0:3], 0x5e ; C20A015E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C > s_load_dwordx4 s[24:27], s[4:5], 0x2c ; C08C052C > s_load_dwordx8 s[44:51], s[4:5], 0x20 ; C0D60520 > s_load_dwordx8 s[32:39], s[4:5], 0x10 ; C0D00510 > s_load_dwordx4 s[40:43], s[4:5], 0x1c ; C094051C > s_buffer_load_dword s23, s[0:3], 0x61 ; C20B8161 > s_buffer_load_dword s0, s[0:3], 0x62 ; C2000162 > v_mov_b32_e32 v3, 0xbec0c0c1 ; 7E0602FF BEC0C0C1 > v_fma_f32 v1, v1, v0, v3 ; D2960001 040E0101 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v0, s22, v2 ; 10000416 > v_exp_f32_e32 v0, v0 ; 7E004B00 > v_mul_f32_e32 v2, s21, v0 ; 10040015 > v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 > v_mul_f32_e32 v0, s8, v2 ; 10000408 > v_mul_f32_e32 v1, s9, v2 ; 10020409 > v_cndmask_b32_e64 v3, 0, -1.0, vcc ; D2000003 01A9E680 > v_mul_f32_e32 v2, s20, v2 ; 10040414 > s_and_b32 s28, s28, s19 ; 871C131C > s_and_b32 s40, s40, s39 ; 87282728 > s_and_b32 s24, s24, s51 ; 87183318 > v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 > image_sample v[20:21], v[12:13], s[12:19], s[28:31] dmask:0xa ; F0800A00 00E3140C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v3, v20, 2.0, -1.0 ; D2960003 03CDE914 > v_fma_f32 v20, v21, 2.0, -1.0 ; D2960014 03CDE915 > v_fma_f32 v21, -v3, v3, 1.0 ; D2960015 23CA0703 > v_fma_f32 v21, -v20, v20, v21 ; D2960015 24562914 > v_mul_f32_e32 v4, v3, v4 ; 10080903 > v_mac_f32_e32 v4, v20, v5 ; 3E080B14 > v_mul_f32_e32 v5, v3, v7 ; 100A0F03 > v_sqrt_f32_e32 v21, v21 ; 7E2A6715 > v_mac_f32_e32 v4, v21, v6 ; 3E080D15 > v_mac_f32_e32 v5, v20, v14 ; 3E0A1D14 > v_mul_f32_e32 v3, v3, v17 ; 10062303 > v_mac_f32_e32 v3, v20, v18 ; 3E062514 > v_mac_f32_e32 v5, v21, v15 ; 3E0A1F15 > v_mul_f32_e32 v6, v4, v4 ; 100C0904 > v_mac_f32_e32 v3, v21, v19 ; 3E062715 > v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05 > v_mac_f32_e32 v6, v3, v3 ; 3E0C0703 > v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 > image_sample v[8:11], v[12:13], s[32:39], s[40:43] dmask:0xf ; F0800F00 0148080C > image_sample v[17:20], v[12:13], s[44:51], s[24:27] dmask:0xf ; F0800F00 00CB110C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v12, s23, v19 ; 10182617 > v_mul_f32_e32 v4, v4, v6 ; 10080D04 > v_mul_f32_e32 v5, v5, v6 ; 100A0D05 > v_mul_f32_e32 v3, v3, v6 ; 10060D03 > v_fma_f32 v6, v3, 0.5, 0.5 ; D2960006 03C1E103 > v_fma_f32 v4, v4, 0.5, 0.5 ; D2960004 03C1E104 > v_fma_f32 v5, v5, 0.5, 0.5 ; D2960005 03C1E105 > v_mov_b32_e32 v3, 0 ; 7E060280 > v_mov_b32_e32 v7, s0 ; 7E0E0200 > v_mov_b32_e32 v13, v18 ; 7E1A0312 > v_mov_b32_e32 v14, v17 ; 7E1C0311 > v_mov_b32_e32 v15, v20 ; 7E1E0314 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 > v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 > exp 15, 1, 1, 0, 0, v0, v1, v0, v0 ; F800041F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 > v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A > exp 15, 2, 1, 0, 0, v0, v1, v0, v0 ; F800042F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C > v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E > exp 15, 3, 1, 1, 1, v0, v1, v0, v0 ; F8001C3F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 24 >Code Size: 624 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 > v_mov_b32_e32 v8, v4 ; 7E100304 > v_mov_b32_e32 v9, v4 ; 7E120304 > v_mov_b32_e32 v10, v4 ; 7E140304 > v_mov_b32_e32 v11, v4 ; 7E160304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[20:23], s[10:11], 0xc ; C08A0B0C > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[12:15], v4, s[4:7], 0 idxen ; E00C2000 80010C04 > s_load_dwordx4 s[4:7], s[10:11], 0x10 ; C0820B10 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[15:18], v5, s[12:15], 0 idxen ; E00C2000 80030F05 > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > s_load_dwordx4 s[12:15], s[10:11], 0x14 ; C0860B14 > s_load_dwordx4 s[16:19], s[10:11], 0x18 ; C0880B18 > s_load_dwordx4 s[8:11], s[10:11], 0x1c ; C0840B1C > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[18:21], v7, s[20:23], 0 idxen ; E00C2000 80051207 > s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 > buffer_load_format_xyzw v[5:8], v8, s[4:7], 0 idxen ; E00C2000 80010508 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[21:24], v9, s[12:15], 0 idxen ; E00C2000 80031509 > buffer_load_format_xyzw v[25:28], v10, s[16:19], 0 idxen ; E00C2000 8004190A > s_waitcnt vmcnt(2) ; BF8C0F72 > buffer_load_format_xyzw v[8:11], v11, s[8:11], 0 idxen ; E00C2000 8002080B > s_load_dwordx4 s[4:7], s[2:3], 0x8 ; C0820308 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 > s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 > s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 > s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 > s_buffer_load_dword s17, s[0:3], 0xd ; C208810D > s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 > s_buffer_load_dword s16, s[0:3], 0xc ; C208010C > s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 > s_buffer_load_dword s18, s[0:3], 0xe ; C209010E > s_buffer_load_dword s14, s[0:3], 0xa ; C207010A > s_buffer_load_dword s19, s[0:3], 0xf ; C209810F > s_buffer_load_dword s20, s[0:3], 0x1c ; C20A011C > s_buffer_load_dword s23, s[0:3], 0x20 ; C20B8120 > s_buffer_load_dword s26, s[0:3], 0x24 ; C20D0124 > s_buffer_load_dword s21, s[0:3], 0x1d ; C20A811D > s_buffer_load_dword s24, s[0:3], 0x21 ; C20C0121 > s_buffer_load_dword s27, s[0:3], 0x25 ; C20D8125 > s_buffer_load_dword s22, s[0:3], 0x1e ; C20B011E > s_buffer_load_dword s25, s[0:3], 0x22 ; C20C8122 > s_buffer_load_dword s28, s[0:3], 0x26 ; C20E0126 > s_buffer_load_dword s15, s[0:3], 0xb ; C207810B > s_buffer_load_dword s29, s[0:3], 0x28 ; C20E8128 > s_buffer_load_dword s30, s[0:3], 0x29 ; C20F0129 > s_buffer_load_dword s31, s[0:3], 0x2a ; C20F812A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v9, v0, v9 ; 10121300 > v_mul_f32_e32 v1, v0, v10 ; 10021500 > v_mul_f32_e32 v0, v0, v8 ; 10001100 > v_cvt_i32_f32_e32 v8, v9 ; 7E101109 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v8, 5, v8 ; 34101085 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_or_b32_e32 v10, 4, v8 ; 38141084 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v9, 4, v1 ; 38120284 > buffer_load_dword v10, v10, s[4:7], 0 offen ; E0301000 80010A0A > v_or_b32_e32 v11, 16, v8 ; 38161090 > v_or_b32_e32 v28, 28, v8 ; 3838109C > v_or_b32_e32 v29, 20, v8 ; 383A1094 > v_or_b32_e32 v30, 4, v0 ; 383C0084 > v_or_b32_e32 v31, 24, v8 ; 383E1098 > buffer_load_dword v11, v11, s[4:7], 0 offen ; E0301000 80010B0B > buffer_load_dword v9, v9, s[4:7], 0 offen ; E0301000 80010909 > buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C > buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D > buffer_load_dword v30, v30, s[4:7], 0 offen ; E0301000 80011E1E > buffer_load_dword v31, v31, s[4:7], 0 offen ; E0301000 80011F1F > v_or_b32_e32 v33, 28, v1 ; 3842029C > v_or_b32_e32 v34, 24, v1 ; 38440298 > v_or_b32_e32 v32, 16, v1 ; 38400290 > v_or_b32_e32 v35, 20, v1 ; 38460294 > buffer_load_dword v33, v33, s[4:7], 0 offen ; E0301000 80012121 > buffer_load_dword v34, v34, s[4:7], 0 offen ; E0301000 80012222 > buffer_load_dword v32, v32, s[4:7], 0 offen ; E0301000 80012020 > buffer_load_dword v35, v35, s[4:7], 0 offen ; E0301000 80012323 > v_or_b32_e32 v37, 28, v0 ; 384A009C > v_or_b32_e32 v38, 24, v0 ; 384C0098 > v_or_b32_e32 v36, 16, v0 ; 38480090 > v_or_b32_e32 v39, 20, v0 ; 384E0094 > buffer_load_dword v37, v37, s[4:7], 0 offen ; E0301000 80012525 > buffer_load_dword v38, v38, s[4:7], 0 offen ; E0301000 80012626 > buffer_load_dword v36, v36, s[4:7], 0 offen ; E0301000 80012424 > buffer_load_dword v39, v39, s[4:7], 0 offen ; E0301000 80012727 > v_or_b32_e32 v41, 8, v8 ; 38521088 > buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 > v_or_b32_e32 v40, 8, v1 ; 38500288 > buffer_load_dword v1, v1, s[4:7], 0 offen ; E0301000 80010101 > v_or_b32_e32 v42, 8, v0 ; 38540088 > buffer_load_dword v0, v0, s[4:7], 0 offen ; E0301000 80010000 > buffer_load_dword v40, v40, s[4:7], 0 offen ; E0301000 80012828 > buffer_load_dword v41, v41, s[4:7], 0 offen ; E0301000 80012929 > buffer_load_dword v42, v42, s[4:7], 0 offen ; E0301000 80012A2A > s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 > s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 > s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 > s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 > s_buffer_load_dword s0, s[0:3], 0x2b ; C200012B > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v10, v10, v26 ; 1014350A > v_mul_f32_e32 v48, v11, v11 ; 1060170B > v_mac_f32_e32 v10, v9, v25 ; 3E143309 > v_mul_f32_e32 v9, v28, v11 ; 1012171C > v_mac_f32_e32 v10, v30, v27 ; 3E14371E > s_waitcnt vmcnt(14) ; BF8C0F7E > v_fma_f32 v30, v29, v31, -v9 ; D296001E 84263F1D > v_mul_f32_e32 v43, v26, v30 ; 10563D1A > v_mac_f32_e32 v43, v26, v30 ; 3E563D1A > v_mul_f32_e32 v30, v28, v31 ; 103C3F1C > v_fma_f32 v44, v11, v29, v30 ; D296002C 047A3B0B > v_mul_f32_e32 v45, v26, v44 ; 105A591A > v_mac_f32_e32 v45, v26, v44 ; 3E5A591A > s_waitcnt vmcnt(12) ; BF8C0F7C > v_mul_f32_e32 v44, v33, v34 ; 10584521 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_fma_f32 v46, v32, v35, v44 ; D296002E 04B24720 > v_mul_f32_e32 v46, v25, v46 ; 105C5D19 > v_mad_f32 v49, v31, v31, v48 ; D2820031 04C23F1F > v_mac_f32_e32 v45, 2.0, v46 ; 3E5A5CF4 > v_mul_f32_e32 v46, v32, v32 ; 105C4120 > v_mad_f32 v47, v34, v34, v46 ; D282002F 04BA4522 > v_fma_f32 v49, -v49, 2.0, 1.0 ; D2960031 23C9E931 > v_fma_f32 v47, -v47, 2.0, 1.0 ; D296002F 23C9E92F > v_mul_f32_e32 v49, v49, v26 ; 10623531 > v_mac_f32_e32 v49, v47, v25 ; 3E62332F > v_mul_f32_e32 v47, v33, v32 ; 105E4121 > v_fma_f32 v50, v35, v34, -v47 ; D2960032 84BE4523 > v_mul_f32_e32 v50, v25, v50 ; 10646519 > v_mac_f32_e32 v43, 2.0, v50 ; 3E5664F4 > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mul_f32_e32 v50, v37, v38 ; 10644D25 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_fma_f32 v51, v36, v39, v50 ; D2960033 04CA4F24 > v_mul_f32_e32 v51, v27, v51 ; 1066671B > s_waitcnt vmcnt(5) ; BF8C0F75 > v_mul_f32_e32 v8, v8, v26 ; 10103508 > v_mac_f32_e32 v45, 2.0, v51 ; 3E5A66F4 > v_mul_f32_e32 v51, v36, v36 ; 10664924 > v_mad_f32 v52, v38, v38, v51 ; D2820034 04CE4D26 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mac_f32_e32 v8, v1, v25 ; 3E103301 > v_mul_f32_e32 v1, v33, v35 ; 10024721 > v_fma_f32 v52, -v52, 2.0, 1.0 ; D2960034 23C9E934 > v_fma_f32 v33, v32, v35, -v44 ; D2960021 84B24720 > v_fma_f32 v44, v32, v34, -v1 ; D296002C 84064520 > v_mac_f32_e32 v1, v32, v34 ; 3E024520 > v_mul_f32_e32 v32, v37, v39 ; 10404F25 > v_mul_f32_e32 v28, v28, v29 ; 10383B1C > v_fma_f32 v30, v11, v29, -v30 ; D296001E 847A3B0B > v_mac_f32_e32 v49, v52, v27 ; 3E623734 > v_mul_f32_e32 v52, v37, v36 ; 10684925 > v_fma_f32 v37, v36, v39, -v50 ; D2960025 84CA4F24 > v_fma_f32 v50, v36, v38, -v32 ; D2960032 84824D24 > v_mac_f32_e32 v32, v36, v38 ; 3E404D24 > v_fma_f32 v36, v11, v31, -v28 ; D2960024 84723F0B > v_mac_f32_e32 v28, v11, v31 ; 3E383F0B > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mac_f32_e32 v8, v0, v27 ; 3E103700 > v_mul_f32_e32 v0, v26, v30 ; 10003D1A > v_mul_f32_e32 v11, v26, v28 ; 1016391A > v_mac_f32_e32 v0, v26, v30 ; 3E003D1A > v_mul_f32_e32 v30, v29, v29 ; 103C3B1D > v_mac_f32_e32 v30, v31, v31 ; 3E3C3F1F > v_mac_f32_e32 v11, v26, v28 ; 3E16391A > v_mul_f32_e32 v28, v35, v35 ; 10384723 > v_mul_f32_e32 v1, v25, v1 ; 10020319 > v_mac_f32_e32 v28, v34, v34 ; 3E384522 > v_fma_f32 v30, -v30, 2.0, 1.0 ; D296001E 23C9E91E > v_mac_f32_e32 v11, 2.0, v1 ; 3E1602F4 > v_mul_f32_e32 v1, v39, v39 ; 10024F27 > v_mac_f32_e32 v1, v38, v38 ; 3E024D26 > v_fma_f32 v28, -v28, 2.0, 1.0 ; D296001C 23C9E91C > v_mul_f32_e32 v30, v30, v26 ; 103C351E > v_mac_f32_e32 v30, v28, v25 ; 3E3C331C > v_mul_f32_e32 v28, v25, v33 ; 10384319 > v_fma_f32 v1, -v1, 2.0, 1.0 ; D2960001 23C9E901 > v_mac_f32_e32 v0, 2.0, v28 ; 3E0038F4 > v_mac_f32_e32 v30, v1, v27 ; 3E3C3701 > v_mul_f32_e32 v1, v27, v37 ; 10024B1B > v_mac_f32_e32 v0, 2.0, v1 ; 3E0002F4 > v_mul_f32_e32 v1, v27, v32 ; 1002411B > v_mac_f32_e32 v11, 2.0, v1 ; 3E1602F4 > v_mul_f32_e32 v1, v13, v0 ; 1002010D > v_mac_f32_e32 v1, v12, v30 ; 3E023D0C > v_mac_f32_e32 v1, v14, v11 ; 3E02170E > v_add_f32_e32 v1, v8, v1 ; 06020308 > v_fma_f32 v8, v35, v34, v47 ; D2960008 04BE4523 > v_fma_f32 v9, v29, v31, v9 ; D2960009 04263F1D > v_mul_f32_e32 v31, v25, v8 ; 103E1119 > v_mac_f32_e32 v46, v35, v35 ; 3E5C4723 > v_mac_f32_e32 v48, v29, v29 ; 3E603B1D > v_mul_f32_e32 v29, v25, v44 ; 103A5919 > v_mac_f32_e32 v31, v25, v8 ; 3E3E1119 > v_fma_f32 v8, -v46, 2.0, 1.0 ; D2960008 23C9E92E > v_fma_f32 v28, v39, v38, v52 ; D296001C 04D24D27 > v_mul_f32_e32 v9, v26, v9 ; 1012131A > v_mac_f32_e32 v51, v39, v39 ; 3E664F27 > v_mac_f32_e32 v29, v25, v44 ; 3E3A5919 > v_mul_f32_e32 v8, v8, v25 ; 10103308 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v25, v40, v25 ; 10323328 > v_fma_f32 v32, -v48, 2.0, 1.0 ; D2960020 23C9E930 > v_fma_f32 v53, v39, v38, -v52 ; D2960035 84D24D27 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v25, v41, v26 ; 3E323529 > v_mac_f32_e32 v8, v32, v26 ; 3E103520 > v_fma_f32 v32, -v51, 2.0, 1.0 ; D2960020 23C9E933 > v_mul_f32_e32 v26, v26, v36 ; 1034491A > v_mul_f32_e32 v28, v27, v28 ; 1038391B > v_mac_f32_e32 v31, 2.0, v9 ; 3E3E12F4 > v_mul_f32_e32 v53, v27, v53 ; 106A6B1B > v_mac_f32_e32 v31, 2.0, v28 ; 3E3E38F4 > v_mac_f32_e32 v8, v32, v27 ; 3E103720 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v25, v42, v27 ; 3E32372A > v_mul_f32_e32 v27, v27, v50 ; 1036651B > v_mac_f32_e32 v29, 2.0, v26 ; 3E3A34F4 > v_mac_f32_e32 v29, 2.0, v27 ; 3E3A36F4 > v_mul_f32_e32 v9, v13, v31 ; 10123F0D > v_mac_f32_e32 v9, v12, v29 ; 3E123B0C > v_mac_f32_e32 v9, v14, v8 ; 3E12110E > v_mac_f32_e32 v43, 2.0, v53 ; 3E566AF4 > v_mul_f32_e32 v53, v13, v49 ; 106A630D > v_mac_f32_e32 v53, v12, v45 ; 3E6A5B0C > v_add_f32_e32 v9, v25, v9 ; 06121319 > v_mov_b32_e32 v25, 1.0 ; 7E3202F2 > v_mac_f32_e32 v53, v14, v43 ; 3E6A570E > exp 15, 32, 0, 0, 0, v3, v4, v14, v25 ; F800020F 190E0403 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mul_f32_e32 v3, v18, v45 ; 10065B12 > v_mul_f32_e32 v4, v18, v29 ; 10083B12 > v_mul_f32_e32 v14, v18, v30 ; 101C3D12 > v_mac_f32_e32 v3, v19, v49 ; 3E066313 > v_mac_f32_e32 v4, v19, v31 ; 3E083F13 > v_mac_f32_e32 v14, v19, v0 ; 3E1C0113 > v_mul_f32_e32 v18, v5, v45 ; 10245B05 > v_mul_f32_e32 v19, v5, v29 ; 10263B05 > v_mul_f32_e32 v5, v5, v30 ; 100A3D05 > v_mac_f32_e32 v18, v6, v49 ; 3E246306 > v_mac_f32_e32 v19, v6, v31 ; 3E263F06 > v_mac_f32_e32 v5, v6, v0 ; 3E0A0106 > v_mac_f32_e32 v18, v7, v43 ; 3E245707 > v_mac_f32_e32 v19, v7, v8 ; 3E261107 > v_mac_f32_e32 v5, v7, v11 ; 3E0A1707 > v_mul_f32_e32 v7, v15, v29 ; 100E3B0F > v_mac_f32_e32 v7, v16, v31 ; 3E0E3F10 > v_mac_f32_e32 v14, v20, v11 ; 3E1C1714 > v_add_f32_e32 v53, v10, v53 ; 066A6B0A > v_mul_f32_e32 v6, v15, v45 ; 100C5B0F > v_mac_f32_e32 v3, v20, v43 ; 3E065714 > v_mac_f32_e32 v4, v20, v8 ; 3E081114 > v_mac_f32_e32 v7, v17, v8 ; 3E0E1111 > v_mul_f32_e32 v8, v15, v30 ; 10103D0F > v_mul_f32_e32 v15, v14, v14 ; 101E1D0E > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v13, s9, v53 ; 101A6A09 > v_mac_f32_e32 v15, v3, v3 ; 3E1E0703 > v_mac_f32_e32 v13, s8, v1 ; 3E1A0208 > v_mac_f32_e32 v15, v4, v4 ; 3E1E0904 > v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F > v_mac_f32_e32 v13, s10, v9 ; 3E1A120A > v_mac_f32_e32 v8, v16, v0 ; 3E100110 > v_add_f32_e32 v0, s11, v13 ; 06001A0B > v_mul_f32_e32 v13, s17, v53 ; 101A6A11 > v_mul_f32_e32 v54, s5, v53 ; 106C6A05 > v_mac_f32_e32 v13, s16, v1 ; 3E1A0210 > v_mac_f32_e32 v8, v17, v11 ; 3E101711 > v_mul_f32_e32 v11, s13, v53 ; 10166A0D > v_mac_f32_e32 v54, s4, v1 ; 3E6C0204 > v_mac_f32_e32 v6, v16, v49 ; 3E0C6310 > v_mac_f32_e32 v11, s12, v1 ; 3E16020C > v_mac_f32_e32 v13, s18, v9 ; 3E1A1212 > v_mac_f32_e32 v11, s14, v9 ; 3E16120E > v_mac_f32_e32 v54, s6, v9 ; 3E6C1206 > v_add_f32_e32 v1, s19, v13 ; 06021A13 > v_mul_f32_e32 v13, v5, v5 ; 101A0B05 > v_mul_f32_e32 v9, v14, v15 ; 10121F0E > v_mac_f32_e32 v6, v17, v43 ; 3E0C5711 > v_mul_f32_e32 v14, v8, v8 ; 101C1108 > v_mac_f32_e32 v13, v18, v18 ; 3E1A2512 > v_mac_f32_e32 v14, v6, v6 ; 3E1C0D06 > v_mac_f32_e32 v13, v19, v19 ; 3E1A2713 > v_mac_f32_e32 v14, v7, v7 ; 3E1C0F07 > v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E > v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D > v_mul_f32_e32 v3, v3, v15 ; 10061F03 > v_mul_f32_e32 v4, v4, v15 ; 10081F04 > v_mul_f32_e32 v8, v8, v14 ; 10101D08 > v_mul_f32_e32 v5, v5, v13 ; 100A1B05 > v_mul_f32_e32 v15, s20, v9 ; 101E1214 > v_mul_f32_e32 v18, v18, v13 ; 10241B12 > v_mul_f32_e32 v16, s20, v5 ; 10200A14 > v_mul_f32_e32 v17, s20, v8 ; 10221014 > v_mul_f32_e32 v6, v6, v14 ; 100C1D06 > v_mul_f32_e32 v7, v7, v14 ; 100E1D07 > v_mul_f32_e32 v14, s23, v9 ; 101C1217 > v_mul_f32_e32 v9, s26, v9 ; 1012121A > v_mac_f32_e32 v15, s21, v3 ; 3E1E0615 > v_mac_f32_e32 v17, s21, v6 ; 3E220C15 > v_mac_f32_e32 v14, s24, v3 ; 3E1C0618 > v_mac_f32_e32 v9, s27, v3 ; 3E12061B > v_mul_f32_e32 v3, s23, v5 ; 10060A17 > v_mul_f32_e32 v13, v19, v13 ; 101A1B13 > v_mul_f32_e32 v19, s23, v8 ; 10261017 > v_mac_f32_e32 v16, s21, v18 ; 3E202415 > v_mac_f32_e32 v3, s24, v18 ; 3E062418 > v_mac_f32_e32 v19, s24, v6 ; 3E260C18 > v_mul_f32_e32 v5, s26, v5 ; 100A0A1A > v_mul_f32_e32 v8, s26, v8 ; 1010101A > v_mac_f32_e32 v15, s22, v4 ; 3E1E0816 > v_mac_f32_e32 v16, s22, v13 ; 3E201A16 > v_mac_f32_e32 v17, s22, v7 ; 3E220E16 > v_mac_f32_e32 v8, s27, v6 ; 3E100C1B > v_mac_f32_e32 v5, s27, v18 ; 3E0A241B > v_mac_f32_e32 v14, s25, v4 ; 3E1C0819 > v_mac_f32_e32 v3, s25, v13 ; 3E061A19 > v_mac_f32_e32 v19, s25, v7 ; 3E260E19 > exp 15, 33, 0, 0, 0, v15, v16, v17, v0 ; F800021F 0011100F > v_mul_f32_e32 v6, v22, v24 ; 100C3116 > v_mac_f32_e32 v9, s28, v4 ; 3E12081C > v_mul_f32_e32 v4, v21, v24 ; 10083115 > v_mac_f32_e32 v5, s28, v13 ; 3E0A1A1C > v_mac_f32_e32 v8, s28, v7 ; 3E100E1C > v_mul_f32_e32 v7, v23, v24 ; 100E3117 > exp 15, 34, 0, 0, 0, v14, v3, v19, v10 ; F800022F 0A13030E > v_mul_f32_e32 v4, s29, v4 ; 1008081D > v_mul_f32_e32 v6, s30, v6 ; 100C0C1E > v_mul_f32_e32 v7, s31, v7 ; 100E0E1F > v_mul_f32_e32 v13, s0, v24 ; 101A3000 > exp 15, 35, 0, 0, 0, v9, v5, v8, v0 ; F800023F 00080509 > v_add_f32_e32 v12, s7, v54 ; 06186C07 > v_add_f32_e32 v11, s15, v11 ; 0616160F > exp 15, 36, 0, 0, 0, v4, v6, v7, v13 ; F800024F 0D070604 > exp 15, 12, 0, 1, 0, v12, v0, v11, v1 ; F80008CF 010B000C > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 56 >Code Size: 1652 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 4 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v12, v2, 0, 0, [m0] ; C8300002 > v_mov_b32_e32 v16, v13 ; 7E20030D > v_interp_p2_f32 v12, [v12], v3, 0, 0, [m0] ; C8310003 > v_interp_p1_f32 v13, v2, 1, 0, [m0] ; C8340102 > v_interp_p2_f32 v13, [v13], v3, 1, 0, [m0] ; C8350103 > v_interp_p1_f32 v4, v2, 0, 1, [m0] ; C8100402 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_interp_p2_f32 v4, [v4], v3, 0, 1, [m0] ; C8110403 > v_interp_p1_f32 v5, v2, 1, 1, [m0] ; C8140502 > v_interp_p2_f32 v5, [v5], v3, 1, 1, [m0] ; C8150503 > v_interp_p1_f32 v6, v2, 2, 1, [m0] ; C8180602 > v_interp_p2_f32 v6, [v6], v3, 2, 1, [m0] ; C8190603 > v_interp_p1_f32 v7, v2, 0, 2, [m0] ; C81C0802 > v_interp_p2_f32 v7, [v7], v3, 0, 2, [m0] ; C81D0803 > v_interp_p1_f32 v14, v2, 1, 2, [m0] ; C8380902 > v_interp_p2_f32 v14, [v14], v3, 1, 2, [m0] ; C8390903 > v_interp_p1_f32 v15, v2, 2, 2, [m0] ; C83C0A02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s7, s[0:3], 0x59 ; C2038159 > v_interp_p2_f32 v15, [v15], v3, 2, 2, [m0] ; C83D0A03 > v_interp_p1_f32 v17, v2, 0, 3, [m0] ; C8440C02 > v_interp_p2_f32 v17, [v17], v3, 0, 3, [m0] ; C8450C03 > v_interp_p1_f32 v18, v2, 1, 3, [m0] ; C8480D02 > s_buffer_load_dword s6, s[0:3], 0x58 ; C2030158 > v_interp_p2_f32 v18, [v18], v3, 1, 3, [m0] ; C8490D03 > v_interp_p1_f32 v19, v2, 2, 3, [m0] ; C84C0E02 > v_interp_p2_f32 v19, [v19], v3, 2, 3, [m0] ; C84D0E03 > v_interp_p1_f32 v0, v2, 3, 4, [m0] ; C8001302 > v_interp_p2_f32 v0, [v0], v3, 3, 4, [m0] ; C8011303 > v_sub_f32_e32 v2, 1.0, v0 ; 080400F2 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_add_f32_e64 v1, 1.0, s7 ; D2060001 00000EF2 > v_fma_f32 v1, s6, v1, v2 ; D2960001 040A0206 > v_rcp_f32_e32 v2, s7 ; 7E045407 > v_cmp_neq_f32_e64 vcc, 0, s7 ; D01A006A 00000E80 > v_bfrev_b32_e32 v3, 14 ; 7E06708E > v_sub_f32_e32 v0, 0x3f7eb852, v0 ; 080000FF 3F7EB852 > v_cndmask_b32_e32 v2, v3, v2 ; 00040503 > v_mad_f32 v1, v1, v2, -v2 ; D2820001 840A0501 > v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 > v_mov_b32_e32 v2, 0x40400000 ; 7E0402FF 40400000 > v_ceil_f32_e32 v0, v0 ; 7E004500 > v_fma_f32 v2, -2.0, v1, v2 ; D2960002 040A02F5 > v_mul_f32_e32 v1, v1, v1 ; 10020301 > v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 > v_mul_f32_e32 v1, v2, v1 ; 10020302 > v_fma_f32 v2, -v0, v1, 1.0 ; D2960002 23CA0300 > s_buffer_load_dword s22, s[0:3], 0x60 ; C20B0160 > v_log_f32_e32 v2, v2 ; 7E044F02 > s_buffer_load_dword s21, s[0:3], 0x5f ; C20A815F > s_buffer_load_dword s8, s[0:3], 0x5c ; C204015C > s_buffer_load_dword s9, s[0:3], 0x5d ; C204815D > s_buffer_load_dword s20, s[0:3], 0x5e ; C20A015E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C > s_load_dwordx4 s[24:27], s[4:5], 0x2c ; C08C052C > s_load_dwordx8 s[44:51], s[4:5], 0x20 ; C0D60520 > s_load_dwordx8 s[32:39], s[4:5], 0x10 ; C0D00510 > s_load_dwordx4 s[40:43], s[4:5], 0x1c ; C094051C > s_buffer_load_dword s23, s[0:3], 0x61 ; C20B8161 > s_buffer_load_dword s0, s[0:3], 0x62 ; C2000162 > v_mov_b32_e32 v3, 0xbec0c0c1 ; 7E0602FF BEC0C0C1 > v_fma_f32 v1, v1, v0, v3 ; D2960001 040E0101 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v0, s22, v2 ; 10000416 > v_exp_f32_e32 v0, v0 ; 7E004B00 > v_mul_f32_e32 v2, s21, v0 ; 10040015 > v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 > v_mul_f32_e32 v0, s8, v2 ; 10000408 > v_mul_f32_e32 v1, s9, v2 ; 10020409 > v_cndmask_b32_e64 v3, 0, -1.0, vcc ; D2000003 01A9E680 > v_mul_f32_e32 v2, s20, v2 ; 10040414 > s_and_b32 s28, s28, s19 ; 871C131C > s_and_b32 s40, s40, s39 ; 87282728 > s_and_b32 s24, s24, s51 ; 87183318 > v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 > image_sample v[20:21], v[12:13], s[12:19], s[28:31] dmask:0xa ; F0800A00 00E3140C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v3, v20, 2.0, -1.0 ; D2960003 03CDE914 > v_fma_f32 v20, v21, 2.0, -1.0 ; D2960014 03CDE915 > v_fma_f32 v21, -v3, v3, 1.0 ; D2960015 23CA0703 > v_fma_f32 v21, -v20, v20, v21 ; D2960015 24562914 > v_mul_f32_e32 v4, v3, v4 ; 10080903 > v_mac_f32_e32 v4, v20, v5 ; 3E080B14 > v_mul_f32_e32 v5, v3, v7 ; 100A0F03 > v_sqrt_f32_e32 v21, v21 ; 7E2A6715 > v_mac_f32_e32 v4, v21, v6 ; 3E080D15 > v_mac_f32_e32 v5, v20, v14 ; 3E0A1D14 > v_mul_f32_e32 v3, v3, v17 ; 10062303 > v_mac_f32_e32 v3, v20, v18 ; 3E062514 > v_mac_f32_e32 v5, v21, v15 ; 3E0A1F15 > v_mul_f32_e32 v6, v4, v4 ; 100C0904 > v_mac_f32_e32 v3, v21, v19 ; 3E062715 > v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05 > v_mac_f32_e32 v6, v3, v3 ; 3E0C0703 > v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 > image_sample v[8:11], v[12:13], s[32:39], s[40:43] dmask:0xf ; F0800F00 0148080C > image_sample v[17:19], v[12:13], s[44:51], s[24:27] dmask:0x7 ; F0800700 00CB110C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v12, s23, v19 ; 10182617 > v_mul_f32_e32 v4, v4, v6 ; 10080D04 > v_mul_f32_e32 v5, v5, v6 ; 100A0D05 > v_mul_f32_e32 v3, v3, v6 ; 10060D03 > v_fma_f32 v6, v3, 0.5, 0.5 ; D2960006 03C1E103 > v_fma_f32 v4, v4, 0.5, 0.5 ; D2960004 03C1E104 > v_fma_f32 v5, v5, 0.5, 0.5 ; D2960005 03C1E105 > v_mov_b32_e32 v3, 0 ; 7E060280 > v_mov_b32_e32 v15, 0 ; 7E1E0280 > v_mov_b32_e32 v7, s0 ; 7E0E0200 > v_mov_b32_e32 v13, v18 ; 7E1A0312 > v_mov_b32_e32 v14, v17 ; 7E1C0311 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 > v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 > exp 15, 1, 1, 0, 0, v0, v1, v0, v0 ; F800041F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 > v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A > exp 15, 2, 1, 0, 0, v0, v1, v0, v0 ; F800042F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C > v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E > exp 15, 3, 1, 1, 1, v0, v1, v0, v0 ; F8001C3F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 24 >Code Size: 624 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 > v_mov_b32_e32 v8, v4 ; 7E100304 > v_mov_b32_e32 v9, v4 ; 7E120304 > v_mov_b32_e32 v10, v4 ; 7E140304 > v_mov_b32_e32 v11, v4 ; 7E160304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[20:23], s[10:11], 0xc ; C08A0B0C > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[12:15], v4, s[4:7], 0 idxen ; E00C2000 80010C04 > s_load_dwordx4 s[4:7], s[10:11], 0x10 ; C0820B10 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[15:18], v5, s[12:15], 0 idxen ; E00C2000 80030F05 > s_load_dwordx4 s[12:15], s[10:11], 0x18 ; C0860B18 > s_load_dwordx4 s[8:11], s[10:11], 0x1c ; C0840B1C > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[18:21], v7, s[20:23], 0 idxen ; E00C2000 80051207 > s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 > buffer_load_format_xyzw v[5:8], v8, s[4:7], 0 idxen ; E00C2000 80010508 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[21:24], v10, s[12:15], 0 idxen ; E00C2000 8003150A > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[8:11], v11, s[8:11], 0 idxen ; E00C2000 8002080B > s_load_dwordx4 s[4:7], s[2:3], 0x8 ; C0820308 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 > s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 > s_buffer_load_dword s14, s[0:3], 0xd ; C207010D > s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 > s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 > s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 > s_buffer_load_dword s15, s[0:3], 0xe ; C207810E > s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v10 ; 10021500 > v_mul_f32_e32 v9, v0, v9 ; 10121300 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v9, v9 ; 7E121109 > v_mul_f32_e32 v0, v0, v8 ; 10001100 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_lshlrev_b32_e32 v9, 5, v9 ; 34121285 > v_or_b32_e32 v10, 16, v1 ; 38140290 > v_or_b32_e32 v11, 28, v1 ; 3816029C > v_or_b32_e32 v24, 24, v1 ; 38300298 > buffer_load_dword v25, v9, s[4:7], 0 offen ; E0301000 80011909 > v_or_b32_e32 v27, 20, v1 ; 38360294 > buffer_load_dword v27, v27, s[4:7], 0 offen ; E0301000 80011B1B > buffer_load_dword v10, v10, s[4:7], 0 offen ; E0301000 80010A0A > buffer_load_dword v11, v11, s[4:7], 0 offen ; E0301000 80010B0B > buffer_load_dword v24, v24, s[4:7], 0 offen ; E0301000 80011818 > buffer_load_dword v26, v1, s[4:7], 0 offen ; E0301000 80011A01 > v_or_b32_e32 v31, 4, v1 ; 383E0284 > v_or_b32_e32 v8, 16, v9 ; 38101290 > v_or_b32_e32 v28, 28, v9 ; 3838129C > v_or_b32_e32 v29, 24, v9 ; 383A1298 > v_or_b32_e32 v30, 20, v9 ; 383C1294 > v_or_b32_e32 v1, 8, v1 ; 38020288 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v32, 4, v9 ; 38401284 > v_or_b32_e32 v9, 8, v9 ; 38121288 > buffer_load_dword v1, v1, s[4:7], 0 offen ; E0301000 80010101 > buffer_load_dword v30, v30, s[4:7], 0 offen ; E0301000 80011E1E > buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 > buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C > buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D > v_or_b32_e32 v38, 8, v0 ; 384C0088 > buffer_load_dword v9, v9, s[4:7], 0 offen ; E0301000 80010909 > buffer_load_dword v38, v38, s[4:7], 0 offen ; E0301000 80012626 > v_or_b32_e32 v33, 16, v0 ; 38420090 > v_or_b32_e32 v34, 28, v0 ; 3844009C > v_or_b32_e32 v35, 24, v0 ; 38460098 > v_or_b32_e32 v36, 20, v0 ; 38480094 > buffer_load_dword v32, v32, s[4:7], 0 offen ; E0301000 80012020 > buffer_load_dword v36, v36, s[4:7], 0 offen ; E0301000 80012424 > buffer_load_dword v31, v31, s[4:7], 0 offen ; E0301000 80011F1F > buffer_load_dword v33, v33, s[4:7], 0 offen ; E0301000 80012121 > buffer_load_dword v34, v34, s[4:7], 0 offen ; E0301000 80012222 > buffer_load_dword v35, v35, s[4:7], 0 offen ; E0301000 80012323 > v_or_b32_e32 v37, 4, v0 ; 384A0084 > buffer_load_dword v37, v37, s[4:7], 0 offen ; E0301000 80012525 > buffer_load_dword v0, v0, s[4:7], 0 offen ; E0301000 80010000 > s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 > s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 > s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 > s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v25, v25, v22 ; 10322D19 > v_mul_f32_e32 v43, v27, v27 ; 1056371B > v_mul_f32_e32 v39, v11, v24 ; 104E310B > v_mac_f32_e32 v25, v26, v21 ; 3E322B1A > v_mul_f32_e32 v26, v11, v10 ; 1034150B > v_mul_f32_e32 v11, v11, v27 ; 1016370B > v_fma_f32 v40, v10, v27, v39 ; D2960028 049E370A > v_fma_f32 v39, v10, v27, -v39 ; D2960027 849E370A > v_fma_f32 v41, v10, v24, -v11 ; D2960029 842E310A > v_mac_f32_e32 v11, v10, v24 ; 3E16310A > v_mul_f32_e32 v10, v10, v10 ; 1014150A > v_fma_f32 v42, v27, v24, -v26 ; D296002A 846A311B > v_fma_f32 v26, v27, v24, v26 ; D296001A 046A311B > s_waitcnt vmcnt(14) ; BF8C0F7E > v_mul_f32_e32 v1, v1, v21 ; 10022B01 > v_mac_f32_e32 v43, v24, v24 ; 3E563118 > v_mad_f32 v24, v24, v24, v10 ; D2820018 042A3118 > v_mac_f32_e32 v10, v27, v27 ; 3E14371B > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mul_f32_e32 v27, v28, v8 ; 1036111C > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mul_f32_e32 v44, v28, v29 ; 10583B1C > v_mul_f32_e32 v28, v28, v30 ; 10383D1C > v_mul_f32_e32 v48, v30, v30 ; 10603D1E > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mac_f32_e32 v1, v9, v22 ; 3E022D09 > v_fma_f32 v45, v8, v30, v44 ; D296002D 04B23D08 > v_fma_f32 v46, v8, v29, -v28 ; D296002E 84723B08 > v_fma_f32 v44, v8, v30, -v44 ; D296002C 84B23D08 > v_mac_f32_e32 v28, v8, v29 ; 3E383B08 > v_mul_f32_e32 v8, v8, v8 ; 10101108 > v_fma_f32 v47, v30, v29, -v27 ; D296002F 846E3B1E > v_fma_f32 v27, v30, v29, v27 ; D296001B 046E3B1E > v_mac_f32_e32 v48, v29, v29 ; 3E603B1D > v_mad_f32 v29, v29, v29, v8 ; D282001D 04223B1D > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mac_f32_e32 v1, v38, v23 ; 3E022F26 > v_mul_f32_e32 v38, v22, v28 ; 104C3916 > v_mac_f32_e32 v8, v30, v30 ; 3E103D1E > s_waitcnt vmcnt(7) ; BF8C0F77 > v_mul_f32_e32 v30, v32, v22 ; 103C2D20 > v_mac_f32_e32 v38, v22, v28 ; 3E4C3916 > v_fma_f32 v28, -v29, 2.0, 1.0 ; D296001C 23C9E91D > s_waitcnt vmcnt(5) ; BF8C0F75 > v_mac_f32_e32 v30, v31, v21 ; 3E3C2B1F > v_fma_f32 v10, -v10, 2.0, 1.0 ; D296000A 23C9E90A > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mul_f32_e32 v31, v34, v33 ; 103E4322 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v32, v34, v35 ; 10404722 > v_mul_f32_e32 v34, v34, v36 ; 10444922 > v_fma_f32 v24, -v24, 2.0, 1.0 ; D2960018 23C9E918 > v_mul_f32_e32 v28, v28, v22 ; 10382D1C > v_mul_f32_e32 v52, v36, v36 ; 10684924 > v_fma_f32 v49, v33, v36, v32 ; D2960031 04824921 > v_fma_f32 v50, v33, v35, -v34 ; D2960032 848A4721 > v_mac_f32_e32 v28, v24, v21 ; 3E382B18 > v_fma_f32 v24, -v48, 2.0, 1.0 ; D2960018 23C9E930 > v_fma_f32 v32, v33, v36, -v32 ; D2960020 84824921 > v_mac_f32_e32 v34, v33, v35 ; 3E444721 > v_mul_f32_e32 v33, v33, v33 ; 10424321 > v_fma_f32 v8, -v8, 2.0, 1.0 ; D2960008 23C9E908 > v_mul_f32_e32 v10, v10, v21 ; 10142B0A > v_fma_f32 v51, v36, v35, -v31 ; D2960033 847E4724 > v_fma_f32 v31, v36, v35, v31 ; D296001F 047E4724 > v_mac_f32_e32 v52, v35, v35 ; 3E684723 > v_mad_f32 v35, v35, v35, v33 ; D2820023 04864723 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v30, v37, v23 ; 3E3C2F25 > v_mul_f32_e32 v37, v21, v26 ; 104A3515 > v_mac_f32_e32 v10, v8, v22 ; 3E142D08 > v_fma_f32 v8, -v43, 2.0, 1.0 ; D2960008 23C9E92B > v_mul_f32_e32 v24, v24, v22 ; 10302D18 > v_mul_f32_e32 v9, v22, v45 ; 10125B16 > v_mac_f32_e32 v33, v36, v36 ; 3E424924 > v_mul_f32_e32 v36, v21, v41 ; 10485315 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v25, v0, v23 ; 3E322F00 > v_mul_f32_e32 v0, v22, v47 ; 10005F16 > v_mac_f32_e32 v37, v21, v26 ; 3E4A3515 > v_mul_f32_e32 v26, v22, v44 ; 10345916 > v_mac_f32_e32 v24, v8, v21 ; 3E302B08 > v_fma_f32 v8, -v35, 2.0, 1.0 ; D2960008 23C9E923 > v_mac_f32_e32 v28, v8, v23 ; 3E382F08 > v_fma_f32 v8, -v33, 2.0, 1.0 ; D2960008 23C9E921 > v_mul_f32_e32 v27, v22, v27 ; 10363716 > v_mul_f32_e32 v29, v21, v40 ; 103A5115 > v_mac_f32_e32 v9, v22, v45 ; 3E125B16 > v_mac_f32_e32 v0, v22, v47 ; 3E005F16 > v_mac_f32_e32 v26, v22, v44 ; 3E345916 > v_mac_f32_e32 v36, v21, v41 ; 3E485315 > v_mul_f32_e32 v22, v22, v46 ; 102C5D16 > v_mac_f32_e32 v10, v8, v23 ; 3E142F08 > v_fma_f32 v8, -v52, 2.0, 1.0 ; D2960008 23C9E934 > v_mul_f32_e32 v39, v21, v39 ; 104E4F15 > v_mul_f32_e32 v35, v23, v49 ; 10466317 > v_mac_f32_e32 v9, 2.0, v29 ; 3E123AF4 > v_mul_f32_e32 v40, v23, v50 ; 10506517 > v_mac_f32_e32 v36, 2.0, v22 ; 3E482CF4 > v_mac_f32_e32 v24, v8, v23 ; 3E302F08 > v_mul_f32_e32 v8, v21, v42 ; 10105515 > v_mul_f32_e32 v11, v21, v11 ; 10161715 > v_mul_f32_e32 v31, v23, v31 ; 103E3F17 > v_mac_f32_e32 v37, 2.0, v27 ; 3E4A36F4 > v_mul_f32_e32 v21, v23, v32 ; 102A4117 > v_mac_f32_e32 v26, 2.0, v39 ; 3E344EF4 > v_mac_f32_e32 v9, 2.0, v35 ; 3E1246F4 > v_mac_f32_e32 v36, 2.0, v40 ; 3E4850F4 > v_mul_f32_e32 v33, v23, v51 ; 10426717 > v_mac_f32_e32 v0, 2.0, v8 ; 3E0010F4 > v_mul_f32_e32 v8, v18, v9 ; 10101312 > v_mac_f32_e32 v38, 2.0, v11 ; 3E4C16F4 > v_mul_f32_e32 v23, v23, v34 ; 102E4517 > v_mul_f32_e32 v11, v18, v36 ; 10164912 > v_mac_f32_e32 v37, 2.0, v31 ; 3E4A3EF4 > v_mac_f32_e32 v26, 2.0, v21 ; 3E342AF4 > v_mul_f32_e32 v18, v18, v24 ; 10243112 > v_mac_f32_e32 v11, v19, v37 ; 3E164B13 > v_mac_f32_e32 v0, 2.0, v33 ; 3E0042F4 > v_mac_f32_e32 v8, v19, v28 ; 3E103913 > v_mac_f32_e32 v18, v19, v26 ; 3E243513 > v_mac_f32_e32 v38, 2.0, v23 ; 3E4C2EF4 > v_mul_f32_e32 v19, v5, v9 ; 10261305 > v_mac_f32_e32 v8, v20, v0 ; 3E100114 > v_mac_f32_e32 v11, v20, v10 ; 3E161514 > v_mac_f32_e32 v18, v20, v38 ; 3E244D14 > v_mul_f32_e32 v20, v5, v36 ; 10284905 > v_mul_f32_e32 v5, v5, v24 ; 100A3105 > v_mac_f32_e32 v19, v6, v28 ; 3E263906 > v_mac_f32_e32 v20, v6, v37 ; 3E284B06 > v_mac_f32_e32 v5, v6, v26 ; 3E0A3506 > v_mul_f32_e32 v6, v15, v9 ; 100C130F > v_mac_f32_e32 v19, v7, v0 ; 3E260107 > v_mac_f32_e32 v20, v7, v10 ; 3E281507 > v_mac_f32_e32 v5, v7, v38 ; 3E0A4D07 > v_mul_f32_e32 v7, v15, v36 ; 100E490F > v_mul_f32_e32 v15, v15, v24 ; 101E310F > v_mac_f32_e32 v6, v16, v28 ; 3E0C3910 > v_mac_f32_e32 v7, v16, v37 ; 3E0E4B10 > v_mac_f32_e32 v15, v16, v26 ; 3E1E3510 > v_mul_f32_e32 v16, v13, v28 ; 1020390D > v_mac_f32_e32 v16, v12, v9 ; 3E20130C > v_mac_f32_e32 v16, v14, v0 ; 3E20010E > v_mac_f32_e32 v6, v17, v0 ; 3E0C0111 > v_mul_f32_e32 v0, v13, v37 ; 10004B0D > v_mac_f32_e32 v0, v12, v36 ; 3E00490C > v_mac_f32_e32 v0, v14, v10 ; 3E00150E > v_add_f32_e32 v0, v1, v0 ; 06000101 > v_mul_f32_e32 v1, v13, v26 ; 1002350D > v_mac_f32_e32 v1, v12, v24 ; 3E02310C > v_add_f32_e32 v9, v30, v16 ; 0612211E > v_mac_f32_e32 v1, v14, v38 ; 3E024D0E > v_mac_f32_e32 v7, v17, v10 ; 3E0E1511 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v10, s5, v9 ; 10141205 > v_add_f32_e32 v1, v25, v1 ; 06020319 > s_buffer_load_dword s5, s[0:3], 0xc ; C202810C > v_mac_f32_e32 v10, s4, v1 ; 3E140204 > s_buffer_load_dword s4, s[0:3], 0xa ; C202010A > v_mul_f32_e32 v12, s9, v9 ; 10181209 > v_mul_f32_e32 v13, s13, v9 ; 101A120D > v_mul_f32_e32 v9, s14, v9 ; 1012120E > v_mac_f32_e32 v12, s8, v1 ; 3E180208 > v_mac_f32_e32 v13, s12, v1 ; 3E1A020C > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mac_f32_e32 v9, s5, v1 ; 3E120205 > v_mac_f32_e32 v10, s6, v0 ; 3E140006 > v_mac_f32_e32 v12, s10, v0 ; 3E18000A > v_mac_f32_e32 v13, s4, v0 ; 3E1A0004 > v_mac_f32_e32 v9, s15, v0 ; 3E12000F > v_mul_f32_e32 v0, v18, v18 ; 10002512 > v_mac_f32_e32 v15, v17, v38 ; 3E1E4D11 > v_mac_f32_e32 v0, v8, v8 ; 3E001108 > v_mul_f32_e32 v17, v5, v5 ; 10220B05 > v_mac_f32_e32 v0, v11, v11 ; 3E00170B > v_mac_f32_e32 v17, v19, v19 ; 3E222713 > v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 > v_mac_f32_e32 v17, v20, v20 ; 3E222914 > v_mul_f32_e32 v21, v15, v15 ; 102A1F0F > s_buffer_load_dword s8, s[0:3], 0x1c ; C204011C > s_buffer_load_dword s13, s[0:3], 0x20 ; C2068120 > s_buffer_load_dword s10, s[0:3], 0x24 ; C2050124 > v_mac_f32_e32 v21, v6, v6 ; 3E2A0D06 > v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 > s_buffer_load_dword s9, s[0:3], 0x1d ; C204811D > s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 > s_buffer_load_dword s14, s[0:3], 0x25 ; C2070125 > v_mac_f32_e32 v21, v7, v7 ; 3E2A0F07 > v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 > v_mul_f32_e32 v1, v18, v0 ; 10020112 > v_mul_f32_e32 v8, v8, v0 ; 10100108 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v16, s8, v1 ; 10200208 > v_mul_f32_e32 v18, s13, v1 ; 1024020D > v_mul_f32_e32 v1, s10, v1 ; 1002020A > v_mul_f32_e32 v5, v5, v17 ; 100A2305 > s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E > s_buffer_load_dword s6, s[0:3], 0x22 ; C2030122 > s_buffer_load_dword s15, s[0:3], 0x26 ; C2078126 > v_mac_f32_e32 v16, s9, v8 ; 3E201009 > v_mac_f32_e32 v18, s4, v8 ; 3E241004 > v_mac_f32_e32 v1, s14, v8 ; 3E02100E > v_mul_f32_e32 v8, v19, v17 ; 10102313 > v_mul_f32_e32 v19, s8, v5 ; 10260A08 > v_mul_f32_e32 v22, s13, v5 ; 102C0A0D > v_mul_f32_e32 v5, s10, v5 ; 100A0A0A > v_mac_f32_e32 v19, s9, v8 ; 3E261009 > v_mac_f32_e32 v22, s4, v8 ; 3E2C1004 > v_mac_f32_e32 v5, s14, v8 ; 3E0A100E > v_mul_f32_e32 v8, v15, v21 ; 10102B0F > v_mul_f32_e32 v6, v6, v21 ; 100C2B06 > v_mul_f32_e32 v15, s8, v8 ; 101E1008 > v_mul_f32_e32 v23, s13, v8 ; 102E100D > v_mul_f32_e32 v8, s10, v8 ; 1010100A > s_buffer_load_dword s5, s[0:3], 0xb ; C202810B > s_buffer_load_dword s0, s[0:3], 0xf ; C200010F > v_mac_f32_e32 v15, s9, v6 ; 3E1E0C09 > v_mul_f32_e32 v7, v7, v21 ; 100E2B07 > v_mac_f32_e32 v23, s4, v6 ; 3E2E0C04 > v_mac_f32_e32 v8, s14, v6 ; 3E100C0E > v_mul_f32_e32 v0, v11, v0 ; 1000010B > v_mul_f32_e32 v6, v20, v17 ; 100C2314 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mac_f32_e32 v15, s12, v7 ; 3E1E0E0C > v_mac_f32_e32 v23, s6, v7 ; 3E2E0E06 > v_mac_f32_e32 v8, s15, v7 ; 3E100E0F > v_mov_b32_e32 v7, 1.0 ; 7E0E02F2 > v_mac_f32_e32 v16, s12, v0 ; 3E20000C > v_mac_f32_e32 v18, s6, v0 ; 3E240006 > v_mac_f32_e32 v1, s15, v0 ; 3E02000F > v_add_f32_e32 v0, s7, v10 ; 06001407 > v_mac_f32_e32 v19, s12, v6 ; 3E260C0C > exp 15, 32, 0, 0, 0, v3, v4, v14, v7 ; F800020F 070E0403 > v_mac_f32_e32 v22, s6, v6 ; 3E2C0C06 > exp 15, 33, 0, 0, 0, v16, v19, v15, v0 ; F800021F 000F1310 > v_mac_f32_e32 v5, s15, v6 ; 3E0A0C0F > exp 15, 34, 0, 0, 0, v18, v22, v23, v30 ; F800022F 1E171612 > v_add_f32_e32 v6, s11, v12 ; 060C180B > v_add_f32_e32 v10, s5, v13 ; 06141A05 > v_add_f32_e32 v9, s0, v9 ; 06121200 > exp 15, 35, 0, 0, 0, v1, v5, v8, v0 ; F800023F 00080501 > exp 15, 12, 0, 1, 0, v0, v6, v10, v9 ; F80008CF 090A0600 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 56 >Code Size: 1596 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 4 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_mov_b32_e32 v18, v13 ; 7E24030D > v_interp_p1_f32 v13, v2, 0, 0, [m0] ; C8340002 > v_interp_p2_f32 v13, [v13], v3, 0, 0, [m0] ; C8350003 > v_interp_p1_f32 v14, v2, 1, 0, [m0] ; C8380102 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_interp_p2_f32 v14, [v14], v3, 1, 0, [m0] ; C8390103 > v_interp_p1_f32 v4, v2, 0, 1, [m0] ; C8100402 > v_interp_p2_f32 v4, [v4], v3, 0, 1, [m0] ; C8110403 > v_interp_p1_f32 v5, v2, 1, 1, [m0] ; C8140502 > v_interp_p2_f32 v5, [v5], v3, 1, 1, [m0] ; C8150503 > v_interp_p1_f32 v6, v2, 2, 1, [m0] ; C8180602 > v_interp_p2_f32 v6, [v6], v3, 2, 1, [m0] ; C8190603 > v_interp_p1_f32 v7, v2, 0, 2, [m0] ; C81C0802 > s_load_dwordx8 s[24:31], s[4:5], 0x0 ; C0CC0500 > s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C > v_interp_p2_f32 v7, [v7], v3, 0, 2, [m0] ; C81D0803 > v_interp_p1_f32 v15, v2, 1, 2, [m0] ; C83C0902 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s7, s[0:3], 0x59 ; C2038159 > v_interp_p2_f32 v15, [v15], v3, 1, 2, [m0] ; C83D0903 > v_interp_p1_f32 v16, v2, 2, 2, [m0] ; C8400A02 > v_interp_p2_f32 v16, [v16], v3, 2, 2, [m0] ; C8410A03 > v_interp_p1_f32 v17, v2, 0, 3, [m0] ; C8440C02 > s_buffer_load_dword s6, s[0:3], 0x58 ; C2030158 > v_interp_p2_f32 v17, [v17], v3, 0, 3, [m0] ; C8450C03 > v_interp_p1_f32 v19, v2, 1, 3, [m0] ; C84C0D02 > v_interp_p2_f32 v19, [v19], v3, 1, 3, [m0] ; C84D0D03 > v_interp_p1_f32 v20, v2, 2, 3, [m0] ; C8500E02 > s_load_dwordx4 s[12:15], s[4:5], 0x1c ; C086051C > s_load_dwordx8 s[16:23], s[4:5], 0x10 ; C0C80510 > s_and_b32 s32, s32, s31 ; 87201F20 > v_interp_p2_f32 v20, [v20], v3, 2, 3, [m0] ; C8510E03 > image_sample v1, v[13:14], s[24:31], s[32:35] dmask:0x8 ; F0800800 0106010D > s_waitcnt vmcnt(0) ; BF8C0F70 > v_sub_f32_e32 v2, 1.0, v1 ; 080402F2 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_add_f32_e64 v0, 1.0, s7 ; D2060000 00000EF2 > v_fma_f32 v0, s6, v0, v2 ; D2960000 040A0006 > v_rcp_f32_e32 v2, s7 ; 7E045407 > s_and_b32 s12, s12, s23 ; 870C170C > image_sample v[8:11], v[13:14], s[16:23], s[12:15] dmask:0xf ; F0800F00 0064080D > s_buffer_load_dword s8, s[0:3], 0x5c ; C204015C > s_buffer_load_dword s9, s[0:3], 0x5d ; C204815D > s_buffer_load_dword s11, s[0:3], 0x5e ; C205815E > s_buffer_load_dword s36, s[0:3], 0x5f ; C212015F > s_buffer_load_dword s37, s[0:3], 0x60 ; C2128160 > s_buffer_load_dword s38, s[0:3], 0x61 ; C2130161 > s_buffer_load_dword s12, s[0:3], 0x62 ; C2060162 > s_buffer_load_dword s2, s[0:3], 0x63 ; C2010163 > v_cmp_neq_f32_e64 s[0:1], 0, s7 ; D01A0000 00000E80 > v_bfrev_b32_e32 v3, 14 ; 7E06708E > v_cndmask_b32_e64 v2, v3, v2, s[0:1] ; D2000002 00020503 > v_mad_f32 v0, v0, v2, -v2 ; D2820000 840A0500 > v_sub_f32_e32 v1, 0x3f7eb852, v1 ; 080202FF 3F7EB852 > v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 > v_mov_b32_e32 v2, 0x40400000 ; 7E0402FF 40400000 > v_ceil_f32_e32 v1, v1 ; 7E024501 > v_fma_f32 v2, -2.0, v0, v2 ; D2960002 040A00F5 > v_mul_f32_e32 v0, v0, v0 ; 10000100 > v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 > v_mul_f32_e32 v0, v2, v0 ; 10000102 > v_fma_f32 v2, -v1, v0, 1.0 ; D2960002 23CA0101 > v_log_f32_e32 v2, v2 ; 7E044F02 > v_mul_f32_e32 v0, v1, v0 ; 10000101 > s_load_dwordx8 s[24:31], s[4:5], 0x20 ; C0CC0520 > s_load_dwordx4 s[20:23], s[4:5], 0x2c ; C08A052C > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v1, s37, v2 ; 10020425 > s_load_dwordx4 s[16:19], s[4:5], 0x3c ; C088053C > s_load_dwordx8 s[40:47], s[4:5], 0x30 ; C0D40530 > v_exp_f32_e32 v1, v1 ; 7E024B01 > v_mul_f32_e32 v2, s36, v1 ; 10040224 > v_mov_b32_e32 v1, 0xbec0c0c1 ; 7E0202FF BEC0C0C1 > v_cmp_ne_i32_e32 vcc, 0, v12 ; 7D0A1880 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v11, v0, v1 ; D2960000 0406010B > v_cndmask_b32_e64 v3, 0, -1, vcc ; D2000003 01A98280 > v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 > v_cndmask_b32_e64 v11, 0, -1.0, vcc ; D200000B 01A9E680 > v_mul_f32_e32 v0, s8, v2 ; 10000408 > v_mul_f32_e32 v1, s9, v2 ; 10020409 > v_mul_f32_e32 v2, s11, v2 ; 1004040B > s_and_b32 s20, s20, s31 ; 87141F14 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s16, s16, s47 ; 87102F10 > v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 > image_sample v[11:12], v[13:14], s[24:31], s[20:23] dmask:0xa ; F0800A00 00A60B0D > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v11, v11, 2.0, -1.0 ; D296000B 03CDE90B > v_fma_f32 v12, v12, 2.0, -1.0 ; D296000C 03CDE90C > v_fma_f32 v21, -v11, v11, 1.0 ; D2960015 23CA170B > v_fma_f32 v21, -v12, v12, v21 ; D2960015 2456190C > v_mul_f32_e32 v4, v11, v4 ; 1008090B > v_mac_f32_e32 v4, v12, v5 ; 3E080B0C > v_mul_f32_e32 v5, v11, v7 ; 100A0F0B > v_sqrt_f32_e32 v21, v21 ; 7E2A6715 > v_mac_f32_e32 v4, v21, v6 ; 3E080D15 > v_mac_f32_e32 v5, v12, v15 ; 3E0A1F0C > v_mul_f32_e32 v6, v11, v17 ; 100C230B > v_mac_f32_e32 v6, v12, v19 ; 3E0C270C > v_mac_f32_e32 v5, v21, v16 ; 3E0A2115 > v_mul_f32_e32 v7, v4, v4 ; 100E0904 > v_mac_f32_e32 v6, v21, v20 ; 3E0C2915 > v_mac_f32_e32 v7, v5, v5 ; 3E0E0B05 > v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06 > v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 > v_cmp_ne_i32_e32 vcc, 0, v3 ; 7D0A0680 > image_sample v[14:17], v[13:14], s[40:47], s[16:19] dmask:0xf ; F0800F00 008A0E0D > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v13, v15 ; 7E1A030F > v_mul_f32_e32 v4, v4, v7 ; 10080F04 > v_mul_f32_e32 v5, v5, v7 ; 100A0F05 > v_mul_f32_e32 v6, v6, v7 ; 100C0F06 > v_bfrev_b32_e32 v7, 1 ; 7E0E7081 > v_xor_b32_e32 v12, v5, v7 ; 3A180F05 > v_xor_b32_e32 v11, v4, v7 ; 3A160F04 > v_xor_b32_e32 v7, v6, v7 ; 3A0E0F06 > v_cndmask_b32_e32 v3, v11, v4 ; 0006090B > v_cndmask_b32_e32 v5, v12, v5 ; 000A0B0C > v_cndmask_b32_e32 v6, v7, v6 ; 000C0D07 > v_fma_f32 v4, v3, 0.5, 0.5 ; D2960004 03C1E103 > v_mul_f32_e32 v12, s12, v16 ; 1018200C > v_fma_f32 v5, v5, 0.5, 0.5 ; D2960005 03C1E105 > v_fma_f32 v6, v6, 0.5, 0.5 ; D2960006 03C1E106 > v_mov_b32_e32 v3, 0 ; 7E060280 > v_mov_b32_e32 v7, s2 ; 7E0E0202 > v_mov_b32_e32 v11, v17 ; 7E160311 > v_mov_b32_e32 v15, s38 ; 7E1E0226 > v_mov_b32_e32 v16, v18 ; 7E200312 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 > v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 > exp 15, 1, 1, 0, 0, v0, v1, v0, v0 ; F800041F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 > v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A > exp 15, 2, 1, 0, 0, v0, v1, v0, v0 ; F800042F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C > v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E > exp 15, 3, 1, 1, 1, v0, v1, v0, v0 ; F8001C3F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x1002 >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 24 >Code Size: 708 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 > v_mov_b32_e32 v8, v4 ; 7E100304 > v_mov_b32_e32 v9, v4 ; 7E120304 > v_mov_b32_e32 v10, v4 ; 7E140304 > v_mov_b32_e32 v11, v4 ; 7E160304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[20:23], s[10:11], 0xc ; C08A0B0C > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[12:15], v4, s[4:7], 0 idxen ; E00C2000 80010C04 > s_load_dwordx4 s[4:7], s[10:11], 0x10 ; C0820B10 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[15:18], v5, s[12:15], 0 idxen ; E00C2000 80030F05 > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > s_load_dwordx4 s[12:15], s[10:11], 0x14 ; C0860B14 > s_load_dwordx4 s[16:19], s[10:11], 0x18 ; C0880B18 > s_load_dwordx4 s[8:11], s[10:11], 0x1c ; C0840B1C > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[18:21], v7, s[20:23], 0 idxen ; E00C2000 80051207 > s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 > buffer_load_format_xyzw v[5:8], v8, s[4:7], 0 idxen ; E00C2000 80010508 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[21:24], v9, s[12:15], 0 idxen ; E00C2000 80031509 > buffer_load_format_xyzw v[25:28], v10, s[16:19], 0 idxen ; E00C2000 8004190A > s_waitcnt vmcnt(2) ; BF8C0F72 > buffer_load_format_xyzw v[8:11], v11, s[8:11], 0 idxen ; E00C2000 8002080B > s_load_dwordx4 s[4:7], s[2:3], 0x8 ; C0820308 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 > s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 > s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 > s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 > s_buffer_load_dword s17, s[0:3], 0xd ; C208810D > s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 > s_buffer_load_dword s16, s[0:3], 0xc ; C208010C > s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 > s_buffer_load_dword s18, s[0:3], 0xe ; C209010E > s_buffer_load_dword s14, s[0:3], 0xa ; C207010A > s_buffer_load_dword s19, s[0:3], 0xf ; C209810F > s_buffer_load_dword s20, s[0:3], 0x1c ; C20A011C > s_buffer_load_dword s23, s[0:3], 0x20 ; C20B8120 > s_buffer_load_dword s26, s[0:3], 0x24 ; C20D0124 > s_buffer_load_dword s21, s[0:3], 0x1d ; C20A811D > s_buffer_load_dword s24, s[0:3], 0x21 ; C20C0121 > s_buffer_load_dword s27, s[0:3], 0x25 ; C20D8125 > s_buffer_load_dword s22, s[0:3], 0x1e ; C20B011E > s_buffer_load_dword s25, s[0:3], 0x22 ; C20C8122 > s_buffer_load_dword s28, s[0:3], 0x26 ; C20E0126 > s_buffer_load_dword s15, s[0:3], 0xb ; C207810B > s_buffer_load_dword s29, s[0:3], 0x28 ; C20E8128 > s_buffer_load_dword s30, s[0:3], 0x29 ; C20F0129 > s_buffer_load_dword s31, s[0:3], 0x2a ; C20F812A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v9, v0, v9 ; 10121300 > v_mul_f32_e32 v1, v0, v10 ; 10021500 > v_mul_f32_e32 v0, v0, v8 ; 10001100 > v_cvt_i32_f32_e32 v8, v9 ; 7E101109 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v8, 5, v8 ; 34101085 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_or_b32_e32 v10, 4, v8 ; 38141084 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v9, 4, v1 ; 38120284 > buffer_load_dword v10, v10, s[4:7], 0 offen ; E0301000 80010A0A > v_or_b32_e32 v11, 16, v8 ; 38161090 > v_or_b32_e32 v28, 28, v8 ; 3838109C > v_or_b32_e32 v29, 20, v8 ; 383A1094 > v_or_b32_e32 v30, 4, v0 ; 383C0084 > v_or_b32_e32 v31, 24, v8 ; 383E1098 > buffer_load_dword v11, v11, s[4:7], 0 offen ; E0301000 80010B0B > buffer_load_dword v9, v9, s[4:7], 0 offen ; E0301000 80010909 > buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C > buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D > buffer_load_dword v30, v30, s[4:7], 0 offen ; E0301000 80011E1E > buffer_load_dword v31, v31, s[4:7], 0 offen ; E0301000 80011F1F > v_or_b32_e32 v33, 28, v1 ; 3842029C > v_or_b32_e32 v34, 24, v1 ; 38440298 > v_or_b32_e32 v32, 16, v1 ; 38400290 > v_or_b32_e32 v35, 20, v1 ; 38460294 > buffer_load_dword v33, v33, s[4:7], 0 offen ; E0301000 80012121 > buffer_load_dword v34, v34, s[4:7], 0 offen ; E0301000 80012222 > buffer_load_dword v32, v32, s[4:7], 0 offen ; E0301000 80012020 > buffer_load_dword v35, v35, s[4:7], 0 offen ; E0301000 80012323 > v_or_b32_e32 v37, 28, v0 ; 384A009C > v_or_b32_e32 v38, 24, v0 ; 384C0098 > v_or_b32_e32 v36, 16, v0 ; 38480090 > v_or_b32_e32 v39, 20, v0 ; 384E0094 > buffer_load_dword v37, v37, s[4:7], 0 offen ; E0301000 80012525 > buffer_load_dword v38, v38, s[4:7], 0 offen ; E0301000 80012626 > buffer_load_dword v36, v36, s[4:7], 0 offen ; E0301000 80012424 > buffer_load_dword v39, v39, s[4:7], 0 offen ; E0301000 80012727 > v_or_b32_e32 v41, 8, v8 ; 38521088 > buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 > v_or_b32_e32 v40, 8, v1 ; 38500288 > buffer_load_dword v1, v1, s[4:7], 0 offen ; E0301000 80010101 > v_or_b32_e32 v42, 8, v0 ; 38540088 > buffer_load_dword v0, v0, s[4:7], 0 offen ; E0301000 80010000 > buffer_load_dword v40, v40, s[4:7], 0 offen ; E0301000 80012828 > buffer_load_dword v41, v41, s[4:7], 0 offen ; E0301000 80012929 > buffer_load_dword v42, v42, s[4:7], 0 offen ; E0301000 80012A2A > s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 > s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 > s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 > s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 > s_buffer_load_dword s0, s[0:3], 0x2b ; C200012B > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v10, v10, v26 ; 1014350A > v_mul_f32_e32 v48, v11, v11 ; 1060170B > v_mac_f32_e32 v10, v9, v25 ; 3E143309 > v_mul_f32_e32 v9, v28, v11 ; 1012171C > v_mac_f32_e32 v10, v30, v27 ; 3E14371E > s_waitcnt vmcnt(14) ; BF8C0F7E > v_fma_f32 v30, v29, v31, -v9 ; D296001E 84263F1D > v_mul_f32_e32 v43, v26, v30 ; 10563D1A > v_mac_f32_e32 v43, v26, v30 ; 3E563D1A > v_mul_f32_e32 v30, v28, v31 ; 103C3F1C > v_fma_f32 v44, v11, v29, v30 ; D296002C 047A3B0B > v_mul_f32_e32 v45, v26, v44 ; 105A591A > v_mac_f32_e32 v45, v26, v44 ; 3E5A591A > s_waitcnt vmcnt(12) ; BF8C0F7C > v_mul_f32_e32 v44, v33, v34 ; 10584521 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_fma_f32 v46, v32, v35, v44 ; D296002E 04B24720 > v_mul_f32_e32 v46, v25, v46 ; 105C5D19 > v_mad_f32 v49, v31, v31, v48 ; D2820031 04C23F1F > v_mac_f32_e32 v45, 2.0, v46 ; 3E5A5CF4 > v_mul_f32_e32 v46, v32, v32 ; 105C4120 > v_mad_f32 v47, v34, v34, v46 ; D282002F 04BA4522 > v_fma_f32 v49, -v49, 2.0, 1.0 ; D2960031 23C9E931 > v_fma_f32 v47, -v47, 2.0, 1.0 ; D296002F 23C9E92F > v_mul_f32_e32 v49, v49, v26 ; 10623531 > v_mac_f32_e32 v49, v47, v25 ; 3E62332F > v_mul_f32_e32 v47, v33, v32 ; 105E4121 > v_fma_f32 v50, v35, v34, -v47 ; D2960032 84BE4523 > v_mul_f32_e32 v50, v25, v50 ; 10646519 > v_mac_f32_e32 v43, 2.0, v50 ; 3E5664F4 > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mul_f32_e32 v50, v37, v38 ; 10644D25 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_fma_f32 v51, v36, v39, v50 ; D2960033 04CA4F24 > v_mul_f32_e32 v51, v27, v51 ; 1066671B > s_waitcnt vmcnt(5) ; BF8C0F75 > v_mul_f32_e32 v8, v8, v26 ; 10103508 > v_mac_f32_e32 v45, 2.0, v51 ; 3E5A66F4 > v_mul_f32_e32 v51, v36, v36 ; 10664924 > v_mad_f32 v52, v38, v38, v51 ; D2820034 04CE4D26 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mac_f32_e32 v8, v1, v25 ; 3E103301 > v_mul_f32_e32 v1, v33, v35 ; 10024721 > v_fma_f32 v52, -v52, 2.0, 1.0 ; D2960034 23C9E934 > v_fma_f32 v33, v32, v35, -v44 ; D2960021 84B24720 > v_fma_f32 v44, v32, v34, -v1 ; D296002C 84064520 > v_mac_f32_e32 v1, v32, v34 ; 3E024520 > v_mul_f32_e32 v32, v37, v39 ; 10404F25 > v_mul_f32_e32 v28, v28, v29 ; 10383B1C > v_fma_f32 v30, v11, v29, -v30 ; D296001E 847A3B0B > v_mac_f32_e32 v49, v52, v27 ; 3E623734 > v_mul_f32_e32 v52, v37, v36 ; 10684925 > v_fma_f32 v37, v36, v39, -v50 ; D2960025 84CA4F24 > v_fma_f32 v50, v36, v38, -v32 ; D2960032 84824D24 > v_mac_f32_e32 v32, v36, v38 ; 3E404D24 > v_fma_f32 v36, v11, v31, -v28 ; D2960024 84723F0B > v_mac_f32_e32 v28, v11, v31 ; 3E383F0B > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mac_f32_e32 v8, v0, v27 ; 3E103700 > v_mul_f32_e32 v0, v26, v30 ; 10003D1A > v_mul_f32_e32 v11, v26, v28 ; 1016391A > v_mac_f32_e32 v0, v26, v30 ; 3E003D1A > v_mul_f32_e32 v30, v29, v29 ; 103C3B1D > v_mac_f32_e32 v30, v31, v31 ; 3E3C3F1F > v_mac_f32_e32 v11, v26, v28 ; 3E16391A > v_mul_f32_e32 v28, v35, v35 ; 10384723 > v_mul_f32_e32 v1, v25, v1 ; 10020319 > v_mac_f32_e32 v28, v34, v34 ; 3E384522 > v_fma_f32 v30, -v30, 2.0, 1.0 ; D296001E 23C9E91E > v_mac_f32_e32 v11, 2.0, v1 ; 3E1602F4 > v_mul_f32_e32 v1, v39, v39 ; 10024F27 > v_mac_f32_e32 v1, v38, v38 ; 3E024D26 > v_fma_f32 v28, -v28, 2.0, 1.0 ; D296001C 23C9E91C > v_mul_f32_e32 v30, v30, v26 ; 103C351E > v_mac_f32_e32 v30, v28, v25 ; 3E3C331C > v_mul_f32_e32 v28, v25, v33 ; 10384319 > v_fma_f32 v1, -v1, 2.0, 1.0 ; D2960001 23C9E901 > v_mac_f32_e32 v0, 2.0, v28 ; 3E0038F4 > v_mac_f32_e32 v30, v1, v27 ; 3E3C3701 > v_mul_f32_e32 v1, v27, v37 ; 10024B1B > v_mac_f32_e32 v0, 2.0, v1 ; 3E0002F4 > v_mul_f32_e32 v1, v27, v32 ; 1002411B > v_mac_f32_e32 v11, 2.0, v1 ; 3E1602F4 > v_mul_f32_e32 v1, v13, v0 ; 1002010D > v_mac_f32_e32 v1, v12, v30 ; 3E023D0C > v_mac_f32_e32 v1, v14, v11 ; 3E02170E > v_add_f32_e32 v1, v8, v1 ; 06020308 > v_fma_f32 v8, v35, v34, v47 ; D2960008 04BE4523 > v_fma_f32 v9, v29, v31, v9 ; D2960009 04263F1D > v_mul_f32_e32 v31, v25, v8 ; 103E1119 > v_mac_f32_e32 v46, v35, v35 ; 3E5C4723 > v_mac_f32_e32 v48, v29, v29 ; 3E603B1D > v_mul_f32_e32 v29, v25, v44 ; 103A5919 > v_mac_f32_e32 v31, v25, v8 ; 3E3E1119 > v_fma_f32 v8, -v46, 2.0, 1.0 ; D2960008 23C9E92E > v_fma_f32 v28, v39, v38, v52 ; D296001C 04D24D27 > v_mul_f32_e32 v9, v26, v9 ; 1012131A > v_mac_f32_e32 v51, v39, v39 ; 3E664F27 > v_mac_f32_e32 v29, v25, v44 ; 3E3A5919 > v_mul_f32_e32 v8, v8, v25 ; 10103308 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v25, v40, v25 ; 10323328 > v_fma_f32 v32, -v48, 2.0, 1.0 ; D2960020 23C9E930 > v_fma_f32 v53, v39, v38, -v52 ; D2960035 84D24D27 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v25, v41, v26 ; 3E323529 > v_mac_f32_e32 v8, v32, v26 ; 3E103520 > v_fma_f32 v32, -v51, 2.0, 1.0 ; D2960020 23C9E933 > v_mul_f32_e32 v26, v26, v36 ; 1034491A > v_mul_f32_e32 v28, v27, v28 ; 1038391B > v_mac_f32_e32 v31, 2.0, v9 ; 3E3E12F4 > v_mul_f32_e32 v53, v27, v53 ; 106A6B1B > v_mac_f32_e32 v31, 2.0, v28 ; 3E3E38F4 > v_mac_f32_e32 v8, v32, v27 ; 3E103720 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v25, v42, v27 ; 3E32372A > v_mul_f32_e32 v27, v27, v50 ; 1036651B > v_mac_f32_e32 v29, 2.0, v26 ; 3E3A34F4 > v_mac_f32_e32 v29, 2.0, v27 ; 3E3A36F4 > v_mul_f32_e32 v9, v13, v31 ; 10123F0D > v_mac_f32_e32 v9, v12, v29 ; 3E123B0C > v_mac_f32_e32 v9, v14, v8 ; 3E12110E > v_mac_f32_e32 v43, 2.0, v53 ; 3E566AF4 > v_mul_f32_e32 v53, v13, v49 ; 106A630D > v_mac_f32_e32 v53, v12, v45 ; 3E6A5B0C > v_add_f32_e32 v9, v25, v9 ; 06121319 > v_mov_b32_e32 v25, 1.0 ; 7E3202F2 > v_mac_f32_e32 v53, v14, v43 ; 3E6A570E > exp 15, 32, 0, 0, 0, v3, v4, v14, v25 ; F800020F 190E0403 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mul_f32_e32 v3, v18, v45 ; 10065B12 > v_mul_f32_e32 v4, v18, v29 ; 10083B12 > v_mul_f32_e32 v14, v18, v30 ; 101C3D12 > v_mac_f32_e32 v3, v19, v49 ; 3E066313 > v_mac_f32_e32 v4, v19, v31 ; 3E083F13 > v_mac_f32_e32 v14, v19, v0 ; 3E1C0113 > v_mul_f32_e32 v18, v5, v45 ; 10245B05 > v_mul_f32_e32 v19, v5, v29 ; 10263B05 > v_mul_f32_e32 v5, v5, v30 ; 100A3D05 > v_mac_f32_e32 v18, v6, v49 ; 3E246306 > v_mac_f32_e32 v19, v6, v31 ; 3E263F06 > v_mac_f32_e32 v5, v6, v0 ; 3E0A0106 > v_mac_f32_e32 v18, v7, v43 ; 3E245707 > v_mac_f32_e32 v19, v7, v8 ; 3E261107 > v_mac_f32_e32 v5, v7, v11 ; 3E0A1707 > v_mul_f32_e32 v7, v15, v29 ; 100E3B0F > v_mac_f32_e32 v7, v16, v31 ; 3E0E3F10 > v_mac_f32_e32 v14, v20, v11 ; 3E1C1714 > v_add_f32_e32 v53, v10, v53 ; 066A6B0A > v_mul_f32_e32 v6, v15, v45 ; 100C5B0F > v_mac_f32_e32 v3, v20, v43 ; 3E065714 > v_mac_f32_e32 v4, v20, v8 ; 3E081114 > v_mac_f32_e32 v7, v17, v8 ; 3E0E1111 > v_mul_f32_e32 v8, v15, v30 ; 10103D0F > v_mul_f32_e32 v15, v14, v14 ; 101E1D0E > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v13, s9, v53 ; 101A6A09 > v_mac_f32_e32 v15, v3, v3 ; 3E1E0703 > v_mac_f32_e32 v13, s8, v1 ; 3E1A0208 > v_mac_f32_e32 v15, v4, v4 ; 3E1E0904 > v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F > v_mac_f32_e32 v13, s10, v9 ; 3E1A120A > v_mac_f32_e32 v8, v16, v0 ; 3E100110 > v_add_f32_e32 v0, s11, v13 ; 06001A0B > v_mul_f32_e32 v13, s17, v53 ; 101A6A11 > v_mul_f32_e32 v54, s5, v53 ; 106C6A05 > v_mac_f32_e32 v13, s16, v1 ; 3E1A0210 > v_mac_f32_e32 v8, v17, v11 ; 3E101711 > v_mul_f32_e32 v11, s13, v53 ; 10166A0D > v_mac_f32_e32 v54, s4, v1 ; 3E6C0204 > v_mac_f32_e32 v6, v16, v49 ; 3E0C6310 > v_mac_f32_e32 v11, s12, v1 ; 3E16020C > v_mac_f32_e32 v13, s18, v9 ; 3E1A1212 > v_mac_f32_e32 v11, s14, v9 ; 3E16120E > v_mac_f32_e32 v54, s6, v9 ; 3E6C1206 > v_add_f32_e32 v1, s19, v13 ; 06021A13 > v_mul_f32_e32 v13, v5, v5 ; 101A0B05 > v_mul_f32_e32 v9, v14, v15 ; 10121F0E > v_mac_f32_e32 v6, v17, v43 ; 3E0C5711 > v_mul_f32_e32 v14, v8, v8 ; 101C1108 > v_mac_f32_e32 v13, v18, v18 ; 3E1A2512 > v_mac_f32_e32 v14, v6, v6 ; 3E1C0D06 > v_mac_f32_e32 v13, v19, v19 ; 3E1A2713 > v_mac_f32_e32 v14, v7, v7 ; 3E1C0F07 > v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E > v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D > v_mul_f32_e32 v3, v3, v15 ; 10061F03 > v_mul_f32_e32 v4, v4, v15 ; 10081F04 > v_mul_f32_e32 v8, v8, v14 ; 10101D08 > v_mul_f32_e32 v5, v5, v13 ; 100A1B05 > v_mul_f32_e32 v15, s20, v9 ; 101E1214 > v_mul_f32_e32 v18, v18, v13 ; 10241B12 > v_mul_f32_e32 v16, s20, v5 ; 10200A14 > v_mul_f32_e32 v17, s20, v8 ; 10221014 > v_mul_f32_e32 v6, v6, v14 ; 100C1D06 > v_mul_f32_e32 v7, v7, v14 ; 100E1D07 > v_mul_f32_e32 v14, s23, v9 ; 101C1217 > v_mul_f32_e32 v9, s26, v9 ; 1012121A > v_mac_f32_e32 v15, s21, v3 ; 3E1E0615 > v_mac_f32_e32 v17, s21, v6 ; 3E220C15 > v_mac_f32_e32 v14, s24, v3 ; 3E1C0618 > v_mac_f32_e32 v9, s27, v3 ; 3E12061B > v_mul_f32_e32 v3, s23, v5 ; 10060A17 > v_mul_f32_e32 v13, v19, v13 ; 101A1B13 > v_mul_f32_e32 v19, s23, v8 ; 10261017 > v_mac_f32_e32 v16, s21, v18 ; 3E202415 > v_mac_f32_e32 v3, s24, v18 ; 3E062418 > v_mac_f32_e32 v19, s24, v6 ; 3E260C18 > v_mul_f32_e32 v5, s26, v5 ; 100A0A1A > v_mul_f32_e32 v8, s26, v8 ; 1010101A > v_mac_f32_e32 v15, s22, v4 ; 3E1E0816 > v_mac_f32_e32 v16, s22, v13 ; 3E201A16 > v_mac_f32_e32 v17, s22, v7 ; 3E220E16 > v_mac_f32_e32 v8, s27, v6 ; 3E100C1B > v_mac_f32_e32 v5, s27, v18 ; 3E0A241B > v_mac_f32_e32 v14, s25, v4 ; 3E1C0819 > v_mac_f32_e32 v3, s25, v13 ; 3E061A19 > v_mac_f32_e32 v19, s25, v7 ; 3E260E19 > exp 15, 33, 0, 0, 0, v15, v16, v17, v0 ; F800021F 0011100F > v_mul_f32_e32 v6, v22, v24 ; 100C3116 > v_mac_f32_e32 v9, s28, v4 ; 3E12081C > v_mul_f32_e32 v4, v21, v24 ; 10083115 > v_mac_f32_e32 v5, s28, v13 ; 3E0A1A1C > v_mac_f32_e32 v8, s28, v7 ; 3E100E1C > v_mul_f32_e32 v7, v23, v24 ; 100E3117 > exp 15, 34, 0, 0, 0, v14, v3, v19, v10 ; F800022F 0A13030E > v_mul_f32_e32 v4, s29, v4 ; 1008081D > v_mul_f32_e32 v6, s30, v6 ; 100C0C1E > v_mul_f32_e32 v7, s31, v7 ; 100E0E1F > v_mul_f32_e32 v13, s0, v24 ; 101A3000 > exp 15, 35, 0, 0, 0, v9, v5, v8, v0 ; F800023F 00080509 > v_add_f32_e32 v12, s7, v54 ; 06186C07 > v_add_f32_e32 v11, s15, v11 ; 0616160F > exp 15, 36, 0, 0, 0, v4, v6, v7, v13 ; F800024F 0D070604 > exp 15, 12, 0, 1, 0, v12, v0, v11, v1 ; F80008CF 010B000C > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 56 >Code Size: 1652 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 4 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_mov_b32_e32 v18, v13 ; 7E24030D > v_interp_p1_f32 v13, v2, 0, 0, [m0] ; C8340002 > v_interp_p2_f32 v13, [v13], v3, 0, 0, [m0] ; C8350003 > v_interp_p1_f32 v14, v2, 1, 0, [m0] ; C8380102 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_interp_p2_f32 v14, [v14], v3, 1, 0, [m0] ; C8390103 > v_interp_p1_f32 v4, v2, 0, 1, [m0] ; C8100402 > v_interp_p2_f32 v4, [v4], v3, 0, 1, [m0] ; C8110403 > v_interp_p1_f32 v5, v2, 1, 1, [m0] ; C8140502 > v_interp_p2_f32 v5, [v5], v3, 1, 1, [m0] ; C8150503 > v_interp_p1_f32 v6, v2, 2, 1, [m0] ; C8180602 > v_interp_p2_f32 v6, [v6], v3, 2, 1, [m0] ; C8190603 > v_interp_p1_f32 v7, v2, 0, 2, [m0] ; C81C0802 > v_interp_p2_f32 v7, [v7], v3, 0, 2, [m0] ; C81D0803 > v_interp_p1_f32 v15, v2, 1, 2, [m0] ; C83C0902 > v_interp_p2_f32 v15, [v15], v3, 1, 2, [m0] ; C83D0903 > v_interp_p1_f32 v16, v2, 2, 2, [m0] ; C8400A02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s7, s[0:3], 0x59 ; C2038159 > v_interp_p2_f32 v16, [v16], v3, 2, 2, [m0] ; C8410A03 > v_interp_p1_f32 v17, v2, 0, 3, [m0] ; C8440C02 > v_interp_p2_f32 v17, [v17], v3, 0, 3, [m0] ; C8450C03 > v_interp_p1_f32 v19, v2, 1, 3, [m0] ; C84C0D02 > s_load_dwordx8 s[20:27], s[4:5], 0x0 ; C0CA0500 > s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C > v_interp_p2_f32 v19, [v19], v3, 1, 3, [m0] ; C84D0D03 > v_interp_p1_f32 v20, v2, 2, 3, [m0] ; C8500E02 > s_buffer_load_dword s6, s[0:3], 0x58 ; C2030158 > v_interp_p2_f32 v20, [v20], v3, 2, 3, [m0] ; C8510E03 > v_interp_p1_f32 v0, v2, 3, 4, [m0] ; C8001302 > v_interp_p2_f32 v0, [v0], v3, 3, 4, [m0] ; C8011303 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v3, s7 ; 7E065407 > v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 > v_add_f32_e64 v2, 1.0, s7 ; D2060002 00000EF2 > s_and_b32 s28, s28, s27 ; 871C1B1C > image_sample v[8:11], v[13:14], s[20:27], s[28:31] dmask:0xf ; F0800F00 00E5080D > v_fma_f32 v1, s6, v2, v1 ; D2960001 04060406 > s_buffer_load_dword s8, s[0:3], 0x5c ; C204015C > s_buffer_load_dword s9, s[0:3], 0x5d ; C204815D > s_buffer_load_dword s11, s[0:3], 0x5e ; C205815E > s_buffer_load_dword s20, s[0:3], 0x5f ; C20A015F > s_buffer_load_dword s21, s[0:3], 0x60 ; C20A8160 > s_buffer_load_dword s22, s[0:3], 0x61 ; C20B0161 > s_buffer_load_dword s23, s[0:3], 0x62 ; C20B8162 > s_buffer_load_dword s2, s[0:3], 0x63 ; C2010163 > v_cmp_neq_f32_e64 s[0:1], 0, s7 ; D01A0000 00000E80 > v_bfrev_b32_e32 v2, 14 ; 7E04708E > v_cndmask_b32_e64 v2, v2, v3, s[0:1] ; D2000002 00020702 > v_mad_f32 v1, v1, v2, -v2 ; D2820001 840A0501 > v_sub_f32_e32 v0, 0x3f7eb852, v0 ; 080000FF 3F7EB852 > v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 > v_mov_b32_e32 v2, 0x40400000 ; 7E0402FF 40400000 > v_ceil_f32_e32 v0, v0 ; 7E004500 > v_fma_f32 v2, -2.0, v1, v2 ; D2960002 040A02F5 > v_mul_f32_e32 v1, v1, v1 ; 10020301 > v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 > v_mul_f32_e32 v1, v2, v1 ; 10020302 > v_fma_f32 v2, -v0, v1, 1.0 ; D2960002 23CA0300 > v_log_f32_e32 v2, v2 ; 7E044F02 > v_mul_f32_e32 v0, v0, v1 ; 10000300 > s_load_dwordx8 s[12:19], s[4:5], 0x10 ; C0C60510 > s_load_dwordx4 s[28:31], s[4:5], 0x1c ; C08E051C > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v1, s21, v2 ; 10020415 > s_load_dwordx4 s[24:27], s[4:5], 0x2c ; C08C052C > s_load_dwordx8 s[32:39], s[4:5], 0x20 ; C0D00520 > v_exp_f32_e32 v1, v1 ; 7E024B01 > v_mul_f32_e32 v2, s20, v1 ; 10040214 > v_mov_b32_e32 v1, 0xbec0c0c1 ; 7E0202FF BEC0C0C1 > v_cmp_ne_i32_e32 vcc, 0, v12 ; 7D0A1880 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v11, v0, v1 ; D2960000 0406010B > v_cndmask_b32_e64 v3, 0, -1, vcc ; D2000003 01A98280 > v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 > v_cndmask_b32_e64 v11, 0, -1.0, vcc ; D200000B 01A9E680 > v_mul_f32_e32 v0, s8, v2 ; 10000408 > v_mul_f32_e32 v1, s9, v2 ; 10020409 > v_mul_f32_e32 v2, s11, v2 ; 1004040B > s_and_b32 s28, s28, s19 ; 871C131C > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s24, s24, s39 ; 87182718 > v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 > image_sample v[11:12], v[13:14], s[12:19], s[28:31] dmask:0xa ; F0800A00 00E30B0D > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v11, v11, 2.0, -1.0 ; D296000B 03CDE90B > v_fma_f32 v12, v12, 2.0, -1.0 ; D296000C 03CDE90C > v_fma_f32 v21, -v11, v11, 1.0 ; D2960015 23CA170B > v_fma_f32 v21, -v12, v12, v21 ; D2960015 2456190C > v_mul_f32_e32 v4, v11, v4 ; 1008090B > v_mac_f32_e32 v4, v12, v5 ; 3E080B0C > v_mul_f32_e32 v5, v11, v7 ; 100A0F0B > v_sqrt_f32_e32 v21, v21 ; 7E2A6715 > v_mac_f32_e32 v4, v21, v6 ; 3E080D15 > v_mac_f32_e32 v5, v12, v15 ; 3E0A1F0C > v_mul_f32_e32 v6, v11, v17 ; 100C230B > v_mac_f32_e32 v6, v12, v19 ; 3E0C270C > v_mac_f32_e32 v5, v21, v16 ; 3E0A2115 > v_mul_f32_e32 v7, v4, v4 ; 100E0904 > v_mac_f32_e32 v6, v21, v20 ; 3E0C2915 > v_mac_f32_e32 v7, v5, v5 ; 3E0E0B05 > v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06 > v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 > v_cmp_ne_i32_e32 vcc, 0, v3 ; 7D0A0680 > image_sample v[14:17], v[13:14], s[32:39], s[24:27] dmask:0xf ; F0800F00 00C80E0D > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v13, v15 ; 7E1A030F > v_mul_f32_e32 v4, v4, v7 ; 10080F04 > v_mul_f32_e32 v5, v5, v7 ; 100A0F05 > v_mul_f32_e32 v6, v6, v7 ; 100C0F06 > v_bfrev_b32_e32 v7, 1 ; 7E0E7081 > v_xor_b32_e32 v12, v5, v7 ; 3A180F05 > v_xor_b32_e32 v11, v4, v7 ; 3A160F04 > v_xor_b32_e32 v7, v6, v7 ; 3A0E0F06 > v_cndmask_b32_e32 v3, v11, v4 ; 0006090B > v_cndmask_b32_e32 v5, v12, v5 ; 000A0B0C > v_cndmask_b32_e32 v6, v7, v6 ; 000C0D07 > v_fma_f32 v4, v3, 0.5, 0.5 ; D2960004 03C1E103 > v_mul_f32_e32 v12, s23, v16 ; 10182017 > v_fma_f32 v5, v5, 0.5, 0.5 ; D2960005 03C1E105 > v_fma_f32 v6, v6, 0.5, 0.5 ; D2960006 03C1E106 > v_mov_b32_e32 v3, 0 ; 7E060280 > v_mov_b32_e32 v7, s2 ; 7E0E0202 > v_mov_b32_e32 v11, v17 ; 7E160311 > v_mov_b32_e32 v15, s22 ; 7E1E0216 > v_mov_b32_e32 v16, v18 ; 7E200312 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 > v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 > exp 15, 1, 1, 0, 0, v0, v1, v0, v0 ; F800041F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 > v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A > exp 15, 2, 1, 0, 0, v0, v1, v0, v0 ; F800042F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C > v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E > exp 15, 3, 1, 1, 1, v0, v1, v0, v0 ; F8001C3F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x1002 >*** SHADER STATS *** >SGPRS: 48 >VGPRS: 24 >Code Size: 692 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 > v_mov_b32_e32 v8, v4 ; 7E100304 > v_mov_b32_e32 v9, v4 ; 7E120304 > v_mov_b32_e32 v10, v4 ; 7E140304 > v_mov_b32_e32 v11, v4 ; 7E160304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[20:23], s[10:11], 0xc ; C08A0B0C > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[12:15], v4, s[4:7], 0 idxen ; E00C2000 80010C04 > s_load_dwordx4 s[4:7], s[10:11], 0x10 ; C0820B10 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[15:18], v5, s[12:15], 0 idxen ; E00C2000 80030F05 > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > s_load_dwordx4 s[12:15], s[10:11], 0x14 ; C0860B14 > s_load_dwordx4 s[16:19], s[10:11], 0x18 ; C0880B18 > s_load_dwordx4 s[8:11], s[10:11], 0x1c ; C0840B1C > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[18:21], v7, s[20:23], 0 idxen ; E00C2000 80051207 > s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 > buffer_load_format_xyzw v[5:8], v8, s[4:7], 0 idxen ; E00C2000 80010508 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[21:24], v9, s[12:15], 0 idxen ; E00C2000 80031509 > buffer_load_format_xyzw v[25:28], v10, s[16:19], 0 idxen ; E00C2000 8004190A > s_waitcnt vmcnt(2) ; BF8C0F72 > buffer_load_format_xyzw v[8:11], v11, s[8:11], 0 idxen ; E00C2000 8002080B > s_load_dwordx4 s[4:7], s[2:3], 0x8 ; C0820308 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 > s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 > s_buffer_load_dword s10, s[0:3], 0xa ; C205010A > s_buffer_load_dword s11, s[0:3], 0xb ; C205810B > s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 > s_buffer_load_dword s13, s[0:3], 0xd ; C206810D > s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 > s_buffer_load_dword s12, s[0:3], 0xc ; C206010C > s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 > s_buffer_load_dword s14, s[0:3], 0xe ; C207010E > s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113 > s_buffer_load_dword s20, s[0:3], 0x20 ; C20A0120 > s_buffer_load_dword s23, s[0:3], 0x24 ; C20B8124 > s_buffer_load_dword s26, s[0:3], 0x28 ; C20D0128 > s_buffer_load_dword s21, s[0:3], 0x21 ; C20A8121 > s_buffer_load_dword s24, s[0:3], 0x25 ; C20C0125 > s_buffer_load_dword s27, s[0:3], 0x29 ; C20D8129 > s_buffer_load_dword s22, s[0:3], 0x22 ; C20B0122 > s_buffer_load_dword s25, s[0:3], 0x26 ; C20C8126 > s_buffer_load_dword s28, s[0:3], 0x2a ; C20E012A > s_buffer_load_dword s15, s[0:3], 0xf ; C207810F > s_buffer_load_dword s29, s[0:3], 0x2c ; C20E812C > s_buffer_load_dword s30, s[0:3], 0x2d ; C20F012D > s_buffer_load_dword s31, s[0:3], 0x2e ; C20F812E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v9, v0, v9 ; 10121300 > v_mul_f32_e32 v1, v0, v10 ; 10021500 > v_mul_f32_e32 v0, v0, v8 ; 10001100 > v_cvt_i32_f32_e32 v8, v9 ; 7E101109 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v8, 5, v8 ; 34101085 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_or_b32_e32 v10, 4, v8 ; 38141084 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v9, 4, v1 ; 38120284 > buffer_load_dword v10, v10, s[4:7], 0 offen ; E0301000 80010A0A > v_or_b32_e32 v11, 16, v8 ; 38161090 > v_or_b32_e32 v28, 28, v8 ; 3838109C > v_or_b32_e32 v29, 20, v8 ; 383A1094 > v_or_b32_e32 v30, 4, v0 ; 383C0084 > v_or_b32_e32 v31, 24, v8 ; 383E1098 > buffer_load_dword v11, v11, s[4:7], 0 offen ; E0301000 80010B0B > buffer_load_dword v9, v9, s[4:7], 0 offen ; E0301000 80010909 > buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C > buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D > buffer_load_dword v30, v30, s[4:7], 0 offen ; E0301000 80011E1E > buffer_load_dword v31, v31, s[4:7], 0 offen ; E0301000 80011F1F > v_or_b32_e32 v33, 28, v1 ; 3842029C > v_or_b32_e32 v34, 24, v1 ; 38440298 > v_or_b32_e32 v32, 16, v1 ; 38400290 > v_or_b32_e32 v35, 20, v1 ; 38460294 > buffer_load_dword v33, v33, s[4:7], 0 offen ; E0301000 80012121 > buffer_load_dword v34, v34, s[4:7], 0 offen ; E0301000 80012222 > buffer_load_dword v32, v32, s[4:7], 0 offen ; E0301000 80012020 > buffer_load_dword v35, v35, s[4:7], 0 offen ; E0301000 80012323 > v_or_b32_e32 v37, 28, v0 ; 384A009C > v_or_b32_e32 v38, 24, v0 ; 384C0098 > v_or_b32_e32 v36, 16, v0 ; 38480090 > v_or_b32_e32 v39, 20, v0 ; 384E0094 > buffer_load_dword v37, v37, s[4:7], 0 offen ; E0301000 80012525 > buffer_load_dword v38, v38, s[4:7], 0 offen ; E0301000 80012626 > buffer_load_dword v36, v36, s[4:7], 0 offen ; E0301000 80012424 > buffer_load_dword v39, v39, s[4:7], 0 offen ; E0301000 80012727 > v_or_b32_e32 v41, 8, v8 ; 38521088 > buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 > v_or_b32_e32 v40, 8, v1 ; 38500288 > buffer_load_dword v1, v1, s[4:7], 0 offen ; E0301000 80010101 > v_or_b32_e32 v42, 8, v0 ; 38540088 > buffer_load_dword v0, v0, s[4:7], 0 offen ; E0301000 80010000 > buffer_load_dword v40, v40, s[4:7], 0 offen ; E0301000 80012828 > buffer_load_dword v41, v41, s[4:7], 0 offen ; E0301000 80012929 > buffer_load_dword v42, v42, s[4:7], 0 offen ; E0301000 80012A2A > s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 > s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 > s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 > s_buffer_load_dword s7, s[0:3], 0x7 ; C2038107 > s_buffer_load_dword s0, s[0:3], 0x2f ; C200012F > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v10, v10, v26 ; 1014350A > v_mul_f32_e32 v48, v11, v11 ; 1060170B > v_mac_f32_e32 v10, v9, v25 ; 3E143309 > v_mul_f32_e32 v9, v28, v11 ; 1012171C > v_mac_f32_e32 v10, v30, v27 ; 3E14371E > s_waitcnt vmcnt(14) ; BF8C0F7E > v_fma_f32 v30, v29, v31, -v9 ; D296001E 84263F1D > v_mul_f32_e32 v43, v26, v30 ; 10563D1A > v_mac_f32_e32 v43, v26, v30 ; 3E563D1A > v_mul_f32_e32 v30, v28, v31 ; 103C3F1C > v_fma_f32 v44, v11, v29, v30 ; D296002C 047A3B0B > v_mul_f32_e32 v45, v26, v44 ; 105A591A > v_mac_f32_e32 v45, v26, v44 ; 3E5A591A > s_waitcnt vmcnt(12) ; BF8C0F7C > v_mul_f32_e32 v44, v33, v34 ; 10584521 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_fma_f32 v46, v32, v35, v44 ; D296002E 04B24720 > v_mul_f32_e32 v46, v25, v46 ; 105C5D19 > v_mad_f32 v49, v31, v31, v48 ; D2820031 04C23F1F > v_mac_f32_e32 v45, 2.0, v46 ; 3E5A5CF4 > v_mul_f32_e32 v46, v32, v32 ; 105C4120 > v_mad_f32 v47, v34, v34, v46 ; D282002F 04BA4522 > v_fma_f32 v49, -v49, 2.0, 1.0 ; D2960031 23C9E931 > v_fma_f32 v47, -v47, 2.0, 1.0 ; D296002F 23C9E92F > v_mul_f32_e32 v49, v49, v26 ; 10623531 > v_mac_f32_e32 v49, v47, v25 ; 3E62332F > v_mul_f32_e32 v47, v33, v32 ; 105E4121 > v_fma_f32 v50, v35, v34, -v47 ; D2960032 84BE4523 > v_mul_f32_e32 v50, v25, v50 ; 10646519 > v_mac_f32_e32 v43, 2.0, v50 ; 3E5664F4 > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mul_f32_e32 v50, v37, v38 ; 10644D25 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_fma_f32 v51, v36, v39, v50 ; D2960033 04CA4F24 > v_mul_f32_e32 v51, v27, v51 ; 1066671B > s_waitcnt vmcnt(5) ; BF8C0F75 > v_mul_f32_e32 v8, v8, v26 ; 10103508 > v_mac_f32_e32 v45, 2.0, v51 ; 3E5A66F4 > v_mul_f32_e32 v51, v36, v36 ; 10664924 > v_mad_f32 v52, v38, v38, v51 ; D2820034 04CE4D26 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mac_f32_e32 v8, v1, v25 ; 3E103301 > v_mul_f32_e32 v1, v33, v35 ; 10024721 > v_fma_f32 v52, -v52, 2.0, 1.0 ; D2960034 23C9E934 > v_fma_f32 v33, v32, v35, -v44 ; D2960021 84B24720 > v_fma_f32 v44, v32, v34, -v1 ; D296002C 84064520 > v_mac_f32_e32 v1, v32, v34 ; 3E024520 > v_mul_f32_e32 v32, v37, v39 ; 10404F25 > v_mul_f32_e32 v28, v28, v29 ; 10383B1C > v_fma_f32 v30, v11, v29, -v30 ; D296001E 847A3B0B > v_mac_f32_e32 v49, v52, v27 ; 3E623734 > v_mul_f32_e32 v52, v37, v36 ; 10684925 > v_fma_f32 v37, v36, v39, -v50 ; D2960025 84CA4F24 > v_fma_f32 v50, v36, v38, -v32 ; D2960032 84824D24 > v_mac_f32_e32 v32, v36, v38 ; 3E404D24 > v_fma_f32 v36, v11, v31, -v28 ; D2960024 84723F0B > v_mac_f32_e32 v28, v11, v31 ; 3E383F0B > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mac_f32_e32 v8, v0, v27 ; 3E103700 > v_mul_f32_e32 v0, v26, v30 ; 10003D1A > v_mul_f32_e32 v11, v26, v28 ; 1016391A > v_mac_f32_e32 v0, v26, v30 ; 3E003D1A > v_mul_f32_e32 v30, v29, v29 ; 103C3B1D > v_mac_f32_e32 v30, v31, v31 ; 3E3C3F1F > v_mac_f32_e32 v11, v26, v28 ; 3E16391A > v_mul_f32_e32 v28, v35, v35 ; 10384723 > v_mul_f32_e32 v1, v25, v1 ; 10020319 > v_mac_f32_e32 v28, v34, v34 ; 3E384522 > v_fma_f32 v30, -v30, 2.0, 1.0 ; D296001E 23C9E91E > v_mac_f32_e32 v11, 2.0, v1 ; 3E1602F4 > v_mul_f32_e32 v1, v39, v39 ; 10024F27 > v_mac_f32_e32 v1, v38, v38 ; 3E024D26 > v_fma_f32 v28, -v28, 2.0, 1.0 ; D296001C 23C9E91C > v_mul_f32_e32 v30, v30, v26 ; 103C351E > v_mac_f32_e32 v30, v28, v25 ; 3E3C331C > v_mul_f32_e32 v28, v25, v33 ; 10384319 > v_fma_f32 v1, -v1, 2.0, 1.0 ; D2960001 23C9E901 > v_mac_f32_e32 v0, 2.0, v28 ; 3E0038F4 > v_mac_f32_e32 v30, v1, v27 ; 3E3C3701 > v_mul_f32_e32 v1, v27, v37 ; 10024B1B > v_mac_f32_e32 v0, 2.0, v1 ; 3E0002F4 > v_mul_f32_e32 v1, v27, v32 ; 1002411B > v_mac_f32_e32 v11, 2.0, v1 ; 3E1602F4 > v_mul_f32_e32 v1, v13, v0 ; 1002010D > v_mac_f32_e32 v1, v12, v30 ; 3E023D0C > v_mac_f32_e32 v1, v14, v11 ; 3E02170E > v_add_f32_e32 v1, v8, v1 ; 06020308 > v_fma_f32 v8, v35, v34, v47 ; D2960008 04BE4523 > v_fma_f32 v9, v29, v31, v9 ; D2960009 04263F1D > v_mul_f32_e32 v31, v25, v8 ; 103E1119 > v_mac_f32_e32 v46, v35, v35 ; 3E5C4723 > v_mac_f32_e32 v48, v29, v29 ; 3E603B1D > v_mul_f32_e32 v29, v25, v44 ; 103A5919 > v_mac_f32_e32 v31, v25, v8 ; 3E3E1119 > v_fma_f32 v8, -v46, 2.0, 1.0 ; D2960008 23C9E92E > v_fma_f32 v28, v39, v38, v52 ; D296001C 04D24D27 > v_mul_f32_e32 v9, v26, v9 ; 1012131A > v_mac_f32_e32 v51, v39, v39 ; 3E664F27 > v_mac_f32_e32 v29, v25, v44 ; 3E3A5919 > v_mul_f32_e32 v8, v8, v25 ; 10103308 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v25, v40, v25 ; 10323328 > v_fma_f32 v32, -v48, 2.0, 1.0 ; D2960020 23C9E930 > v_fma_f32 v53, v39, v38, -v52 ; D2960035 84D24D27 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v25, v41, v26 ; 3E323529 > v_mac_f32_e32 v8, v32, v26 ; 3E103520 > v_fma_f32 v32, -v51, 2.0, 1.0 ; D2960020 23C9E933 > v_mul_f32_e32 v26, v26, v36 ; 1034491A > v_mul_f32_e32 v28, v27, v28 ; 1038391B > v_mac_f32_e32 v31, 2.0, v9 ; 3E3E12F4 > v_mul_f32_e32 v53, v27, v53 ; 106A6B1B > v_mac_f32_e32 v31, 2.0, v28 ; 3E3E38F4 > v_mac_f32_e32 v8, v32, v27 ; 3E103720 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v25, v42, v27 ; 3E32372A > v_mul_f32_e32 v27, v27, v50 ; 1036651B > v_mac_f32_e32 v29, 2.0, v26 ; 3E3A34F4 > v_mac_f32_e32 v29, 2.0, v27 ; 3E3A36F4 > v_mul_f32_e32 v9, v13, v31 ; 10123F0D > v_mac_f32_e32 v9, v12, v29 ; 3E123B0C > v_mac_f32_e32 v9, v14, v8 ; 3E12110E > v_mac_f32_e32 v43, 2.0, v53 ; 3E566AF4 > v_mul_f32_e32 v53, v13, v49 ; 106A630D > v_mac_f32_e32 v53, v12, v45 ; 3E6A5B0C > v_add_f32_e32 v9, v25, v9 ; 06121319 > v_mov_b32_e32 v25, 1.0 ; 7E3202F2 > v_mac_f32_e32 v53, v14, v43 ; 3E6A570E > exp 15, 32, 0, 0, 0, v3, v4, v14, v25 ; F800020F 190E0403 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mul_f32_e32 v3, v18, v45 ; 10065B12 > v_mul_f32_e32 v4, v18, v29 ; 10083B12 > v_mul_f32_e32 v14, v18, v30 ; 101C3D12 > v_mac_f32_e32 v3, v19, v49 ; 3E066313 > v_mac_f32_e32 v4, v19, v31 ; 3E083F13 > v_mac_f32_e32 v14, v19, v0 ; 3E1C0113 > v_mul_f32_e32 v18, v5, v45 ; 10245B05 > v_mul_f32_e32 v19, v5, v29 ; 10263B05 > v_mul_f32_e32 v5, v5, v30 ; 100A3D05 > v_mac_f32_e32 v18, v6, v49 ; 3E246306 > v_mac_f32_e32 v19, v6, v31 ; 3E263F06 > v_mac_f32_e32 v5, v6, v0 ; 3E0A0106 > v_mac_f32_e32 v18, v7, v43 ; 3E245707 > v_mac_f32_e32 v19, v7, v8 ; 3E261107 > v_mac_f32_e32 v5, v7, v11 ; 3E0A1707 > v_mul_f32_e32 v7, v15, v29 ; 100E3B0F > v_mac_f32_e32 v7, v16, v31 ; 3E0E3F10 > v_mac_f32_e32 v14, v20, v11 ; 3E1C1714 > v_add_f32_e32 v53, v10, v53 ; 066A6B0A > v_mul_f32_e32 v6, v15, v45 ; 100C5B0F > v_mac_f32_e32 v3, v20, v43 ; 3E065714 > v_mac_f32_e32 v4, v20, v8 ; 3E081114 > v_mac_f32_e32 v7, v17, v8 ; 3E0E1111 > v_mul_f32_e32 v8, v15, v30 ; 10103D0F > v_mul_f32_e32 v15, v14, v14 ; 101E1D0E > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v13, s9, v53 ; 101A6A09 > v_mac_f32_e32 v15, v3, v3 ; 3E1E0703 > v_mac_f32_e32 v13, s8, v1 ; 3E1A0208 > v_mac_f32_e32 v15, v4, v4 ; 3E1E0904 > v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F > v_mac_f32_e32 v13, s10, v9 ; 3E1A120A > v_mac_f32_e32 v8, v16, v0 ; 3E100110 > v_add_f32_e32 v0, s11, v13 ; 06001A0B > v_mul_f32_e32 v13, s17, v53 ; 101A6A11 > v_mul_f32_e32 v54, s5, v53 ; 106C6A05 > v_mac_f32_e32 v13, s16, v1 ; 3E1A0210 > v_mac_f32_e32 v8, v17, v11 ; 3E101711 > v_mul_f32_e32 v11, s13, v53 ; 10166A0D > v_mac_f32_e32 v54, s4, v1 ; 3E6C0204 > v_mac_f32_e32 v6, v16, v49 ; 3E0C6310 > v_mac_f32_e32 v11, s12, v1 ; 3E16020C > v_mac_f32_e32 v13, s18, v9 ; 3E1A1212 > v_mac_f32_e32 v11, s14, v9 ; 3E16120E > v_mac_f32_e32 v54, s6, v9 ; 3E6C1206 > v_add_f32_e32 v1, s19, v13 ; 06021A13 > v_mul_f32_e32 v13, v5, v5 ; 101A0B05 > v_mul_f32_e32 v9, v14, v15 ; 10121F0E > v_mac_f32_e32 v6, v17, v43 ; 3E0C5711 > v_mul_f32_e32 v14, v8, v8 ; 101C1108 > v_mac_f32_e32 v13, v18, v18 ; 3E1A2512 > v_mac_f32_e32 v14, v6, v6 ; 3E1C0D06 > v_mac_f32_e32 v13, v19, v19 ; 3E1A2713 > v_mac_f32_e32 v14, v7, v7 ; 3E1C0F07 > v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E > v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D > v_mul_f32_e32 v3, v3, v15 ; 10061F03 > v_mul_f32_e32 v4, v4, v15 ; 10081F04 > v_mul_f32_e32 v8, v8, v14 ; 10101D08 > v_mul_f32_e32 v5, v5, v13 ; 100A1B05 > v_mul_f32_e32 v15, s20, v9 ; 101E1214 > v_mul_f32_e32 v18, v18, v13 ; 10241B12 > v_mul_f32_e32 v16, s20, v5 ; 10200A14 > v_mul_f32_e32 v17, s20, v8 ; 10221014 > v_mul_f32_e32 v6, v6, v14 ; 100C1D06 > v_mul_f32_e32 v7, v7, v14 ; 100E1D07 > v_mul_f32_e32 v14, s23, v9 ; 101C1217 > v_mul_f32_e32 v9, s26, v9 ; 1012121A > v_mac_f32_e32 v15, s21, v3 ; 3E1E0615 > v_mac_f32_e32 v17, s21, v6 ; 3E220C15 > v_mac_f32_e32 v14, s24, v3 ; 3E1C0618 > v_mac_f32_e32 v9, s27, v3 ; 3E12061B > v_mul_f32_e32 v3, s23, v5 ; 10060A17 > v_mul_f32_e32 v13, v19, v13 ; 101A1B13 > v_mul_f32_e32 v19, s23, v8 ; 10261017 > v_mac_f32_e32 v16, s21, v18 ; 3E202415 > v_mac_f32_e32 v3, s24, v18 ; 3E062418 > v_mac_f32_e32 v19, s24, v6 ; 3E260C18 > v_mul_f32_e32 v5, s26, v5 ; 100A0A1A > v_mul_f32_e32 v8, s26, v8 ; 1010101A > v_mac_f32_e32 v15, s22, v4 ; 3E1E0816 > v_mac_f32_e32 v16, s22, v13 ; 3E201A16 > v_mac_f32_e32 v17, s22, v7 ; 3E220E16 > v_mac_f32_e32 v8, s27, v6 ; 3E100C1B > v_mac_f32_e32 v5, s27, v18 ; 3E0A241B > v_mac_f32_e32 v14, s25, v4 ; 3E1C0819 > v_mac_f32_e32 v3, s25, v13 ; 3E061A19 > v_mac_f32_e32 v19, s25, v7 ; 3E260E19 > exp 15, 33, 0, 0, 0, v15, v16, v17, v0 ; F800021F 0011100F > v_mac_f32_e32 v9, s28, v4 ; 3E12081C > v_mac_f32_e32 v5, s28, v13 ; 3E0A1A1C > v_mac_f32_e32 v8, s28, v7 ; 3E100E1C > exp 15, 34, 0, 0, 0, v14, v3, v19, v10 ; F800022F 0A13030E > v_mul_f32_e32 v4, s29, v21 ; 10082A1D > v_mul_f32_e32 v6, s30, v22 ; 100C2C1E > v_mul_f32_e32 v7, s31, v23 ; 100E2E1F > v_mul_f32_e32 v13, s0, v24 ; 101A3000 > exp 15, 35, 0, 0, 0, v9, v5, v8, v1 ; F800023F 01080509 > v_add_f32_e32 v12, s7, v54 ; 06186C07 > v_add_f32_e32 v11, s15, v11 ; 0616160F > exp 15, 36, 0, 0, 0, v4, v6, v7, v13 ; F800024F 0D070604 > exp 15, 12, 0, 1, 0, v12, v0, v11, v1 ; F80008CF 010B000C > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 56 >Code Size: 1640 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 4 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[28:31], s[2:3], 0x4 ; C08E0304 > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v12, v2, 0, 0, [m0] ; C8300002 > v_mov_b32_e32 v17, v13 ; 7E22030D > v_interp_p2_f32 v12, [v12], v3, 0, 0, [m0] ; C8310003 > v_interp_p1_f32 v13, v2, 1, 0, [m0] ; C8340102 > v_interp_p2_f32 v13, [v13], v3, 1, 0, [m0] ; C8350103 > v_interp_p1_f32 v4, v2, 0, 1, [m0] ; C8100402 > v_interp_p2_f32 v4, [v4], v3, 0, 1, [m0] ; C8110403 > v_interp_p1_f32 v5, v2, 1, 1, [m0] ; C8140502 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s36, s[28:31], 0x5d ; C2121D5D > v_interp_p2_f32 v5, [v5], v3, 1, 1, [m0] ; C8150503 > v_interp_p1_f32 v6, v2, 2, 1, [m0] ; C8180602 > v_interp_p2_f32 v6, [v6], v3, 2, 1, [m0] ; C8190603 > v_interp_p1_f32 v7, v2, 0, 2, [m0] ; C81C0802 > v_interp_p2_f32 v7, [v7], v3, 0, 2, [m0] ; C81D0803 > v_interp_p1_f32 v14, v2, 1, 2, [m0] ; C8380902 > v_interp_p2_f32 v14, [v14], v3, 1, 2, [m0] ; C8390903 > v_interp_p1_f32 v15, v2, 2, 2, [m0] ; C83C0A02 > v_interp_p2_f32 v15, [v15], v3, 2, 2, [m0] ; C83D0A03 > v_interp_p1_f32 v16, v2, 0, 3, [m0] ; C8400C02 > s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C > s_load_dwordx4 s[20:23], s[4:5], 0x1c ; C08A051C > s_load_dwordx8 s[40:47], s[4:5], 0x10 ; C0D40510 > s_buffer_load_dword s6, s[28:31], 0x0 ; C2031D00 > s_buffer_load_dword s7, s[28:31], 0x1 ; C2039D01 > s_buffer_load_dword s8, s[28:31], 0x2 ; C2041D02 > s_buffer_load_dword s9, s[28:31], 0x3 ; C2049D03 > s_buffer_load_dword s11, s[28:31], 0x5c ; C2059D5C > s_buffer_load_dword s37, s[28:31], 0x5e ; C2129D5E > s_buffer_load_dword s38, s[28:31], 0x5f ; C2131D5F > s_buffer_load_dword s39, s[28:31], 0x60 ; C2139D60 > s_load_dwordx8 s[28:35], s[4:5], 0x0 ; C0CE0500 > v_interp_p2_f32 v16, [v16], v3, 0, 3, [m0] ; C8410C03 > v_interp_p1_f32 v18, v2, 1, 3, [m0] ; C8480D02 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v20, s36 ; 7E285424 > v_interp_p2_f32 v18, [v18], v3, 1, 3, [m0] ; C8490D03 > v_interp_p1_f32 v19, v2, 2, 3, [m0] ; C84C0E02 > v_interp_p2_f32 v19, [v19], v3, 2, 3, [m0] ; C84D0E03 > v_interp_p1_f32 v2, v2, 3, 4, [m0] ; C8081302 > v_interp_p2_f32 v2, [v2], v3, 3, 4, [m0] ; C8091303 > v_bfrev_b32_e32 v0, 14 ; 7E00708E > v_cmp_neq_f32_e64 vcc, 0, s36 ; D01A006A 00004880 > v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 > s_and_b32 s24, s24, s35 ; 87182318 > v_cndmask_b32_e32 v0, v0, v20 ; 00002900 > v_add_f32_e64 v20, 1.0, s36 ; D2060014 000048F2 > s_and_b32 s20, s20, s47 ; 87142F14 > image_sample v[8:11], v[12:13], s[28:35], s[24:27] dmask:0xf ; F0800F00 00C7080C > v_fma_f32 v3, s11, v20, v3 ; D2960003 040E280B > image_sample v[20:21], v[12:13], s[40:47], s[20:23] dmask:0xa ; F0800A00 00AA140C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v20, v20, 2.0, -1.0 ; D2960014 03CDE914 > v_mad_f32 v0, v3, v0, -v0 ; D2820000 84020103 > v_fma_f32 v21, v21, 2.0, -1.0 ; D2960015 03CDE915 > v_fma_f32 v22, -v20, v20, 1.0 ; D2960016 23CA2914 > v_sub_f32_e32 v2, 0x3f7eb852, v2 ; 080404FF 3F7EB852 > v_fma_f32 v22, -v21, v21, v22 ; D2960016 245A2B15 > v_mul_f32_e32 v4, v20, v4 ; 10080914 > v_mov_b32_e32 v1, 0x40400000 ; 7E0202FF 40400000 > v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 > v_ceil_f32_e32 v2, v2 ; 7E044502 > v_fma_f32 v1, -2.0, v0, v1 ; D2960001 040600F5 > v_mul_f32_e32 v0, v0, v0 ; 10000100 > v_mac_f32_e32 v4, v21, v5 ; 3E080B15 > v_mul_f32_e32 v5, v20, v7 ; 100A0F14 > v_sqrt_f32_e32 v22, v22 ; 7E2C6716 > v_mac_f32_e32 v4, v22, v6 ; 3E080D16 > v_mac_f32_e32 v5, v21, v14 ; 3E0A1D15 > v_mul_f32_e32 v6, v20, v16 ; 100C2114 > v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 > v_mul_f32_e32 v0, v1, v0 ; 10000101 > v_fma_f32 v1, -v2, v0, 1.0 ; D2960001 23CA0102 > v_mac_f32_e32 v6, v21, v18 ; 3E0C2515 > v_mac_f32_e32 v5, v22, v15 ; 3E0A1F16 > v_mul_f32_e32 v7, v4, v4 ; 100E0904 > s_load_dwordx4 s[0:3], s[4:5], 0x2c ; C080052C > s_load_dwordx8 s[12:19], s[4:5], 0x20 ; C0C60520 > v_log_f32_e32 v1, v1 ; 7E024F01 > v_mac_f32_e32 v6, v22, v19 ; 3E0C2716 > v_mac_f32_e32 v7, v5, v5 ; 3E0E0B05 > v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06 > v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 > v_mul_f32_e32 v3, v2, v0 ; 10060102 > v_mul_f32_e32 v0, s37, v1 ; 10000225 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_exp_f32_e32 v0, v0 ; 7E004B00 > v_mul_f32_e32 v11, s38, v11 ; 10161626 > image_sample v[14:16], v[12:13], s[12:19], s[0:3] dmask:0x7 ; F0800700 00030E0C > v_mul_f32_e32 v2, s9, v0 ; 10040009 > v_mul_f32_e32 v3, v11, v3 ; 1006070B > v_mul_f32_e32 v4, v4, v7 ; 10080F04 > v_mul_f32_e32 v5, v5, v7 ; 100A0F05 > v_mul_f32_e32 v6, v6, v7 ; 100C0F06 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v12, s39, v16 ; 10182027 > v_mov_b32_e32 v13, v15 ; 7E1A030F > v_mul_f32_e32 v0, s6, v2 ; 10000406 > v_mul_f32_e32 v1, s7, v2 ; 10020407 > v_mul_f32_e32 v2, s8, v2 ; 10040408 > v_fma_f32 v4, v4, 0.5, 0.5 ; D2960004 03C1E104 > v_fma_f32 v5, v5, 0.5, 0.5 ; D2960005 03C1E105 > v_fma_f32 v6, v6, 0.5, 0.5 ; D2960006 03C1E106 > v_mov_b32_e32 v7, v3 ; 7E0E0303 > v_mov_b32_e32 v11, v3 ; 7E160303 > v_mov_b32_e32 v15, v3 ; 7E1E0303 > v_mov_b32_e32 v16, v17 ; 7E200311 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 > v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 > exp 15, 1, 1, 0, 0, v0, v1, v0, v0 ; F800041F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 > v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A > exp 15, 2, 1, 0, 0, v0, v1, v0, v0 ; F800042F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C > v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E > exp 15, 3, 1, 1, 1, v0, v1, v0, v0 ; F8001C3F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 24 >Code Size: 604 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** >radeonsi: Compiling shader 383 >Fragment Shader Epilog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { >main_body: > call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 9, i32 0, i32 undef, i32 undef, i32 undef, i32 undef) > ret void >} > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) > >attributes #0 = { "InitialPSInputAddr"="16777215" } > > >Pixel Shader: >Shader main disassembly: >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 14 >VGPRS: 15 >Code Size: 12 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >radeonsi: Compiling shader 384 >Vertex Shader Prolog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { >main_body: > %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> undef, i32 %0, 0 > %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %19, i32 %1, 1 > %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %20, i32 %2, 2 > %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %21, i32 %3, 3 > %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %22, i32 %4, 4 > %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %23, i32 %5, 5 > %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %24, i32 %6, 6 > %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %25, i32 %7, 7 > %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %26, i32 %8, 8 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %27, i32 %9, 9 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %28, i32 %10, 10 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %29, i32 %11, 11 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %30, i32 %12, 12 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %31, i32 %13, 13 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %32, i32 %14, 14 > %34 = bitcast i32 %15 to float > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %33, float %34, 15 > %36 = bitcast i32 %16 to float > %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %35, float %36, 16 > %38 = bitcast i32 %17 to float > %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %37, float %38, 17 > %40 = bitcast i32 %18 to float > %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %39, float %40, 18 > %42 = add i32 %15, %12 > %43 = bitcast i32 %42 to float > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %41, float %43, 19 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %44 >} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 > s_buffer_load_dword s0, s[0:3], 0x15 ; C2000115 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v6, 0 ; 7E0C0280 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_fma_f32 v1, v3, s4, s4 ; D2960001 00100903 > v_fma_f32 v5, v4, -s0, s0 ; D2960005 40000104 > exp 15, 32, 0, 0, 0, v1, v5, v6, v0 ; F800020F 00060501 > exp 15, 12, 0, 1, 0, v3, v4, v6, v0 ; F80008CF 00060403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 88 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >radeonsi: Compiling shader 385 >Fragment Shader Epilog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { >main_body: > call void @llvm.SI.export(i32 1, i32 1, i32 1, i32 0, i32 0, float %6, float undef, float undef, float undef) > ret void >} > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { "InitialPSInputAddr"="16777215" } > > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[4:7], s[4:5], 0xc ; C082050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s0, s[0:3], 0x18 ; C2000118 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > s_and_b32 s4, s4, s19 ; 87041304 > image_sample v0, v[0:1], s[12:19], s[4:7] dmask:0x1 ; F0800100 00230000 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v2, s0 ; 7E040200 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_eq_f32_e32 vcc, 0, v0 ; 7C040080 > s_and_saveexec_b64 s[2:3], vcc ; BE82246A > s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E > v_cmp_lt_f32_e64 s[4:5], 0, s0 ; D0020004 00000080 > v_cndmask_b32_e64 v1, v2, 1.0, s[4:5] ; D2000001 0011E502 > v_cmp_le_f32_e32 vcc, 0, v1 ; 7C060280 > v_mul_f32_e32 v1, 0x70000000, v1 ; 100202FF 70000000 > v_bfrev_b32_e32 v2, 15 ; 7E04708F > v_cndmask_b32_e32 v1, v2, v1 ; 00020302 > s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 > s_xor_b64 exec, exec, s[2:3] ; 89FE027E > v_rcp_f32_e32 v1, v0 ; 7E025500 > v_mul_f32_e32 v1, s0, v1 ; 10020200 > s_or_b64 exec, exec, s[2:3] ; 88FE027E > v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 > v_mov_b32_e32 v0, 0x4b189680 ; 7E0002FF 4B189680 > v_cndmask_b32_e32 v0, v0, v1 ; 00000300 > v_mov_b32_e32 v1, v0 ; 7E020300 > v_mov_b32_e32 v2, v0 ; 7E040300 > v_mov_b32_e32 v3, v0 ; 7E060300 >Shader epilog disassembly: > exp 1, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001801 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 176 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 > v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 > v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 > v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 >Shader epilog disassembly: > exp 1, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001801 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 16 >VGPRS: 15 >Code Size: 32 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_bfrev_b32_e32 v1, 14 ; 7E02708E > v_mov_b32_e32 v0, 0 ; 7E000280 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v5, 0xbfc00000 ; 7E0A02FF BFC00000 > v_mov_b32_e32 v6, 0x3fc00000 ; 7E0C02FF 3FC00000 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v7, s4 ; 7E0E5404 > v_rcp_f32_e32 v8, s0 ; 7E105400 > v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 > v_cmp_eq_f32_e64 s[0:1], 0, s0 ; D0040000 00000080 > v_cndmask_b32_e32 v7, v7, v1 ; 000E0307 > v_cndmask_b32_e64 v1, v8, v1, s[0:1] ; D2000001 00020308 > v_fma_f32 v8, v3, 0.5, 0.5 ; D2960008 03C1E103 > v_fma_f32 v9, v4, -0.5, 0.5 ; D2960009 03C1E304 > v_fma_f32 v10, v7, v5, v8 ; D296000A 04220B07 > v_fma_f32 v5, v1, v5, v9 ; D2960005 04260B01 > v_fma_f32 v11, v7, -0.5, v8 ; D296000B 0421E307 > v_fma_f32 v12, v7, 0.5, v8 ; D296000C 0421E107 > v_fma_f32 v7, v7, v6, v8 ; D2960007 04220D07 > exp 15, 32, 0, 0, 0, v10, v5, v11, v5 ; F800020F 050B050A > v_fma_f32 v8, v1, -0.5, v9 ; D2960008 0425E301 > exp 15, 33, 0, 0, 0, v12, v5, v7, v5 ; F800021F 0507050C > exp 15, 34, 0, 0, 0, v10, v8, v11, v8 ; F800022F 080B080A > v_fma_f32 v13, v1, 0.5, v9 ; D296000D 0425E101 > exp 15, 35, 0, 0, 0, v12, v8, v7, v8 ; F800023F 0807080C > exp 15, 36, 0, 0, 0, v10, v13, v11, v13 ; F800024F 0D0B0D0A > v_fma_f32 v1, v1, v6, v9 ; D2960001 04260D01 > exp 15, 37, 0, 0, 0, v12, v13, v7, v13 ; F800025F 0D070D0C > exp 15, 38, 0, 0, 0, v10, v1, v11, v1 ; F800026F 010B010A > exp 15, 39, 0, 0, 0, v12, v1, v7, v1 ; F800027F 0107010C > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 1.0 ; 7E0202F2 > exp 15, 12, 0, 1, 0, v3, v4, v0, v1 ; F80008CF 01000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 16 >Code Size: 268 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >radeonsi: Compiling shader 386 >Fragment Shader Epilog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { >main_body: > call void @llvm.SI.export(i32 1, i32 0, i32 0, i32 0, i32 0, float %6, float undef, float undef, float undef) > call void @llvm.SI.export(i32 1, i32 1, i32 1, i32 1, i32 0, float %10, float undef, float undef, float undef) > ret void >} > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { "InitialPSInputAddr"="16777215" } > > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v7, v2, 0, 0, [m0] ; C81C0002 > v_interp_p2_f32 v7, [v7], v3, 0, 0, [m0] ; C81D0003 > v_interp_p1_f32 v8, v2, 1, 0, [m0] ; C8200102 > v_interp_p2_f32 v8, [v8], v3, 1, 0, [m0] ; C8210103 > v_interp_p1_f32 v9, v2, 2, 0, [m0] ; C8240202 > v_interp_p2_f32 v9, [v9], v3, 2, 0, [m0] ; C8250203 > v_interp_p1_f32 v10, v2, 3, 0, [m0] ; C8280302 > v_interp_p2_f32 v10, [v10], v3, 3, 0, [m0] ; C8290303 > v_interp_p1_f32 v11, v2, 0, 1, [m0] ; C82C0402 > v_interp_p2_f32 v11, [v11], v3, 0, 1, [m0] ; C82D0403 > v_interp_p1_f32 v12, v2, 1, 1, [m0] ; C8300502 > v_interp_p2_f32 v12, [v12], v3, 1, 1, [m0] ; C8310503 > v_interp_p1_f32 v14, v2, 2, 1, [m0] ; C8380602 > v_interp_p2_f32 v14, [v14], v3, 2, 1, [m0] ; C8390603 > v_interp_p1_f32 v15, v2, 3, 1, [m0] ; C83C0702 > v_interp_p2_f32 v15, [v15], v3, 3, 1, [m0] ; C83D0703 > v_interp_p1_f32 v16, v2, 0, 2, [m0] ; C8400802 > v_interp_p2_f32 v16, [v16], v3, 0, 2, [m0] ; C8410803 > v_interp_p1_f32 v17, v2, 1, 2, [m0] ; C8440902 > v_interp_p2_f32 v17, [v17], v3, 1, 2, [m0] ; C8450903 > v_interp_p1_f32 v18, v2, 2, 2, [m0] ; C8480A02 > v_interp_p2_f32 v18, [v18], v3, 2, 2, [m0] ; C8490A03 > v_interp_p1_f32 v19, v2, 3, 2, [m0] ; C84C0B02 > v_interp_p2_f32 v19, [v19], v3, 3, 2, [m0] ; C84D0B03 > v_interp_p1_f32 v20, v2, 0, 3, [m0] ; C8500C02 > v_interp_p2_f32 v20, [v20], v3, 0, 3, [m0] ; C8510C03 > v_interp_p1_f32 v21, v2, 1, 3, [m0] ; C8540D02 > v_interp_p2_f32 v21, [v21], v3, 1, 3, [m0] ; C8550D03 > v_interp_p1_f32 v22, v2, 2, 3, [m0] ; C8580E02 > v_interp_p2_f32 v22, [v22], v3, 2, 3, [m0] ; C8590E03 > v_interp_p1_f32 v23, v2, 3, 3, [m0] ; C85C0F02 > v_interp_p2_f32 v23, [v23], v3, 3, 3, [m0] ; C85D0F03 > v_interp_p1_f32 v24, v2, 0, 4, [m0] ; C8601002 > v_interp_p2_f32 v24, [v24], v3, 0, 4, [m0] ; C8611003 > v_interp_p1_f32 v25, v2, 1, 4, [m0] ; C8641102 > v_interp_p2_f32 v25, [v25], v3, 1, 4, [m0] ; C8651103 > v_interp_p1_f32 v26, v2, 2, 4, [m0] ; C8681202 > v_interp_p2_f32 v26, [v26], v3, 2, 4, [m0] ; C8691203 > v_interp_p1_f32 v27, v2, 3, 4, [m0] ; C86C1302 > v_interp_p2_f32 v27, [v27], v3, 3, 4, [m0] ; C86D1303 > v_interp_p1_f32 v28, v2, 0, 5, [m0] ; C8701402 > v_interp_p2_f32 v28, [v28], v3, 0, 5, [m0] ; C8711403 > v_interp_p1_f32 v29, v2, 1, 5, [m0] ; C8741502 > v_interp_p2_f32 v29, [v29], v3, 1, 5, [m0] ; C8751503 > v_interp_p1_f32 v30, v2, 2, 5, [m0] ; C8781602 > v_interp_p2_f32 v30, [v30], v3, 2, 5, [m0] ; C8791603 > v_interp_p1_f32 v31, v2, 3, 5, [m0] ; C87C1702 > v_interp_p2_f32 v31, [v31], v3, 3, 5, [m0] ; C87D1703 > v_interp_p1_f32 v32, v2, 0, 6, [m0] ; C8801802 > v_interp_p2_f32 v32, [v32], v3, 0, 6, [m0] ; C8811803 > v_interp_p1_f32 v33, v2, 1, 6, [m0] ; C8841902 > v_interp_p2_f32 v33, [v33], v3, 1, 6, [m0] ; C8851903 > v_interp_p1_f32 v34, v2, 2, 6, [m0] ; C8881A02 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > v_interp_p2_f32 v34, [v34], v3, 2, 6, [m0] ; C8891A03 > v_interp_p1_f32 v35, v2, 3, 6, [m0] ; C88C1B02 > v_interp_p2_f32 v35, [v35], v3, 3, 6, [m0] ; C88D1B03 > v_interp_p1_f32 v36, v2, 0, 7, [m0] ; C8901C02 > v_interp_p2_f32 v36, [v36], v3, 0, 7, [m0] ; C8911C03 > v_interp_p1_f32 v37, v2, 1, 7, [m0] ; C8941D02 > v_interp_p2_f32 v37, [v37], v3, 1, 7, [m0] ; C8951D03 > v_interp_p1_f32 v38, v2, 2, 7, [m0] ; C8981E02 > v_interp_p2_f32 v38, [v38], v3, 2, 7, [m0] ; C8991E03 > v_interp_p1_f32 v39, v2, 3, 7, [m0] ; C89C1F02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v39, [v39], v3, 3, 7, [m0] ; C89D1F03 > image_sample v0, v[7:8], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030007 > image_sample v2, v[9:10], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030209 > image_sample v3, v[11:12], s[12:19], s[0:3] dmask:0x1 ; F0800100 0003030B > image_sample v7, v[14:15], s[12:19], s[0:3] dmask:0x1 ; F0800100 0003070E > image_sample v8, v[16:17], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030810 > image_sample v9, v[18:19], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030912 > image_sample v10, v[20:21], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030A14 > image_sample v11, v[22:23], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030B16 > s_waitcnt vmcnt(3) ; BF8C0F73 > v_add_f32_e32 v0, v8, v0 ; 06000108 > image_sample v8, v[24:25], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030818 > s_waitcnt vmcnt(3) ; BF8C0F73 > v_add_f32_e32 v2, v9, v2 ; 06040509 > image_sample v9, v[26:27], s[12:19], s[0:3] dmask:0x1 ; F0800100 0003091A > s_waitcnt vmcnt(3) ; BF8C0F73 > v_add_f32_e32 v3, v10, v3 ; 0606070A > image_sample v10, v[28:29], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030A1C > s_waitcnt vmcnt(3) ; BF8C0F73 > v_add_f32_e32 v7, v11, v7 ; 060E0F0B > image_sample v11, v[30:31], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030B1E > s_waitcnt vmcnt(3) ; BF8C0F73 > v_add_f32_e32 v0, v8, v0 ; 06000108 > image_sample v8, v[32:33], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030820 > s_waitcnt vmcnt(3) ; BF8C0F73 > v_add_f32_e32 v2, v9, v2 ; 06040509 > image_sample v9, v[34:35], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030922 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_add_f32_e32 v8, v8, v0 ; 06100108 > v_mov_b32_e32 v1, 0x3d800000 ; 7E0202FF 3D800000 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v0, v9, v2 ; 06000509 > v_mul_f32_e32 v0, v1, v0 ; 10000101 > v_add_f32_e32 v3, v10, v3 ; 0606070A > image_sample v10, v[36:37], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030A24 > v_add_f32_e32 v7, v11, v7 ; 060E0F0B > image_sample v11, v[38:39], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030B26 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_add_f32_e32 v2, v10, v3 ; 0604070A > v_mac_f32_e32 v0, v1, v8 ; 3E001101 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v3, v11, v7 ; 06060F0B > v_mac_f32_e32 v0, v1, v2 ; 3E000501 > v_mac_f32_e32 v0, v1, v3 ; 3E000701 > v_mov_b32_e32 v4, 0 ; 7E080280 > v_mov_b32_e32 v5, 0 ; 7E0A0280 > v_mov_b32_e32 v6, 0 ; 7E0C0280 > v_mov_b32_e32 v7, 0 ; 7E0E0280 > v_mov_b32_e32 v1, v0 ; 7E020300 > v_mov_b32_e32 v2, v0 ; 7E040300 > v_mov_b32_e32 v3, v0 ; 7E060300 >Shader epilog disassembly: > exp 1, 0, 0, 0, 0, v0, v0, v0, v0 ; F8000001 00000000 > exp 1, 1, 0, 1, 1, v4, v0, v0, v0 ; F8001811 00000004 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 40 >Code Size: 568 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 6 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v3, 0.5, 0.5 ; D2960000 03C1E103 > v_fma_f32 v1, v4, -0.5, 0.5 ; D2960001 03C1E304 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 76 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > image_sample v4, v[0:1], s[12:19], s[0:3] dmask:0x8 ; F0800800 00030400 > v_mov_b32_e32 v0, 0 ; 7E000280 > v_mov_b32_e32 v1, 0 ; 7E020280 > v_mov_b32_e32 v2, 0 ; 7E040280 > v_mov_b32_e32 v3, 0 ; 7E060280 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 >Shader epilog disassembly: > exp 1, 0, 0, 0, 0, v0, v0, v0, v0 ; F8000001 00000000 > exp 1, 1, 0, 1, 1, v4, v0, v0, v0 ; F8001811 00000004 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 100 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v0, 0x3e800000 ; 7E0002FF 3E800000 > v_mov_b32_e32 v1, 1.0 ; 7E0202F2 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s5, s[0:3], 0x35 ; C2028135 > s_buffer_load_dword s6, s[0:3], 0x36 ; C2030136 > s_buffer_load_dword s8, s[0:3], 0x38 ; C2040138 > s_buffer_load_dword s10, s[0:3], 0x39 ; C2050139 > s_buffer_load_dword s13, s[0:3], 0x3c ; C206813C > s_buffer_load_dword s4, s[0:3], 0x34 ; C2020134 > s_buffer_load_dword s7, s[0:3], 0x37 ; C2038137 > s_buffer_load_dword s9, s[0:3], 0x41 ; C2048141 > s_buffer_load_dword s11, s[0:3], 0x3a ; C205813A > s_buffer_load_dword s12, s[0:3], 0x3b ; C206013B > s_buffer_load_dword s0, s[0:3], 0x3d ; C200013D > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mov_b32_e32 v5, s5 ; 7E0A0205 > v_mov_b32_e32 v6, s10 ; 7E0C020A > v_mov_b32_e32 v7, s8 ; 7E0E0208 > v_mac_f32_e32 v6, s9, v7 ; 3E0C0E09 > v_mov_b32_e32 v7, s9 ; 7E0E0209 > v_add_f32_e32 v10, 1.0, v4 ; 061408F2 > v_fma_f32 v8, v3, s13, s13 ; D2960008 00341B03 > v_fma_f32 v9, v4, -s0, s0 ; D2960009 40000104 > v_mad_f32 v11, v3, s6, s6 ; D282000B 00180D03 > v_mul_f32_e32 v12, s11, v8 ; 1018100B > v_fma_f32 v10, -v10, 0.5, 1.0 ; D296000A 23C9E10A > v_mul_f32_e32 v13, s12, v9 ; 101A120C > v_fma_f32 v11, v11, 0.5, s4 ; D296000B 0011E10B > v_mul_f32_e32 v12, v0, v12 ; 10181900 > v_mul_f32_e32 v0, v0, v13 ; 10001B00 > v_fma_f32 v10, s7, v10, v5 ; D296000A 04161407 > v_mov_b32_e32 v13, s4 ; 7E1A0204 > v_mul_f32_e32 v11, v6, v11 ; 10161706 > v_mul_f32_e32 v6, v6, v10 ; 100C1506 > exp 15, 32, 0, 0, 0, v8, v9, v12, v0 ; F800020F 000C0908 > v_fma_f32 v13, s6, v8, v13 ; D296000D 04361006 > v_fma_f32 v5, s7, v9, v5 ; D2960005 04161207 > exp 15, 33, 0, 0, 0, v11, v6, v7, v0 ; F800021F 0007060B > exp 15, 34, 0, 0, 0, v13, v5, v0, v0 ; F800022F 0000050D > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 0 ; 7E000280 > exp 15, 12, 0, 1, 0, v3, v4, v0, v1 ; F80008CF 01000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 16 >Code Size: 248 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v5, v2, 3, 0, [m0] ; C8140302 > s_load_dwordx4 s[44:47], s[4:5], 0xc ; C096050C > s_load_dwordx8 s[48:55], s[4:5], 0x0 ; C0D80500 > v_interp_p2_f32 v5, [v5], v3, 3, 0, [m0] ; C8150303 > v_interp_p1_f32 v6, v2, 0, 1, [m0] ; C8180402 > v_interp_p2_f32 v6, [v6], v3, 0, 1, [m0] ; C8190403 > v_interp_p1_f32 v7, v2, 1, 1, [m0] ; C81C0502 > s_load_dwordx4 s[32:35], s[4:5], 0x1c ; C090051C > s_load_dwordx8 s[36:43], s[4:5], 0x10 ; C0D20510 > v_interp_p2_f32 v7, [v7], v3, 1, 1, [m0] ; C81D0503 > v_interp_p1_f32 v8, v2, 2, 1, [m0] ; C8200602 > v_interp_p2_f32 v8, [v8], v3, 2, 1, [m0] ; C8210603 > v_interp_p1_f32 v9, v2, 0, 2, [m0] ; C8240802 > v_interp_p2_f32 v9, [v9], v3, 0, 2, [m0] ; C8250803 > v_interp_p1_f32 v10, v2, 1, 2, [m0] ; C8280902 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s44, s44, s55 ; 872C372C > s_load_dwordx4 s[24:27], s[2:3], 0x4 ; C08C0304 > v_interp_p2_f32 v10, [v10], v3, 1, 2, [m0] ; C8290903 > image_sample v[14:16], v[0:1], s[48:55], s[44:47] dmask:0x7 ; F0800700 016C0E00 > s_and_b32 s32, s32, s43 ; 87202B20 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v2, v14, 2.0, -1.0 ; D2960002 03CDE90E > image_sample v[11:12], v[4:5], s[36:43], s[32:35] dmask:0x3 ; F0800300 01090B04 > v_fma_f32 v3, v15, 2.0, -1.0 ; D2960003 03CDE90F > v_mul_f32_e32 v5, v2, v2 ; 100A0502 > v_fma_f32 v4, v16, 2.0, -1.0 ; D2960004 03CDE910 > v_mac_f32_e32 v5, v3, v3 ; 3E0A0703 > s_load_dwordx4 s[28:31], s[4:5], 0x2c ; C08E052C > s_load_dwordx8 s[44:51], s[4:5], 0x20 ; C0D60520 > v_mac_f32_e32 v5, v4, v4 ; 3E0A0904 > v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s58, s[24:27], 0x2c ; C21D192C > s_buffer_load_dword s59, s[24:27], 0x2d ; C21D992D > s_buffer_load_dword s60, s[24:27], 0x2e ; C21E192E > s_buffer_load_dword s23, s[24:27], 0x28 ; C20B9928 > s_buffer_load_dword s56, s[24:27], 0x29 ; C21C1929 > s_buffer_load_dword s57, s[24:27], 0x2a ; C21C992A > s_buffer_load_dword s61, s[24:27], 0x30 ; C21E9930 > s_and_b32 s28, s28, s51 ; 871C331C > s_buffer_load_dword s62, s[24:27], 0x31 ; C21F1931 > s_buffer_load_dword s63, s[24:27], 0x32 ; C21F9932 > image_sample v0, v[0:1], s[44:51], s[28:31] dmask:0x1 ; F0800100 00EB0000 > v_mul_f32_e32 v1, v3, v5 ; 10020B03 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s58, v1 ; 1006023A > v_mul_f32_e32 v14, s59, v1 ; 101C023B > v_mul_f32_e32 v2, v2, v5 ; 10040B02 > v_mul_f32_e32 v1, s60, v1 ; 1002023C > v_fma_f32 v3, v2, s23, v3 ; D2960003 040C2F02 > v_fma_f32 v14, v2, s56, v14 ; D296000E 04387102 > v_fma_f32 v1, v2, s57, v1 ; D2960001 04047302 > v_mul_f32_e32 v2, v4, v5 ; 10040B04 > v_fma_f32 v3, v2, s61, v3 ; D2960003 040C7B02 > v_fma_f32 v4, v2, s62, v14 ; D2960004 04387D02 > v_mul_f32_e32 v5, v3, v3 ; 100A0703 > v_fma_f32 v1, v2, s63, v1 ; D2960001 04047F02 > v_mul_f32_e32 v2, v6, v6 ; 10040D06 > v_mac_f32_e32 v2, v7, v7 ; 3E040F07 > v_mac_f32_e32 v5, v4, v4 ; 3E0A0904 > v_mac_f32_e32 v2, v8, v8 ; 3E041108 > v_mac_f32_e32 v5, v1, v1 ; 3E0A0301 > v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 > v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 > s_buffer_load_dword s7, s[24:27], 0x18 ; C2039918 > s_buffer_load_dword s20, s[24:27], 0x19 ; C20A1919 > v_mul_f32_e32 v6, v6, v2 ; 100C0506 > v_mul_f32_e32 v7, v7, v2 ; 100E0507 > v_mul_f32_e32 v8, v8, v2 ; 10100508 > v_mul_f32_e32 v2, v3, v5 ; 10040B03 > v_mul_f32_e32 v4, v4, v5 ; 10080B04 > v_mul_f32_e32 v3, v2, v6 ; 10060D02 > v_mul_f32_e32 v1, v1, v5 ; 10020B01 > v_mac_f32_e32 v3, v4, v7 ; 3E060F04 > v_mac_f32_e32 v3, v1, v8 ; 3E061101 > v_add_f32_e32 v3, v3, v3 ; 06060703 > v_fma_f32 v5, v2, -v3, v6 ; D2960005 441A0702 > v_fma_f32 v6, v4, -v3, v7 ; D2960006 441E0704 > v_fma_f32 v5, v5, 2.0, v2 ; D2960005 0409E905 > v_fma_f32 v3, v1, -v3, v8 ; D2960003 44220701 > v_fma_f32 v6, v6, 2.0, v4 ; D2960006 0411E906 > v_mul_f32_e32 v7, v5, v5 ; 100E0B05 > v_fma_f32 v3, v3, 2.0, v1 ; D2960003 0405E903 > v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06 > v_mac_f32_e32 v7, v3, v3 ; 3E0E0703 > v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_fma_f32 v8, v11, 2.0, -1.0 ; D2960008 03CDE90B > v_fma_f32 v11, v12, 2.0, -1.0 ; D296000B 03CDE90C > v_mul_f32_e32 v12, v11, v11 ; 1018170B > v_mac_f32_e32 v12, v8, v8 ; 3E181108 > v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C > v_mul_f32_e32 v33, v3, v7 ; 10420F03 > v_mul_f32_e32 v41, v5, v7 ; 10520F05 > v_mul_f32_e32 v40, v6, v7 ; 10500F06 > v_mul_f32_e32 v5, v33, v4 ; 100A0921 > v_mul_f32_e32 v6, v41, v1 ; 100C0329 > v_fma_f32 v5, v40, v1, -v5 ; D2960005 84160328 > v_mul_f32_e32 v17, v8, v12 ; 10221908 > v_fma_f32 v8, v33, v2, -v6 ; D2960008 841A0521 > v_mul_f32_e32 v6, v5, v5 ; 100C0B05 > v_mul_f32_e32 v7, v40, v2 ; 100E0528 > v_mul_f32_e32 v18, v11, v12 ; 1024190B > v_fma_f32 v11, v41, v4, -v7 ; D296000B 841E0929 > v_mac_f32_e32 v6, v8, v8 ; 3E0C1108 > v_mac_f32_e32 v6, v11, v11 ; 3E0C170B > v_rsq_clamp_f32_e32 v12, v6 ; 7E185906 > v_mov_b32_e32 v3, 0x3dcccccd ; 7E0602FF 3DCCCCCD > s_buffer_load_dword s2, s[24:27], 0x1a ; C201191A > s_buffer_load_dword s3, s[24:27], 0x1b ; C201991B > s_buffer_load_dword s0, s[24:27], 0x1c ; C200191C > s_buffer_load_dword s9, s[24:27], 0x1d ; C204991D > s_buffer_load_dword s1, s[24:27], 0x1e ; C200991E > s_buffer_load_dword s21, s[24:27], 0x1f ; C20A991F > s_buffer_load_dword s11, s[24:27], 0x24 ; C2059924 > s_buffer_load_dword s22, s[24:27], 0x25 ; C20B1925 > s_buffer_load_dword s8, s[24:27], 0x26 ; C2041926 > s_buffer_load_dword s6, s[24:27], 0x27 ; C2031927 > s_buffer_load_dword s28, s[24:27], 0x38 ; C20E1938 > s_buffer_load_dword s24, s[24:27], 0x39 ; C20C1939 > v_mul_f32_e32 v25, v3, v18 ; 10322503 > v_mov_b32_e32 v6, 0xbdcccccd ; 7E0C02FF BDCCCCCD > v_mad_f32 v15, v17, v6, v25 ; D282000F 04660D11 > v_mul_f32_e32 v6, v5, v12 ; 100C1905 > v_mul_f32_e32 v5, v11, v12 ; 100A190B > v_mul_f32_e32 v7, v8, v12 ; 100E1908 > v_mul_f32_e32 v8, v5, v40 ; 10105105 > v_fma_f32 v44, v7, v33, -v8 ; D296002C 84224307 > v_mul_f32_e32 v11, v6, v33 ; 10164306 > v_fma_f32 v8, v44, v15, v41 ; D2960008 04A61F2C > v_mac_f32_e32 v25, v3, v17 ; 3E322303 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v14, s24 ; 7E1C0218 > v_fma_f32 v43, v5, v41, -v11 ; D296002B 842E5305 > v_fma_f32 v3, v6, v25, v8 ; D2960003 04223306 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v8, 0x3b83126f, v0 ; 101000FF 3B83126F > v_fma_f32 v11, v43, v15, v40 ; D296000B 04A21F2B > v_mul_f32_e32 v12, v7, v41 ; 10185307 > v_mac_f32_e32 v14, s28, v0 ; 3E1C001C > v_max_f32_e32 v39, 1.0, v8 ; 204E10F2 > v_fma_f32 v42, v6, v40, -v12 ; D296002A 84325106 > v_mul_f32_e32 v8, v9, v14 ; 10101D09 > v_mul_f32_e32 v9, v10, v14 ; 10121D0A > v_mul_f32_e32 v10, 0.5, v39 ; 10144EF0 > v_fma_f32 v11, v7, v25, v11 ; D296000B 042E3307 > v_fma_f32 v12, v42, v15, v33 ; D296000C 04861F2A > v_fma_f32 v11, v11, v10, v9 ; D296000B 0426150B > v_fma_f32 v12, v5, v25, v12 ; D296000C 04323305 > v_fma_f32 v3, v3, v10, v8 ; D2960003 04221503 > v_mul_f32_e32 v14, s20, v11 ; 101C1614 > v_fma_f32 v12, v12, v10, v0 ; D296000C 0402150C > v_mac_f32_e32 v14, s7, v3 ; 3E1C0607 > v_mac_f32_e32 v14, s2, v12 ; 3E1C1802 > v_add_f32_e32 v14, s3, v14 ; 061C1C03 > v_cmp_lt_f32_e32 vcc, 0, v14 ; 7C021C80 > v_cndmask_b32_e64 v15, v14, 1.0, vcc ; D200000F 01A9E50E > v_bfrev_b32_e32 v16, 14 ; 7E20708E > v_mul_f32_e32 v19, v16, v15 ; 10261F10 > v_bfrev_b32_e32 v20, 15 ; 7E28708F > v_cmp_le_f32_e32 vcc, 0, v15 ; 7C061E80 > v_cndmask_b32_e32 v15, v20, v19 ; 001E2714 > v_mul_f32_e32 v19, s9, v11 ; 10261609 > v_mul_f32_e32 v11, s22, v11 ; 10161616 > v_mac_f32_e32 v11, s11, v3 ; 3E16060B > v_mac_f32_e32 v19, s0, v3 ; 3E260600 > v_mac_f32_e32 v11, s8, v12 ; 3E161808 > v_mac_f32_e32 v19, s1, v12 ; 3E261801 > v_add_f32_e32 v3, s6, v11 ; 06061606 > v_add_f32_e32 v19, s21, v19 ; 06262615 > v_rcp_f32_e32 v11, v3 ; 7E165503 > v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 > s_load_dwordx8 s[12:19], s[4:5], 0x30 ; C0C60530 > s_load_dwordx4 s[24:27], s[4:5], 0x3c ; C08C053C > v_cndmask_b32_e64 v21, v19, 1.0, vcc ; D2000015 01A9E513 > v_mul_f32_e32 v16, v16, v21 ; 10202B10 > v_cmp_le_f32_e32 vcc, 0, v21 ; 7C062A80 > v_cndmask_b32_e32 v16, v20, v16 ; 00202114 > v_mul_f32_e32 v14, v11, v14 ; 101C1D0B > v_cmp_eq_f32_e32 vcc, 0, v3 ; 7C040680 > v_mul_f32_e32 v11, v11, v19 ; 1016270B > v_cndmask_b32_e32 v3, v14, v15 ; 00061F0E > v_cndmask_b32_e32 v11, v11, v16 ; 0016210B > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s24, s24, s19 ; 87181318 > v_fma_f32 v14, v3, 0.5, 0.5 ; D296000E 03C1E103 > v_fma_f32 v15, v11, -0.5, 0.5 ; D296000F 03C1E30B > image_sample v3, v[14:15], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C3030E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v11, v3, v12 ; 0A161903 > v_mul_f32_e32 v3, 0x3e800000, v39 ; 10064EFF 3E800000 > v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 > v_cndmask_b32_e32 v3, v10, v3 ; 0006070A > v_mul_f32_e32 v10, 0.5, v3 ; 101406F0 > v_cmp_lt_f32_e32 vcc, 0, v11 ; 7C021680 > v_cndmask_b32_e32 v10, v3, v10 ; 00141503 > v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 > v_cndmask_b32_e32 v3, v3, v10 ; 00061503 > v_sub_f32_e64 v10, |v11|, v3 ; D208010A 0002070B > v_cmp_eq_f32_e32 vcc, 0, v3 ; 7C040680 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v10 ; 7C021480 > v_cndmask_b32_e64 v11, v10, 1.0, vcc ; D200000B 01A9E50A > v_cmp_le_f32_e32 vcc, 0, v11 ; 7C061680 > v_mul_f32_e32 v11, 0x70000000, v11 ; 101616FF 70000000 > v_bfrev_b32_e32 v12, 15 ; 7E18708F > v_cndmask_b32_e32 v11, v12, v11 ; 0016170C > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v3, v3 ; 7E065503 > v_mul_f32_e32 v11, v3, v10 ; 10161503 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_mov_b32_e32 v14, 0xbe9eb852 ; 7E1C02FF BE9EB852 > v_mul_f32_e32 v21, v17, v14 ; 102A1D11 > v_mul_f32_e32 v12, 0x3e9eb852, v18 ; 101824FF 3E9EB852 > v_fma_f32 v15, v43, v21, v40 ; D296000F 04A22B2B > v_fma_f32 v14, v44, v21, v41 ; D296000E 04A62B2C > v_mul_f32_e32 v10, 0x3fa66666, v39 ; 10144EFF 3FA66666 > v_fma_f32 v15, v7, v12, v15 ; D296000F 043E1907 > v_fma_f32 v14, v6, v12, v14 ; D296000E 043A1906 > v_fma_f32 v16, v42, v21, v33 ; D2960010 04862B2A > v_fma_f32 v15, v15, v10, v9 ; D296000F 0426150F > v_fma_f32 v12, v5, v12, v16 ; D296000C 04421905 > v_fma_f32 v14, v14, v10, v8 ; D296000E 0422150E > v_mul_f32_e32 v16, s20, v15 ; 10201E14 > v_mul_f32_e32 v19, s9, v15 ; 10261E09 > v_mul_f32_e32 v15, s22, v15 ; 101E1E16 > v_fma_f32 v12, v12, v10, v0 ; D296000C 0402150C > v_mac_f32_e32 v16, s7, v14 ; 3E201C07 > v_mac_f32_e32 v15, s11, v14 ; 3E1E1C0B > v_mac_f32_e32 v16, s2, v12 ; 3E201802 > v_mac_f32_e32 v19, s0, v14 ; 3E261C00 > v_mac_f32_e32 v15, s8, v12 ; 3E1E1808 > v_mac_f32_e32 v19, s1, v12 ; 3E261801 > v_add_f32_e32 v16, s3, v16 ; 06202003 > v_add_f32_e32 v14, s6, v15 ; 061C1E06 > v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080 > v_add_f32_e32 v19, s21, v19 ; 06262615 > v_rcp_f32_e32 v24, v14 ; 7E30550E > v_cndmask_b32_e64 v15, v16, 1.0, vcc ; D200000F 01A9E510 > v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 > v_bfrev_b32_e32 v22, 14 ; 7E2C708E > v_cndmask_b32_e64 v20, v19, 1.0, vcc ; D2000014 01A9E513 > v_cmp_le_f32_e32 vcc, 0, v15 ; 7C061E80 > v_mul_f32_e32 v15, v22, v15 ; 101E1F16 > v_bfrev_b32_e32 v23, 15 ; 7E2E708F > v_cndmask_b32_e32 v15, v23, v15 ; 001E1F17 > v_cmp_le_f32_e32 vcc, 0, v20 ; 7C062880 > v_mul_f32_e32 v20, v22, v20 ; 10282916 > v_cndmask_b32_e32 v20, v23, v20 ; 00282917 > v_cmp_eq_f32_e32 vcc, 0, v14 ; 7C041C80 > v_mul_f32_e32 v14, v24, v16 ; 101C2118 > v_mul_f32_e32 v16, v24, v19 ; 10202718 > v_cndmask_b32_e32 v14, v14, v15 ; 001C1F0E > v_cndmask_b32_e32 v15, v16, v20 ; 001E2910 > v_fma_f32 v19, v14, 0.5, 0.5 ; D2960013 03C1E10E > v_fma_f32 v20, v15, -0.5, 0.5 ; D2960014 03C1E30F > image_sample v14, v[19:20], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C30E13 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, v14, v12 ; 0A18190E > v_add_f32_e64 v3, 0, v11 clamp ; D2060803 00021680 > v_mul_f32_e32 v11, 0x3f266666, v39 ; 10164EFF 3F266666 > v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 > v_cndmask_b32_e32 v10, v10, v11 ; 0014170A > v_mul_f32_e32 v11, 0.5, v10 ; 101614F0 > v_cmp_lt_f32_e32 vcc, 0, v12 ; 7C021880 > v_cndmask_b32_e32 v11, v10, v11 ; 0016170A > v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 > v_cndmask_b32_e32 v10, v10, v11 ; 0014170A > v_sub_f32_e64 v11, |v12|, v10 ; D208010B 0002150C > v_cmp_eq_f32_e32 vcc, 0, v10 ; 7C041480 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v11 ; 7C021680 > v_cndmask_b32_e64 v12, v11, 1.0, vcc ; D200000C 01A9E50B > v_cmp_le_f32_e32 vcc, 0, v12 ; 7C061880 > v_mul_f32_e32 v12, 0x70000000, v12 ; 101818FF 70000000 > v_bfrev_b32_e32 v14, 15 ; 7E1C708F > v_cndmask_b32_e32 v12, v14, v12 ; 0018190E > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v10, v10 ; 7E14550A > v_mul_f32_e32 v12, v10, v11 ; 1018170A > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_mov_b32_e32 v11, 0x3c23d70a ; 7E1602FF 3C23D70A > v_add_f32_e64 v10, 0, v12 clamp ; D206080A 00021880 > v_mul_f32_e32 v12, v11, v18 ; 1018250B > v_mac_f32_e32 v12, 0x3e8a3d71, v17 ; 3E1822FF 3E8A3D71 > v_mul_f32_e32 v22, v11, v17 ; 102C230B > v_mov_b32_e32 v11, 0xbe8a3d71 ; 7E1602FF BE8A3D71 > v_mac_f32_e32 v22, v18, v11 ; 3E2C1712 > v_fma_f32 v14, v43, v12, v40 ; D296000E 04A2192B > v_fma_f32 v11, v44, v12, v41 ; D296000B 04A6192C > v_fma_f32 v14, v7, v22, v14 ; D296000E 043A2D07 > v_mul_f32_e32 v15, 0x40133333, v39 ; 101E4EFF 40133333 > v_fma_f32 v12, v42, v12, v33 ; D296000C 0486192A > v_fma_f32 v11, v6, v22, v11 ; D296000B 042E2D06 > v_fma_f32 v14, v14, v15, v9 ; D296000E 04261F0E > v_fma_f32 v12, v5, v22, v12 ; D296000C 04322D05 > v_fma_f32 v11, v11, v15, v8 ; D296000B 04221F0B > v_mul_f32_e32 v16, s20, v14 ; 10201C14 > v_mul_f32_e32 v19, s9, v14 ; 10261C09 > v_mul_f32_e32 v14, s22, v14 ; 101C1C16 > v_fma_f32 v12, v12, v15, v0 ; D296000C 04021F0C > v_mac_f32_e32 v16, s7, v11 ; 3E201607 > v_mac_f32_e32 v14, s11, v11 ; 3E1C160B > v_mac_f32_e32 v16, s2, v12 ; 3E201802 > v_mac_f32_e32 v19, s0, v11 ; 3E261600 > v_mac_f32_e32 v14, s8, v12 ; 3E1C1808 > v_mac_f32_e32 v19, s1, v12 ; 3E261801 > v_add_f32_e32 v16, s3, v16 ; 06202003 > v_add_f32_e32 v11, s6, v14 ; 06161C06 > v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080 > v_add_f32_e32 v19, s21, v19 ; 06262615 > v_rcp_f32_e32 v26, v11 ; 7E34550B > v_cndmask_b32_e64 v14, v16, 1.0, vcc ; D200000E 01A9E510 > v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 > v_bfrev_b32_e32 v23, 14 ; 7E2E708E > v_cndmask_b32_e64 v20, v19, 1.0, vcc ; D2000014 01A9E513 > v_cmp_le_f32_e32 vcc, 0, v14 ; 7C061C80 > v_mul_f32_e32 v14, v23, v14 ; 101C1D17 > v_bfrev_b32_e32 v24, 15 ; 7E30708F > v_cndmask_b32_e32 v14, v24, v14 ; 001C1D18 > v_cmp_le_f32_e32 vcc, 0, v20 ; 7C062880 > v_mul_f32_e32 v20, v23, v20 ; 10282917 > v_cndmask_b32_e32 v20, v24, v20 ; 00282918 > v_cmp_eq_f32_e32 vcc, 0, v11 ; 7C041680 > v_mul_f32_e32 v11, v26, v16 ; 1016211A > v_mul_f32_e32 v16, v26, v19 ; 1020271A > v_cndmask_b32_e32 v11, v11, v14 ; 00161D0B > v_cndmask_b32_e32 v14, v16, v20 ; 001C2910 > v_fma_f32 v19, v11, 0.5, 0.5 ; D2960013 03C1E10B > v_fma_f32 v20, v14, -0.5, 0.5 ; D2960014 03C1E30E > image_sample v11, v[19:20], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C30B13 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, v11, v12 ; 0A18190B > v_mul_f32_e32 v11, 0x3f933333, v39 ; 10164EFF 3F933333 > v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 > v_cndmask_b32_e32 v11, v15, v11 ; 0016170F > v_mul_f32_e32 v14, 0.5, v11 ; 101C16F0 > v_cmp_lt_f32_e32 vcc, 0, v12 ; 7C021880 > v_cndmask_b32_e32 v14, v11, v14 ; 001C1D0B > v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 > v_cndmask_b32_e32 v11, v11, v14 ; 00161D0B > v_sub_f32_e64 v12, |v12|, v11 ; D208010C 0002170C > v_cmp_eq_f32_e32 vcc, 0, v11 ; 7C041680 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v12 ; 7C021880 > v_cndmask_b32_e64 v14, v12, 1.0, vcc ; D200000E 01A9E50C > v_cmp_le_f32_e32 vcc, 0, v14 ; 7C061C80 > v_mul_f32_e32 v14, 0x70000000, v14 ; 101C1CFF 70000000 > v_bfrev_b32_e32 v15, 15 ; 7E1E708F > v_cndmask_b32_e32 v14, v15, v14 ; 001C1D0F > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v11, v11 ; 7E16550B > v_mul_f32_e32 v14, v11, v12 ; 101C190B > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_mov_b32_e32 v15, 0xbe9eb852 ; 7E1E02FF BE9EB852 > v_mul_f32_e32 v24, v15, v18 ; 1030250F > v_mac_f32_e32 v24, 0x3c23d70a, v17 ; 3E3022FF 3C23D70A > v_mul_f32_e32 v23, v15, v17 ; 102E230F > v_mov_b32_e32 v16, 0xbc23d70a ; 7E2002FF BC23D70A > v_mac_f32_e32 v23, v18, v16 ; 3E2E2112 > v_fma_f32 v16, v43, v24, v40 ; D2960010 04A2312B > v_fma_f32 v15, v44, v24, v41 ; D296000F 04A6312C > v_mul_f32_e32 v12, 0x404ccccd, v39 ; 10184EFF 404CCCCD > v_fma_f32 v16, v7, v23, v16 ; D2960010 04422F07 > v_fma_f32 v19, v42, v24, v33 ; D2960013 0486312A > v_fma_f32 v15, v6, v23, v15 ; D296000F 043E2F06 > v_fma_f32 v16, v16, v12, v9 ; D2960010 04261910 > v_fma_f32 v19, v5, v23, v19 ; D2960013 044E2F05 > v_fma_f32 v15, v15, v12, v8 ; D296000F 0422190F > v_mul_f32_e32 v20, s20, v16 ; 10282014 > v_mul_f32_e32 v26, s9, v16 ; 10342009 > v_mul_f32_e32 v16, s22, v16 ; 10202016 > v_fma_f32 v19, v19, v12, v0 ; D2960013 04021913 > v_mac_f32_e32 v20, s7, v15 ; 3E281E07 > v_mac_f32_e32 v16, s11, v15 ; 3E201E0B > v_mac_f32_e32 v20, s2, v19 ; 3E282602 > v_mac_f32_e32 v26, s0, v15 ; 3E341E00 > v_mac_f32_e32 v16, s8, v19 ; 3E202608 > v_mac_f32_e32 v26, s1, v19 ; 3E342601 > v_add_f32_e32 v20, s3, v20 ; 06282803 > v_add_f32_e32 v15, s6, v16 ; 061E2006 > v_cmp_lt_f32_e32 vcc, 0, v20 ; 7C022880 > v_add_f32_e32 v26, s21, v26 ; 06343415 > v_rcp_f32_e32 v30, v15 ; 7E3C550F > v_cndmask_b32_e64 v16, v20, 1.0, vcc ; D2000010 01A9E514 > v_cmp_lt_f32_e32 vcc, 0, v26 ; 7C023480 > v_bfrev_b32_e32 v28, 14 ; 7E38708E > v_cndmask_b32_e64 v27, v26, 1.0, vcc ; D200001B 01A9E51A > v_cmp_le_f32_e32 vcc, 0, v16 ; 7C062080 > v_mul_f32_e32 v16, v28, v16 ; 1020211C > v_bfrev_b32_e32 v29, 15 ; 7E3A708F > v_cndmask_b32_e32 v16, v29, v16 ; 0020211D > v_cmp_le_f32_e32 vcc, 0, v27 ; 7C063680 > v_mul_f32_e32 v27, v28, v27 ; 1036371C > v_cndmask_b32_e32 v27, v29, v27 ; 0036371D > v_cmp_eq_f32_e32 vcc, 0, v15 ; 7C041E80 > v_mul_f32_e32 v15, v30, v20 ; 101E291E > v_mul_f32_e32 v20, v30, v26 ; 1028351E > v_cndmask_b32_e32 v15, v15, v16 ; 001E210F > v_cndmask_b32_e32 v16, v20, v27 ; 00203714 > v_fma_f32 v26, v15, 0.5, 0.5 ; D296001A 03C1E10F > v_fma_f32 v27, v16, -0.5, 0.5 ; D296001B 03C1E310 > image_sample v15, v[26:27], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C30F1A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v15, v15, v19 ; 0A1E270F > v_add_f32_e64 v11, 0, v14 clamp ; D206080B 00021C80 > v_mul_f32_e32 v14, 0x3fcccccd, v39 ; 101C4EFF 3FCCCCCD > v_cmp_gt_f32_e32 vcc, 0, v15 ; 7C081E80 > v_cndmask_b32_e32 v12, v12, v14 ; 00181D0C > v_mul_f32_e32 v14, 0.5, v12 ; 101C18F0 > v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 > v_cndmask_b32_e32 v14, v12, v14 ; 001C1D0C > v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 > v_cndmask_b32_e32 v12, v12, v14 ; 00181D0C > v_sub_f32_e64 v14, |v15|, v12 ; D208010E 0002190F > v_cmp_eq_f32_e32 vcc, 0, v12 ; 7C041880 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v14 ; 7C021C80 > v_cndmask_b32_e64 v15, v14, 1.0, vcc ; D200000F 01A9E50E > v_cmp_le_f32_e32 vcc, 0, v15 ; 7C061E80 > v_mul_f32_e32 v15, 0x70000000, v15 ; 101E1EFF 70000000 > v_bfrev_b32_e32 v16, 15 ; 7E20708F > v_cndmask_b32_e32 v15, v16, v15 ; 001E1F10 > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v12, v12 ; 7E18550C > v_mul_f32_e32 v15, v12, v14 ; 101E1D0C > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_mov_b32_e32 v14, 0xbec28f5c ; 7E1C02FF BEC28F5C > v_mul_f32_e32 v26, v14, v17 ; 1034230E > v_mul_f32_e32 v27, v14, v18 ; 1036250E > v_mov_b32_e32 v14, 0xbd4ccccd ; 7E1C02FF BD4CCCCD > v_mac_f32_e32 v27, v17, v14 ; 3E361D11 > v_add_f32_e64 v12, 0, v15 clamp ; D206080C 00021E80 > v_mac_f32_e32 v26, 0x3d4ccccd, v18 ; 3E3424FF 3D4CCCCD > v_fma_f32 v15, v43, v27, v40 ; D296000F 04A2372B > v_fma_f32 v14, v44, v27, v41 ; D296000E 04A6372C > v_fma_f32 v15, v7, v26, v15 ; D296000F 043E3507 > v_mul_f32_e32 v19, 0x40833333, v39 ; 10264EFF 40833333 > v_fma_f32 v16, v42, v27, v33 ; D2960010 0486372A > v_fma_f32 v14, v6, v26, v14 ; D296000E 043A3506 > v_fma_f32 v15, v15, v19, v9 ; D296000F 0426270F > v_fma_f32 v16, v5, v26, v16 ; D2960010 04423505 > v_fma_f32 v14, v14, v19, v8 ; D296000E 0422270E > v_mul_f32_e32 v20, s20, v15 ; 10281E14 > v_mul_f32_e32 v28, s9, v15 ; 10381E09 > v_mul_f32_e32 v15, s22, v15 ; 101E1E16 > v_fma_f32 v16, v16, v19, v0 ; D2960010 04022710 > v_mac_f32_e32 v20, s7, v14 ; 3E281C07 > v_mac_f32_e32 v15, s11, v14 ; 3E1E1C0B > v_mac_f32_e32 v20, s2, v16 ; 3E282002 > v_mac_f32_e32 v28, s0, v14 ; 3E381C00 > v_mac_f32_e32 v15, s8, v16 ; 3E1E2008 > v_mac_f32_e32 v28, s1, v16 ; 3E382001 > v_add_f32_e32 v20, s3, v20 ; 06282803 > v_add_f32_e32 v14, s6, v15 ; 061C1E06 > v_cmp_lt_f32_e32 vcc, 0, v20 ; 7C022880 > v_add_f32_e32 v28, s21, v28 ; 06383815 > v_rcp_f32_e32 v32, v14 ; 7E40550E > v_cndmask_b32_e64 v15, v20, 1.0, vcc ; D200000F 01A9E514 > v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880 > v_bfrev_b32_e32 v30, 14 ; 7E3C708E > v_cndmask_b32_e64 v29, v28, 1.0, vcc ; D200001D 01A9E51C > v_cmp_le_f32_e32 vcc, 0, v15 ; 7C061E80 > v_mul_f32_e32 v15, v30, v15 ; 101E1F1E > v_bfrev_b32_e32 v31, 15 ; 7E3E708F > v_cndmask_b32_e32 v15, v31, v15 ; 001E1F1F > v_cmp_le_f32_e32 vcc, 0, v29 ; 7C063A80 > v_mul_f32_e32 v29, v30, v29 ; 103A3B1E > v_cndmask_b32_e32 v29, v31, v29 ; 003A3B1F > v_cmp_eq_f32_e32 vcc, 0, v14 ; 7C041C80 > v_mul_f32_e32 v14, v32, v20 ; 101C2920 > v_mul_f32_e32 v20, v32, v28 ; 10283920 > v_cndmask_b32_e32 v14, v14, v15 ; 001C1F0E > v_cndmask_b32_e32 v15, v20, v29 ; 001E3B14 > v_fma_f32 v28, v14, 0.5, 0.5 ; D296001C 03C1E10E > v_fma_f32 v29, v15, -0.5, 0.5 ; D296001D 03C1E30F > image_sample v14, v[28:29], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C30E1C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v15, v14, v16 ; 0A1E210E > v_mul_f32_e32 v14, 0x40033333, v39 ; 101C4EFF 40033333 > v_cmp_gt_f32_e32 vcc, 0, v15 ; 7C081E80 > v_cndmask_b32_e32 v14, v19, v14 ; 001C1D13 > v_mul_f32_e32 v16, 0.5, v14 ; 10201CF0 > v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 > v_cndmask_b32_e32 v16, v14, v16 ; 0020210E > v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 > v_cndmask_b32_e32 v14, v14, v16 ; 001C210E > v_sub_f32_e64 v15, |v15|, v14 ; D208010F 00021D0F > v_cmp_eq_f32_e32 vcc, 0, v14 ; 7C041C80 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 > v_cndmask_b32_e64 v16, v15, 1.0, vcc ; D2000010 01A9E50F > v_cmp_le_f32_e32 vcc, 0, v16 ; 7C062080 > v_mul_f32_e32 v16, 0x70000000, v16 ; 102020FF 70000000 > v_bfrev_b32_e32 v19, 15 ; 7E26708F > v_cndmask_b32_e32 v16, v19, v16 ; 00202113 > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v14, v14 ; 7E1C550E > v_mul_f32_e32 v16, v14, v15 ; 10201F0E > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_mul_f32_e32 v29, 0.5, v18 ; 103A24F0 > v_mad_f32 v28, 0.5, v17, v29 ; D282001C 047622F0 > v_mac_f32_e32 v29, -0.5, v17 ; 3E3A22F1 > v_fma_f32 v20, v43, v29, v40 ; D2960014 04A23B2B > v_fma_f32 v19, v44, v29, v41 ; D2960013 04A63B2C > v_mul_f32_e32 v15, 0x40a9999a, v39 ; 101E4EFF 40A9999A > v_fma_f32 v20, v7, v28, v20 ; D2960014 04523907 > v_fma_f32 v30, v42, v29, v33 ; D296001E 04863B2A > v_fma_f32 v19, v6, v28, v19 ; D2960013 044E3906 > v_fma_f32 v20, v20, v15, v9 ; D2960014 04261F14 > v_fma_f32 v30, v5, v28, v30 ; D296001E 047A3905 > v_fma_f32 v19, v19, v15, v8 ; D2960013 04221F13 > v_mul_f32_e32 v31, s20, v20 ; 103E2814 > v_mul_f32_e32 v32, s9, v20 ; 10402809 > v_mul_f32_e32 v20, s22, v20 ; 10282816 > v_fma_f32 v30, v30, v15, v0 ; D296001E 04021F1E > v_mac_f32_e32 v31, s7, v19 ; 3E3E2607 > v_mac_f32_e32 v20, s11, v19 ; 3E28260B > v_mac_f32_e32 v31, s2, v30 ; 3E3E3C02 > v_mac_f32_e32 v32, s0, v19 ; 3E402600 > v_mac_f32_e32 v20, s8, v30 ; 3E283C08 > v_mac_f32_e32 v32, s1, v30 ; 3E403C01 > v_add_f32_e32 v31, s3, v31 ; 063E3E03 > v_add_f32_e32 v19, s6, v20 ; 06262806 > v_cmp_lt_f32_e32 vcc, 0, v31 ; 7C023E80 > v_add_f32_e32 v32, s21, v32 ; 06404015 > v_rcp_f32_e32 v37, v19 ; 7E4A5513 > v_cndmask_b32_e64 v20, v31, 1.0, vcc ; D2000014 01A9E51F > v_cmp_lt_f32_e32 vcc, 0, v32 ; 7C024080 > v_bfrev_b32_e32 v35, 14 ; 7E46708E > v_cndmask_b32_e64 v34, v32, 1.0, vcc ; D2000022 01A9E520 > v_cmp_le_f32_e32 vcc, 0, v20 ; 7C062880 > v_mul_f32_e32 v20, v35, v20 ; 10282923 > v_bfrev_b32_e32 v36, 15 ; 7E48708F > v_cndmask_b32_e32 v20, v36, v20 ; 00282924 > v_cmp_le_f32_e32 vcc, 0, v34 ; 7C064480 > v_mul_f32_e32 v34, v35, v34 ; 10444523 > v_cndmask_b32_e32 v34, v36, v34 ; 00444524 > v_cmp_eq_f32_e32 vcc, 0, v19 ; 7C042680 > v_mul_f32_e32 v19, v37, v31 ; 10263F25 > v_mul_f32_e32 v31, v37, v32 ; 103E4125 > v_cndmask_b32_e32 v19, v19, v20 ; 00262913 > v_cndmask_b32_e32 v20, v31, v34 ; 0028451F > v_fma_f32 v31, v19, 0.5, 0.5 ; D296001F 03C1E113 > v_fma_f32 v32, v20, -0.5, 0.5 ; D2960020 03C1E314 > image_sample v19, v[31:32], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C3131F > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v19, v19, v30 ; 0A263D13 > v_add_f32_e64 v14, 0, v16 clamp ; D206080E 00022080 > v_mul_f32_e32 v16, 0x4029999a, v39 ; 10204EFF 4029999A > v_cmp_gt_f32_e32 vcc, 0, v19 ; 7C082680 > v_cndmask_b32_e32 v15, v15, v16 ; 001E210F > v_mul_f32_e32 v16, 0.5, v15 ; 10201EF0 > v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 > v_cndmask_b32_e32 v16, v15, v16 ; 0020210F > v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 > v_cndmask_b32_e32 v15, v15, v16 ; 001E210F > v_sub_f32_e64 v16, |v19|, v15 ; D2080110 00021F13 > v_cmp_eq_f32_e32 vcc, 0, v15 ; 7C041E80 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080 > v_cndmask_b32_e64 v19, v16, 1.0, vcc ; D2000013 01A9E510 > v_cmp_le_f32_e32 vcc, 0, v19 ; 7C062680 > v_mul_f32_e32 v19, 0x70000000, v19 ; 102626FF 70000000 > v_bfrev_b32_e32 v20, 15 ; 7E28708F > v_cndmask_b32_e32 v19, v20, v19 ; 00262714 > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v15, v15 ; 7E1E550F > v_mul_f32_e32 v19, v15, v16 ; 1026210F > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_mov_b32_e32 v16, 0x3eb33333 ; 7E2002FF 3EB33333 > v_mul_f32_e32 v31, v16, v18 ; 103E2510 > v_add_f32_e64 v15, 0, v19 clamp ; D206080F 00022680 > v_mac_f32_e32 v31, 0x3ecccccd, v17 ; 3E3E22FF 3ECCCCCD > v_mul_f32_e32 v30, v16, v17 ; 103C2310 > v_mov_b32_e32 v19, 0xbecccccd ; 7E2602FF BECCCCCD > v_mac_f32_e32 v30, v18, v19 ; 3E3C2712 > v_fma_f32 v19, v43, v31, v40 ; D2960013 04A23F2B > v_fma_f32 v16, v44, v31, v41 ; D2960010 04A63F2C > v_fma_f32 v19, v7, v30, v19 ; D2960013 044E3D07 > v_mul_f32_e32 v32, 0x40c33333, v39 ; 10404EFF 40C33333 > v_fma_f32 v20, v42, v31, v33 ; D2960014 04863F2A > v_fma_f32 v16, v6, v30, v16 ; D2960010 04423D06 > v_fma_f32 v19, v19, v32, v9 ; D2960013 04264113 > v_fma_f32 v20, v5, v30, v20 ; D2960014 04523D05 > v_fma_f32 v16, v16, v32, v8 ; D2960010 04224110 > v_mul_f32_e32 v34, s20, v19 ; 10442614 > v_mul_f32_e32 v35, s9, v19 ; 10462609 > v_mul_f32_e32 v19, s22, v19 ; 10262616 > v_fma_f32 v20, v20, v32, v0 ; D2960014 04024114 > v_mac_f32_e32 v34, s7, v16 ; 3E442007 > v_mac_f32_e32 v19, s11, v16 ; 3E26200B > v_mac_f32_e32 v34, s2, v20 ; 3E442802 > v_mac_f32_e32 v35, s0, v16 ; 3E462000 > v_mac_f32_e32 v19, s8, v20 ; 3E262808 > v_mac_f32_e32 v35, s1, v20 ; 3E462801 > v_add_f32_e32 v34, s3, v34 ; 06444403 > v_add_f32_e32 v16, s6, v19 ; 06202606 > v_cmp_lt_f32_e32 vcc, 0, v34 ; 7C024480 > v_add_f32_e32 v35, s21, v35 ; 06464615 > v_rcp_f32_e32 v45, v16 ; 7E5A5510 > v_cndmask_b32_e64 v19, v34, 1.0, vcc ; D2000013 01A9E522 > v_cmp_lt_f32_e32 vcc, 0, v35 ; 7C024680 > v_bfrev_b32_e32 v37, 14 ; 7E4A708E > v_cndmask_b32_e64 v36, v35, 1.0, vcc ; D2000024 01A9E523 > v_cmp_le_f32_e32 vcc, 0, v19 ; 7C062680 > v_mul_f32_e32 v19, v37, v19 ; 10262725 > v_bfrev_b32_e32 v38, 15 ; 7E4C708F > v_cndmask_b32_e32 v19, v38, v19 ; 00262726 > v_cmp_le_f32_e32 vcc, 0, v36 ; 7C064880 > v_mul_f32_e32 v36, v37, v36 ; 10484925 > v_cndmask_b32_e32 v36, v38, v36 ; 00484926 > v_cmp_eq_f32_e32 vcc, 0, v16 ; 7C042080 > v_mul_f32_e32 v16, v45, v34 ; 1020452D > v_mul_f32_e32 v34, v45, v35 ; 1044472D > v_cndmask_b32_e32 v16, v16, v19 ; 00202710 > v_cndmask_b32_e32 v19, v34, v36 ; 00264922 > v_fma_f32 v34, v16, 0.5, 0.5 ; D2960022 03C1E110 > v_fma_f32 v35, v19, -0.5, 0.5 ; D2960023 03C1E313 > image_sample v16, v[34:35], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C31022 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v19, v16, v20 ; 0A262910 > v_mul_f32_e32 v16, 0x40433333, v39 ; 10204EFF 40433333 > v_cmp_gt_f32_e32 vcc, 0, v19 ; 7C082680 > v_cndmask_b32_e32 v16, v32, v16 ; 00202120 > v_mul_f32_e32 v20, 0.5, v16 ; 102820F0 > v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 > v_cndmask_b32_e32 v20, v16, v20 ; 00282910 > v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 > v_cndmask_b32_e32 v16, v16, v20 ; 00202910 > v_sub_f32_e64 v19, |v19|, v16 ; D2080113 00022113 > v_cmp_eq_f32_e32 vcc, 0, v16 ; 7C042080 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 > v_cndmask_b32_e64 v20, v19, 1.0, vcc ; D2000014 01A9E513 > v_cmp_le_f32_e32 vcc, 0, v20 ; 7C062880 > v_mul_f32_e32 v20, 0x70000000, v20 ; 102828FF 70000000 > v_bfrev_b32_e32 v32, 15 ; 7E40708F > v_cndmask_b32_e32 v20, v32, v20 ; 00282920 > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v16, v16 ; 7E205510 > v_mul_f32_e32 v20, v16, v19 ; 10282710 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_mul_f32_e32 v32, -0.5, v17 ; 104022F1 > v_mov_b32_e32 v34, 0xbec28f5c ; 7E4402FF BEC28F5C > v_mac_f32_e32 v32, v18, v34 ; 3E404512 > v_mul_f32_e32 v34, -0.5, v18 ; 104424F1 > v_mac_f32_e32 v34, 0x3ec28f5c, v17 ; 3E4422FF 3EC28F5C > v_fma_f32 v36, v43, v34, v40 ; D2960024 04A2452B > v_fma_f32 v35, v44, v34, v41 ; D2960023 04A6452C > v_mul_f32_e32 v19, 0x40f66666, v39 ; 10264EFF 40F66666 > v_fma_f32 v36, v7, v32, v36 ; D2960024 04924107 > v_fma_f32 v37, v42, v34, v33 ; D2960025 0486452A > v_fma_f32 v35, v6, v32, v35 ; D2960023 048E4106 > v_fma_f32 v36, v36, v19, v9 ; D2960024 04262724 > v_fma_f32 v37, v5, v32, v37 ; D2960025 04964105 > v_fma_f32 v35, v35, v19, v8 ; D2960023 04222723 > v_mul_f32_e32 v38, s20, v36 ; 104C4814 > v_mul_f32_e32 v45, s9, v36 ; 105A4809 > v_mul_f32_e32 v36, s22, v36 ; 10484816 > v_fma_f32 v37, v37, v19, v0 ; D2960025 04022725 > v_mac_f32_e32 v38, s7, v35 ; 3E4C4607 > v_mac_f32_e32 v36, s11, v35 ; 3E48460B > v_mac_f32_e32 v38, s2, v37 ; 3E4C4A02 > v_mac_f32_e32 v45, s0, v35 ; 3E5A4600 > v_mac_f32_e32 v36, s8, v37 ; 3E484A08 > v_mac_f32_e32 v45, s1, v37 ; 3E5A4A01 > v_add_f32_e32 v38, s3, v38 ; 064C4C03 > v_add_f32_e32 v35, s6, v36 ; 06464806 > v_cmp_lt_f32_e32 vcc, 0, v38 ; 7C024C80 > v_add_f32_e32 v45, s21, v45 ; 065A5A15 > v_rcp_f32_e32 v49, v35 ; 7E625523 > v_cndmask_b32_e64 v36, v38, 1.0, vcc ; D2000024 01A9E526 > v_cmp_lt_f32_e32 vcc, 0, v45 ; 7C025A80 > v_bfrev_b32_e32 v47, 14 ; 7E5E708E > v_cndmask_b32_e64 v46, v45, 1.0, vcc ; D200002E 01A9E52D > v_cmp_le_f32_e32 vcc, 0, v36 ; 7C064880 > v_mul_f32_e32 v36, v47, v36 ; 1048492F > v_bfrev_b32_e32 v48, 15 ; 7E60708F > v_cndmask_b32_e32 v36, v48, v36 ; 00484930 > v_cmp_le_f32_e32 vcc, 0, v46 ; 7C065C80 > v_mul_f32_e32 v46, v47, v46 ; 105C5D2F > v_cndmask_b32_e32 v46, v48, v46 ; 005C5D30 > v_cmp_eq_f32_e32 vcc, 0, v35 ; 7C044680 > v_mul_f32_e32 v35, v49, v38 ; 10464D31 > v_mul_f32_e32 v38, v49, v45 ; 104C5B31 > v_cndmask_b32_e32 v35, v35, v36 ; 00464923 > v_cndmask_b32_e32 v36, v38, v46 ; 00485D26 > v_fma_f32 v45, v35, 0.5, 0.5 ; D296002D 03C1E123 > v_fma_f32 v46, v36, -0.5, 0.5 ; D296002E 03C1E324 > image_sample v35, v[45:46], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C3232D > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v35, v35, v37 ; 0A464B23 > v_add_f32_e64 v16, 0, v20 clamp ; D2060810 00022880 > v_mul_f32_e32 v20, 0x40766666, v39 ; 10284EFF 40766666 > v_cmp_gt_f32_e32 vcc, 0, v35 ; 7C084680 > v_cndmask_b32_e32 v19, v19, v20 ; 00262913 > v_mul_f32_e32 v20, 0.5, v19 ; 102826F0 > v_cmp_lt_f32_e32 vcc, 0, v35 ; 7C024680 > v_cndmask_b32_e32 v20, v19, v20 ; 00282913 > v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 > v_cndmask_b32_e32 v19, v19, v20 ; 00262913 > v_sub_f32_e64 v20, |v35|, v19 ; D2080114 00022723 > v_cmp_eq_f32_e32 vcc, 0, v19 ; 7C042680 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v20 ; 7C022880 > v_cndmask_b32_e64 v35, v20, 1.0, vcc ; D2000023 01A9E514 > v_cmp_le_f32_e32 vcc, 0, v35 ; 7C064680 > v_mul_f32_e32 v35, 0x70000000, v35 ; 104646FF 70000000 > v_bfrev_b32_e32 v36, 15 ; 7E48708F > v_cndmask_b32_e32 v35, v36, v35 ; 00464724 > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v19, v19 ; 7E265513 > v_mul_f32_e32 v35, v19, v20 ; 10462913 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_mov_b32_e32 v20, 0x3eae147b ; 7E2802FF 3EAE147B > v_add_f32_e64 v19, 0, v35 clamp ; D2060813 00024680 > v_mul_f32_e32 v35, v20, v17 ; 10462314 > v_mul_f32_e32 v37, v20, v18 ; 104A2514 > v_mov_b32_e32 v20, 0xbf35c28f ; 7E2802FF BF35C28F > v_mac_f32_e32 v37, v17, v20 ; 3E4A2911 > v_mac_f32_e32 v35, 0x3f35c28f, v18 ; 3E4624FF 3F35C28F > v_fma_f32 v36, v43, v37, v40 ; D2960024 04A24B2B > v_fma_f32 v20, v44, v37, v41 ; D2960014 04A64B2C > v_fma_f32 v36, v7, v35, v36 ; D2960024 04924707 > v_mul_f32_e32 v45, 0x41080000, v39 ; 105A4EFF 41080000 > v_fma_f32 v38, v42, v37, v33 ; D2960026 04864B2A > v_fma_f32 v20, v6, v35, v20 ; D2960014 04524706 > v_fma_f32 v36, v36, v45, v9 ; D2960024 04265B24 > v_fma_f32 v38, v5, v35, v38 ; D2960026 049A4705 > v_fma_f32 v20, v20, v45, v8 ; D2960014 04225B14 > v_mul_f32_e32 v46, s20, v36 ; 105C4814 > v_mul_f32_e32 v47, s9, v36 ; 105E4809 > v_mul_f32_e32 v36, s22, v36 ; 10484816 > v_fma_f32 v38, v38, v45, v0 ; D2960026 04025B26 > v_mac_f32_e32 v46, s7, v20 ; 3E5C2807 > v_mac_f32_e32 v36, s11, v20 ; 3E48280B > v_mac_f32_e32 v46, s2, v38 ; 3E5C4C02 > v_mac_f32_e32 v47, s0, v20 ; 3E5E2800 > v_mac_f32_e32 v36, s8, v38 ; 3E484C08 > v_mac_f32_e32 v47, s1, v38 ; 3E5E4C01 > v_add_f32_e32 v46, s3, v46 ; 065C5C03 > v_add_f32_e32 v20, s6, v36 ; 06284806 > v_cmp_lt_f32_e32 vcc, 0, v46 ; 7C025C80 > v_add_f32_e32 v47, s21, v47 ; 065E5E15 > v_rcp_f32_e32 v51, v20 ; 7E665514 > v_cndmask_b32_e64 v36, v46, 1.0, vcc ; D2000024 01A9E52E > v_cmp_lt_f32_e32 vcc, 0, v47 ; 7C025E80 > v_bfrev_b32_e32 v49, 14 ; 7E62708E > v_cndmask_b32_e64 v48, v47, 1.0, vcc ; D2000030 01A9E52F > v_cmp_le_f32_e32 vcc, 0, v36 ; 7C064880 > v_mul_f32_e32 v36, v49, v36 ; 10484931 > v_bfrev_b32_e32 v50, 15 ; 7E64708F > v_cndmask_b32_e32 v36, v50, v36 ; 00484932 > v_cmp_le_f32_e32 vcc, 0, v48 ; 7C066080 > v_mul_f32_e32 v48, v49, v48 ; 10606131 > v_cndmask_b32_e32 v48, v50, v48 ; 00606132 > v_cmp_eq_f32_e32 vcc, 0, v20 ; 7C042880 > v_mul_f32_e32 v20, v51, v46 ; 10285D33 > v_mul_f32_e32 v46, v51, v47 ; 105C5F33 > v_cndmask_b32_e32 v20, v20, v36 ; 00284914 > v_cndmask_b32_e32 v36, v46, v48 ; 0048612E > v_fma_f32 v46, v20, 0.5, 0.5 ; D296002E 03C1E114 > v_fma_f32 v47, v36, -0.5, 0.5 ; D296002F 03C1E324 > image_sample v20, v[46:47], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C3142E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v36, v20, v38 ; 0A484D14 > v_mul_f32_e32 v20, 0x40880000, v39 ; 10284EFF 40880000 > v_cmp_gt_f32_e32 vcc, 0, v36 ; 7C084880 > v_cndmask_b32_e32 v20, v45, v20 ; 0028292D > v_cmp_lt_f32_e32 vcc, 0, v36 ; 7C024880 > v_mul_f32_e32 v38, 0.5, v20 ; 104C28F0 > v_cndmask_b32_e32 v38, v20, v38 ; 004C4D14 > v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 > v_cndmask_b32_e32 v20, v20, v38 ; 00284D14 > v_sub_f32_e64 v36, |v36|, v20 ; D2080124 00022924 > v_cmp_eq_f32_e32 vcc, 0, v20 ; 7C042880 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v36 ; 7C024880 > v_cndmask_b32_e64 v38, v36, 1.0, vcc ; D2000026 01A9E524 > v_cmp_le_f32_e32 vcc, 0, v38 ; 7C064C80 > v_mul_f32_e32 v38, 0x70000000, v38 ; 104C4CFF 70000000 > v_bfrev_b32_e32 v45, 15 ; 7E5A708F > v_cndmask_b32_e32 v38, v45, v38 ; 004C4D2D > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v20, v20 ; 7E285514 > v_mul_f32_e32 v38, v20, v36 ; 104C4914 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_add_f32_e64 v20, 0, v38 clamp ; D2060814 00024C80 > v_mov_b32_e32 v38, 0xbe19999a ; 7E4C02FF BE19999A > v_mul_f32_e32 v36, v38, v17 ; 10482326 > v_mul_f32_e32 v38, v38, v18 ; 104C2526 > v_mov_b32_e32 v46, 0xbdcccccd ; 7E5C02FF BDCCCCCD > v_mac_f32_e32 v38, v17, v46 ; 3E4C5D11 > v_mac_f32_e32 v36, 0x3dcccccd, v18 ; 3E4824FF 3DCCCCCD > v_fma_f32 v40, v43, v38, v40 ; D2960028 04A24D2B > v_fma_f32 v41, v44, v38, v41 ; D2960029 04A64D2C > v_mul_f32_e32 v45, 0x41200000, v39 ; 105A4EFF 41200000 > v_fma_f32 v40, v7, v36, v40 ; D2960028 04A24907 > v_fma_f32 v42, v42, v38, v33 ; D296002A 04864D2A > v_fma_f32 v41, v6, v36, v41 ; D2960029 04A64906 > v_fma_f32 v40, v40, v45, v9 ; D2960028 04265B28 > v_fma_f32 v42, v5, v36, v42 ; D296002A 04AA4905 > v_fma_f32 v41, v41, v45, v8 ; D2960029 04225B29 > v_mul_f32_e32 v43, s20, v40 ; 10565014 > v_mul_f32_e32 v44, s9, v40 ; 10585009 > v_mul_f32_e32 v40, s22, v40 ; 10505016 > v_fma_f32 v42, v42, v45, v0 ; D296002A 04025B2A > v_mac_f32_e32 v43, s7, v41 ; 3E565207 > v_mac_f32_e32 v40, s11, v41 ; 3E50520B > v_mac_f32_e32 v43, s2, v42 ; 3E565402 > v_mac_f32_e32 v44, s0, v41 ; 3E585200 > v_mac_f32_e32 v40, s8, v42 ; 3E505408 > v_mac_f32_e32 v44, s1, v42 ; 3E585401 > v_add_f32_e32 v43, s3, v43 ; 06565603 > v_add_f32_e32 v40, s6, v40 ; 06505006 > v_cmp_lt_f32_e32 vcc, 0, v43 ; 7C025680 > v_add_f32_e32 v44, s21, v44 ; 06585815 > v_rcp_f32_e32 v49, v40 ; 7E625528 > v_cndmask_b32_e64 v41, v43, 1.0, vcc ; D2000029 01A9E52B > v_cmp_lt_f32_e32 vcc, 0, v44 ; 7C025880 > v_bfrev_b32_e32 v47, 14 ; 7E5E708E > v_cndmask_b32_e64 v46, v44, 1.0, vcc ; D200002E 01A9E52C > v_cmp_le_f32_e32 vcc, 0, v41 ; 7C065280 > v_mul_f32_e32 v41, v47, v41 ; 1052532F > v_bfrev_b32_e32 v48, 15 ; 7E60708F > v_cndmask_b32_e32 v41, v48, v41 ; 00525330 > v_mul_f32_e32 v47, v47, v46 ; 105E5D2F > v_cmp_le_f32_e32 vcc, 0, v46 ; 7C065C80 > v_cndmask_b32_e32 v46, v48, v47 ; 005C5F30 > v_mul_f32_e32 v43, v49, v43 ; 10565731 > v_cmp_eq_f32_e32 vcc, 0, v40 ; 7C045080 > v_mul_f32_e32 v44, v49, v44 ; 10585931 > v_cndmask_b32_e32 v40, v43, v41 ; 0050532B > v_cndmask_b32_e32 v41, v44, v46 ; 00525D2C > v_fma_f32 v43, v40, 0.5, 0.5 ; D296002B 03C1E128 > v_fma_f32 v44, v41, -0.5, 0.5 ; D296002C 03C1E329 > image_sample v40, v[43:44], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C3282B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v40, v40, v42 ; 0A505528 > v_mul_f32_e32 v39, 0x40a00000, v39 ; 104E4EFF 40A00000 > v_cmp_gt_f32_e32 vcc, 0, v40 ; 7C085080 > v_cndmask_b32_e32 v39, v45, v39 ; 004E4F2D > v_cmp_lt_f32_e32 vcc, 0, v40 ; 7C025080 > v_mul_f32_e32 v41, 0.5, v39 ; 10524EF0 > v_cndmask_b32_e32 v41, v39, v41 ; 00525327 > v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 > v_cndmask_b32_e32 v33, v39, v41 ; 00425327 > v_sub_f32_e64 v39, |v40|, v33 ; D2080127 00024328 > v_cmp_eq_f32_e32 vcc, 0, v33 ; 7C044280 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v39 ; 7C024E80 > v_cndmask_b32_e64 v40, v39, 1.0, vcc ; D2000028 01A9E527 > v_cmp_le_f32_e32 vcc, 0, v40 ; 7C065080 > v_mul_f32_e32 v40, 0x70000000, v40 ; 105050FF 70000000 > v_bfrev_b32_e32 v41, 15 ; 7E52708F > v_cndmask_b32_e32 v40, v41, v40 ; 00505129 > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v33, v33 ; 7E425521 > v_mul_f32_e32 v40, v33, v39 ; 10504F21 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_add_f32_e64 v33, 0, v40 clamp ; D2060821 00025080 > v_mul_f32_e32 v43, 0x3dcccccd, v18 ; 105624FF 3DCCCCCD > v_mov_b32_e32 v40, 0xbdcccccd ; 7E5002FF BDCCCCCD > v_mul_f32_e32 v42, v5, v2 ; 10540505 > v_mac_f32_e32 v43, v17, v40 ; 3E565111 > v_mul_f32_e32 v40, v7, v1 ; 10500307 > v_mul_f32_e32 v44, v6, v4 ; 10580906 > v_mul_f32_e32 v39, 0x3c23d70a, v0 ; 104E00FF 3C23D70A > v_fma_f32 v41, v4, v5, -v40 ; D2960029 84A20B04 > v_fma_f32 v42, v1, v6, -v42 ; D296002A 84AA0D01 > v_fma_f32 v40, v2, v7, -v44 ; D2960028 84B20F02 > v_fma_f32 v44, v41, v43, v2 ; D296002C 040A5729 > v_fma_f32 v45, v42, v43, v4 ; D296002D 0412572A > v_max_f32_e32 v39, 1.0, v39 ; 204E4EF2 > v_fma_f32 v43, v40, v43, v1 ; D296002B 04065728 > v_fma_f32 v44, v6, v25, v44 ; D296002C 04B23306 > v_fma_f32 v45, v7, v25, v45 ; D296002D 04B63307 > v_fma_f32 v25, v5, v25, v43 ; D2960019 04AE3305 > v_mul_f32_e32 v43, 0.5, v39 ; 10564EF0 > v_fma_f32 v45, v45, v43, v9 ; D296002D 0426572D > v_fma_f32 v44, v44, v43, v8 ; D296002C 0422572C > v_mul_f32_e32 v46, s20, v45 ; 105C5A14 > v_mul_f32_e32 v47, s9, v45 ; 105E5A09 > v_mul_f32_e32 v45, s22, v45 ; 105A5A16 > v_fma_f32 v25, v25, v43, v0 ; D2960019 04025719 > v_mac_f32_e32 v46, s7, v44 ; 3E5C5807 > v_mac_f32_e32 v45, s11, v44 ; 3E5A580B > v_mac_f32_e32 v46, s2, v25 ; 3E5C3202 > v_mac_f32_e32 v47, s0, v44 ; 3E5E5800 > v_mac_f32_e32 v45, s8, v25 ; 3E5A3208 > v_mac_f32_e32 v47, s1, v25 ; 3E5E3201 > v_add_f32_e32 v46, s3, v46 ; 065C5C03 > v_add_f32_e32 v44, s6, v45 ; 06585A06 > v_cmp_lt_f32_e32 vcc, 0, v46 ; 7C025C80 > v_add_f32_e32 v47, s21, v47 ; 065E5E15 > v_rcp_f32_e32 v51, v44 ; 7E66552C > v_cndmask_b32_e64 v45, v46, 1.0, vcc ; D200002D 01A9E52E > v_cmp_lt_f32_e32 vcc, 0, v47 ; 7C025E80 > v_bfrev_b32_e32 v49, 14 ; 7E62708E > v_cndmask_b32_e64 v48, v47, 1.0, vcc ; D2000030 01A9E52F > v_cmp_le_f32_e32 vcc, 0, v45 ; 7C065A80 > v_mul_f32_e32 v45, v49, v45 ; 105A5B31 > v_bfrev_b32_e32 v50, 15 ; 7E64708F > v_cndmask_b32_e32 v45, v50, v45 ; 005A5B32 > v_cmp_le_f32_e32 vcc, 0, v48 ; 7C066080 > v_mul_f32_e32 v48, v49, v48 ; 10606131 > v_cndmask_b32_e32 v48, v50, v48 ; 00606132 > v_cmp_eq_f32_e32 vcc, 0, v44 ; 7C045880 > v_mul_f32_e32 v44, v51, v46 ; 10585D33 > v_mul_f32_e32 v46, v51, v47 ; 105C5F33 > v_cndmask_b32_e32 v44, v44, v45 ; 00585B2C > v_cndmask_b32_e32 v45, v46, v48 ; 005A612E > v_fma_f32 v46, v44, 0.5, 0.5 ; D296002E 03C1E12C > v_fma_f32 v47, v45, -0.5, 0.5 ; D296002F 03C1E32D > image_sample v44, v[46:47], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C32C2E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v44, v44, v25 ; 0A58332C > v_mul_f32_e32 v25, 0x3e800000, v39 ; 10324EFF 3E800000 > v_cmp_gt_f32_e32 vcc, 0, v44 ; 7C085880 > v_cndmask_b32_e32 v25, v43, v25 ; 0032332B > v_sub_f32_e64 v43, |v44|, v25 ; D208012B 0002332C > v_cmp_eq_f32_e32 vcc, 0, v25 ; 7C043280 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v43 ; 7C025680 > v_cndmask_b32_e64 v44, v43, 1.0, vcc ; D200002C 01A9E52B > v_cmp_le_f32_e32 vcc, 0, v44 ; 7C065880 > v_mul_f32_e32 v44, 0x70000000, v44 ; 105858FF 70000000 > v_bfrev_b32_e32 v45, 15 ; 7E5A708F > v_cndmask_b32_e32 v44, v45, v44 ; 0058592D > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v25, v25 ; 7E325519 > v_mul_f32_e32 v44, v25, v43 ; 10585719 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_mul_f32_e32 v43, 0x3e9eb852, v18 ; 105624FF 3E9EB852 > v_fma_f32 v45, v41, v21, v2 ; D296002D 040A2B29 > v_fma_f32 v46, v42, v21, v4 ; D296002E 04122B2A > v_fma_f32 v21, v40, v21, v1 ; D2960015 04062B28 > v_add_f32_e64 v25, 0, v44 clamp ; D2060819 00025880 > v_mul_f32_e32 v44, 0x3fa66666, v39 ; 10584EFF 3FA66666 > v_fma_f32 v45, v6, v43, v45 ; D296002D 04B65706 > v_fma_f32 v46, v7, v43, v46 ; D296002E 04BA5707 > v_fma_f32 v21, v5, v43, v21 ; D2960015 04565705 > v_fma_f32 v43, v45, v44, v8 ; D296002B 0422592D > v_fma_f32 v45, v46, v44, v9 ; D296002D 0426592E > v_mul_f32_e32 v46, s20, v45 ; 105C5A14 > v_mul_f32_e32 v47, s9, v45 ; 105E5A09 > v_mul_f32_e32 v45, s22, v45 ; 105A5A16 > v_fma_f32 v21, v21, v44, v0 ; D2960015 04025915 > v_mac_f32_e32 v46, s7, v43 ; 3E5C5607 > v_mac_f32_e32 v45, s11, v43 ; 3E5A560B > v_mac_f32_e32 v46, s2, v21 ; 3E5C2A02 > v_mac_f32_e32 v47, s0, v43 ; 3E5E5600 > v_mac_f32_e32 v45, s8, v21 ; 3E5A2A08 > v_mac_f32_e32 v47, s1, v21 ; 3E5E2A01 > v_add_f32_e32 v46, s3, v46 ; 065C5C03 > v_add_f32_e32 v43, s6, v45 ; 06565A06 > v_cmp_lt_f32_e32 vcc, 0, v46 ; 7C025C80 > v_add_f32_e32 v47, s21, v47 ; 065E5E15 > v_rcp_f32_e32 v51, v43 ; 7E66552B > v_cndmask_b32_e64 v45, v46, 1.0, vcc ; D200002D 01A9E52E > v_cmp_lt_f32_e32 vcc, 0, v47 ; 7C025E80 > v_bfrev_b32_e32 v49, 14 ; 7E62708E > v_cndmask_b32_e64 v48, v47, 1.0, vcc ; D2000030 01A9E52F > v_cmp_le_f32_e32 vcc, 0, v45 ; 7C065A80 > v_mul_f32_e32 v45, v49, v45 ; 105A5B31 > v_bfrev_b32_e32 v50, 15 ; 7E64708F > v_cndmask_b32_e32 v45, v50, v45 ; 005A5B32 > v_cmp_le_f32_e32 vcc, 0, v48 ; 7C066080 > v_mul_f32_e32 v48, v49, v48 ; 10606131 > v_cndmask_b32_e32 v48, v50, v48 ; 00606132 > v_cmp_eq_f32_e32 vcc, 0, v43 ; 7C045680 > v_mul_f32_e32 v43, v51, v46 ; 10565D33 > v_mul_f32_e32 v46, v51, v47 ; 105C5F33 > v_cndmask_b32_e32 v43, v43, v45 ; 00565B2B > v_cndmask_b32_e32 v45, v46, v48 ; 005A612E > v_fma_f32 v46, v43, 0.5, 0.5 ; D296002E 03C1E12B > v_fma_f32 v47, v45, -0.5, 0.5 ; D296002F 03C1E32D > image_sample v43, v[46:47], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C32B2E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v21, v43, v21 ; 0A2A2B2B > v_mul_f32_e32 v43, 0x3f266666, v39 ; 10564EFF 3F266666 > v_cmp_gt_f32_e32 vcc, 0, v21 ; 7C082A80 > v_cndmask_b32_e32 v43, v44, v43 ; 0056572C > v_sub_f32_e64 v44, |v21|, v43 ; D208012C 00025715 > v_cmp_eq_f32_e32 vcc, 0, v43 ; 7C045680 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v44 ; 7C025880 > v_cndmask_b32_e64 v21, v44, 1.0, vcc ; D2000015 01A9E52C > v_cmp_le_f32_e32 vcc, 0, v21 ; 7C062A80 > v_mul_f32_e32 v21, 0x70000000, v21 ; 102A2AFF 70000000 > v_bfrev_b32_e32 v45, 15 ; 7E5A708F > v_cndmask_b32_e32 v21, v45, v21 ; 002A2B2D > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v21, v43 ; 7E2A552B > v_mul_f32_e32 v21, v21, v44 ; 102A5915 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_mul_f32_e32 v18, 0x3c23d70a, v18 ; 102424FF 3C23D70A > v_mac_f32_e32 v18, 0x3e8a3d71, v17 ; 3E2422FF 3E8A3D71 > v_fma_f32 v17, v41, v18, v2 ; D2960011 040A2529 > v_fma_f32 v43, v42, v18, v4 ; D296002B 0412252A > v_fma_f32 v18, v40, v18, v1 ; D2960012 04062528 > v_fma_f32 v17, v6, v22, v17 ; D2960011 04462D06 > v_fma_f32 v43, v7, v22, v43 ; D296002B 04AE2D07 > v_fma_f32 v18, v5, v22, v18 ; D2960012 044A2D05 > v_mul_f32_e32 v22, 0x40133333, v39 ; 102C4EFF 40133333 > v_fma_f32 v43, v43, v22, v9 ; D296002B 04262D2B > v_fma_f32 v17, v17, v22, v8 ; D2960011 04222D11 > v_mul_f32_e32 v44, s20, v43 ; 10585614 > v_mul_f32_e32 v45, s9, v43 ; 105A5609 > v_mul_f32_e32 v43, s22, v43 ; 10565616 > v_fma_f32 v18, v18, v22, v0 ; D2960012 04022D12 > v_mac_f32_e32 v44, s7, v17 ; 3E582207 > v_mac_f32_e32 v43, s11, v17 ; 3E56220B > v_mac_f32_e32 v44, s2, v18 ; 3E582402 > v_mac_f32_e32 v45, s0, v17 ; 3E5A2200 > v_mac_f32_e32 v43, s8, v18 ; 3E562408 > v_mac_f32_e32 v45, s1, v18 ; 3E5A2401 > v_add_f32_e32 v44, s3, v44 ; 06585803 > v_add_f32_e32 v17, s6, v43 ; 06225606 > v_cmp_lt_f32_e32 vcc, 0, v44 ; 7C025880 > v_add_f32_e32 v45, s21, v45 ; 065A5A15 > v_rcp_f32_e32 v49, v17 ; 7E625511 > v_cndmask_b32_e64 v43, v44, 1.0, vcc ; D200002B 01A9E52C > v_cmp_lt_f32_e32 vcc, 0, v45 ; 7C025A80 > v_bfrev_b32_e32 v47, 14 ; 7E5E708E > v_cndmask_b32_e64 v46, v45, 1.0, vcc ; D200002E 01A9E52D > v_cmp_le_f32_e32 vcc, 0, v43 ; 7C065680 > v_mul_f32_e32 v43, v47, v43 ; 1056572F > v_bfrev_b32_e32 v48, 15 ; 7E60708F > v_cndmask_b32_e32 v43, v48, v43 ; 00565730 > v_mul_f32_e32 v47, v47, v46 ; 105E5D2F > v_cmp_le_f32_e32 vcc, 0, v46 ; 7C065C80 > v_cndmask_b32_e32 v46, v48, v47 ; 005C5F30 > v_cmp_eq_f32_e32 vcc, 0, v17 ; 7C042280 > v_mul_f32_e32 v44, v49, v44 ; 10585931 > v_mul_f32_e32 v45, v49, v45 ; 105A5B31 > v_cndmask_b32_e32 v43, v44, v43 ; 0056572C > v_cndmask_b32_e32 v44, v45, v46 ; 00585D2D > v_fma_f32 v45, v43, 0.5, 0.5 ; D296002D 03C1E12B > v_fma_f32 v46, v44, -0.5, 0.5 ; D296002E 03C1E32C > image_sample v43, v[45:46], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C32B2D > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v43, v43, v18 ; 0A56252B > v_add_f32_e64 v17, 0, v21 clamp ; D2060811 00022A80 > v_mul_f32_e32 v21, 0x3f933333, v39 ; 102A4EFF 3F933333 > v_cmp_gt_f32_e32 vcc, 0, v43 ; 7C085680 > v_cndmask_b32_e32 v18, v22, v21 ; 00242B16 > v_sub_f32_e64 v21, |v43|, v18 ; D2080115 0002252B > v_cmp_eq_f32_e32 vcc, 0, v18 ; 7C042480 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v21 ; 7C022A80 > v_cndmask_b32_e64 v22, v21, 1.0, vcc ; D2000016 01A9E515 > v_cmp_le_f32_e32 vcc, 0, v22 ; 7C062C80 > v_mul_f32_e32 v22, 0x70000000, v22 ; 102C2CFF 70000000 > v_bfrev_b32_e32 v43, 15 ; 7E56708F > v_cndmask_b32_e32 v22, v43, v22 ; 002C2D2B > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v18, v18 ; 7E245512 > v_mul_f32_e32 v22, v18, v21 ; 102C2B12 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_fma_f32 v43, v42, v24, v4 ; D296002B 0412312A > v_add_f32_e64 v18, 0, v22 clamp ; D2060812 00022C80 > v_fma_f32 v22, v41, v24, v2 ; D2960016 040A3129 > v_mul_f32_e32 v21, 0x404ccccd, v39 ; 102A4EFF 404CCCCD > v_fma_f32 v24, v40, v24, v1 ; D2960018 04063128 > v_fma_f32 v43, v7, v23, v43 ; D296002B 04AE2F07 > v_fma_f32 v22, v6, v23, v22 ; D2960016 045A2F06 > v_fma_f32 v23, v5, v23, v24 ; D2960017 04622F05 > v_fma_f32 v24, v43, v21, v9 ; D2960018 04262B2B > v_fma_f32 v22, v22, v21, v8 ; D2960016 04222B16 > v_mul_f32_e32 v43, s20, v24 ; 10563014 > v_mul_f32_e32 v44, s9, v24 ; 10583009 > v_mul_f32_e32 v24, s22, v24 ; 10303016 > v_fma_f32 v23, v23, v21, v0 ; D2960017 04022B17 > v_mac_f32_e32 v43, s7, v22 ; 3E562C07 > v_mac_f32_e32 v24, s11, v22 ; 3E302C0B > v_mac_f32_e32 v43, s2, v23 ; 3E562E02 > v_mac_f32_e32 v44, s0, v22 ; 3E582C00 > v_mac_f32_e32 v24, s8, v23 ; 3E302E08 > v_mac_f32_e32 v44, s1, v23 ; 3E582E01 > v_add_f32_e32 v43, s3, v43 ; 06565603 > v_add_f32_e32 v22, s6, v24 ; 062C3006 > v_cmp_lt_f32_e32 vcc, 0, v43 ; 7C025680 > v_add_f32_e32 v44, s21, v44 ; 06585815 > v_rcp_f32_e32 v48, v22 ; 7E605516 > v_cndmask_b32_e64 v24, v43, 1.0, vcc ; D2000018 01A9E52B > v_cmp_lt_f32_e32 vcc, 0, v44 ; 7C025880 > v_bfrev_b32_e32 v46, 14 ; 7E5C708E > v_cndmask_b32_e64 v45, v44, 1.0, vcc ; D200002D 01A9E52C > v_cmp_le_f32_e32 vcc, 0, v24 ; 7C063080 > v_mul_f32_e32 v24, v46, v24 ; 1030312E > v_bfrev_b32_e32 v47, 15 ; 7E5E708F > v_cndmask_b32_e32 v24, v47, v24 ; 0030312F > v_mul_f32_e32 v46, v46, v45 ; 105C5B2E > v_cmp_le_f32_e32 vcc, 0, v45 ; 7C065A80 > v_cndmask_b32_e32 v45, v47, v46 ; 005A5D2F > v_mul_f32_e32 v43, v48, v43 ; 10565730 > v_cmp_eq_f32_e32 vcc, 0, v22 ; 7C042C80 > v_mul_f32_e32 v44, v48, v44 ; 10585930 > v_cndmask_b32_e32 v22, v43, v24 ; 002C312B > v_cndmask_b32_e32 v24, v44, v45 ; 00305B2C > v_fma_f32 v44, v22, 0.5, 0.5 ; D296002C 03C1E116 > v_fma_f32 v45, v24, -0.5, 0.5 ; D296002D 03C1E318 > image_sample v22, v[44:45], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C3162C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v22, v22, v23 ; 0A2C2F16 > v_mul_f32_e32 v43, 0x3fcccccd, v39 ; 10564EFF 3FCCCCCD > v_cmp_gt_f32_e32 vcc, 0, v22 ; 7C082C80 > v_cndmask_b32_e32 v21, v21, v43 ; 002A5715 > v_sub_f32_e64 v22, |v22|, v21 ; D2080116 00022B16 > v_cmp_eq_f32_e32 vcc, 0, v21 ; 7C042A80 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80 > v_cndmask_b32_e64 v23, v22, 1.0, vcc ; D2000017 01A9E516 > v_cmp_le_f32_e32 vcc, 0, v23 ; 7C062E80 > v_mul_f32_e32 v23, 0x70000000, v23 ; 102E2EFF 70000000 > v_bfrev_b32_e32 v24, 15 ; 7E30708F > v_cndmask_b32_e32 v23, v24, v23 ; 002E2F18 > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v21, v21 ; 7E2A5515 > v_mul_f32_e32 v23, v21, v22 ; 102E2D15 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_fma_f32 v22, v41, v27, v2 ; D2960016 040A3729 > v_fma_f32 v24, v40, v27, v1 ; D2960018 04063728 > v_add_f32_e64 v21, 0, v23 clamp ; D2060815 00022E80 > v_fma_f32 v23, v42, v27, v4 ; D2960017 0412372A > v_fma_f32 v22, v6, v26, v22 ; D2960016 045A3506 > v_fma_f32 v23, v7, v26, v23 ; D2960017 045E3507 > v_fma_f32 v24, v5, v26, v24 ; D2960018 04623505 > v_mul_f32_e32 v26, 0x40833333, v39 ; 10344EFF 40833333 > v_fma_f32 v23, v23, v26, v9 ; D2960017 04263517 > v_fma_f32 v22, v22, v26, v8 ; D2960016 04223516 > v_mul_f32_e32 v27, s20, v23 ; 10362E14 > v_mul_f32_e32 v43, s9, v23 ; 10562E09 > v_mul_f32_e32 v23, s22, v23 ; 102E2E16 > v_fma_f32 v24, v24, v26, v0 ; D2960018 04023518 > v_mac_f32_e32 v27, s7, v22 ; 3E362C07 > v_mac_f32_e32 v23, s11, v22 ; 3E2E2C0B > v_mac_f32_e32 v27, s2, v24 ; 3E363002 > v_mac_f32_e32 v43, s0, v22 ; 3E562C00 > v_mac_f32_e32 v23, s8, v24 ; 3E2E3008 > v_mac_f32_e32 v43, s1, v24 ; 3E563001 > v_add_f32_e32 v27, s3, v27 ; 06363603 > v_add_f32_e32 v22, s6, v23 ; 062C2E06 > v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 > v_add_f32_e32 v43, s21, v43 ; 06565615 > v_rcp_f32_e32 v47, v22 ; 7E5E5516 > v_cndmask_b32_e64 v23, v27, 1.0, vcc ; D2000017 01A9E51B > v_cmp_lt_f32_e32 vcc, 0, v43 ; 7C025680 > v_bfrev_b32_e32 v45, 14 ; 7E5A708E > v_cndmask_b32_e64 v44, v43, 1.0, vcc ; D200002C 01A9E52B > v_cmp_le_f32_e32 vcc, 0, v23 ; 7C062E80 > v_mul_f32_e32 v23, v45, v23 ; 102E2F2D > v_bfrev_b32_e32 v46, 15 ; 7E5C708F > v_cndmask_b32_e32 v23, v46, v23 ; 002E2F2E > v_mul_f32_e32 v45, v45, v44 ; 105A592D > v_cmp_le_f32_e32 vcc, 0, v44 ; 7C065880 > v_cndmask_b32_e32 v44, v46, v45 ; 00585B2E > v_mul_f32_e32 v27, v47, v27 ; 1036372F > v_cmp_eq_f32_e32 vcc, 0, v22 ; 7C042C80 > v_mul_f32_e32 v43, v47, v43 ; 1056572F > v_cndmask_b32_e32 v22, v27, v23 ; 002C2F1B > v_cndmask_b32_e32 v23, v43, v44 ; 002E592B > v_fma_f32 v43, v22, 0.5, 0.5 ; D296002B 03C1E116 > v_fma_f32 v44, v23, -0.5, 0.5 ; D296002C 03C1E317 > image_sample v22, v[43:44], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C3162B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v23, v22, v24 ; 0A2E3116 > v_mul_f32_e32 v27, 0x40033333, v39 ; 10364EFF 40033333 > v_cmp_gt_f32_e32 vcc, 0, v23 ; 7C082E80 > v_cndmask_b32_e32 v22, v26, v27 ; 002C371A > v_sub_f32_e64 v23, |v23|, v22 ; D2080117 00022D17 > v_cmp_eq_f32_e32 vcc, 0, v22 ; 7C042C80 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80 > v_cndmask_b32_e64 v24, v23, 1.0, vcc ; D2000018 01A9E517 > v_cmp_le_f32_e32 vcc, 0, v24 ; 7C063080 > v_mul_f32_e32 v24, 0x70000000, v24 ; 103030FF 70000000 > v_bfrev_b32_e32 v26, 15 ; 7E34708F > v_cndmask_b32_e32 v24, v26, v24 ; 0030311A > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v22, v22 ; 7E2C5516 > v_mul_f32_e32 v24, v22, v23 ; 10302F16 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_fma_f32 v26, v42, v29, v4 ; D296001A 04123B2A > v_add_f32_e64 v22, 0, v24 clamp ; D2060816 00023080 > v_fma_f32 v24, v41, v29, v2 ; D2960018 040A3B29 > v_mul_f32_e32 v23, 0x40a9999a, v39 ; 102E4EFF 40A9999A > v_fma_f32 v26, v7, v28, v26 ; D296001A 046A3907 > v_fma_f32 v27, v40, v29, v1 ; D296001B 04063B28 > v_fma_f32 v24, v6, v28, v24 ; D2960018 04623906 > v_fma_f32 v26, v26, v23, v9 ; D296001A 04262F1A > v_fma_f32 v27, v5, v28, v27 ; D296001B 046E3905 > v_fma_f32 v24, v24, v23, v8 ; D2960018 04222F18 > v_mul_f32_e32 v28, s20, v26 ; 10383414 > v_mul_f32_e32 v29, s9, v26 ; 103A3409 > v_mul_f32_e32 v26, s22, v26 ; 10343416 > v_fma_f32 v27, v27, v23, v0 ; D296001B 04022F1B > v_mac_f32_e32 v28, s7, v24 ; 3E383007 > v_mac_f32_e32 v26, s11, v24 ; 3E34300B > v_mac_f32_e32 v28, s2, v27 ; 3E383602 > v_mac_f32_e32 v29, s0, v24 ; 3E3A3000 > v_mac_f32_e32 v26, s8, v27 ; 3E343608 > v_mac_f32_e32 v29, s1, v27 ; 3E3A3601 > v_add_f32_e32 v28, s3, v28 ; 06383803 > v_add_f32_e32 v24, s6, v26 ; 06303406 > v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880 > v_add_f32_e32 v29, s21, v29 ; 063A3A15 > v_rcp_f32_e32 v46, v24 ; 7E5C5518 > v_cndmask_b32_e64 v26, v28, 1.0, vcc ; D200001A 01A9E51C > v_cmp_lt_f32_e32 vcc, 0, v29 ; 7C023A80 > v_bfrev_b32_e32 v44, 14 ; 7E58708E > v_cndmask_b32_e64 v43, v29, 1.0, vcc ; D200002B 01A9E51D > v_cmp_le_f32_e32 vcc, 0, v26 ; 7C063480 > v_mul_f32_e32 v26, v44, v26 ; 1034352C > v_bfrev_b32_e32 v45, 15 ; 7E5A708F > v_cndmask_b32_e32 v26, v45, v26 ; 0034352D > v_mul_f32_e32 v44, v44, v43 ; 1058572C > v_cmp_le_f32_e32 vcc, 0, v43 ; 7C065680 > v_cndmask_b32_e32 v43, v45, v44 ; 0056592D > v_mul_f32_e32 v28, v46, v28 ; 1038392E > v_cmp_eq_f32_e32 vcc, 0, v24 ; 7C043080 > v_mul_f32_e32 v29, v46, v29 ; 103A3B2E > v_cndmask_b32_e32 v24, v28, v26 ; 0030351C > v_cndmask_b32_e32 v26, v29, v43 ; 0034571D > v_fma_f32 v44, v26, -0.5, 0.5 ; D296002C 03C1E31A > v_fma_f32 v43, v24, 0.5, 0.5 ; D296002B 03C1E118 > image_sample v24, v[43:44], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C3182B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v24, v24, v27 ; 0A303718 > v_mul_f32_e32 v28, 0x4029999a, v39 ; 10384EFF 4029999A > v_cmp_gt_f32_e32 vcc, 0, v24 ; 7C083080 > v_cndmask_b32_e32 v23, v23, v28 ; 002E3917 > v_sub_f32_e64 v24, |v24|, v23 ; D2080118 00022F18 > v_cmp_eq_f32_e32 vcc, 0, v23 ; 7C042E80 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080 > v_cndmask_b32_e64 v26, v24, 1.0, vcc ; D200001A 01A9E518 > v_cmp_le_f32_e32 vcc, 0, v26 ; 7C063480 > v_mul_f32_e32 v26, 0x70000000, v26 ; 103434FF 70000000 > v_bfrev_b32_e32 v27, 15 ; 7E36708F > v_cndmask_b32_e32 v26, v27, v26 ; 0034351B > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v23, v23 ; 7E2E5517 > v_mul_f32_e32 v26, v23, v24 ; 10343117 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_add_f32_e64 v23, 0, v26 clamp ; D2060817 00023480 > v_fma_f32 v26, v42, v31, v4 ; D296001A 04123F2A > v_fma_f32 v24, v41, v31, v2 ; D2960018 040A3F29 > v_fma_f32 v26, v7, v30, v26 ; D296001A 046A3D07 > v_mul_f32_e32 v28, 0x40c33333, v39 ; 10384EFF 40C33333 > v_fma_f32 v27, v40, v31, v1 ; D296001B 04063F28 > v_fma_f32 v24, v6, v30, v24 ; D2960018 04623D06 > v_fma_f32 v26, v26, v28, v9 ; D296001A 0426391A > v_fma_f32 v27, v5, v30, v27 ; D296001B 046E3D05 > v_fma_f32 v24, v24, v28, v8 ; D2960018 04223918 > v_mul_f32_e32 v29, s20, v26 ; 103A3414 > v_mul_f32_e32 v30, s9, v26 ; 103C3409 > v_mul_f32_e32 v26, s22, v26 ; 10343416 > v_fma_f32 v27, v27, v28, v0 ; D296001B 0402391B > v_mac_f32_e32 v29, s7, v24 ; 3E3A3007 > v_mac_f32_e32 v26, s11, v24 ; 3E34300B > v_mac_f32_e32 v29, s2, v27 ; 3E3A3602 > v_mac_f32_e32 v30, s0, v24 ; 3E3C3000 > v_mac_f32_e32 v26, s8, v27 ; 3E343608 > v_mac_f32_e32 v30, s1, v27 ; 3E3C3601 > v_add_f32_e32 v29, s3, v29 ; 063A3A03 > v_add_f32_e32 v24, s6, v26 ; 06303406 > v_cmp_lt_f32_e32 vcc, 0, v29 ; 7C023A80 > v_add_f32_e32 v30, s21, v30 ; 063C3C15 > v_rcp_f32_e32 v45, v24 ; 7E5A5518 > v_cndmask_b32_e64 v26, v29, 1.0, vcc ; D200001A 01A9E51D > v_cmp_lt_f32_e32 vcc, 0, v30 ; 7C023C80 > v_bfrev_b32_e32 v43, 14 ; 7E56708E > v_cndmask_b32_e64 v31, v30, 1.0, vcc ; D200001F 01A9E51E > v_cmp_le_f32_e32 vcc, 0, v26 ; 7C063480 > v_mul_f32_e32 v26, v43, v26 ; 1034352B > v_bfrev_b32_e32 v44, 15 ; 7E58708F > v_cndmask_b32_e32 v26, v44, v26 ; 0034352C > v_mul_f32_e32 v43, v43, v31 ; 10563F2B > v_cmp_le_f32_e32 vcc, 0, v31 ; 7C063E80 > v_cndmask_b32_e32 v31, v44, v43 ; 003E572C > v_mul_f32_e32 v29, v45, v29 ; 103A3B2D > v_cmp_eq_f32_e32 vcc, 0, v24 ; 7C043080 > v_mul_f32_e32 v30, v45, v30 ; 103C3D2D > v_cndmask_b32_e32 v24, v29, v26 ; 0030351D > v_cndmask_b32_e32 v26, v30, v31 ; 00343F1E > v_fma_f32 v30, v24, 0.5, 0.5 ; D296001E 03C1E118 > v_fma_f32 v31, v26, -0.5, 0.5 ; D296001F 03C1E31A > image_sample v24, v[30:31], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C3181E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v26, v24, v27 ; 0A343718 > v_mul_f32_e32 v29, 0x40433333, v39 ; 103A4EFF 40433333 > v_cmp_gt_f32_e32 vcc, 0, v26 ; 7C083480 > v_cndmask_b32_e32 v24, v28, v29 ; 00303B1C > v_sub_f32_e64 v26, |v26|, v24 ; D208011A 0002311A > v_cmp_eq_f32_e32 vcc, 0, v24 ; 7C043080 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v26 ; 7C023480 > v_cndmask_b32_e64 v27, v26, 1.0, vcc ; D200001B 01A9E51A > v_cmp_le_f32_e32 vcc, 0, v27 ; 7C063680 > v_mul_f32_e32 v27, 0x70000000, v27 ; 103636FF 70000000 > v_bfrev_b32_e32 v28, 15 ; 7E38708F > v_cndmask_b32_e32 v27, v28, v27 ; 0036371C > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v24, v24 ; 7E305518 > v_mul_f32_e32 v27, v24, v26 ; 10363518 > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_fma_f32 v28, v42, v34, v4 ; D296001C 0412452A > v_add_f32_e64 v24, 0, v27 clamp ; D2060818 00023680 > v_fma_f32 v27, v41, v34, v2 ; D296001B 040A4529 > v_mul_f32_e32 v26, 0x40f66666, v39 ; 10344EFF 40F66666 > v_fma_f32 v28, v7, v32, v28 ; D296001C 04724107 > v_fma_f32 v29, v40, v34, v1 ; D296001D 04064528 > v_fma_f32 v27, v6, v32, v27 ; D296001B 046E4106 > v_fma_f32 v28, v28, v26, v9 ; D296001C 0426351C > v_fma_f32 v29, v5, v32, v29 ; D296001D 04764105 > v_fma_f32 v27, v27, v26, v8 ; D296001B 0422351B > v_mul_f32_e32 v30, s20, v28 ; 103C3814 > v_mul_f32_e32 v31, s9, v28 ; 103E3809 > v_mul_f32_e32 v28, s22, v28 ; 10383816 > v_fma_f32 v29, v29, v26, v0 ; D296001D 0402351D > v_mac_f32_e32 v30, s7, v27 ; 3E3C3607 > v_mac_f32_e32 v28, s11, v27 ; 3E38360B > v_mac_f32_e32 v30, s2, v29 ; 3E3C3A02 > v_mac_f32_e32 v31, s0, v27 ; 3E3E3600 > v_mac_f32_e32 v28, s8, v29 ; 3E383A08 > v_mac_f32_e32 v31, s1, v29 ; 3E3E3A01 > v_add_f32_e32 v30, s3, v30 ; 063C3C03 > v_add_f32_e32 v27, s6, v28 ; 06363806 > v_cmp_lt_f32_e32 vcc, 0, v30 ; 7C023C80 > v_add_f32_e32 v31, s21, v31 ; 063E3E15 > v_rcp_f32_e32 v44, v27 ; 7E58551B > v_cndmask_b32_e64 v28, v30, 1.0, vcc ; D200001C 01A9E51E > v_cmp_lt_f32_e32 vcc, 0, v31 ; 7C023E80 > v_bfrev_b32_e32 v34, 14 ; 7E44708E > v_cndmask_b32_e64 v32, v31, 1.0, vcc ; D2000020 01A9E51F > v_cmp_le_f32_e32 vcc, 0, v28 ; 7C063880 > v_mul_f32_e32 v28, v34, v28 ; 10383922 > v_bfrev_b32_e32 v43, 15 ; 7E56708F > v_cndmask_b32_e32 v28, v43, v28 ; 0038392B > v_mul_f32_e32 v34, v34, v32 ; 10444122 > v_cmp_le_f32_e32 vcc, 0, v32 ; 7C064080 > v_cndmask_b32_e32 v32, v43, v34 ; 0040452B > v_mul_f32_e32 v30, v44, v30 ; 103C3D2C > v_cmp_eq_f32_e32 vcc, 0, v27 ; 7C043680 > v_mul_f32_e32 v31, v44, v31 ; 103E3F2C > v_cndmask_b32_e32 v27, v30, v28 ; 0036391E > v_cndmask_b32_e32 v28, v31, v32 ; 0038411F > v_fma_f32 v32, v28, -0.5, 0.5 ; D2960020 03C1E31C > v_fma_f32 v31, v27, 0.5, 0.5 ; D296001F 03C1E11B > image_sample v27, v[31:32], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C31B1F > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v27, v27, v29 ; 0A363B1B > v_mul_f32_e32 v30, 0x40766666, v39 ; 103C4EFF 40766666 > v_cmp_gt_f32_e32 vcc, 0, v27 ; 7C083680 > v_cndmask_b32_e32 v26, v26, v30 ; 00343D1A > v_sub_f32_e64 v27, |v27|, v26 ; D208011B 0002351B > v_cmp_eq_f32_e32 vcc, 0, v26 ; 7C043480 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 > v_cndmask_b32_e64 v28, v27, 1.0, vcc ; D200001C 01A9E51B > v_cmp_le_f32_e32 vcc, 0, v28 ; 7C063880 > v_mul_f32_e32 v28, 0x70000000, v28 ; 103838FF 70000000 > v_bfrev_b32_e32 v29, 15 ; 7E3A708F > v_cndmask_b32_e32 v28, v29, v28 ; 0038391D > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v26, v26 ; 7E34551A > v_mul_f32_e32 v28, v26, v27 ; 1038371A > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_fma_f32 v29, v42, v37, v4 ; D296001D 04124B2A > v_add_f32_e64 v26, 0, v28 clamp ; D206081A 00023880 > v_fma_f32 v28, v41, v37, v2 ; D296001C 040A4B29 > v_mul_f32_e32 v27, 0x41080000, v39 ; 10364EFF 41080000 > v_fma_f32 v29, v7, v35, v29 ; D296001D 04764707 > v_fma_f32 v30, v40, v37, v1 ; D296001E 04064B28 > v_fma_f32 v28, v6, v35, v28 ; D296001C 04724706 > v_fma_f32 v29, v29, v27, v9 ; D296001D 0426371D > v_fma_f32 v30, v5, v35, v30 ; D296001E 047A4705 > v_fma_f32 v28, v28, v27, v8 ; D296001C 0422371C > v_mul_f32_e32 v31, s20, v29 ; 103E3A14 > v_mul_f32_e32 v32, s9, v29 ; 10403A09 > v_mul_f32_e32 v29, s22, v29 ; 103A3A16 > v_fma_f32 v30, v30, v27, v0 ; D296001E 0402371E > v_mac_f32_e32 v31, s7, v28 ; 3E3E3807 > v_mac_f32_e32 v29, s11, v28 ; 3E3A380B > v_mac_f32_e32 v31, s2, v30 ; 3E3E3C02 > v_mac_f32_e32 v32, s0, v28 ; 3E403800 > v_mac_f32_e32 v29, s8, v30 ; 3E3A3C08 > v_mac_f32_e32 v32, s1, v30 ; 3E403C01 > v_add_f32_e32 v31, s3, v31 ; 063E3E03 > v_add_f32_e32 v28, s6, v29 ; 06383A06 > v_cmp_lt_f32_e32 vcc, 0, v31 ; 7C023E80 > v_add_f32_e32 v32, s21, v32 ; 06404015 > v_rcp_f32_e32 v43, v28 ; 7E56551C > v_cndmask_b32_e64 v29, v31, 1.0, vcc ; D200001D 01A9E51F > v_cmp_lt_f32_e32 vcc, 0, v32 ; 7C024080 > v_bfrev_b32_e32 v35, 14 ; 7E46708E > v_cndmask_b32_e64 v34, v32, 1.0, vcc ; D2000022 01A9E520 > v_cmp_le_f32_e32 vcc, 0, v29 ; 7C063A80 > v_mul_f32_e32 v29, v35, v29 ; 103A3B23 > v_bfrev_b32_e32 v37, 15 ; 7E4A708F > v_cndmask_b32_e32 v29, v37, v29 ; 003A3B25 > v_mul_f32_e32 v35, v35, v34 ; 10464523 > v_cmp_le_f32_e32 vcc, 0, v34 ; 7C064480 > v_cndmask_b32_e32 v34, v37, v35 ; 00444725 > v_mul_f32_e32 v31, v43, v31 ; 103E3F2B > v_cmp_eq_f32_e32 vcc, 0, v28 ; 7C043880 > v_mul_f32_e32 v32, v43, v32 ; 1040412B > v_cndmask_b32_e32 v28, v31, v29 ; 00383B1F > v_cndmask_b32_e32 v29, v32, v34 ; 003A4520 > v_fma_f32 v34, v28, 0.5, 0.5 ; D2960022 03C1E11C > v_fma_f32 v35, v29, -0.5, 0.5 ; D2960023 03C1E31D > image_sample v28, v[34:35], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C31C22 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v29, v28, v30 ; 0A3A3D1C > v_mul_f32_e32 v31, 0x40880000, v39 ; 103E4EFF 40880000 > v_cmp_gt_f32_e32 vcc, 0, v29 ; 7C083A80 > v_cndmask_b32_e32 v28, v27, v31 ; 00383F1B > v_sub_f32_e64 v29, |v29|, v28 ; D208011D 0002391D > v_cmp_eq_f32_e32 vcc, 0, v28 ; 7C043880 > s_and_saveexec_b64 s[4:5], vcc ; BE84246A > s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E > v_cmp_lt_f32_e32 vcc, 0, v29 ; 7C023A80 > v_cndmask_b32_e64 v27, v29, 1.0, vcc ; D200001B 01A9E51D > v_cmp_le_f32_e32 vcc, 0, v27 ; 7C063680 > v_mul_f32_e32 v27, 0x70000000, v27 ; 103636FF 70000000 > v_bfrev_b32_e32 v30, 15 ; 7E3C708F > v_cndmask_b32_e32 v27, v30, v27 ; 0036371E > s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 > s_xor_b64 exec, exec, s[4:5] ; 89FE047E > v_rcp_f32_e32 v27, v28 ; 7E36551C > v_mul_f32_e32 v27, v27, v29 ; 10363B1B > s_or_b64 exec, exec, s[4:5] ; 88FE047E > v_fma_f32 v4, v42, v38, v4 ; D2960004 04124D2A > v_fma_f32 v2, v41, v38, v2 ; D2960002 040A4D29 > v_mul_f32_e32 v28, 0x41200000, v39 ; 10384EFF 41200000 > v_fma_f32 v4, v7, v36, v4 ; D2960004 04124907 > v_fma_f32 v2, v6, v36, v2 ; D2960002 040A4906 > v_fma_f32 v1, v40, v38, v1 ; D2960001 04064D28 > v_fma_f32 v4, v4, v28, v9 ; D2960004 04263904 > v_fma_f32 v1, v5, v36, v1 ; D2960001 04064905 > v_fma_f32 v2, v2, v28, v8 ; D2960002 04223902 > v_mul_f32_e32 v7, s20, v4 ; 100E0814 > v_mul_f32_e32 v5, s9, v4 ; 100A0809 > v_mul_f32_e32 v4, s22, v4 ; 10080816 > v_mac_f32_e32 v7, s7, v2 ; 3E0E0407 > v_fma_f32 v1, v1, v28, v0 ; D2960001 04023901 > v_mac_f32_e32 v4, s11, v2 ; 3E08040B > v_mac_f32_e32 v5, s0, v2 ; 3E0A0400 > v_mac_f32_e32 v7, s2, v1 ; 3E0E0202 > v_mac_f32_e32 v4, s8, v1 ; 3E080208 > v_add_f32_e32 v0, s3, v7 ; 06000E03 > v_mac_f32_e32 v5, s1, v1 ; 3E0A0201 > v_add_f32_e32 v2, s6, v4 ; 06040806 > v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 > v_add_f32_e32 v5, s21, v5 ; 060A0A15 > v_rcp_f32_e32 v9, v2 ; 7E125502 > v_cndmask_b32_e64 v4, v0, 1.0, vcc ; D2000004 01A9E500 > v_cmp_lt_f32_e32 vcc, 0, v5 ; 7C020A80 > v_bfrev_b32_e32 v7, 14 ; 7E0E708E > v_cndmask_b32_e64 v6, v5, 1.0, vcc ; D2000006 01A9E505 > v_cmp_le_f32_e32 vcc, 0, v4 ; 7C060880 > v_mul_f32_e32 v4, v7, v4 ; 10080907 > v_bfrev_b32_e32 v8, 15 ; 7E10708F > v_cndmask_b32_e32 v4, v8, v4 ; 00080908 > v_cmp_le_f32_e32 vcc, 0, v6 ; 7C060C80 > v_mul_f32_e32 v6, v7, v6 ; 100C0D07 > v_cndmask_b32_e32 v6, v8, v6 ; 000C0D08 > v_cmp_eq_f32_e32 vcc, 0, v2 ; 7C040480 > v_mul_f32_e32 v0, v9, v0 ; 10000109 > v_mul_f32_e32 v2, v9, v5 ; 10040B09 > v_cndmask_b32_e32 v0, v0, v4 ; 00000900 > v_cndmask_b32_e32 v2, v2, v6 ; 00040D02 > v_fma_f32 v4, v0, 0.5, 0.5 ; D2960004 03C1E100 > v_fma_f32 v5, v2, -0.5, 0.5 ; D2960005 03C1E302 > image_sample v2, v[4:5], s[12:19], s[24:27] dmask:0x1 ; F0800100 00C30204 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v1, v2, v1 ; 0A020302 > v_mul_f32_e32 v4, 0x40a00000, v39 ; 10084EFF 40A00000 > v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 > v_cndmask_b32_e32 v2, v28, v4 ; 0004091C > v_sub_f32_e64 v4, |v1|, v2 ; D2080104 00020501 > v_add_f32_e64 v0, 0, v27 clamp ; D2060800 00023680 > v_cmp_eq_f32_e32 vcc, 0, v2 ; 7C040480 > s_and_saveexec_b64 s[0:1], vcc ; BE80246A > s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E > v_cmp_lt_f32_e32 vcc, 0, v4 ; 7C020880 > v_cndmask_b32_e64 v1, v4, 1.0, vcc ; D2000001 01A9E504 > v_cmp_le_f32_e32 vcc, 0, v1 ; 7C060280 > v_mul_f32_e32 v1, 0x70000000, v1 ; 100202FF 70000000 > v_bfrev_b32_e32 v5, 15 ; 7E0A708F > v_cndmask_b32_e32 v1, v5, v1 ; 00020305 > s_or_saveexec_b64 s[0:1], s[0:1] ; BE802500 > s_xor_b64 exec, exec, s[0:1] ; 89FE007E > v_rcp_f32_e32 v1, v2 ; 7E025502 > v_mul_f32_e32 v1, v1, v4 ; 10020901 > s_or_b64 exec, exec, s[0:1] ; 88FE007E > v_add_f32_e32 v2, v3, v10 ; 06041503 > v_add_f32_e32 v4, v17, v25 ; 06083311 > v_add_f32_e32 v2, v2, v11 ; 06041702 > v_add_f32_e32 v4, v4, v18 ; 06082504 > v_add_f32_e32 v2, v12, v2 ; 0604050C > v_add_f32_e32 v4, v4, v21 ; 06082B04 > v_add_f32_e32 v2, v14, v2 ; 0604050E > v_add_f32_e32 v4, v4, v22 ; 06082D04 > v_add_f32_e32 v2, v15, v2 ; 0604050F > v_add_f32_e32 v4, v4, v23 ; 06082F04 > v_add_f32_e32 v2, v16, v2 ; 06040510 > v_add_f32_e32 v4, v4, v24 ; 06083104 > v_add_f32_e32 v2, v19, v2 ; 06040513 > v_add_f32_e32 v4, v4, v26 ; 06083504 > v_add_f32_e32 v2, v20, v2 ; 06040514 > v_add_f32_e32 v0, v4, v0 ; 06000104 > v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 > v_add_f32_e32 v2, v2, v33 ; 06044302 > v_mov_b32_e32 v3, 0x3dcccccd ; 7E0602FF 3DCCCCCD > v_add_f32_e32 v0, v0, v1 ; 06000300 > v_mul_f32_e32 v2, v3, v2 ; 10040503 > v_mul_f32_e32 v0, v3, v0 ; 10000103 > v_mul_f32_e32 v2, v2, v2 ; 10040502 > v_mul_f32_e32 v1, v0, v0 ; 10020100 > v_min_f32_e32 v0, v2, v1 ; 1E000302 > v_mov_b32_e32 v2, 0 ; 7E040280 > v_mov_b32_e32 v3, 1.0 ; 7E0602F2 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 72 >VGPRS: 52 >Code Size: 8148 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 4 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_fma_f32 v0, v3, s4, s4 ; D2960000 00100903 > v_fma_f32 v1, v4, -s0, s0 ; D2960001 40000104 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 88 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115 > s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 > s_buffer_load_dword s0, s[0:3], 0x16 ; C2000116 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v2, s9 ; 7E040209 > v_mul_f32_e32 v2, s8, v2 ; 10040408 > v_mov_b32_e32 v3, s0 ; 7E060200 > v_mul_f32_e32 v3, s8, v3 ; 10060608 > v_rcp_f32_e32 v4, v2 ; 7E085502 > v_rcp_f32_e32 v5, v3 ; 7E0A5503 > s_load_dwordx4 s[4:7], s[4:5], 0xc ; C082050C > v_cmp_eq_f32_e32 vcc, 0, v2 ; 7C040480 > v_cmp_eq_f32_e64 s[0:1], 0, v3 ; D0040000 00020680 > v_mul_f32_e32 v3, 0, v5 ; 10060A80 > v_mul_f32_e32 v2, -4.0, v4 ; 100408F7 > v_bfrev_b32_e32 v6, 15 ; 7E0C708F > v_mul_f32_e32 v5, -2.0, v4 ; 100A08F5 > v_cndmask_b32_e64 v3, v3, 0, s[0:1] ; D2000003 00010103 > v_cndmask_b32_e32 v5, v5, v6 ; 000A0D05 > v_cndmask_b32_e32 v2, v2, v6 ; 00040D02 > v_add_f32_e32 v6, v4, v4 ; 060C0904 > v_bfrev_b32_e32 v7, 14 ; 7E0E708E > v_mul_f32_e32 v4, 4.0, v4 ; 100808F6 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s4, s4, s19 ; 87041304 > v_cndmask_b32_e32 v4, v4, v7 ; 00080F04 > v_add_f32_e32 v9, v1, v3 ; 06120701 > v_add_f32_e32 v8, v0, v5 ; 06100B00 > v_cndmask_b32_e32 v6, v6, v7 ; 000C0F06 > v_add_f32_e32 v3, v0, v4 ; 06060900 > v_add_f32_e32 v4, v0, v2 ; 06080500 > image_sample v[7:8], v[8:9], s[12:19], s[4:7] dmask:0x3 ; F0800300 00230708 > v_mov_b32_e32 v5, v9 ; 7E0A0309 > v_mov_b32_e32 v2, 0x3e5a2cb9 ; 7E0402FF 3E5A2CB9 > image_sample v[4:5], v[4:5], s[12:19], s[4:7] dmask:0x3 ; F0800300 00230404 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v7, v2, v7 ; 100E0F02 > v_mov_b32_e32 v10, 0x3e1f13ce ; 7E1402FF 3E1F13CE > v_mul_f32_e32 v8, v2, v8 ; 10101102 > v_add_f32_e32 v6, v0, v6 ; 060C0D00 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v4, v4, v10, v7 ; D2960004 041E1504 > v_fma_f32 v5, v5, v10, v8 ; D2960005 04221505 > image_sample v[0:1], v[0:1], s[12:19], s[4:7] dmask:0x3 ; F0800300 00230000 > v_mov_b32_e32 v7, 0x3e86cab6 ; 7E0E02FF 3E86CAB6 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v0, v7, v4 ; D2960000 04120F00 > v_fma_f32 v1, v1, v7, v5 ; D2960001 04160F01 > v_mov_b32_e32 v7, v9 ; 7E0E0309 > image_sample v[5:6], v[6:7], s[12:19], s[4:7] dmask:0x3 ; F0800300 00230506 > v_mov_b32_e32 v4, v9 ; 7E080309 > image_sample v[3:4], v[3:4], s[12:19], s[4:7] dmask:0x3 ; F0800300 00230303 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_fma_f32 v0, v5, v2, v0 ; D2960000 04020505 > v_fma_f32 v1, v6, v2, v1 ; D2960001 04060506 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v3, v10, v0 ; D2960000 04021503 > v_mov_b32_e32 v2, 0x3e4ccccd ; 7E0402FF 3E4CCCCD > v_fma_f32 v1, v4, v10, v1 ; D2960001 04061504 > v_fma_f32 v0, v0, v0, v2 ; D2960000 040A0100 > v_fma_f32 v1, v1, v1, v2 ; D2960001 040A0301 > v_min_f32_e32 v0, 1.0, v0 ; 1E0000F2 > v_mov_b32_e32 v2, 0xbe4ccccd ; 7E0402FF BE4CCCCD > v_min_f32_e32 v1, 1.0, v1 ; 1E0202F2 > v_add_f32_e32 v0, v2, v0 ; 06000102 > v_add_f32_e32 v1, v2, v1 ; 06020302 > v_mov_b32_e32 v2, 0x3fa00000 ; 7E0402FF 3FA00000 > v_mul_f32_e32 v0, v2, v0 ; 10000102 > v_mul_f32_e32 v1, v2, v1 ; 10020302 > v_sqrt_f32_e32 v0, v0 ; 7E006700 > v_sqrt_f32_e32 v1, v1 ; 7E026701 > v_mov_b32_e32 v2, 0 ; 7E040280 > v_mov_b32_e32 v3, 0 ; 7E060280 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 440 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_fma_f32 v0, v3, s4, s4 ; D2960000 00100903 > v_fma_f32 v1, v4, -s0, s0 ; D2960001 40000104 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 88 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115 > s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 > s_buffer_load_dword s0, s[0:3], 0x16 ; C2000116 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v2, s9 ; 7E040209 > v_mul_f32_e32 v2, s8, v2 ; 10040408 > v_mov_b32_e32 v3, s0 ; 7E060200 > v_mul_f32_e32 v3, s8, v3 ; 10060608 > v_rcp_f32_e32 v4, v2 ; 7E085502 > v_rcp_f32_e32 v5, v3 ; 7E0A5503 > s_load_dwordx4 s[4:7], s[4:5], 0xc ; C082050C > v_cmp_eq_f32_e32 vcc, 0, v2 ; 7C040480 > v_mul_f32_e32 v2, 0, v4 ; 10040880 > v_cmp_eq_f32_e64 s[0:1], 0, v3 ; D0040000 00020680 > v_mul_f32_e32 v3, -4.0, v5 ; 10060AF7 > v_bfrev_b32_e32 v6, 15 ; 7E0C708F > v_mul_f32_e32 v4, -2.0, v5 ; 10080AF5 > v_cndmask_b32_e64 v3, v3, v6, s[0:1] ; D2000003 00020D03 > v_cndmask_b32_e64 v4, v4, v6, s[0:1] ; D2000004 00020D04 > v_add_f32_e32 v6, v5, v5 ; 060C0B05 > v_bfrev_b32_e32 v7, 14 ; 7E0E708E > v_mul_f32_e32 v5, 4.0, v5 ; 100A0AF6 > v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 > v_cndmask_b32_e64 v5, v5, v7, s[0:1] ; D2000005 00020F05 > v_cndmask_b32_e64 v6, v6, v7, s[0:1] ; D2000006 00020F06 > v_add_f32_e32 v7, v0, v2 ; 060E0500 > v_add_f32_e32 v8, v1, v4 ; 06100901 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s4, s4, s19 ; 87041304 > image_sample v[8:11], v[7:8], s[12:19], s[4:7] dmask:0xf ; F0800F00 00230807 > v_mov_b32_e32 v4, 0x3e5a2cb9 ; 7E0802FF 3E5A2CB9 > v_add_f32_e32 v3, v1, v3 ; 06060701 > v_add_f32_e32 v2, v1, v6 ; 06040D01 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v4, v8 ; 100C1104 > v_mov_b32_e32 v8, v3 ; 7E100303 > image_sample v[14:17], v[0:1], s[12:19], s[4:7] dmask:0xf ; F0800F00 00230E00 > v_mul_f32_e32 v9, v4, v9 ; 10121304 > v_mul_f32_e32 v10, v4, v10 ; 10141504 > v_mul_f32_e32 v11, v4, v11 ; 10161704 > image_sample v[18:21], v[7:8], s[12:19], s[4:7] dmask:0xf ; F0800F00 00231207 > v_mov_b32_e32 v0, 0x3e1f13ce ; 7E0002FF 3E1F13CE > v_add_f32_e32 v5, v1, v5 ; 060A0B01 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v1, v18, v0, v6 ; D2960001 041A0112 > v_fma_f32 v3, v19, v0, v9 ; D2960003 04260113 > v_mov_b32_e32 v9, 0x3e86cab6 ; 7E1202FF 3E86CAB6 > v_fma_f32 v6, v20, v0, v10 ; D2960006 042A0114 > v_fma_f32 v8, v21, v0, v11 ; D2960008 042E0115 > v_fma_f32 v1, v14, v9, v1 ; D2960001 0406130E > v_fma_f32 v3, v15, v9, v3 ; D2960003 040E130F > v_fma_f32 v6, v16, v9, v6 ; D2960006 041A1310 > v_fma_f32 v9, v17, v9, v8 ; D2960009 04221311 > v_mov_b32_e32 v8, v2 ; 7E100302 > image_sample v[14:17], v[7:8], s[12:19], s[4:7] dmask:0xf ; F0800F00 00230E07 > v_mov_b32_e32 v8, v5 ; 7E100305 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v2, v15, v4, v3 ; D2960002 040E090F > v_fma_f32 v3, v16, v4, v6 ; D2960003 041A0910 > v_fma_f32 v1, v14, v4, v1 ; D2960001 0406090E > image_sample v[5:8], v[7:8], s[12:19], s[4:7] dmask:0xf ; F0800F00 00230507 > v_fma_f32 v4, v17, v4, v9 ; D2960004 04260911 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v1, v5, v0, v1 ; D2960001 04060105 > v_fma_f32 v5, v6, v0, v2 ; D2960005 040A0106 > v_fma_f32 v2, v7, v0, v3 ; D2960002 040E0107 > v_fma_f32 v3, v8, v0, v4 ; D2960003 04120108 > v_mov_b32_e32 v0, 0x3e4ccccd ; 7E0002FF 3E4CCCCD > v_fma_f32 v1, v1, v1, v0 ; D2960001 04020301 > v_fma_f32 v0, v5, v5, v0 ; D2960000 04020B05 > v_min_f32_e32 v1, 1.0, v1 ; 1E0202F2 > v_mov_b32_e32 v4, 0xbe4ccccd ; 7E0802FF BE4CCCCD > v_min_f32_e32 v0, 1.0, v0 ; 1E0000F2 > v_add_f32_e32 v1, v4, v1 ; 06020304 > v_add_f32_e32 v0, v4, v0 ; 06000104 > v_mov_b32_e32 v4, 0x3fa00000 ; 7E0802FF 3FA00000 > v_mul_f32_e32 v1, v4, v1 ; 10020304 > v_mul_f32_e32 v4, v4, v0 ; 10080104 > v_sqrt_f32_e32 v0, v1 ; 7E006701 > v_sqrt_f32_e32 v1, v4 ; 7E026704 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 24 >Code Size: 516 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 >Shader main disassembly: > s_load_dwordx4 s[24:27], s[2:3], 0x4 ; C08C0304 > s_load_dwordx4 s[12:15], s[2:3], 0xc ; C086030C > s_load_dwordx4 s[16:19], s[10:11], 0x0 ; C0880B00 > s_load_dwordx4 s[20:23], s[2:3], 0x8 ; C08A0308 > s_load_dwordx4 s[28:31], s[10:11], 0x4 ; C08E0B04 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s1, s[24:27], 0x0 ; C2009900 > s_buffer_load_dword s0, s[12:15], 0x66 ; C2000D66 > buffer_load_format_xyzw v[7:10], v4, s[16:19], 0 idxen ; E00C2000 80040704 > s_buffer_load_dword s5, s[24:27], 0x1 ; C2029901 > s_buffer_load_dword s2, s[24:27], 0x2 ; C2011902 > s_buffer_load_dword s4, s[24:27], 0x3 ; C2021903 > s_buffer_load_dword s3, s[24:27], 0x4 ; C2019904 > s_buffer_load_dword s9, s[24:27], 0x5 ; C2049905 > s_buffer_load_dword s6, s[24:27], 0x6 ; C2031906 > s_buffer_load_dword s8, s[24:27], 0x7 ; C2041907 > s_buffer_load_dword s7, s[24:27], 0x8 ; C2039908 > s_buffer_load_dword s15, s[24:27], 0x9 ; C2079909 > s_buffer_load_dword s12, s[24:27], 0xa ; C206190A > s_buffer_load_dword s14, s[24:27], 0xb ; C207190B > s_buffer_load_dword s13, s[24:27], 0xc ; C206990C > s_buffer_load_dword s18, s[24:27], 0xd ; C209190D > s_buffer_load_dword s16, s[24:27], 0xe ; C208190E > s_buffer_load_dword s17, s[24:27], 0xf ; C208990F > s_load_dwordx4 s[24:27], s[10:11], 0x8 ; C08C0B08 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[10:13], v5, s[28:31], 0 idxen ; E00C2000 80070A05 > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v6, s[24:27], 0 idxen ; E00C2000 80060306 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v5 ; 10020B00 > v_mul_f32_e32 v4, v0, v4 ; 10080900 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v4, v4 ; 7E081104 > v_mul_f32_e32 v0, v0, v3 ; 10000700 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_lshlrev_b32_e32 v4, 5, v4 ; 34080885 > v_or_b32_e32 v5, 16, v1 ; 380A0290 > v_or_b32_e32 v6, 28, v1 ; 380C029C > v_or_b32_e32 v13, 20, v1 ; 381A0294 > v_or_b32_e32 v14, 24, v1 ; 381C0298 > buffer_load_dword v16, v4, s[20:23], 0 offen ; E0301000 80051004 > buffer_load_dword v13, v13, s[20:23], 0 offen ; E0301000 80050D0D > buffer_load_dword v15, v1, s[20:23], 0 offen ; E0301000 80050F01 > buffer_load_dword v5, v5, s[20:23], 0 offen ; E0301000 80050505 > buffer_load_dword v6, v6, s[20:23], 0 offen ; E0301000 80050606 > buffer_load_dword v14, v14, s[20:23], 0 offen ; E0301000 80050E0E > v_or_b32_e32 v20, 4, v1 ; 38280284 > v_or_b32_e32 v3, 16, v4 ; 38060890 > v_or_b32_e32 v17, 28, v4 ; 3822089C > v_or_b32_e32 v18, 20, v4 ; 38240894 > v_or_b32_e32 v19, 24, v4 ; 38260898 > v_or_b32_e32 v1, 8, v1 ; 38020288 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v21, 4, v4 ; 382A0884 > v_or_b32_e32 v4, 8, v4 ; 38080888 > buffer_load_dword v1, v1, s[20:23], 0 offen ; E0301000 80050101 > buffer_load_dword v18, v18, s[20:23], 0 offen ; E0301000 80051212 > buffer_load_dword v3, v3, s[20:23], 0 offen ; E0301000 80050303 > buffer_load_dword v17, v17, s[20:23], 0 offen ; E0301000 80051111 > buffer_load_dword v19, v19, s[20:23], 0 offen ; E0301000 80051313 > v_or_b32_e32 v27, 8, v0 ; 38360088 > buffer_load_dword v4, v4, s[20:23], 0 offen ; E0301000 80050404 > buffer_load_dword v27, v27, s[20:23], 0 offen ; E0301000 80051B1B > v_or_b32_e32 v22, 16, v0 ; 382C0090 > v_or_b32_e32 v23, 28, v0 ; 382E009C > v_or_b32_e32 v24, 24, v0 ; 38300098 > v_or_b32_e32 v25, 20, v0 ; 38320094 > buffer_load_dword v21, v21, s[20:23], 0 offen ; E0301000 80051515 > buffer_load_dword v25, v25, s[20:23], 0 offen ; E0301000 80051919 > buffer_load_dword v20, v20, s[20:23], 0 offen ; E0301000 80051414 > buffer_load_dword v22, v22, s[20:23], 0 offen ; E0301000 80051616 > buffer_load_dword v23, v23, s[20:23], 0 offen ; E0301000 80051717 > buffer_load_dword v24, v24, s[20:23], 0 offen ; E0301000 80051818 > v_or_b32_e32 v26, 4, v0 ; 38340084 > buffer_load_dword v0, v0, s[20:23], 0 offen ; E0301000 80050000 > buffer_load_dword v26, v26, s[20:23], 0 offen ; E0301000 80051A1A > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v16, v16, v11 ; 10201710 > v_mul_f32_e32 v32, v13, v13 ; 10401B0D > v_mac_f32_e32 v16, v15, v10 ; 3E20150F > v_mul_f32_e32 v15, v6, v5 ; 101E0B06 > v_mul_f32_e32 v28, v6, v14 ; 10381D06 > v_mul_f32_e32 v6, v6, v13 ; 100C1B06 > v_fma_f32 v29, v5, v13, v28 ; D296001D 04721B05 > v_fma_f32 v28, v5, v13, -v28 ; D296001C 84721B05 > v_fma_f32 v30, v5, v14, -v6 ; D296001E 841A1D05 > v_mac_f32_e32 v6, v5, v14 ; 3E0C1D05 > v_mul_f32_e32 v5, v5, v5 ; 100A0B05 > v_fma_f32 v31, v13, v14, -v15 ; D296001F 843E1D0D > v_fma_f32 v15, v13, v14, v15 ; D296000F 043E1D0D > v_mac_f32_e32 v32, v14, v14 ; 3E401D0E > v_mad_f32 v14, v14, v14, v5 ; D282000E 04161D0E > v_mac_f32_e32 v5, v13, v13 ; 3E0A1B0D > s_waitcnt vmcnt(14) ; BF8C0F7E > v_mul_f32_e32 v1, v1, v10 ; 10021501 > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mul_f32_e32 v13, v17, v3 ; 101A0711 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mul_f32_e32 v33, v17, v19 ; 10422711 > v_mul_f32_e32 v17, v17, v18 ; 10222511 > v_mul_f32_e32 v37, v18, v18 ; 104A2512 > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mac_f32_e32 v1, v4, v11 ; 3E021704 > v_fma_f32 v34, v3, v18, v33 ; D2960022 04862503 > v_fma_f32 v35, v3, v19, -v17 ; D2960023 84462703 > v_fma_f32 v33, v3, v18, -v33 ; D2960021 84862503 > v_mac_f32_e32 v17, v3, v19 ; 3E222703 > v_mul_f32_e32 v3, v3, v3 ; 10060703 > v_fma_f32 v36, v18, v19, -v13 ; D2960024 84362712 > v_fma_f32 v13, v18, v19, v13 ; D296000D 04362712 > v_mac_f32_e32 v37, v19, v19 ; 3E4A2713 > v_mad_f32 v19, v19, v19, v3 ; D2820013 040E2713 > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mac_f32_e32 v1, v27, v12 ; 3E02191B > v_mul_f32_e32 v27, v11, v17 ; 1036230B > v_mac_f32_e32 v3, v18, v18 ; 3E062512 > s_waitcnt vmcnt(7) ; BF8C0F77 > v_mul_f32_e32 v18, v21, v11 ; 10241715 > v_mac_f32_e32 v27, v11, v17 ; 3E36230B > v_fma_f32 v17, -v19, 2.0, 1.0 ; D2960011 23C9E913 > s_waitcnt vmcnt(5) ; BF8C0F75 > v_mac_f32_e32 v18, v20, v10 ; 3E241514 > v_fma_f32 v5, -v5, 2.0, 1.0 ; D2960005 23C9E905 > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mul_f32_e32 v20, v23, v22 ; 10282D17 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v21, v23, v24 ; 102A3117 > v_mul_f32_e32 v23, v23, v25 ; 102E3317 > v_fma_f32 v14, -v14, 2.0, 1.0 ; D296000E 23C9E90E > v_mul_f32_e32 v17, v17, v11 ; 10221711 > v_mul_f32_e32 v41, v25, v25 ; 10523319 > v_fma_f32 v38, v22, v25, v21 ; D2960026 04563316 > v_fma_f32 v39, v22, v24, -v23 ; D2960027 845E3116 > v_mac_f32_e32 v17, v14, v10 ; 3E22150E > v_fma_f32 v14, -v37, 2.0, 1.0 ; D296000E 23C9E925 > v_fma_f32 v21, v22, v25, -v21 ; D2960015 84563316 > v_mac_f32_e32 v23, v22, v24 ; 3E2E3116 > v_mul_f32_e32 v22, v22, v22 ; 102C2D16 > v_fma_f32 v3, -v3, 2.0, 1.0 ; D2960003 23C9E903 > v_mul_f32_e32 v5, v5, v10 ; 100A1505 > v_fma_f32 v40, v25, v24, -v20 ; D2960028 84523119 > v_fma_f32 v20, v25, v24, v20 ; D2960014 04523119 > v_mac_f32_e32 v41, v24, v24 ; 3E523118 > v_mad_f32 v24, v24, v24, v22 ; D2820018 045A3118 > v_mac_f32_e32 v5, v3, v11 ; 3E0A1703 > v_fma_f32 v3, -v32, 2.0, 1.0 ; D2960003 23C9E920 > v_mul_f32_e32 v14, v14, v11 ; 101C170E > v_mac_f32_e32 v22, v25, v25 ; 3E2C3319 > v_mac_f32_e32 v14, v3, v10 ; 3E1C1503 > v_fma_f32 v3, -v24, 2.0, 1.0 ; D2960003 23C9E918 > v_mul_f32_e32 v4, v11, v34 ; 1008450B > v_mac_f32_e32 v17, v3, v12 ; 3E221903 > v_fma_f32 v3, -v22, 2.0, 1.0 ; D2960003 23C9E916 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v16, v0, v12 ; 3E201900 > v_mul_f32_e32 v0, v11, v36 ; 1000490B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v18, v26, v12 ; 3E24191A > v_mul_f32_e32 v26, v10, v15 ; 10341F0A > v_mac_f32_e32 v5, v3, v12 ; 3E0A1903 > v_fma_f32 v3, -v41, 2.0, 1.0 ; D2960003 23C9E929 > v_mul_f32_e32 v19, v10, v29 ; 10263B0A > v_mac_f32_e32 v4, v11, v34 ; 3E08450B > v_mac_f32_e32 v26, v10, v15 ; 3E341F0A > v_mul_f32_e32 v15, v11, v33 ; 101E430B > v_mac_f32_e32 v14, v3, v12 ; 3E1C1903 > v_mac_f32_e32 v0, v11, v36 ; 3E00490B > v_mul_f32_e32 v3, v10, v31 ; 10063F0A > v_mul_f32_e32 v24, v12, v38 ; 10304D0C > v_mac_f32_e32 v4, 2.0, v19 ; 3E0826F4 > v_mul_f32_e32 v25, v10, v30 ; 10323D0A > v_mac_f32_e32 v0, 2.0, v3 ; 3E0006F4 > v_mul_f32_e32 v22, v12, v40 ; 102C510C > v_mac_f32_e32 v15, v11, v33 ; 3E1E430B > v_mul_f32_e32 v28, v10, v28 ; 1038390A > v_mac_f32_e32 v4, 2.0, v24 ; 3E0830F4 > v_mul_f32_e32 v3, v8, v17 ; 10062308 > v_mul_f32_e32 v13, v11, v13 ; 101A1B0B > v_mul_f32_e32 v6, v10, v6 ; 100C0D0A > v_mac_f32_e32 v25, v10, v30 ; 3E323D0A > v_mul_f32_e32 v10, v12, v21 ; 10142B0C > v_mac_f32_e32 v15, 2.0, v28 ; 3E1E38F4 > v_mac_f32_e32 v0, 2.0, v22 ; 3E002CF4 > v_mac_f32_e32 v3, v7, v4 ; 3E060907 > v_mac_f32_e32 v3, v9, v0 ; 3E060109 > v_mul_f32_e32 v11, v11, v35 ; 1016470B > v_mul_f32_e32 v20, v12, v20 ; 1028290C > v_mac_f32_e32 v26, 2.0, v13 ; 3E341AF4 > v_mac_f32_e32 v15, 2.0, v10 ; 3E1E14F4 > v_mul_f32_e32 v29, v12, v39 ; 103A4F0C > v_mac_f32_e32 v25, 2.0, v11 ; 3E3216F4 > v_mac_f32_e32 v26, 2.0, v20 ; 3E3428F4 > v_add_f32_e32 v0, v18, v3 ; 06000712 > v_mul_f32_e32 v3, v8, v15 ; 10061F08 > v_mac_f32_e32 v27, 2.0, v6 ; 3E360CF4 > v_mul_f32_e32 v12, v12, v23 ; 10182F0C > v_mac_f32_e32 v25, 2.0, v29 ; 3E323AF4 > v_mul_f32_e32 v4, v8, v26 ; 10083508 > v_mac_f32_e32 v27, 2.0, v12 ; 3E3618F4 > v_mac_f32_e32 v3, v7, v14 ; 3E061D07 > v_mac_f32_e32 v4, v7, v25 ; 3E083307 > v_mac_f32_e32 v3, v9, v27 ; 3E063709 > v_mac_f32_e32 v4, v9, v5 ; 3E080B09 > v_add_f32_e32 v3, v16, v3 ; 06060710 > v_mul_f32_e32 v6, s15, v0 ; 100C000F > v_mul_f32_e32 v5, s9, v0 ; 100A0009 > v_add_f32_e32 v1, v1, v4 ; 06020901 > v_mul_f32_e32 v4, s5, v0 ; 10080005 > v_mac_f32_e32 v6, s7, v3 ; 3E0C0607 > v_mul_f32_e32 v0, s18, v0 ; 10000012 > v_mac_f32_e32 v4, s1, v3 ; 3E080601 > v_mac_f32_e32 v5, s3, v3 ; 3E0A0603 > v_mac_f32_e32 v6, s12, v1 ; 3E0C020C > v_mac_f32_e32 v0, s13, v3 ; 3E00060D > v_mac_f32_e32 v4, s2, v1 ; 3E080202 > v_mac_f32_e32 v5, s6, v1 ; 3E0A0206 > v_mac_f32_e32 v0, s16, v1 ; 3E000210 > v_add_f32_e32 v6, s14, v6 ; 060C0C0E > v_add_f32_e32 v4, s4, v4 ; 06080804 > v_add_f32_e32 v5, s8, v5 ; 060A0A08 > v_add_f32_e32 v0, s17, v0 ; 06000011 > v_min_f32_e32 v1, s0, v6 ; 1E020C00 > exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 44 >Code Size: 1136 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 5 >******************** > >Pixel Shader: >Shader main disassembly: >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 14 >VGPRS: 15 >Code Size: 12 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 >Shader main disassembly: > s_load_dwordx4 s[24:27], s[2:3], 0x4 ; C08C0304 > s_load_dwordx4 s[12:15], s[2:3], 0xc ; C086030C > s_load_dwordx4 s[16:19], s[10:11], 0x0 ; C0880B00 > s_load_dwordx4 s[20:23], s[2:3], 0x8 ; C08A0308 > s_load_dwordx4 s[28:31], s[10:11], 0x4 ; C08E0B04 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s1, s[24:27], 0x0 ; C2009900 > s_buffer_load_dword s0, s[12:15], 0x66 ; C2000D66 > buffer_load_format_xyzw v[7:10], v4, s[16:19], 0 idxen ; E00C2000 80040704 > s_buffer_load_dword s5, s[24:27], 0x1 ; C2029901 > s_buffer_load_dword s2, s[24:27], 0x2 ; C2011902 > s_buffer_load_dword s4, s[24:27], 0x3 ; C2021903 > s_buffer_load_dword s3, s[24:27], 0x4 ; C2019904 > s_buffer_load_dword s9, s[24:27], 0x5 ; C2049905 > s_buffer_load_dword s6, s[24:27], 0x6 ; C2031906 > s_buffer_load_dword s8, s[24:27], 0x7 ; C2041907 > s_buffer_load_dword s7, s[24:27], 0x8 ; C2039908 > s_buffer_load_dword s15, s[24:27], 0x9 ; C2079909 > s_buffer_load_dword s12, s[24:27], 0xa ; C206190A > s_buffer_load_dword s14, s[24:27], 0xb ; C207190B > s_buffer_load_dword s13, s[24:27], 0xc ; C206990C > s_buffer_load_dword s18, s[24:27], 0xd ; C209190D > s_buffer_load_dword s16, s[24:27], 0xe ; C208190E > s_buffer_load_dword s17, s[24:27], 0xf ; C208990F > s_load_dwordx4 s[24:27], s[10:11], 0x8 ; C08C0B08 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[10:13], v5, s[28:31], 0 idxen ; E00C2000 80070A05 > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v6, s[24:27], 0 idxen ; E00C2000 80060306 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v5 ; 10020B00 > v_mul_f32_e32 v4, v0, v4 ; 10080900 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v4, v4 ; 7E081104 > v_mul_f32_e32 v0, v0, v3 ; 10000700 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_lshlrev_b32_e32 v4, 5, v4 ; 34080885 > v_or_b32_e32 v5, 16, v1 ; 380A0290 > v_or_b32_e32 v6, 28, v1 ; 380C029C > v_or_b32_e32 v13, 20, v1 ; 381A0294 > v_or_b32_e32 v14, 24, v1 ; 381C0298 > buffer_load_dword v16, v4, s[20:23], 0 offen ; E0301000 80051004 > buffer_load_dword v13, v13, s[20:23], 0 offen ; E0301000 80050D0D > buffer_load_dword v15, v1, s[20:23], 0 offen ; E0301000 80050F01 > buffer_load_dword v5, v5, s[20:23], 0 offen ; E0301000 80050505 > buffer_load_dword v6, v6, s[20:23], 0 offen ; E0301000 80050606 > buffer_load_dword v14, v14, s[20:23], 0 offen ; E0301000 80050E0E > v_or_b32_e32 v20, 4, v1 ; 38280284 > v_or_b32_e32 v3, 16, v4 ; 38060890 > v_or_b32_e32 v17, 28, v4 ; 3822089C > v_or_b32_e32 v18, 20, v4 ; 38240894 > v_or_b32_e32 v19, 24, v4 ; 38260898 > v_or_b32_e32 v1, 8, v1 ; 38020288 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v21, 4, v4 ; 382A0884 > v_or_b32_e32 v4, 8, v4 ; 38080888 > buffer_load_dword v1, v1, s[20:23], 0 offen ; E0301000 80050101 > buffer_load_dword v18, v18, s[20:23], 0 offen ; E0301000 80051212 > buffer_load_dword v3, v3, s[20:23], 0 offen ; E0301000 80050303 > buffer_load_dword v17, v17, s[20:23], 0 offen ; E0301000 80051111 > buffer_load_dword v19, v19, s[20:23], 0 offen ; E0301000 80051313 > v_or_b32_e32 v27, 8, v0 ; 38360088 > buffer_load_dword v4, v4, s[20:23], 0 offen ; E0301000 80050404 > buffer_load_dword v27, v27, s[20:23], 0 offen ; E0301000 80051B1B > v_or_b32_e32 v22, 16, v0 ; 382C0090 > v_or_b32_e32 v23, 28, v0 ; 382E009C > v_or_b32_e32 v24, 24, v0 ; 38300098 > v_or_b32_e32 v25, 20, v0 ; 38320094 > buffer_load_dword v21, v21, s[20:23], 0 offen ; E0301000 80051515 > buffer_load_dword v25, v25, s[20:23], 0 offen ; E0301000 80051919 > buffer_load_dword v20, v20, s[20:23], 0 offen ; E0301000 80051414 > buffer_load_dword v22, v22, s[20:23], 0 offen ; E0301000 80051616 > buffer_load_dword v23, v23, s[20:23], 0 offen ; E0301000 80051717 > buffer_load_dword v24, v24, s[20:23], 0 offen ; E0301000 80051818 > v_or_b32_e32 v26, 4, v0 ; 38340084 > buffer_load_dword v0, v0, s[20:23], 0 offen ; E0301000 80050000 > buffer_load_dword v26, v26, s[20:23], 0 offen ; E0301000 80051A1A > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v16, v16, v11 ; 10201710 > v_mul_f32_e32 v32, v13, v13 ; 10401B0D > v_mac_f32_e32 v16, v15, v10 ; 3E20150F > v_mul_f32_e32 v15, v6, v5 ; 101E0B06 > v_mul_f32_e32 v28, v6, v14 ; 10381D06 > v_mul_f32_e32 v6, v6, v13 ; 100C1B06 > v_fma_f32 v29, v5, v13, v28 ; D296001D 04721B05 > v_fma_f32 v28, v5, v13, -v28 ; D296001C 84721B05 > v_fma_f32 v30, v5, v14, -v6 ; D296001E 841A1D05 > v_mac_f32_e32 v6, v5, v14 ; 3E0C1D05 > v_mul_f32_e32 v5, v5, v5 ; 100A0B05 > v_fma_f32 v31, v13, v14, -v15 ; D296001F 843E1D0D > v_fma_f32 v15, v13, v14, v15 ; D296000F 043E1D0D > v_mac_f32_e32 v32, v14, v14 ; 3E401D0E > v_mad_f32 v14, v14, v14, v5 ; D282000E 04161D0E > v_mac_f32_e32 v5, v13, v13 ; 3E0A1B0D > s_waitcnt vmcnt(14) ; BF8C0F7E > v_mul_f32_e32 v1, v1, v10 ; 10021501 > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mul_f32_e32 v13, v17, v3 ; 101A0711 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mul_f32_e32 v33, v17, v19 ; 10422711 > v_mul_f32_e32 v17, v17, v18 ; 10222511 > v_mul_f32_e32 v37, v18, v18 ; 104A2512 > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mac_f32_e32 v1, v4, v11 ; 3E021704 > v_fma_f32 v34, v3, v18, v33 ; D2960022 04862503 > v_fma_f32 v35, v3, v19, -v17 ; D2960023 84462703 > v_fma_f32 v33, v3, v18, -v33 ; D2960021 84862503 > v_mac_f32_e32 v17, v3, v19 ; 3E222703 > v_mul_f32_e32 v3, v3, v3 ; 10060703 > v_fma_f32 v36, v18, v19, -v13 ; D2960024 84362712 > v_fma_f32 v13, v18, v19, v13 ; D296000D 04362712 > v_mac_f32_e32 v37, v19, v19 ; 3E4A2713 > v_mad_f32 v19, v19, v19, v3 ; D2820013 040E2713 > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mac_f32_e32 v1, v27, v12 ; 3E02191B > v_mul_f32_e32 v27, v11, v17 ; 1036230B > v_mac_f32_e32 v3, v18, v18 ; 3E062512 > s_waitcnt vmcnt(7) ; BF8C0F77 > v_mul_f32_e32 v18, v21, v11 ; 10241715 > v_mac_f32_e32 v27, v11, v17 ; 3E36230B > v_fma_f32 v17, -v19, 2.0, 1.0 ; D2960011 23C9E913 > s_waitcnt vmcnt(5) ; BF8C0F75 > v_mac_f32_e32 v18, v20, v10 ; 3E241514 > v_fma_f32 v5, -v5, 2.0, 1.0 ; D2960005 23C9E905 > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mul_f32_e32 v20, v23, v22 ; 10282D17 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v21, v23, v24 ; 102A3117 > v_mul_f32_e32 v23, v23, v25 ; 102E3317 > v_fma_f32 v14, -v14, 2.0, 1.0 ; D296000E 23C9E90E > v_mul_f32_e32 v17, v17, v11 ; 10221711 > v_mul_f32_e32 v41, v25, v25 ; 10523319 > v_fma_f32 v38, v22, v25, v21 ; D2960026 04563316 > v_fma_f32 v39, v22, v24, -v23 ; D2960027 845E3116 > v_mac_f32_e32 v17, v14, v10 ; 3E22150E > v_fma_f32 v14, -v37, 2.0, 1.0 ; D296000E 23C9E925 > v_fma_f32 v21, v22, v25, -v21 ; D2960015 84563316 > v_mac_f32_e32 v23, v22, v24 ; 3E2E3116 > v_mul_f32_e32 v22, v22, v22 ; 102C2D16 > v_fma_f32 v3, -v3, 2.0, 1.0 ; D2960003 23C9E903 > v_mul_f32_e32 v5, v5, v10 ; 100A1505 > v_fma_f32 v40, v25, v24, -v20 ; D2960028 84523119 > v_fma_f32 v20, v25, v24, v20 ; D2960014 04523119 > v_mac_f32_e32 v41, v24, v24 ; 3E523118 > v_mad_f32 v24, v24, v24, v22 ; D2820018 045A3118 > v_mac_f32_e32 v5, v3, v11 ; 3E0A1703 > v_fma_f32 v3, -v32, 2.0, 1.0 ; D2960003 23C9E920 > v_mul_f32_e32 v14, v14, v11 ; 101C170E > v_mac_f32_e32 v22, v25, v25 ; 3E2C3319 > v_mac_f32_e32 v14, v3, v10 ; 3E1C1503 > v_fma_f32 v3, -v24, 2.0, 1.0 ; D2960003 23C9E918 > v_mul_f32_e32 v4, v11, v34 ; 1008450B > v_mac_f32_e32 v17, v3, v12 ; 3E221903 > v_fma_f32 v3, -v22, 2.0, 1.0 ; D2960003 23C9E916 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v16, v0, v12 ; 3E201900 > v_mul_f32_e32 v0, v11, v36 ; 1000490B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v18, v26, v12 ; 3E24191A > v_mul_f32_e32 v26, v10, v15 ; 10341F0A > v_mac_f32_e32 v5, v3, v12 ; 3E0A1903 > v_fma_f32 v3, -v41, 2.0, 1.0 ; D2960003 23C9E929 > v_mul_f32_e32 v19, v10, v29 ; 10263B0A > v_mac_f32_e32 v4, v11, v34 ; 3E08450B > v_mac_f32_e32 v26, v10, v15 ; 3E341F0A > v_mul_f32_e32 v15, v11, v33 ; 101E430B > v_mac_f32_e32 v14, v3, v12 ; 3E1C1903 > v_mac_f32_e32 v0, v11, v36 ; 3E00490B > v_mul_f32_e32 v3, v10, v31 ; 10063F0A > v_mul_f32_e32 v24, v12, v38 ; 10304D0C > v_mac_f32_e32 v4, 2.0, v19 ; 3E0826F4 > v_mul_f32_e32 v25, v10, v30 ; 10323D0A > v_mac_f32_e32 v0, 2.0, v3 ; 3E0006F4 > v_mul_f32_e32 v22, v12, v40 ; 102C510C > v_mac_f32_e32 v15, v11, v33 ; 3E1E430B > v_mul_f32_e32 v28, v10, v28 ; 1038390A > v_mac_f32_e32 v4, 2.0, v24 ; 3E0830F4 > v_mul_f32_e32 v3, v8, v17 ; 10062308 > v_mul_f32_e32 v13, v11, v13 ; 101A1B0B > v_mul_f32_e32 v6, v10, v6 ; 100C0D0A > v_mac_f32_e32 v25, v10, v30 ; 3E323D0A > v_mul_f32_e32 v10, v12, v21 ; 10142B0C > v_mac_f32_e32 v15, 2.0, v28 ; 3E1E38F4 > v_mac_f32_e32 v0, 2.0, v22 ; 3E002CF4 > v_mac_f32_e32 v3, v7, v4 ; 3E060907 > v_mac_f32_e32 v3, v9, v0 ; 3E060109 > v_mul_f32_e32 v11, v11, v35 ; 1016470B > v_mul_f32_e32 v20, v12, v20 ; 1028290C > v_mac_f32_e32 v26, 2.0, v13 ; 3E341AF4 > v_mac_f32_e32 v15, 2.0, v10 ; 3E1E14F4 > v_mul_f32_e32 v29, v12, v39 ; 103A4F0C > v_mac_f32_e32 v25, 2.0, v11 ; 3E3216F4 > v_mac_f32_e32 v26, 2.0, v20 ; 3E3428F4 > v_add_f32_e32 v0, v18, v3 ; 06000712 > v_mul_f32_e32 v3, v8, v15 ; 10061F08 > v_mac_f32_e32 v27, 2.0, v6 ; 3E360CF4 > v_mul_f32_e32 v12, v12, v23 ; 10182F0C > v_mac_f32_e32 v25, 2.0, v29 ; 3E323AF4 > v_mul_f32_e32 v4, v8, v26 ; 10083508 > v_mac_f32_e32 v27, 2.0, v12 ; 3E3618F4 > v_mac_f32_e32 v3, v7, v14 ; 3E061D07 > v_mac_f32_e32 v4, v7, v25 ; 3E083307 > v_mac_f32_e32 v3, v9, v27 ; 3E063709 > v_mac_f32_e32 v4, v9, v5 ; 3E080B09 > v_add_f32_e32 v3, v16, v3 ; 06060710 > v_mul_f32_e32 v6, s15, v0 ; 100C000F > v_mul_f32_e32 v5, s9, v0 ; 100A0009 > v_add_f32_e32 v1, v1, v4 ; 06020901 > v_mul_f32_e32 v4, s5, v0 ; 10080005 > v_mac_f32_e32 v6, s7, v3 ; 3E0C0607 > v_mul_f32_e32 v0, s18, v0 ; 10000012 > v_mac_f32_e32 v4, s1, v3 ; 3E080601 > v_mac_f32_e32 v5, s3, v3 ; 3E0A0603 > v_mac_f32_e32 v6, s12, v1 ; 3E0C020C > v_mac_f32_e32 v0, s13, v3 ; 3E00060D > v_mac_f32_e32 v4, s2, v1 ; 3E080202 > v_mac_f32_e32 v5, s6, v1 ; 3E0A0206 > v_mac_f32_e32 v0, s16, v1 ; 3E000210 > v_add_f32_e32 v6, s14, v6 ; 060C0C0E > v_add_f32_e32 v4, s4, v4 ; 06080804 > v_add_f32_e32 v5, s8, v5 ; 060A0A08 > v_add_f32_e32 v0, s17, v0 ; 06000011 > v_min_f32_e32 v1, s0, v6 ; 1E020C00 > exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 44 >Code Size: 1136 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 5 >******************** > >Pixel Shader: >Shader main disassembly: >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 14 >VGPRS: 15 >Code Size: 12 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >radeonsi: Compiling shader 387 >Vertex Shader Prolog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { >main_body: > %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> undef, i32 %0, 0 > %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %19, i32 %1, 1 > %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %20, i32 %2, 2 > %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %21, i32 %3, 3 > %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %22, i32 %4, 4 > %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %23, i32 %5, 5 > %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %24, i32 %6, 6 > %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %25, i32 %7, 7 > %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %26, i32 %8, 8 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %27, i32 %9, 9 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %28, i32 %10, 10 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %29, i32 %11, 11 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %30, i32 %12, 12 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %31, i32 %13, 13 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %32, i32 %14, 14 > %34 = bitcast i32 %15 to float > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %33, float %34, 15 > %36 = bitcast i32 %16 to float > %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %35, float %36, 16 > %38 = bitcast i32 %17 to float > %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %37, float %38, 17 > %40 = bitcast i32 %18 to float > %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %39, float %40, 18 > %42 = add i32 %15, %12 > %43 = bitcast i32 %42 to float > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %41, float %43, 19 > %45 = add i32 %15, %12 > %46 = bitcast i32 %45 to float > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %44, float %46, 20 > %48 = add i32 %15, %12 > %49 = bitcast i32 %48 to float > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %47, float %49, 21 > %51 = add i32 %15, %12 > %52 = bitcast i32 %51 to float > %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %50, float %52, 22 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float }> %53 >} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[8:11], s[10:11], 0xc ; C0840B0C > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[8:11], v4, s[4:7], 0 idxen ; E00C2000 80010804 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[11:14], v5, s[12:15], 0 idxen ; E00C2000 80030B05 > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[13:16], v7, s[8:11], 0 idxen ; E00C2000 80020D07 > s_load_dwordx4 s[4:7], s[2:3], 0x8 ; C0820308 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v15 ; 10021F00 > v_mul_f32_e32 v6, v0, v14 ; 100C1D00 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v6, v6 ; 7E0C1106 > v_mul_f32_e32 v0, v0, v13 ; 10001B00 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_lshlrev_b32_e32 v6, 5, v6 ; 340C0C85 > v_or_b32_e32 v7, 16, v1 ; 380E0290 > v_or_b32_e32 v14, 28, v1 ; 381C029C > v_or_b32_e32 v15, 24, v1 ; 381E0298 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_dword v16, v6, s[4:7], 0 offen ; E0301000 80011006 > v_or_b32_e32 v18, 20, v1 ; 38240294 > buffer_load_dword v18, v18, s[4:7], 0 offen ; E0301000 80011212 > buffer_load_dword v7, v7, s[4:7], 0 offen ; E0301000 80010707 > buffer_load_dword v14, v14, s[4:7], 0 offen ; E0301000 80010E0E > buffer_load_dword v15, v15, s[4:7], 0 offen ; E0301000 80010F0F > buffer_load_dword v17, v1, s[4:7], 0 offen ; E0301000 80011101 > v_or_b32_e32 v22, 4, v1 ; 382C0284 > v_or_b32_e32 v13, 16, v6 ; 381A0C90 > v_or_b32_e32 v19, 28, v6 ; 38260C9C > v_or_b32_e32 v20, 24, v6 ; 38280C98 > v_or_b32_e32 v21, 20, v6 ; 382A0C94 > v_or_b32_e32 v1, 8, v1 ; 38020288 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v23, 4, v6 ; 382E0C84 > v_or_b32_e32 v6, 8, v6 ; 380C0C88 > buffer_load_dword v1, v1, s[4:7], 0 offen ; E0301000 80010101 > buffer_load_dword v21, v21, s[4:7], 0 offen ; E0301000 80011515 > buffer_load_dword v13, v13, s[4:7], 0 offen ; E0301000 80010D0D > buffer_load_dword v19, v19, s[4:7], 0 offen ; E0301000 80011313 > buffer_load_dword v20, v20, s[4:7], 0 offen ; E0301000 80011414 > v_or_b32_e32 v29, 8, v0 ; 383A0088 > buffer_load_dword v6, v6, s[4:7], 0 offen ; E0301000 80010606 > buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D > v_or_b32_e32 v24, 16, v0 ; 38300090 > v_or_b32_e32 v25, 28, v0 ; 3832009C > v_or_b32_e32 v26, 24, v0 ; 38340098 > v_or_b32_e32 v27, 20, v0 ; 38360094 > v_or_b32_e32 v28, 4, v0 ; 38380084 > buffer_load_dword v23, v23, s[4:7], 0 offen ; E0301000 80011717 > buffer_load_dword v0, v0, s[4:7], 0 offen ; E0301000 80010000 > buffer_load_dword v27, v27, s[4:7], 0 offen ; E0301000 80011B1B > buffer_load_dword v22, v22, s[4:7], 0 offen ; E0301000 80011616 > buffer_load_dword v24, v24, s[4:7], 0 offen ; E0301000 80011818 > buffer_load_dword v25, v25, s[4:7], 0 offen ; E0301000 80011919 > buffer_load_dword v26, v26, s[4:7], 0 offen ; E0301000 80011A1A > buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C > s_load_dwordx4 s[4:7], s[2:3], 0x4 ; C0820304 > s_load_dwordx4 s[0:3], s[2:3], 0xc ; C080030C > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[4:7], 0x1 ; C2048501 > s_buffer_load_dword s8, s[4:7], 0x0 ; C2040500 > s_buffer_load_dword s10, s[4:7], 0x2 ; C2050502 > s_buffer_load_dword s11, s[4:7], 0x3 ; C2058503 > s_buffer_load_dword s13, s[4:7], 0x5 ; C2068505 > s_buffer_load_dword s17, s[4:7], 0x9 ; C2088509 > s_buffer_load_dword s21, s[4:7], 0xd ; C20A850D > s_buffer_load_dword s16, s[4:7], 0x8 ; C2080508 > s_buffer_load_dword s20, s[4:7], 0xc ; C20A050C > s_buffer_load_dword s12, s[4:7], 0x4 ; C2060504 > s_buffer_load_dword s18, s[4:7], 0xa ; C209050A > s_buffer_load_dword s22, s[4:7], 0xe ; C20B050E > s_buffer_load_dword s14, s[4:7], 0x6 ; C2070506 > s_buffer_load_dword s15, s[4:7], 0x7 ; C2078507 > s_buffer_load_dword s19, s[4:7], 0xb ; C209850B > s_buffer_load_dword s4, s[4:7], 0xf ; C202050F > s_buffer_load_dword s0, s[0:3], 0x66 ; C2000166 > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v16, v16, v4 ; 10200910 > v_mul_f32_e32 v34, v18, v18 ; 10442512 > v_mul_f32_e32 v30, v14, v15 ; 103C1F0E > v_mac_f32_e32 v16, v17, v3 ; 3E200711 > v_mul_f32_e32 v17, v14, v7 ; 10220F0E > v_mul_f32_e32 v14, v14, v18 ; 101C250E > v_fma_f32 v31, v7, v18, v30 ; D296001F 047A2507 > v_fma_f32 v32, v7, v15, -v14 ; D2960020 843A1F07 > v_fma_f32 v30, v7, v18, -v30 ; D296001E 847A2507 > v_mac_f32_e32 v14, v7, v15 ; 3E1C1F07 > v_mul_f32_e32 v7, v7, v7 ; 100E0F07 > v_fma_f32 v33, v18, v15, -v17 ; D2960021 84461F12 > v_fma_f32 v17, v18, v15, v17 ; D2960011 04461F12 > s_waitcnt vmcnt(14) ; BF8C0F7E > v_mul_f32_e32 v1, v1, v3 ; 10020701 > v_mac_f32_e32 v34, v15, v15 ; 3E441F0F > v_mad_f32 v15, v15, v15, v7 ; D282000F 041E1F0F > v_mac_f32_e32 v7, v18, v18 ; 3E0E2512 > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mul_f32_e32 v18, v19, v13 ; 10241B13 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mul_f32_e32 v35, v19, v20 ; 10462913 > v_mul_f32_e32 v19, v19, v21 ; 10262B13 > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mac_f32_e32 v1, v6, v4 ; 3E020906 > v_fma_f32 v36, v13, v21, v35 ; D2960024 048E2B0D > v_fma_f32 v37, v13, v20, -v19 ; D2960025 844E290D > v_mac_f32_e32 v19, v13, v20 ; 3E26290D > v_mul_f32_e32 v39, v21, v21 ; 104E2B15 > v_mul_f32_e32 v6, v4, v36 ; 100C4904 > v_fma_f32 v35, v13, v21, -v35 ; D2960023 848E2B0D > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mac_f32_e32 v1, v29, v5 ; 3E020B1D > v_mul_f32_e32 v29, v4, v19 ; 103A2704 > v_mul_f32_e32 v13, v13, v13 ; 101A1B0D > v_fma_f32 v38, v21, v20, -v18 ; D2960026 844A2915 > v_mac_f32_e32 v29, v4, v19 ; 3E3A2704 > v_fma_f32 v18, v21, v20, v18 ; D2960012 044A2915 > v_mac_f32_e32 v39, v20, v20 ; 3E4E2914 > v_mad_f32 v20, v20, v20, v13 ; D2820014 04362914 > v_mac_f32_e32 v6, v4, v36 ; 3E0C4904 > v_mul_f32_e32 v19, v3, v31 ; 10263F03 > v_mac_f32_e32 v6, 2.0, v19 ; 3E0C26F4 > v_fma_f32 v19, -v20, 2.0, 1.0 ; D2960013 23C9E914 > v_mac_f32_e32 v13, v21, v21 ; 3E1A2B15 > s_waitcnt vmcnt(7) ; BF8C0F77 > v_mul_f32_e32 v21, v23, v4 ; 102A0917 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_mac_f32_e32 v16, v0, v5 ; 3E200B00 > v_mul_f32_e32 v0, v4, v38 ; 10004D04 > v_fma_f32 v15, -v15, 2.0, 1.0 ; D296000F 23C9E90F > v_mul_f32_e32 v19, v19, v4 ; 10260913 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mac_f32_e32 v21, v22, v3 ; 3E2A0716 > v_fma_f32 v7, -v7, 2.0, 1.0 ; D2960007 23C9E907 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v22, v25, v24 ; 102C3119 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v23, v25, v26 ; 102E3519 > v_mul_f32_e32 v25, v25, v27 ; 10323719 > v_mac_f32_e32 v19, v15, v3 ; 3E26070F > v_mac_f32_e32 v0, v4, v38 ; 3E004D04 > v_mul_f32_e32 v15, v3, v33 ; 101E4303 > v_mul_f32_e32 v43, v27, v27 ; 1056371B > v_fma_f32 v40, v24, v27, v23 ; D2960028 045E3718 > v_fma_f32 v41, v24, v26, -v25 ; D2960029 84663518 > v_mac_f32_e32 v0, 2.0, v15 ; 3E001EF4 > v_fma_f32 v15, -v39, 2.0, 1.0 ; D296000F 23C9E927 > v_fma_f32 v23, v24, v27, -v23 ; D2960017 845E3718 > v_mac_f32_e32 v25, v24, v26 ; 3E323518 > v_mul_f32_e32 v24, v24, v24 ; 10303118 > v_fma_f32 v13, -v13, 2.0, 1.0 ; D296000D 23C9E90D > v_mul_f32_e32 v7, v7, v3 ; 100E0707 > v_fma_f32 v42, v27, v26, -v22 ; D296002A 845A351B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v21, v28, v5 ; 3E2A0B1C > v_mul_f32_e32 v28, v3, v17 ; 10382303 > v_mac_f32_e32 v7, v13, v4 ; 3E0E090D > v_fma_f32 v22, v27, v26, v22 ; D2960016 045A351B > v_mac_f32_e32 v43, v26, v26 ; 3E56351A > v_mad_f32 v26, v26, v26, v24 ; D282001A 0462351A > v_mac_f32_e32 v24, v27, v27 ; 3E30371B > v_mul_f32_e32 v27, v3, v32 ; 10364103 > v_fma_f32 v13, -v34, 2.0, 1.0 ; D296000D 23C9E922 > v_mul_f32_e32 v15, v15, v4 ; 101E090F > v_mac_f32_e32 v28, v3, v17 ; 3E382303 > v_mac_f32_e32 v15, v13, v3 ; 3E1E070D > v_mac_f32_e32 v27, v3, v32 ; 3E364103 > v_mul_f32_e32 v13, v3, v30 ; 101A3D03 > v_mul_f32_e32 v3, v3, v14 ; 10061D03 > v_mac_f32_e32 v29, 2.0, v3 ; 3E3A06F4 > v_mul_f32_e32 v3, v5, v40 ; 10065105 > v_mac_f32_e32 v6, 2.0, v3 ; 3E0C06F4 > v_fma_f32 v3, -v26, 2.0, 1.0 ; D2960003 23C9E91A > v_mac_f32_e32 v19, v3, v5 ; 3E260B03 > v_mul_f32_e32 v3, v5, v42 ; 10065505 > v_mul_f32_e32 v17, v4, v35 ; 10224704 > v_mac_f32_e32 v0, 2.0, v3 ; 3E0006F4 > v_fma_f32 v3, -v24, 2.0, 1.0 ; D2960003 23C9E918 > v_mac_f32_e32 v17, v4, v35 ; 3E224704 > v_mac_f32_e32 v7, v3, v5 ; 3E0E0B03 > v_fma_f32 v3, -v43, 2.0, 1.0 ; D2960003 23C9E92B > v_mac_f32_e32 v17, 2.0, v13 ; 3E221AF4 > v_mac_f32_e32 v15, v3, v5 ; 3E1E0B03 > v_mul_f32_e32 v3, v5, v23 ; 10062F05 > v_mul_f32_e32 v13, v4, v18 ; 101A2504 > v_mac_f32_e32 v17, 2.0, v3 ; 3E2206F4 > v_mul_f32_e32 v3, v5, v25 ; 10063305 > v_mac_f32_e32 v29, 2.0, v3 ; 3E3A06F4 > v_mul_f32_e32 v3, v9, v19 ; 10062709 > v_mul_f32_e32 v14, v5, v22 ; 101C2D05 > v_mac_f32_e32 v28, 2.0, v13 ; 3E381AF4 > v_mul_f32_e32 v4, v4, v37 ; 10084B04 > v_mac_f32_e32 v3, v8, v6 ; 3E060D08 > v_mul_f32_e32 v6, v9, v17 ; 100C2309 > v_mul_f32_e32 v5, v5, v41 ; 100A5305 > v_mac_f32_e32 v27, 2.0, v4 ; 3E3608F4 > v_mac_f32_e32 v28, 2.0, v14 ; 3E381CF4 > v_mac_f32_e32 v6, v8, v15 ; 3E0C1F08 > v_mac_f32_e32 v3, v10, v0 ; 3E06010A > v_mac_f32_e32 v27, 2.0, v5 ; 3E360AF4 > v_mul_f32_e32 v4, v9, v28 ; 10083909 > v_mac_f32_e32 v4, v8, v27 ; 3E083708 > v_add_f32_e32 v0, v21, v3 ; 06000715 > v_mac_f32_e32 v6, v10, v29 ; 3E0C3B0A > v_add_f32_e32 v3, v16, v6 ; 06060D10 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v6, s9, v0 ; 100C0009 > v_mac_f32_e32 v4, v10, v7 ; 3E080F0A > v_mac_f32_e32 v6, s8, v3 ; 3E0C0608 > v_add_f32_e32 v1, v1, v4 ; 06020901 > v_mac_f32_e32 v6, s10, v1 ; 3E0C020A > v_mul_f32_e32 v5, s13, v0 ; 100A000D > v_add_f32_e32 v4, s11, v6 ; 06080C0B > v_mul_f32_e32 v6, s17, v0 ; 100C0011 > v_mul_f32_e32 v0, s21, v0 ; 10000015 > v_mac_f32_e32 v6, s16, v3 ; 3E0C0610 > v_mac_f32_e32 v0, s20, v3 ; 3E000614 > v_mac_f32_e32 v5, s12, v3 ; 3E0A060C > v_mac_f32_e32 v6, s18, v1 ; 3E0C0212 > v_mac_f32_e32 v0, s22, v1 ; 3E000216 > v_mac_f32_e32 v5, s14, v1 ; 3E0A020E > v_add_f32_e32 v6, s19, v6 ; 060C0C13 > v_add_f32_e32 v0, s4, v0 ; 06000004 > v_add_f32_e32 v5, s15, v5 ; 060A0A0F > v_min_f32_e32 v1, s0, v6 ; 1E020C00 > exp 15, 32, 0, 0, 0, v11, v12, v6, v0 ; F800020F 00060C0B > exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 44 >Code Size: 1172 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 5 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v2, v2, 3, 0, [m0] ; C8080302 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v2, [v2], v3, 3, 0, [m0] ; C8090303 > image_sample v0, v[0:1], s[12:19], s[0:3] dmask:0x8 ; F0800800 00030000 > v_mov_b32_e32 v1, 0xbec0c0c1 ; 7E0202FF BEC0C0C1 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v0, v0, v1 ; 06000300 > v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 > v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 > v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 > v_rcp_f32_e32 v1, v2 ; 7E025502 > v_cmp_lt_f32_e64 s[0:1], 0, v4 ; D0020000 00020880 > v_cndmask_b32_e64 v0, v4, 1.0, s[0:1] ; D2000000 0001E504 > v_cmp_eq_f32_e32 vcc, 0, v2 ; 7C040480 > v_cmp_le_f32_e64 s[0:1], 0, v0 ; D0060000 00020080 > v_mul_f32_e32 v0, 0x70000000, v0 ; 100000FF 70000000 > v_bfrev_b32_e32 v2, 15 ; 7E04708F > v_cndmask_b32_e64 v0, v2, v0, s[0:1] ; D2000000 00020102 > v_mul_f32_e32 v1, v1, v4 ; 10020901 > v_cndmask_b32_e32 v0, v1, v0 ; 00000101 > v_mov_b32_e32 v1, v0 ; 7E020300 > v_mov_b32_e32 v2, v0 ; 7E040300 > v_mov_b32_e32 v3, v0 ; 7E060300 >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 180 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[8:11], s[10:11], 0xc ; C0840B0C > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[8:11], v4, s[4:7], 0 idxen ; E00C2000 80010804 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[11:14], v5, s[12:15], 0 idxen ; E00C2000 80030B05 > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[13:16], v7, s[8:11], 0 idxen ; E00C2000 80020D07 > s_load_dwordx4 s[4:7], s[2:3], 0x8 ; C0820308 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v15 ; 10021F00 > v_mul_f32_e32 v6, v0, v14 ; 100C1D00 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v6, v6 ; 7E0C1106 > v_mul_f32_e32 v0, v0, v13 ; 10001B00 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_lshlrev_b32_e32 v6, 5, v6 ; 340C0C85 > v_or_b32_e32 v7, 16, v1 ; 380E0290 > v_or_b32_e32 v14, 28, v1 ; 381C029C > v_or_b32_e32 v15, 24, v1 ; 381E0298 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_dword v16, v6, s[4:7], 0 offen ; E0301000 80011006 > v_or_b32_e32 v18, 20, v1 ; 38240294 > buffer_load_dword v18, v18, s[4:7], 0 offen ; E0301000 80011212 > buffer_load_dword v7, v7, s[4:7], 0 offen ; E0301000 80010707 > buffer_load_dword v14, v14, s[4:7], 0 offen ; E0301000 80010E0E > buffer_load_dword v15, v15, s[4:7], 0 offen ; E0301000 80010F0F > buffer_load_dword v17, v1, s[4:7], 0 offen ; E0301000 80011101 > v_or_b32_e32 v22, 4, v1 ; 382C0284 > v_or_b32_e32 v13, 16, v6 ; 381A0C90 > v_or_b32_e32 v19, 28, v6 ; 38260C9C > v_or_b32_e32 v20, 24, v6 ; 38280C98 > v_or_b32_e32 v21, 20, v6 ; 382A0C94 > v_or_b32_e32 v1, 8, v1 ; 38020288 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v23, 4, v6 ; 382E0C84 > v_or_b32_e32 v6, 8, v6 ; 380C0C88 > buffer_load_dword v1, v1, s[4:7], 0 offen ; E0301000 80010101 > buffer_load_dword v21, v21, s[4:7], 0 offen ; E0301000 80011515 > buffer_load_dword v13, v13, s[4:7], 0 offen ; E0301000 80010D0D > buffer_load_dword v19, v19, s[4:7], 0 offen ; E0301000 80011313 > buffer_load_dword v20, v20, s[4:7], 0 offen ; E0301000 80011414 > v_or_b32_e32 v29, 8, v0 ; 383A0088 > buffer_load_dword v6, v6, s[4:7], 0 offen ; E0301000 80010606 > buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D > v_or_b32_e32 v24, 16, v0 ; 38300090 > v_or_b32_e32 v25, 28, v0 ; 3832009C > v_or_b32_e32 v26, 24, v0 ; 38340098 > v_or_b32_e32 v27, 20, v0 ; 38360094 > v_or_b32_e32 v28, 4, v0 ; 38380084 > buffer_load_dword v23, v23, s[4:7], 0 offen ; E0301000 80011717 > buffer_load_dword v0, v0, s[4:7], 0 offen ; E0301000 80010000 > buffer_load_dword v27, v27, s[4:7], 0 offen ; E0301000 80011B1B > buffer_load_dword v22, v22, s[4:7], 0 offen ; E0301000 80011616 > buffer_load_dword v24, v24, s[4:7], 0 offen ; E0301000 80011818 > buffer_load_dword v25, v25, s[4:7], 0 offen ; E0301000 80011919 > buffer_load_dword v26, v26, s[4:7], 0 offen ; E0301000 80011A1A > buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C > s_load_dwordx4 s[4:7], s[2:3], 0x4 ; C0820304 > s_load_dwordx4 s[0:3], s[2:3], 0xc ; C080030C > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[4:7], 0x1 ; C2048501 > s_buffer_load_dword s8, s[4:7], 0x0 ; C2040500 > s_buffer_load_dword s10, s[4:7], 0x2 ; C2050502 > s_buffer_load_dword s11, s[4:7], 0x3 ; C2058503 > s_buffer_load_dword s13, s[4:7], 0x5 ; C2068505 > s_buffer_load_dword s17, s[4:7], 0x9 ; C2088509 > s_buffer_load_dword s21, s[4:7], 0xd ; C20A850D > s_buffer_load_dword s16, s[4:7], 0x8 ; C2080508 > s_buffer_load_dword s20, s[4:7], 0xc ; C20A050C > s_buffer_load_dword s12, s[4:7], 0x4 ; C2060504 > s_buffer_load_dword s18, s[4:7], 0xa ; C209050A > s_buffer_load_dword s22, s[4:7], 0xe ; C20B050E > s_buffer_load_dword s14, s[4:7], 0x6 ; C2070506 > s_buffer_load_dword s15, s[4:7], 0x7 ; C2078507 > s_buffer_load_dword s19, s[4:7], 0xb ; C209850B > s_buffer_load_dword s4, s[4:7], 0xf ; C202050F > s_buffer_load_dword s0, s[0:3], 0x66 ; C2000166 > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v16, v16, v4 ; 10200910 > v_mul_f32_e32 v34, v18, v18 ; 10442512 > v_mul_f32_e32 v30, v14, v15 ; 103C1F0E > v_mac_f32_e32 v16, v17, v3 ; 3E200711 > v_mul_f32_e32 v17, v14, v7 ; 10220F0E > v_mul_f32_e32 v14, v14, v18 ; 101C250E > v_fma_f32 v31, v7, v18, v30 ; D296001F 047A2507 > v_fma_f32 v32, v7, v15, -v14 ; D2960020 843A1F07 > v_fma_f32 v30, v7, v18, -v30 ; D296001E 847A2507 > v_mac_f32_e32 v14, v7, v15 ; 3E1C1F07 > v_mul_f32_e32 v7, v7, v7 ; 100E0F07 > v_fma_f32 v33, v18, v15, -v17 ; D2960021 84461F12 > v_fma_f32 v17, v18, v15, v17 ; D2960011 04461F12 > s_waitcnt vmcnt(14) ; BF8C0F7E > v_mul_f32_e32 v1, v1, v3 ; 10020701 > v_mac_f32_e32 v34, v15, v15 ; 3E441F0F > v_mad_f32 v15, v15, v15, v7 ; D282000F 041E1F0F > v_mac_f32_e32 v7, v18, v18 ; 3E0E2512 > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mul_f32_e32 v18, v19, v13 ; 10241B13 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mul_f32_e32 v35, v19, v20 ; 10462913 > v_mul_f32_e32 v19, v19, v21 ; 10262B13 > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mac_f32_e32 v1, v6, v4 ; 3E020906 > v_fma_f32 v36, v13, v21, v35 ; D2960024 048E2B0D > v_fma_f32 v37, v13, v20, -v19 ; D2960025 844E290D > v_mac_f32_e32 v19, v13, v20 ; 3E26290D > v_mul_f32_e32 v39, v21, v21 ; 104E2B15 > v_mul_f32_e32 v6, v4, v36 ; 100C4904 > v_fma_f32 v35, v13, v21, -v35 ; D2960023 848E2B0D > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mac_f32_e32 v1, v29, v5 ; 3E020B1D > v_mul_f32_e32 v29, v4, v19 ; 103A2704 > v_mul_f32_e32 v13, v13, v13 ; 101A1B0D > v_fma_f32 v38, v21, v20, -v18 ; D2960026 844A2915 > v_mac_f32_e32 v29, v4, v19 ; 3E3A2704 > v_fma_f32 v18, v21, v20, v18 ; D2960012 044A2915 > v_mac_f32_e32 v39, v20, v20 ; 3E4E2914 > v_mad_f32 v20, v20, v20, v13 ; D2820014 04362914 > v_mac_f32_e32 v6, v4, v36 ; 3E0C4904 > v_mul_f32_e32 v19, v3, v31 ; 10263F03 > v_mac_f32_e32 v6, 2.0, v19 ; 3E0C26F4 > v_fma_f32 v19, -v20, 2.0, 1.0 ; D2960013 23C9E914 > v_mac_f32_e32 v13, v21, v21 ; 3E1A2B15 > s_waitcnt vmcnt(7) ; BF8C0F77 > v_mul_f32_e32 v21, v23, v4 ; 102A0917 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_mac_f32_e32 v16, v0, v5 ; 3E200B00 > v_mul_f32_e32 v0, v4, v38 ; 10004D04 > v_fma_f32 v15, -v15, 2.0, 1.0 ; D296000F 23C9E90F > v_mul_f32_e32 v19, v19, v4 ; 10260913 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mac_f32_e32 v21, v22, v3 ; 3E2A0716 > v_fma_f32 v7, -v7, 2.0, 1.0 ; D2960007 23C9E907 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v22, v25, v24 ; 102C3119 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v23, v25, v26 ; 102E3519 > v_mul_f32_e32 v25, v25, v27 ; 10323719 > v_mac_f32_e32 v19, v15, v3 ; 3E26070F > v_mac_f32_e32 v0, v4, v38 ; 3E004D04 > v_mul_f32_e32 v15, v3, v33 ; 101E4303 > v_mul_f32_e32 v43, v27, v27 ; 1056371B > v_fma_f32 v40, v24, v27, v23 ; D2960028 045E3718 > v_fma_f32 v41, v24, v26, -v25 ; D2960029 84663518 > v_mac_f32_e32 v0, 2.0, v15 ; 3E001EF4 > v_fma_f32 v15, -v39, 2.0, 1.0 ; D296000F 23C9E927 > v_fma_f32 v23, v24, v27, -v23 ; D2960017 845E3718 > v_mac_f32_e32 v25, v24, v26 ; 3E323518 > v_mul_f32_e32 v24, v24, v24 ; 10303118 > v_fma_f32 v13, -v13, 2.0, 1.0 ; D296000D 23C9E90D > v_mul_f32_e32 v7, v7, v3 ; 100E0707 > v_fma_f32 v42, v27, v26, -v22 ; D296002A 845A351B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v21, v28, v5 ; 3E2A0B1C > v_mul_f32_e32 v28, v3, v17 ; 10382303 > v_mac_f32_e32 v7, v13, v4 ; 3E0E090D > v_fma_f32 v22, v27, v26, v22 ; D2960016 045A351B > v_mac_f32_e32 v43, v26, v26 ; 3E56351A > v_mad_f32 v26, v26, v26, v24 ; D282001A 0462351A > v_mac_f32_e32 v24, v27, v27 ; 3E30371B > v_mul_f32_e32 v27, v3, v32 ; 10364103 > v_fma_f32 v13, -v34, 2.0, 1.0 ; D296000D 23C9E922 > v_mul_f32_e32 v15, v15, v4 ; 101E090F > v_mac_f32_e32 v28, v3, v17 ; 3E382303 > v_mac_f32_e32 v15, v13, v3 ; 3E1E070D > v_mac_f32_e32 v27, v3, v32 ; 3E364103 > v_mul_f32_e32 v13, v3, v30 ; 101A3D03 > v_mul_f32_e32 v3, v3, v14 ; 10061D03 > v_mac_f32_e32 v29, 2.0, v3 ; 3E3A06F4 > v_mul_f32_e32 v3, v5, v40 ; 10065105 > v_mac_f32_e32 v6, 2.0, v3 ; 3E0C06F4 > v_fma_f32 v3, -v26, 2.0, 1.0 ; D2960003 23C9E91A > v_mac_f32_e32 v19, v3, v5 ; 3E260B03 > v_mul_f32_e32 v3, v5, v42 ; 10065505 > v_mul_f32_e32 v17, v4, v35 ; 10224704 > v_mac_f32_e32 v0, 2.0, v3 ; 3E0006F4 > v_fma_f32 v3, -v24, 2.0, 1.0 ; D2960003 23C9E918 > v_mac_f32_e32 v17, v4, v35 ; 3E224704 > v_mac_f32_e32 v7, v3, v5 ; 3E0E0B03 > v_fma_f32 v3, -v43, 2.0, 1.0 ; D2960003 23C9E92B > v_mac_f32_e32 v17, 2.0, v13 ; 3E221AF4 > v_mac_f32_e32 v15, v3, v5 ; 3E1E0B03 > v_mul_f32_e32 v3, v5, v23 ; 10062F05 > v_mul_f32_e32 v13, v4, v18 ; 101A2504 > v_mac_f32_e32 v17, 2.0, v3 ; 3E2206F4 > v_mul_f32_e32 v3, v5, v25 ; 10063305 > v_mac_f32_e32 v29, 2.0, v3 ; 3E3A06F4 > v_mul_f32_e32 v3, v9, v19 ; 10062709 > v_mul_f32_e32 v14, v5, v22 ; 101C2D05 > v_mac_f32_e32 v28, 2.0, v13 ; 3E381AF4 > v_mul_f32_e32 v4, v4, v37 ; 10084B04 > v_mac_f32_e32 v3, v8, v6 ; 3E060D08 > v_mul_f32_e32 v6, v9, v17 ; 100C2309 > v_mul_f32_e32 v5, v5, v41 ; 100A5305 > v_mac_f32_e32 v27, 2.0, v4 ; 3E3608F4 > v_mac_f32_e32 v28, 2.0, v14 ; 3E381CF4 > v_mac_f32_e32 v6, v8, v15 ; 3E0C1F08 > v_mac_f32_e32 v3, v10, v0 ; 3E06010A > v_mac_f32_e32 v27, 2.0, v5 ; 3E360AF4 > v_mul_f32_e32 v4, v9, v28 ; 10083909 > v_mac_f32_e32 v4, v8, v27 ; 3E083708 > v_add_f32_e32 v0, v21, v3 ; 06000715 > v_mac_f32_e32 v6, v10, v29 ; 3E0C3B0A > v_add_f32_e32 v3, v16, v6 ; 06060D10 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v6, s9, v0 ; 100C0009 > v_mac_f32_e32 v4, v10, v7 ; 3E080F0A > v_mac_f32_e32 v6, s8, v3 ; 3E0C0608 > v_add_f32_e32 v1, v1, v4 ; 06020901 > v_mac_f32_e32 v6, s10, v1 ; 3E0C020A > v_mul_f32_e32 v5, s13, v0 ; 100A000D > v_add_f32_e32 v4, s11, v6 ; 06080C0B > v_mul_f32_e32 v6, s17, v0 ; 100C0011 > v_mul_f32_e32 v0, s21, v0 ; 10000015 > v_mac_f32_e32 v6, s16, v3 ; 3E0C0610 > v_mac_f32_e32 v0, s20, v3 ; 3E000614 > v_mac_f32_e32 v5, s12, v3 ; 3E0A060C > v_mac_f32_e32 v6, s18, v1 ; 3E0C0212 > v_mac_f32_e32 v0, s22, v1 ; 3E000216 > v_mac_f32_e32 v5, s14, v1 ; 3E0A020E > v_add_f32_e32 v6, s19, v6 ; 060C0C13 > v_add_f32_e32 v0, s4, v0 ; 06000004 > v_add_f32_e32 v5, s15, v5 ; 060A0A0F > v_min_f32_e32 v1, s0, v6 ; 1E020C00 > exp 15, 32, 0, 0, 0, v11, v12, v6, v0 ; F800020F 00060C0B > exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 44 >Code Size: 1172 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 5 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v2, v2, 3, 0, [m0] ; C8080302 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v2, [v2], v3, 3, 0, [m0] ; C8090303 > image_sample v0, v[0:1], s[12:19], s[0:3] dmask:0x8 ; F0800800 00030000 > v_mov_b32_e32 v1, 0xbec0c0c1 ; 7E0202FF BEC0C0C1 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v0, v0, v1 ; 06000300 > v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 > v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 > v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 > v_rcp_f32_e32 v1, v2 ; 7E025502 > v_cmp_lt_f32_e64 s[0:1], 0, v4 ; D0020000 00020880 > v_cndmask_b32_e64 v0, v4, 1.0, s[0:1] ; D2000000 0001E504 > v_cmp_eq_f32_e32 vcc, 0, v2 ; 7C040480 > v_cmp_le_f32_e64 s[0:1], 0, v0 ; D0060000 00020080 > v_mul_f32_e32 v0, 0x70000000, v0 ; 100000FF 70000000 > v_bfrev_b32_e32 v2, 15 ; 7E04708F > v_cndmask_b32_e64 v0, v2, v0, s[0:1] ; D2000000 00020102 > v_mul_f32_e32 v1, v1, v4 ; 10020901 > v_cndmask_b32_e32 v0, v1, v0 ; 00000101 > v_mov_b32_e32 v1, v0 ; 7E020300 > v_mov_b32_e32 v2, v0 ; 7E040300 > v_mov_b32_e32 v3, v0 ; 7E060300 >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 180 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[8:11], s[10:11], 0xc ; C0840B0C > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[8:11], v4, s[4:7], 0 idxen ; E00C2000 80010804 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[11:14], v5, s[12:15], 0 idxen ; E00C2000 80030B05 > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[13:16], v7, s[8:11], 0 idxen ; E00C2000 80020D07 > s_load_dwordx4 s[4:7], s[2:3], 0x8 ; C0820308 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v15 ; 10021F00 > v_mul_f32_e32 v6, v0, v14 ; 100C1D00 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v6, v6 ; 7E0C1106 > v_mul_f32_e32 v0, v0, v13 ; 10001B00 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_lshlrev_b32_e32 v6, 5, v6 ; 340C0C85 > v_or_b32_e32 v7, 16, v1 ; 380E0290 > v_or_b32_e32 v14, 28, v1 ; 381C029C > v_or_b32_e32 v15, 24, v1 ; 381E0298 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_dword v16, v6, s[4:7], 0 offen ; E0301000 80011006 > v_or_b32_e32 v18, 20, v1 ; 38240294 > buffer_load_dword v18, v18, s[4:7], 0 offen ; E0301000 80011212 > buffer_load_dword v7, v7, s[4:7], 0 offen ; E0301000 80010707 > buffer_load_dword v14, v14, s[4:7], 0 offen ; E0301000 80010E0E > buffer_load_dword v15, v15, s[4:7], 0 offen ; E0301000 80010F0F > buffer_load_dword v17, v1, s[4:7], 0 offen ; E0301000 80011101 > v_or_b32_e32 v22, 4, v1 ; 382C0284 > v_or_b32_e32 v13, 16, v6 ; 381A0C90 > v_or_b32_e32 v19, 28, v6 ; 38260C9C > v_or_b32_e32 v20, 24, v6 ; 38280C98 > v_or_b32_e32 v21, 20, v6 ; 382A0C94 > v_or_b32_e32 v1, 8, v1 ; 38020288 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v23, 4, v6 ; 382E0C84 > v_or_b32_e32 v6, 8, v6 ; 380C0C88 > buffer_load_dword v1, v1, s[4:7], 0 offen ; E0301000 80010101 > buffer_load_dword v21, v21, s[4:7], 0 offen ; E0301000 80011515 > buffer_load_dword v13, v13, s[4:7], 0 offen ; E0301000 80010D0D > buffer_load_dword v19, v19, s[4:7], 0 offen ; E0301000 80011313 > buffer_load_dword v20, v20, s[4:7], 0 offen ; E0301000 80011414 > v_or_b32_e32 v29, 8, v0 ; 383A0088 > buffer_load_dword v6, v6, s[4:7], 0 offen ; E0301000 80010606 > buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D > v_or_b32_e32 v24, 16, v0 ; 38300090 > v_or_b32_e32 v25, 28, v0 ; 3832009C > v_or_b32_e32 v26, 24, v0 ; 38340098 > v_or_b32_e32 v27, 20, v0 ; 38360094 > v_or_b32_e32 v28, 4, v0 ; 38380084 > buffer_load_dword v23, v23, s[4:7], 0 offen ; E0301000 80011717 > buffer_load_dword v0, v0, s[4:7], 0 offen ; E0301000 80010000 > buffer_load_dword v27, v27, s[4:7], 0 offen ; E0301000 80011B1B > buffer_load_dword v22, v22, s[4:7], 0 offen ; E0301000 80011616 > buffer_load_dword v24, v24, s[4:7], 0 offen ; E0301000 80011818 > buffer_load_dword v25, v25, s[4:7], 0 offen ; E0301000 80011919 > buffer_load_dword v26, v26, s[4:7], 0 offen ; E0301000 80011A1A > buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C > s_load_dwordx4 s[4:7], s[2:3], 0x4 ; C0820304 > s_load_dwordx4 s[0:3], s[2:3], 0xc ; C080030C > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[4:7], 0x1 ; C2048501 > s_buffer_load_dword s8, s[4:7], 0x0 ; C2040500 > s_buffer_load_dword s10, s[4:7], 0x2 ; C2050502 > s_buffer_load_dword s11, s[4:7], 0x3 ; C2058503 > s_buffer_load_dword s13, s[4:7], 0x5 ; C2068505 > s_buffer_load_dword s17, s[4:7], 0x9 ; C2088509 > s_buffer_load_dword s21, s[4:7], 0xd ; C20A850D > s_buffer_load_dword s16, s[4:7], 0x8 ; C2080508 > s_buffer_load_dword s20, s[4:7], 0xc ; C20A050C > s_buffer_load_dword s12, s[4:7], 0x4 ; C2060504 > s_buffer_load_dword s18, s[4:7], 0xa ; C209050A > s_buffer_load_dword s22, s[4:7], 0xe ; C20B050E > s_buffer_load_dword s14, s[4:7], 0x6 ; C2070506 > s_buffer_load_dword s15, s[4:7], 0x7 ; C2078507 > s_buffer_load_dword s19, s[4:7], 0xb ; C209850B > s_buffer_load_dword s4, s[4:7], 0xf ; C202050F > s_buffer_load_dword s0, s[0:3], 0x66 ; C2000166 > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v16, v16, v4 ; 10200910 > v_mul_f32_e32 v34, v18, v18 ; 10442512 > v_mul_f32_e32 v30, v14, v15 ; 103C1F0E > v_mac_f32_e32 v16, v17, v3 ; 3E200711 > v_mul_f32_e32 v17, v14, v7 ; 10220F0E > v_mul_f32_e32 v14, v14, v18 ; 101C250E > v_fma_f32 v31, v7, v18, v30 ; D296001F 047A2507 > v_fma_f32 v32, v7, v15, -v14 ; D2960020 843A1F07 > v_fma_f32 v30, v7, v18, -v30 ; D296001E 847A2507 > v_mac_f32_e32 v14, v7, v15 ; 3E1C1F07 > v_mul_f32_e32 v7, v7, v7 ; 100E0F07 > v_fma_f32 v33, v18, v15, -v17 ; D2960021 84461F12 > v_fma_f32 v17, v18, v15, v17 ; D2960011 04461F12 > s_waitcnt vmcnt(14) ; BF8C0F7E > v_mul_f32_e32 v1, v1, v3 ; 10020701 > v_mac_f32_e32 v34, v15, v15 ; 3E441F0F > v_mad_f32 v15, v15, v15, v7 ; D282000F 041E1F0F > v_mac_f32_e32 v7, v18, v18 ; 3E0E2512 > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mul_f32_e32 v18, v19, v13 ; 10241B13 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mul_f32_e32 v35, v19, v20 ; 10462913 > v_mul_f32_e32 v19, v19, v21 ; 10262B13 > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mac_f32_e32 v1, v6, v4 ; 3E020906 > v_fma_f32 v36, v13, v21, v35 ; D2960024 048E2B0D > v_fma_f32 v37, v13, v20, -v19 ; D2960025 844E290D > v_mac_f32_e32 v19, v13, v20 ; 3E26290D > v_mul_f32_e32 v39, v21, v21 ; 104E2B15 > v_mul_f32_e32 v6, v4, v36 ; 100C4904 > v_fma_f32 v35, v13, v21, -v35 ; D2960023 848E2B0D > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mac_f32_e32 v1, v29, v5 ; 3E020B1D > v_mul_f32_e32 v29, v4, v19 ; 103A2704 > v_mul_f32_e32 v13, v13, v13 ; 101A1B0D > v_fma_f32 v38, v21, v20, -v18 ; D2960026 844A2915 > v_mac_f32_e32 v29, v4, v19 ; 3E3A2704 > v_fma_f32 v18, v21, v20, v18 ; D2960012 044A2915 > v_mac_f32_e32 v39, v20, v20 ; 3E4E2914 > v_mad_f32 v20, v20, v20, v13 ; D2820014 04362914 > v_mac_f32_e32 v6, v4, v36 ; 3E0C4904 > v_mul_f32_e32 v19, v3, v31 ; 10263F03 > v_mac_f32_e32 v6, 2.0, v19 ; 3E0C26F4 > v_fma_f32 v19, -v20, 2.0, 1.0 ; D2960013 23C9E914 > v_mac_f32_e32 v13, v21, v21 ; 3E1A2B15 > s_waitcnt vmcnt(7) ; BF8C0F77 > v_mul_f32_e32 v21, v23, v4 ; 102A0917 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_mac_f32_e32 v16, v0, v5 ; 3E200B00 > v_mul_f32_e32 v0, v4, v38 ; 10004D04 > v_fma_f32 v15, -v15, 2.0, 1.0 ; D296000F 23C9E90F > v_mul_f32_e32 v19, v19, v4 ; 10260913 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mac_f32_e32 v21, v22, v3 ; 3E2A0716 > v_fma_f32 v7, -v7, 2.0, 1.0 ; D2960007 23C9E907 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v22, v25, v24 ; 102C3119 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v23, v25, v26 ; 102E3519 > v_mul_f32_e32 v25, v25, v27 ; 10323719 > v_mac_f32_e32 v19, v15, v3 ; 3E26070F > v_mac_f32_e32 v0, v4, v38 ; 3E004D04 > v_mul_f32_e32 v15, v3, v33 ; 101E4303 > v_mul_f32_e32 v43, v27, v27 ; 1056371B > v_fma_f32 v40, v24, v27, v23 ; D2960028 045E3718 > v_fma_f32 v41, v24, v26, -v25 ; D2960029 84663518 > v_mac_f32_e32 v0, 2.0, v15 ; 3E001EF4 > v_fma_f32 v15, -v39, 2.0, 1.0 ; D296000F 23C9E927 > v_fma_f32 v23, v24, v27, -v23 ; D2960017 845E3718 > v_mac_f32_e32 v25, v24, v26 ; 3E323518 > v_mul_f32_e32 v24, v24, v24 ; 10303118 > v_fma_f32 v13, -v13, 2.0, 1.0 ; D296000D 23C9E90D > v_mul_f32_e32 v7, v7, v3 ; 100E0707 > v_fma_f32 v42, v27, v26, -v22 ; D296002A 845A351B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v21, v28, v5 ; 3E2A0B1C > v_mul_f32_e32 v28, v3, v17 ; 10382303 > v_mac_f32_e32 v7, v13, v4 ; 3E0E090D > v_fma_f32 v22, v27, v26, v22 ; D2960016 045A351B > v_mac_f32_e32 v43, v26, v26 ; 3E56351A > v_mad_f32 v26, v26, v26, v24 ; D282001A 0462351A > v_mac_f32_e32 v24, v27, v27 ; 3E30371B > v_mul_f32_e32 v27, v3, v32 ; 10364103 > v_fma_f32 v13, -v34, 2.0, 1.0 ; D296000D 23C9E922 > v_mul_f32_e32 v15, v15, v4 ; 101E090F > v_mac_f32_e32 v28, v3, v17 ; 3E382303 > v_mac_f32_e32 v15, v13, v3 ; 3E1E070D > v_mac_f32_e32 v27, v3, v32 ; 3E364103 > v_mul_f32_e32 v13, v3, v30 ; 101A3D03 > v_mul_f32_e32 v3, v3, v14 ; 10061D03 > v_mac_f32_e32 v29, 2.0, v3 ; 3E3A06F4 > v_mul_f32_e32 v3, v5, v40 ; 10065105 > v_mac_f32_e32 v6, 2.0, v3 ; 3E0C06F4 > v_fma_f32 v3, -v26, 2.0, 1.0 ; D2960003 23C9E91A > v_mac_f32_e32 v19, v3, v5 ; 3E260B03 > v_mul_f32_e32 v3, v5, v42 ; 10065505 > v_mul_f32_e32 v17, v4, v35 ; 10224704 > v_mac_f32_e32 v0, 2.0, v3 ; 3E0006F4 > v_fma_f32 v3, -v24, 2.0, 1.0 ; D2960003 23C9E918 > v_mac_f32_e32 v17, v4, v35 ; 3E224704 > v_mac_f32_e32 v7, v3, v5 ; 3E0E0B03 > v_fma_f32 v3, -v43, 2.0, 1.0 ; D2960003 23C9E92B > v_mac_f32_e32 v17, 2.0, v13 ; 3E221AF4 > v_mac_f32_e32 v15, v3, v5 ; 3E1E0B03 > v_mul_f32_e32 v3, v5, v23 ; 10062F05 > v_mul_f32_e32 v13, v4, v18 ; 101A2504 > v_mac_f32_e32 v17, 2.0, v3 ; 3E2206F4 > v_mul_f32_e32 v3, v5, v25 ; 10063305 > v_mac_f32_e32 v29, 2.0, v3 ; 3E3A06F4 > v_mul_f32_e32 v3, v9, v19 ; 10062709 > v_mul_f32_e32 v14, v5, v22 ; 101C2D05 > v_mac_f32_e32 v28, 2.0, v13 ; 3E381AF4 > v_mul_f32_e32 v4, v4, v37 ; 10084B04 > v_mac_f32_e32 v3, v8, v6 ; 3E060D08 > v_mul_f32_e32 v6, v9, v17 ; 100C2309 > v_mul_f32_e32 v5, v5, v41 ; 100A5305 > v_mac_f32_e32 v27, 2.0, v4 ; 3E3608F4 > v_mac_f32_e32 v28, 2.0, v14 ; 3E381CF4 > v_mac_f32_e32 v6, v8, v15 ; 3E0C1F08 > v_mac_f32_e32 v3, v10, v0 ; 3E06010A > v_mac_f32_e32 v27, 2.0, v5 ; 3E360AF4 > v_mul_f32_e32 v4, v9, v28 ; 10083909 > v_mac_f32_e32 v4, v8, v27 ; 3E083708 > v_add_f32_e32 v0, v21, v3 ; 06000715 > v_mac_f32_e32 v6, v10, v29 ; 3E0C3B0A > v_add_f32_e32 v3, v16, v6 ; 06060D10 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v6, s9, v0 ; 100C0009 > v_mac_f32_e32 v4, v10, v7 ; 3E080F0A > v_mac_f32_e32 v6, s8, v3 ; 3E0C0608 > v_add_f32_e32 v1, v1, v4 ; 06020901 > v_mac_f32_e32 v6, s10, v1 ; 3E0C020A > v_mul_f32_e32 v5, s13, v0 ; 100A000D > v_add_f32_e32 v4, s11, v6 ; 06080C0B > v_mul_f32_e32 v6, s17, v0 ; 100C0011 > v_mul_f32_e32 v0, s21, v0 ; 10000015 > v_mac_f32_e32 v6, s16, v3 ; 3E0C0610 > v_mac_f32_e32 v0, s20, v3 ; 3E000614 > v_mac_f32_e32 v5, s12, v3 ; 3E0A060C > v_mac_f32_e32 v6, s18, v1 ; 3E0C0212 > v_mac_f32_e32 v0, s22, v1 ; 3E000216 > v_mac_f32_e32 v5, s14, v1 ; 3E0A020E > v_add_f32_e32 v6, s19, v6 ; 060C0C13 > v_add_f32_e32 v0, s4, v0 ; 06000004 > v_add_f32_e32 v5, s15, v5 ; 060A0A0F > v_min_f32_e32 v1, s0, v6 ; 1E020C00 > exp 15, 32, 0, 0, 0, v11, v12, v6, v0 ; F800020F 00060C0B > exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 44 >Code Size: 1172 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 5 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v2, v2, 3, 0, [m0] ; C8080302 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v2, [v2], v3, 3, 0, [m0] ; C8090303 > image_sample v0, v[0:1], s[12:19], s[0:3] dmask:0x8 ; F0800800 00030000 > v_mov_b32_e32 v1, 0xbec0c0c1 ; 7E0202FF BEC0C0C1 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v0, v0, v1 ; 06000300 > v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 > v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 > v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 > v_rcp_f32_e32 v1, v2 ; 7E025502 > v_cmp_lt_f32_e64 s[0:1], 0, v4 ; D0020000 00020880 > v_cndmask_b32_e64 v0, v4, 1.0, s[0:1] ; D2000000 0001E504 > v_cmp_eq_f32_e32 vcc, 0, v2 ; 7C040480 > v_cmp_le_f32_e64 s[0:1], 0, v0 ; D0060000 00020080 > v_mul_f32_e32 v0, 0x70000000, v0 ; 100000FF 70000000 > v_bfrev_b32_e32 v2, 15 ; 7E04708F > v_cndmask_b32_e64 v0, v2, v0, s[0:1] ; D2000000 00020102 > v_mul_f32_e32 v1, v1, v4 ; 10020901 > v_cndmask_b32_e32 v0, v1, v0 ; 00000101 > v_mov_b32_e32 v1, v0 ; 7E020300 > v_mov_b32_e32 v2, v0 ; 7E040300 > v_mov_b32_e32 v3, v0 ; 7E060300 >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 180 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[8:11], s[10:11], 0xc ; C0840B0C > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[8:11], v4, s[4:7], 0 idxen ; E00C2000 80010804 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[11:14], v5, s[12:15], 0 idxen ; E00C2000 80030B05 > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[13:16], v7, s[8:11], 0 idxen ; E00C2000 80020D07 > s_load_dwordx4 s[4:7], s[2:3], 0x8 ; C0820308 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v15 ; 10021F00 > v_mul_f32_e32 v6, v0, v14 ; 100C1D00 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v6, v6 ; 7E0C1106 > v_mul_f32_e32 v0, v0, v13 ; 10001B00 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_lshlrev_b32_e32 v6, 5, v6 ; 340C0C85 > v_or_b32_e32 v7, 16, v1 ; 380E0290 > v_or_b32_e32 v14, 28, v1 ; 381C029C > v_or_b32_e32 v15, 24, v1 ; 381E0298 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_dword v16, v6, s[4:7], 0 offen ; E0301000 80011006 > v_or_b32_e32 v18, 20, v1 ; 38240294 > buffer_load_dword v18, v18, s[4:7], 0 offen ; E0301000 80011212 > buffer_load_dword v7, v7, s[4:7], 0 offen ; E0301000 80010707 > buffer_load_dword v14, v14, s[4:7], 0 offen ; E0301000 80010E0E > buffer_load_dword v15, v15, s[4:7], 0 offen ; E0301000 80010F0F > buffer_load_dword v17, v1, s[4:7], 0 offen ; E0301000 80011101 > v_or_b32_e32 v22, 4, v1 ; 382C0284 > v_or_b32_e32 v13, 16, v6 ; 381A0C90 > v_or_b32_e32 v19, 28, v6 ; 38260C9C > v_or_b32_e32 v20, 24, v6 ; 38280C98 > v_or_b32_e32 v21, 20, v6 ; 382A0C94 > v_or_b32_e32 v1, 8, v1 ; 38020288 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v23, 4, v6 ; 382E0C84 > v_or_b32_e32 v6, 8, v6 ; 380C0C88 > buffer_load_dword v1, v1, s[4:7], 0 offen ; E0301000 80010101 > buffer_load_dword v21, v21, s[4:7], 0 offen ; E0301000 80011515 > buffer_load_dword v13, v13, s[4:7], 0 offen ; E0301000 80010D0D > buffer_load_dword v19, v19, s[4:7], 0 offen ; E0301000 80011313 > buffer_load_dword v20, v20, s[4:7], 0 offen ; E0301000 80011414 > v_or_b32_e32 v29, 8, v0 ; 383A0088 > buffer_load_dword v6, v6, s[4:7], 0 offen ; E0301000 80010606 > buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D > v_or_b32_e32 v24, 16, v0 ; 38300090 > v_or_b32_e32 v25, 28, v0 ; 3832009C > v_or_b32_e32 v26, 24, v0 ; 38340098 > v_or_b32_e32 v27, 20, v0 ; 38360094 > v_or_b32_e32 v28, 4, v0 ; 38380084 > buffer_load_dword v23, v23, s[4:7], 0 offen ; E0301000 80011717 > buffer_load_dword v0, v0, s[4:7], 0 offen ; E0301000 80010000 > buffer_load_dword v27, v27, s[4:7], 0 offen ; E0301000 80011B1B > buffer_load_dword v22, v22, s[4:7], 0 offen ; E0301000 80011616 > buffer_load_dword v24, v24, s[4:7], 0 offen ; E0301000 80011818 > buffer_load_dword v25, v25, s[4:7], 0 offen ; E0301000 80011919 > buffer_load_dword v26, v26, s[4:7], 0 offen ; E0301000 80011A1A > buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C > s_load_dwordx4 s[4:7], s[2:3], 0x4 ; C0820304 > s_load_dwordx4 s[0:3], s[2:3], 0xc ; C080030C > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[4:7], 0x1 ; C2048501 > s_buffer_load_dword s8, s[4:7], 0x0 ; C2040500 > s_buffer_load_dword s10, s[4:7], 0x2 ; C2050502 > s_buffer_load_dword s11, s[4:7], 0x3 ; C2058503 > s_buffer_load_dword s13, s[4:7], 0x5 ; C2068505 > s_buffer_load_dword s17, s[4:7], 0x9 ; C2088509 > s_buffer_load_dword s21, s[4:7], 0xd ; C20A850D > s_buffer_load_dword s16, s[4:7], 0x8 ; C2080508 > s_buffer_load_dword s20, s[4:7], 0xc ; C20A050C > s_buffer_load_dword s12, s[4:7], 0x4 ; C2060504 > s_buffer_load_dword s18, s[4:7], 0xa ; C209050A > s_buffer_load_dword s22, s[4:7], 0xe ; C20B050E > s_buffer_load_dword s14, s[4:7], 0x6 ; C2070506 > s_buffer_load_dword s15, s[4:7], 0x7 ; C2078507 > s_buffer_load_dword s19, s[4:7], 0xb ; C209850B > s_buffer_load_dword s4, s[4:7], 0xf ; C202050F > s_buffer_load_dword s0, s[0:3], 0x66 ; C2000166 > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v16, v16, v4 ; 10200910 > v_mul_f32_e32 v34, v18, v18 ; 10442512 > v_mul_f32_e32 v30, v14, v15 ; 103C1F0E > v_mac_f32_e32 v16, v17, v3 ; 3E200711 > v_mul_f32_e32 v17, v14, v7 ; 10220F0E > v_mul_f32_e32 v14, v14, v18 ; 101C250E > v_fma_f32 v31, v7, v18, v30 ; D296001F 047A2507 > v_fma_f32 v32, v7, v15, -v14 ; D2960020 843A1F07 > v_fma_f32 v30, v7, v18, -v30 ; D296001E 847A2507 > v_mac_f32_e32 v14, v7, v15 ; 3E1C1F07 > v_mul_f32_e32 v7, v7, v7 ; 100E0F07 > v_fma_f32 v33, v18, v15, -v17 ; D2960021 84461F12 > v_fma_f32 v17, v18, v15, v17 ; D2960011 04461F12 > s_waitcnt vmcnt(14) ; BF8C0F7E > v_mul_f32_e32 v1, v1, v3 ; 10020701 > v_mac_f32_e32 v34, v15, v15 ; 3E441F0F > v_mad_f32 v15, v15, v15, v7 ; D282000F 041E1F0F > v_mac_f32_e32 v7, v18, v18 ; 3E0E2512 > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mul_f32_e32 v18, v19, v13 ; 10241B13 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mul_f32_e32 v35, v19, v20 ; 10462913 > v_mul_f32_e32 v19, v19, v21 ; 10262B13 > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mac_f32_e32 v1, v6, v4 ; 3E020906 > v_fma_f32 v36, v13, v21, v35 ; D2960024 048E2B0D > v_fma_f32 v37, v13, v20, -v19 ; D2960025 844E290D > v_mac_f32_e32 v19, v13, v20 ; 3E26290D > v_mul_f32_e32 v39, v21, v21 ; 104E2B15 > v_mul_f32_e32 v6, v4, v36 ; 100C4904 > v_fma_f32 v35, v13, v21, -v35 ; D2960023 848E2B0D > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mac_f32_e32 v1, v29, v5 ; 3E020B1D > v_mul_f32_e32 v29, v4, v19 ; 103A2704 > v_mul_f32_e32 v13, v13, v13 ; 101A1B0D > v_fma_f32 v38, v21, v20, -v18 ; D2960026 844A2915 > v_mac_f32_e32 v29, v4, v19 ; 3E3A2704 > v_fma_f32 v18, v21, v20, v18 ; D2960012 044A2915 > v_mac_f32_e32 v39, v20, v20 ; 3E4E2914 > v_mad_f32 v20, v20, v20, v13 ; D2820014 04362914 > v_mac_f32_e32 v6, v4, v36 ; 3E0C4904 > v_mul_f32_e32 v19, v3, v31 ; 10263F03 > v_mac_f32_e32 v6, 2.0, v19 ; 3E0C26F4 > v_fma_f32 v19, -v20, 2.0, 1.0 ; D2960013 23C9E914 > v_mac_f32_e32 v13, v21, v21 ; 3E1A2B15 > s_waitcnt vmcnt(7) ; BF8C0F77 > v_mul_f32_e32 v21, v23, v4 ; 102A0917 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_mac_f32_e32 v16, v0, v5 ; 3E200B00 > v_mul_f32_e32 v0, v4, v38 ; 10004D04 > v_fma_f32 v15, -v15, 2.0, 1.0 ; D296000F 23C9E90F > v_mul_f32_e32 v19, v19, v4 ; 10260913 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mac_f32_e32 v21, v22, v3 ; 3E2A0716 > v_fma_f32 v7, -v7, 2.0, 1.0 ; D2960007 23C9E907 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v22, v25, v24 ; 102C3119 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v23, v25, v26 ; 102E3519 > v_mul_f32_e32 v25, v25, v27 ; 10323719 > v_mac_f32_e32 v19, v15, v3 ; 3E26070F > v_mac_f32_e32 v0, v4, v38 ; 3E004D04 > v_mul_f32_e32 v15, v3, v33 ; 101E4303 > v_mul_f32_e32 v43, v27, v27 ; 1056371B > v_fma_f32 v40, v24, v27, v23 ; D2960028 045E3718 > v_fma_f32 v41, v24, v26, -v25 ; D2960029 84663518 > v_mac_f32_e32 v0, 2.0, v15 ; 3E001EF4 > v_fma_f32 v15, -v39, 2.0, 1.0 ; D296000F 23C9E927 > v_fma_f32 v23, v24, v27, -v23 ; D2960017 845E3718 > v_mac_f32_e32 v25, v24, v26 ; 3E323518 > v_mul_f32_e32 v24, v24, v24 ; 10303118 > v_fma_f32 v13, -v13, 2.0, 1.0 ; D296000D 23C9E90D > v_mul_f32_e32 v7, v7, v3 ; 100E0707 > v_fma_f32 v42, v27, v26, -v22 ; D296002A 845A351B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v21, v28, v5 ; 3E2A0B1C > v_mul_f32_e32 v28, v3, v17 ; 10382303 > v_mac_f32_e32 v7, v13, v4 ; 3E0E090D > v_fma_f32 v22, v27, v26, v22 ; D2960016 045A351B > v_mac_f32_e32 v43, v26, v26 ; 3E56351A > v_mad_f32 v26, v26, v26, v24 ; D282001A 0462351A > v_mac_f32_e32 v24, v27, v27 ; 3E30371B > v_mul_f32_e32 v27, v3, v32 ; 10364103 > v_fma_f32 v13, -v34, 2.0, 1.0 ; D296000D 23C9E922 > v_mul_f32_e32 v15, v15, v4 ; 101E090F > v_mac_f32_e32 v28, v3, v17 ; 3E382303 > v_mac_f32_e32 v15, v13, v3 ; 3E1E070D > v_mac_f32_e32 v27, v3, v32 ; 3E364103 > v_mul_f32_e32 v13, v3, v30 ; 101A3D03 > v_mul_f32_e32 v3, v3, v14 ; 10061D03 > v_mac_f32_e32 v29, 2.0, v3 ; 3E3A06F4 > v_mul_f32_e32 v3, v5, v40 ; 10065105 > v_mac_f32_e32 v6, 2.0, v3 ; 3E0C06F4 > v_fma_f32 v3, -v26, 2.0, 1.0 ; D2960003 23C9E91A > v_mac_f32_e32 v19, v3, v5 ; 3E260B03 > v_mul_f32_e32 v3, v5, v42 ; 10065505 > v_mul_f32_e32 v17, v4, v35 ; 10224704 > v_mac_f32_e32 v0, 2.0, v3 ; 3E0006F4 > v_fma_f32 v3, -v24, 2.0, 1.0 ; D2960003 23C9E918 > v_mac_f32_e32 v17, v4, v35 ; 3E224704 > v_mac_f32_e32 v7, v3, v5 ; 3E0E0B03 > v_fma_f32 v3, -v43, 2.0, 1.0 ; D2960003 23C9E92B > v_mac_f32_e32 v17, 2.0, v13 ; 3E221AF4 > v_mac_f32_e32 v15, v3, v5 ; 3E1E0B03 > v_mul_f32_e32 v3, v5, v23 ; 10062F05 > v_mul_f32_e32 v13, v4, v18 ; 101A2504 > v_mac_f32_e32 v17, 2.0, v3 ; 3E2206F4 > v_mul_f32_e32 v3, v5, v25 ; 10063305 > v_mac_f32_e32 v29, 2.0, v3 ; 3E3A06F4 > v_mul_f32_e32 v3, v9, v19 ; 10062709 > v_mul_f32_e32 v14, v5, v22 ; 101C2D05 > v_mac_f32_e32 v28, 2.0, v13 ; 3E381AF4 > v_mul_f32_e32 v4, v4, v37 ; 10084B04 > v_mac_f32_e32 v3, v8, v6 ; 3E060D08 > v_mul_f32_e32 v6, v9, v17 ; 100C2309 > v_mul_f32_e32 v5, v5, v41 ; 100A5305 > v_mac_f32_e32 v27, 2.0, v4 ; 3E3608F4 > v_mac_f32_e32 v28, 2.0, v14 ; 3E381CF4 > v_mac_f32_e32 v6, v8, v15 ; 3E0C1F08 > v_mac_f32_e32 v3, v10, v0 ; 3E06010A > v_mac_f32_e32 v27, 2.0, v5 ; 3E360AF4 > v_mul_f32_e32 v4, v9, v28 ; 10083909 > v_mac_f32_e32 v4, v8, v27 ; 3E083708 > v_add_f32_e32 v0, v21, v3 ; 06000715 > v_mac_f32_e32 v6, v10, v29 ; 3E0C3B0A > v_add_f32_e32 v3, v16, v6 ; 06060D10 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v6, s9, v0 ; 100C0009 > v_mac_f32_e32 v4, v10, v7 ; 3E080F0A > v_mac_f32_e32 v6, s8, v3 ; 3E0C0608 > v_add_f32_e32 v1, v1, v4 ; 06020901 > v_mac_f32_e32 v6, s10, v1 ; 3E0C020A > v_mul_f32_e32 v5, s13, v0 ; 100A000D > v_add_f32_e32 v4, s11, v6 ; 06080C0B > v_mul_f32_e32 v6, s17, v0 ; 100C0011 > v_mul_f32_e32 v0, s21, v0 ; 10000015 > v_mac_f32_e32 v6, s16, v3 ; 3E0C0610 > v_mac_f32_e32 v0, s20, v3 ; 3E000614 > v_mac_f32_e32 v5, s12, v3 ; 3E0A060C > v_mac_f32_e32 v6, s18, v1 ; 3E0C0212 > v_mac_f32_e32 v0, s22, v1 ; 3E000216 > v_mac_f32_e32 v5, s14, v1 ; 3E0A020E > v_add_f32_e32 v6, s19, v6 ; 060C0C13 > v_add_f32_e32 v0, s4, v0 ; 06000004 > v_add_f32_e32 v5, s15, v5 ; 060A0A0F > v_min_f32_e32 v1, s0, v6 ; 1E020C00 > exp 15, 32, 0, 0, 0, v11, v12, v6, v0 ; F800020F 00060C0B > exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 44 >Code Size: 1172 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 5 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v2, v2, 3, 0, [m0] ; C8080302 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v2, [v2], v3, 3, 0, [m0] ; C8090303 > image_sample v0, v[0:1], s[12:19], s[0:3] dmask:0x8 ; F0800800 00030000 > v_mov_b32_e32 v1, 0xbec0c0c1 ; 7E0202FF BEC0C0C1 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v0, v0, v1 ; 06000300 > v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 > v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 > v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 > v_rcp_f32_e32 v1, v2 ; 7E025502 > v_cmp_lt_f32_e64 s[0:1], 0, v4 ; D0020000 00020880 > v_cndmask_b32_e64 v0, v4, 1.0, s[0:1] ; D2000000 0001E504 > v_cmp_eq_f32_e32 vcc, 0, v2 ; 7C040480 > v_cmp_le_f32_e64 s[0:1], 0, v0 ; D0060000 00020080 > v_mul_f32_e32 v0, 0x70000000, v0 ; 100000FF 70000000 > v_bfrev_b32_e32 v2, 15 ; 7E04708F > v_cndmask_b32_e64 v0, v2, v0, s[0:1] ; D2000000 00020102 > v_mul_f32_e32 v1, v1, v4 ; 10020901 > v_cndmask_b32_e32 v0, v1, v0 ; 00000101 > v_mov_b32_e32 v1, v0 ; 7E020300 > v_mov_b32_e32 v2, v0 ; 7E040300 > v_mov_b32_e32 v3, v0 ; 7E060300 >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 180 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 >Shader main disassembly: > s_load_dwordx4 s[24:27], s[2:3], 0x4 ; C08C0304 > s_load_dwordx4 s[12:15], s[2:3], 0xc ; C086030C > s_load_dwordx4 s[16:19], s[10:11], 0x0 ; C0880B00 > s_load_dwordx4 s[20:23], s[2:3], 0x8 ; C08A0308 > s_load_dwordx4 s[28:31], s[10:11], 0x4 ; C08E0B04 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s1, s[24:27], 0x0 ; C2009900 > s_buffer_load_dword s0, s[12:15], 0x66 ; C2000D66 > buffer_load_format_xyzw v[7:10], v4, s[16:19], 0 idxen ; E00C2000 80040704 > s_buffer_load_dword s5, s[24:27], 0x1 ; C2029901 > s_buffer_load_dword s2, s[24:27], 0x2 ; C2011902 > s_buffer_load_dword s4, s[24:27], 0x3 ; C2021903 > s_buffer_load_dword s3, s[24:27], 0x4 ; C2019904 > s_buffer_load_dword s9, s[24:27], 0x5 ; C2049905 > s_buffer_load_dword s6, s[24:27], 0x6 ; C2031906 > s_buffer_load_dword s8, s[24:27], 0x7 ; C2041907 > s_buffer_load_dword s7, s[24:27], 0x8 ; C2039908 > s_buffer_load_dword s15, s[24:27], 0x9 ; C2079909 > s_buffer_load_dword s12, s[24:27], 0xa ; C206190A > s_buffer_load_dword s14, s[24:27], 0xb ; C207190B > s_buffer_load_dword s13, s[24:27], 0xc ; C206990C > s_buffer_load_dword s18, s[24:27], 0xd ; C209190D > s_buffer_load_dword s16, s[24:27], 0xe ; C208190E > s_buffer_load_dword s17, s[24:27], 0xf ; C208990F > s_load_dwordx4 s[24:27], s[10:11], 0x8 ; C08C0B08 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[10:13], v5, s[28:31], 0 idxen ; E00C2000 80070A05 > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v6, s[24:27], 0 idxen ; E00C2000 80060306 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v5 ; 10020B00 > v_mul_f32_e32 v4, v0, v4 ; 10080900 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_cvt_i32_f32_e32 v4, v4 ; 7E081104 > v_mul_f32_e32 v0, v0, v3 ; 10000700 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_lshlrev_b32_e32 v4, 5, v4 ; 34080885 > v_or_b32_e32 v5, 16, v1 ; 380A0290 > v_or_b32_e32 v6, 28, v1 ; 380C029C > v_or_b32_e32 v13, 20, v1 ; 381A0294 > v_or_b32_e32 v14, 24, v1 ; 381C0298 > buffer_load_dword v16, v4, s[20:23], 0 offen ; E0301000 80051004 > buffer_load_dword v13, v13, s[20:23], 0 offen ; E0301000 80050D0D > buffer_load_dword v15, v1, s[20:23], 0 offen ; E0301000 80050F01 > buffer_load_dword v5, v5, s[20:23], 0 offen ; E0301000 80050505 > buffer_load_dword v6, v6, s[20:23], 0 offen ; E0301000 80050606 > buffer_load_dword v14, v14, s[20:23], 0 offen ; E0301000 80050E0E > v_or_b32_e32 v20, 4, v1 ; 38280284 > v_or_b32_e32 v3, 16, v4 ; 38060890 > v_or_b32_e32 v17, 28, v4 ; 3822089C > v_or_b32_e32 v18, 20, v4 ; 38240894 > v_or_b32_e32 v19, 24, v4 ; 38260898 > v_or_b32_e32 v1, 8, v1 ; 38020288 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v21, 4, v4 ; 382A0884 > v_or_b32_e32 v4, 8, v4 ; 38080888 > buffer_load_dword v1, v1, s[20:23], 0 offen ; E0301000 80050101 > buffer_load_dword v18, v18, s[20:23], 0 offen ; E0301000 80051212 > buffer_load_dword v3, v3, s[20:23], 0 offen ; E0301000 80050303 > buffer_load_dword v17, v17, s[20:23], 0 offen ; E0301000 80051111 > buffer_load_dword v19, v19, s[20:23], 0 offen ; E0301000 80051313 > v_or_b32_e32 v27, 8, v0 ; 38360088 > buffer_load_dword v4, v4, s[20:23], 0 offen ; E0301000 80050404 > buffer_load_dword v27, v27, s[20:23], 0 offen ; E0301000 80051B1B > v_or_b32_e32 v22, 16, v0 ; 382C0090 > v_or_b32_e32 v23, 28, v0 ; 382E009C > v_or_b32_e32 v24, 24, v0 ; 38300098 > v_or_b32_e32 v25, 20, v0 ; 38320094 > buffer_load_dword v21, v21, s[20:23], 0 offen ; E0301000 80051515 > buffer_load_dword v25, v25, s[20:23], 0 offen ; E0301000 80051919 > buffer_load_dword v20, v20, s[20:23], 0 offen ; E0301000 80051414 > buffer_load_dword v22, v22, s[20:23], 0 offen ; E0301000 80051616 > buffer_load_dword v23, v23, s[20:23], 0 offen ; E0301000 80051717 > buffer_load_dword v24, v24, s[20:23], 0 offen ; E0301000 80051818 > v_or_b32_e32 v26, 4, v0 ; 38340084 > buffer_load_dword v0, v0, s[20:23], 0 offen ; E0301000 80050000 > buffer_load_dword v26, v26, s[20:23], 0 offen ; E0301000 80051A1A > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v16, v16, v11 ; 10201710 > v_mul_f32_e32 v32, v13, v13 ; 10401B0D > v_mac_f32_e32 v16, v15, v10 ; 3E20150F > v_mul_f32_e32 v15, v6, v5 ; 101E0B06 > v_mul_f32_e32 v28, v6, v14 ; 10381D06 > v_mul_f32_e32 v6, v6, v13 ; 100C1B06 > v_fma_f32 v29, v5, v13, v28 ; D296001D 04721B05 > v_fma_f32 v28, v5, v13, -v28 ; D296001C 84721B05 > v_fma_f32 v30, v5, v14, -v6 ; D296001E 841A1D05 > v_mac_f32_e32 v6, v5, v14 ; 3E0C1D05 > v_mul_f32_e32 v5, v5, v5 ; 100A0B05 > v_fma_f32 v31, v13, v14, -v15 ; D296001F 843E1D0D > v_fma_f32 v15, v13, v14, v15 ; D296000F 043E1D0D > v_mac_f32_e32 v32, v14, v14 ; 3E401D0E > v_mad_f32 v14, v14, v14, v5 ; D282000E 04161D0E > v_mac_f32_e32 v5, v13, v13 ; 3E0A1B0D > s_waitcnt vmcnt(14) ; BF8C0F7E > v_mul_f32_e32 v1, v1, v10 ; 10021501 > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mul_f32_e32 v13, v17, v3 ; 101A0711 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mul_f32_e32 v33, v17, v19 ; 10422711 > v_mul_f32_e32 v17, v17, v18 ; 10222511 > v_mul_f32_e32 v37, v18, v18 ; 104A2512 > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mac_f32_e32 v1, v4, v11 ; 3E021704 > v_fma_f32 v34, v3, v18, v33 ; D2960022 04862503 > v_fma_f32 v35, v3, v19, -v17 ; D2960023 84462703 > v_fma_f32 v33, v3, v18, -v33 ; D2960021 84862503 > v_mac_f32_e32 v17, v3, v19 ; 3E222703 > v_mul_f32_e32 v3, v3, v3 ; 10060703 > v_fma_f32 v36, v18, v19, -v13 ; D2960024 84362712 > v_fma_f32 v13, v18, v19, v13 ; D296000D 04362712 > v_mac_f32_e32 v37, v19, v19 ; 3E4A2713 > v_mad_f32 v19, v19, v19, v3 ; D2820013 040E2713 > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mac_f32_e32 v1, v27, v12 ; 3E02191B > v_mul_f32_e32 v27, v11, v17 ; 1036230B > v_mac_f32_e32 v3, v18, v18 ; 3E062512 > s_waitcnt vmcnt(7) ; BF8C0F77 > v_mul_f32_e32 v18, v21, v11 ; 10241715 > v_mac_f32_e32 v27, v11, v17 ; 3E36230B > v_fma_f32 v17, -v19, 2.0, 1.0 ; D2960011 23C9E913 > s_waitcnt vmcnt(5) ; BF8C0F75 > v_mac_f32_e32 v18, v20, v10 ; 3E241514 > v_fma_f32 v5, -v5, 2.0, 1.0 ; D2960005 23C9E905 > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mul_f32_e32 v20, v23, v22 ; 10282D17 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v21, v23, v24 ; 102A3117 > v_mul_f32_e32 v23, v23, v25 ; 102E3317 > v_fma_f32 v14, -v14, 2.0, 1.0 ; D296000E 23C9E90E > v_mul_f32_e32 v17, v17, v11 ; 10221711 > v_mul_f32_e32 v41, v25, v25 ; 10523319 > v_fma_f32 v38, v22, v25, v21 ; D2960026 04563316 > v_fma_f32 v39, v22, v24, -v23 ; D2960027 845E3116 > v_mac_f32_e32 v17, v14, v10 ; 3E22150E > v_fma_f32 v14, -v37, 2.0, 1.0 ; D296000E 23C9E925 > v_fma_f32 v21, v22, v25, -v21 ; D2960015 84563316 > v_mac_f32_e32 v23, v22, v24 ; 3E2E3116 > v_mul_f32_e32 v22, v22, v22 ; 102C2D16 > v_fma_f32 v3, -v3, 2.0, 1.0 ; D2960003 23C9E903 > v_mul_f32_e32 v5, v5, v10 ; 100A1505 > v_fma_f32 v40, v25, v24, -v20 ; D2960028 84523119 > v_fma_f32 v20, v25, v24, v20 ; D2960014 04523119 > v_mac_f32_e32 v41, v24, v24 ; 3E523118 > v_mad_f32 v24, v24, v24, v22 ; D2820018 045A3118 > v_mac_f32_e32 v5, v3, v11 ; 3E0A1703 > v_fma_f32 v3, -v32, 2.0, 1.0 ; D2960003 23C9E920 > v_mul_f32_e32 v14, v14, v11 ; 101C170E > v_mac_f32_e32 v22, v25, v25 ; 3E2C3319 > v_mac_f32_e32 v14, v3, v10 ; 3E1C1503 > v_fma_f32 v3, -v24, 2.0, 1.0 ; D2960003 23C9E918 > v_mul_f32_e32 v4, v11, v34 ; 1008450B > v_mac_f32_e32 v17, v3, v12 ; 3E221903 > v_fma_f32 v3, -v22, 2.0, 1.0 ; D2960003 23C9E916 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v16, v0, v12 ; 3E201900 > v_mul_f32_e32 v0, v11, v36 ; 1000490B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v18, v26, v12 ; 3E24191A > v_mul_f32_e32 v26, v10, v15 ; 10341F0A > v_mac_f32_e32 v5, v3, v12 ; 3E0A1903 > v_fma_f32 v3, -v41, 2.0, 1.0 ; D2960003 23C9E929 > v_mul_f32_e32 v19, v10, v29 ; 10263B0A > v_mac_f32_e32 v4, v11, v34 ; 3E08450B > v_mac_f32_e32 v26, v10, v15 ; 3E341F0A > v_mul_f32_e32 v15, v11, v33 ; 101E430B > v_mac_f32_e32 v14, v3, v12 ; 3E1C1903 > v_mac_f32_e32 v0, v11, v36 ; 3E00490B > v_mul_f32_e32 v3, v10, v31 ; 10063F0A > v_mul_f32_e32 v24, v12, v38 ; 10304D0C > v_mac_f32_e32 v4, 2.0, v19 ; 3E0826F4 > v_mul_f32_e32 v25, v10, v30 ; 10323D0A > v_mac_f32_e32 v0, 2.0, v3 ; 3E0006F4 > v_mul_f32_e32 v22, v12, v40 ; 102C510C > v_mac_f32_e32 v15, v11, v33 ; 3E1E430B > v_mul_f32_e32 v28, v10, v28 ; 1038390A > v_mac_f32_e32 v4, 2.0, v24 ; 3E0830F4 > v_mul_f32_e32 v3, v8, v17 ; 10062308 > v_mul_f32_e32 v13, v11, v13 ; 101A1B0B > v_mul_f32_e32 v6, v10, v6 ; 100C0D0A > v_mac_f32_e32 v25, v10, v30 ; 3E323D0A > v_mul_f32_e32 v10, v12, v21 ; 10142B0C > v_mac_f32_e32 v15, 2.0, v28 ; 3E1E38F4 > v_mac_f32_e32 v0, 2.0, v22 ; 3E002CF4 > v_mac_f32_e32 v3, v7, v4 ; 3E060907 > v_mac_f32_e32 v3, v9, v0 ; 3E060109 > v_mul_f32_e32 v11, v11, v35 ; 1016470B > v_mul_f32_e32 v20, v12, v20 ; 1028290C > v_mac_f32_e32 v26, 2.0, v13 ; 3E341AF4 > v_mac_f32_e32 v15, 2.0, v10 ; 3E1E14F4 > v_mul_f32_e32 v29, v12, v39 ; 103A4F0C > v_mac_f32_e32 v25, 2.0, v11 ; 3E3216F4 > v_mac_f32_e32 v26, 2.0, v20 ; 3E3428F4 > v_add_f32_e32 v0, v18, v3 ; 06000712 > v_mul_f32_e32 v3, v8, v15 ; 10061F08 > v_mac_f32_e32 v27, 2.0, v6 ; 3E360CF4 > v_mul_f32_e32 v12, v12, v23 ; 10182F0C > v_mac_f32_e32 v25, 2.0, v29 ; 3E323AF4 > v_mul_f32_e32 v4, v8, v26 ; 10083508 > v_mac_f32_e32 v27, 2.0, v12 ; 3E3618F4 > v_mac_f32_e32 v3, v7, v14 ; 3E061D07 > v_mac_f32_e32 v4, v7, v25 ; 3E083307 > v_mac_f32_e32 v3, v9, v27 ; 3E063709 > v_mac_f32_e32 v4, v9, v5 ; 3E080B09 > v_add_f32_e32 v3, v16, v3 ; 06060710 > v_mul_f32_e32 v6, s15, v0 ; 100C000F > v_mul_f32_e32 v5, s9, v0 ; 100A0009 > v_add_f32_e32 v1, v1, v4 ; 06020901 > v_mul_f32_e32 v4, s5, v0 ; 10080005 > v_mac_f32_e32 v6, s7, v3 ; 3E0C0607 > v_mul_f32_e32 v0, s18, v0 ; 10000012 > v_mac_f32_e32 v4, s1, v3 ; 3E080601 > v_mac_f32_e32 v5, s3, v3 ; 3E0A0603 > v_mac_f32_e32 v6, s12, v1 ; 3E0C020C > v_mac_f32_e32 v0, s13, v3 ; 3E00060D > v_mac_f32_e32 v4, s2, v1 ; 3E080202 > v_mac_f32_e32 v5, s6, v1 ; 3E0A0206 > v_mac_f32_e32 v0, s16, v1 ; 3E000210 > v_add_f32_e32 v6, s14, v6 ; 060C0C0E > v_add_f32_e32 v4, s4, v4 ; 06080804 > v_add_f32_e32 v5, s8, v5 ; 060A0A08 > v_add_f32_e32 v0, s17, v0 ; 06000011 > v_min_f32_e32 v1, s0, v6 ; 1E020C00 > exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 44 >Code Size: 1136 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 5 >******************** > >Pixel Shader: >Shader main disassembly: >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 14 >VGPRS: 15 >Code Size: 12 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x60 ; C2020160 > s_buffer_load_dword s5, s[0:3], 0x54 ; C2028154 > s_buffer_load_dword s6, s[0:3], 0x55 ; C2030155 > s_buffer_load_dword s7, s[0:3], 0x61 ; C2038161 > s_buffer_load_dword s8, s[0:3], 0x56 ; C2040156 > s_buffer_load_dword s0, s[0:3], 0x57 ; C2000157 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mov_b32_e32 v5, s5 ; 7E0A0205 > v_mov_b32_e32 v6, s6 ; 7E0C0206 > v_fma_f32 v7, v3, s4, s4 ; D2960007 00100903 > v_fma_f32 v8, v4, -s7, s7 ; D2960008 401C0F04 > v_fma_f32 v5, s8, v7, v5 ; D2960005 04160E08 > v_fma_f32 v6, s0, v8, v6 ; D2960006 041A1000 > exp 15, 32, 0, 0, 0, v7, v8, v1, v0 ; F800020F 00010807 > exp 15, 33, 0, 0, 0, v5, v6, v0, v0 ; F800021F 00000605 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 12 >Code Size: 132 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_interp_p1_f32 v5, v2, 0, 1, [m0] ; C8140402 > s_load_dwordx8 s[16:23], s[4:5], 0x0 ; C0C80500 > s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C > v_rcp_f32_e32 v9, v4 ; 7E125504 > s_load_dwordx8 s[28:35], s[4:5], 0x10 ; C0CE0510 > s_load_dwordx4 s[36:39], s[4:5], 0x1c ; C092051C > v_interp_p2_f32 v5, [v5], v3, 0, 1, [m0] ; C8150403 > v_interp_p1_f32 v2, v2, 1, 1, [m0] ; C8080502 > v_cmp_lt_f32_e64 s[0:1], 0, v1 ; D0020000 00020280 > v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 > v_cndmask_b32_e64 v6, v1, 1.0, s[0:1] ; D2000006 0001E501 > v_interp_p2_f32 v2, [v2], v3, 1, 1, [m0] ; C8090503 > v_cndmask_b32_e64 v3, v0, 1.0, vcc ; D2000003 01A9E500 > v_bfrev_b32_e32 v7, 14 ; 7E0E708E > v_cmp_le_f32_e32 vcc, 0, v3 ; 7C060680 > v_cmp_le_f32_e64 s[0:1], 0, v6 ; D0060000 00020C80 > v_mul_f32_e32 v3, v7, v3 ; 10060707 > v_bfrev_b32_e32 v8, 15 ; 7E10708F > v_mul_f32_e32 v6, v7, v6 ; 100C0D07 > s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 > v_cndmask_b32_e32 v3, v8, v3 ; 00060708 > v_cndmask_b32_e64 v6, v8, v6, s[0:1] ; D2000006 00020D08 > v_mul_f32_e32 v0, v9, v0 ; 10000109 > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_mul_f32_e32 v1, v9, v1 ; 10020309 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s24, s24, s23 ; 87181718 > v_cndmask_b32_e32 v3, v0, v3 ; 00060700 > v_cndmask_b32_e32 v4, v1, v6 ; 00080D01 > s_and_b32 s36, s36, s35 ; 87242324 > v_cmp_lt_f32_e64 s[0:1], 0, v5 ; D0020000 00020A80 > image_sample v0, v[3:4], s[16:23], s[24:27] dmask:0x1 ; F0800100 00C40003 > image_sample v1, v[3:4], s[28:35], s[36:39] dmask:0x8 ; F0800800 01270103 > v_cndmask_b32_e64 v3, v5, 1.0, s[0:1] ; D2000003 0001E505 > v_cmp_lt_f32_e64 s[0:1], 0, v2 ; D0020000 00020480 > v_cndmask_b32_e64 v4, v2, 1.0, s[0:1] ; D2000004 0001E502 > v_cmp_le_f32_e64 s[0:1], 0, v3 ; D0060000 00020680 > v_mul_f32_e32 v3, v7, v3 ; 10060707 > v_cndmask_b32_e64 v3, v8, v3, s[0:1] ; D2000003 00020708 > v_cmp_le_f32_e64 s[0:1], 0, v4 ; D0060000 00020880 > v_mul_f32_e32 v4, v7, v4 ; 10080907 > v_cndmask_b32_e64 v4, v8, v4, s[0:1] ; D2000004 00020908 > s_buffer_load_dword s0, s[12:15], 0xf8 ; C2000DF8 > s_buffer_load_dword s2, s[12:15], 0xd8 ; C2010DD8 > s_buffer_load_dword s1, s[12:15], 0xf9 ; C2008DF9 > s_buffer_load_dword s6, s[12:15], 0xfa ; C2030DFA > v_mul_f32_e32 v2, v9, v2 ; 10040509 > v_mul_f32_e32 v5, v9, v5 ; 100A0B09 > s_buffer_load_dword s3, s[12:15], 0xd9 ; C2018DD9 > v_cndmask_b32_e32 v3, v5, v3 ; 00060705 > v_cndmask_b32_e32 v2, v2, v4 ; 00040902 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_le_f32_e32 vcc, 0.5, v1 ; 7C0602F0 > s_buffer_load_dword s7, s[12:15], 0xda ; C2038DDA > v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480 > v_mul_f32_e32 v3, v0, v3 ; 10060700 > v_mul_f32_e32 v2, v0, v2 ; 10040500 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_fma_f32 v4, v1, s0, v3 ; D2960004 040C0101 > v_fma_f32 v6, v1, s6, v0 ; D2960006 04000D01 > v_fma_f32 v5, v1, s1, v2 ; D2960005 04080301 > v_mul_f32_e32 v0, s2, v4 ; 10000802 > s_buffer_load_dword s6, s[12:15], 0xd4 ; C2030DD4 > s_buffer_load_dword s1, s[12:15], 0xd5 ; C2008DD5 > v_mul_f32_e32 v1, s3, v5 ; 10020A03 > v_mul_f32_e32 v2, v0, v0 ; 10040100 > s_buffer_load_dword s0, s[12:15], 0xd6 ; C2000DD6 > v_mul_f32_e32 v3, s7, v6 ; 10060C07 > v_mac_f32_e32 v2, v1, v1 ; 3E040301 > v_mac_f32_e32 v2, v3, v3 ; 3E040703 > v_cmp_le_f32_e32 vcc, 0, v2 ; 7C060480 > s_buffer_load_dword s8, s[12:15], 0xb1 ; C2040DB1 > v_cndmask_b32_e64 v7, 0, 1.0, vcc ; D2000007 01A9E480 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_cmp_le_f32_e32 vcc, s6, v2 ; 7C060406 > v_cndmask_b32_e64 v8, 0, 1.0, vcc ; D2000008 01A9E480 > v_cmp_le_f32_e32 vcc, s1, v2 ; 7C060401 > v_cndmask_b32_e64 v3, 0, 1.0, vcc ; D2000003 01A9E480 > v_add_f32_e32 v0, v8, v7 ; 06000F08 > v_cmp_le_f32_e32 vcc, s0, v2 ; 7C060400 > v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480 > v_add_f32_e32 v0, v3, v0 ; 06000103 > v_add_f32_e32 v0, v1, v0 ; 06000101 > v_cvt_i32_f32_e32 v10, v0 ; 7E141100 > v_cvt_i32_f32_e32 v9, s8 ; 7E121008 > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_cmp_ge_i32_e32 vcc, v9, v10 ; 7D0C1509 > s_and_saveexec_b64 s[2:3], vcc ; BE82246A > s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E > s_cbranch_execz BB0_2 ; BF880000 > s_buffer_load_dword s37, s[12:15], 0xc5 ; C2128DC5 > s_buffer_load_dword s45, s[12:15], 0xc4 ; C2168DC4 > s_buffer_load_dword s42, s[12:15], 0xc1 ; C2150DC1 > s_buffer_load_dword s38, s[12:15], 0xc6 ; C2130DC6 > s_buffer_load_dword s11, s[12:15], 0xb9 ; C2058DB9 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v11, s37, v5 ; 10160A25 > s_buffer_load_dword s37, s[12:15], 0xc0 ; C2128DC0 > s_buffer_load_dword s9, s[12:15], 0xb8 ; C2048DB8 > s_buffer_load_dword s39, s[12:15], 0xc7 ; C2138DC7 > s_buffer_load_dword s16, s[12:15], 0xba ; C2080DBA > v_mac_f32_e32 v11, s45, v4 ; 3E16082D > v_mac_f32_e32 v11, s38, v6 ; 3E160C26 > v_mul_f32_e32 v12, s42, v5 ; 10180A2A > s_buffer_load_dword s38, s[12:15], 0xbd ; C2130DBD > v_mul_f32_e32 v10, s11, v5 ; 10140A0B > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mac_f32_e32 v12, s37, v4 ; 3E180825 > s_buffer_load_dword s37, s[12:15], 0xbc ; C2128DBC > v_mac_f32_e32 v10, s9, v4 ; 3E140809 > v_add_f32_e32 v11, s39, v11 ; 06161627 > s_buffer_load_dword s43, s[12:15], 0xc2 ; C2158DC2 > s_buffer_load_dword s39, s[12:15], 0xbe ; C2138DBE > v_mac_f32_e32 v10, s16, v6 ; 3E140C10 > s_buffer_load_dword s46, s[12:15], 0xbb ; C2170DBB > s_buffer_load_dword s7, s[12:15], 0xb6 ; C2038DB6 > s_buffer_load_dword s8, s[12:15], 0xb7 ; C2040DB7 > s_buffer_load_dword s17, s[12:15], 0xf5 ; C2088DF5 > s_buffer_load_dword s18, s[12:15], 0xf6 ; C2090DF6 > s_buffer_load_dword s19, s[12:15], 0xf7 ; C2098DF7 > s_buffer_load_dword s9, s[12:15], 0xe8 ; C2048DE8 > s_buffer_load_dword s11, s[12:15], 0xe9 ; C2058DE9 > s_buffer_load_dword s16, s[12:15], 0xea ; C2080DEA > s_buffer_load_dword s20, s[12:15], 0xeb ; C20A0DEB > s_buffer_load_dword s21, s[12:15], 0xf4 ; C20A8DF4 > s_buffer_load_dword s24, s[12:15], 0xe3 ; C20C0DE3 > s_buffer_load_dword s25, s[12:15], 0xe4 ; C20C8DE4 > s_buffer_load_dword s26, s[12:15], 0xe5 ; C20D0DE5 > s_buffer_load_dword s27, s[12:15], 0xe6 ; C20D8DE6 > s_buffer_load_dword s28, s[12:15], 0xe7 ; C20E0DE7 > s_buffer_load_dword s22, s[12:15], 0xde ; C20B0DDE > s_buffer_load_dword s29, s[12:15], 0xdf ; C20E8DDF > s_buffer_load_dword s30, s[12:15], 0xe0 ; C20F0DE0 > s_buffer_load_dword s31, s[12:15], 0xe1 ; C20F8DE1 > s_buffer_load_dword s32, s[12:15], 0xe2 ; C2100DE2 > s_buffer_load_dword s33, s[12:15], 0xce ; C2108DCE > s_buffer_load_dword s34, s[12:15], 0xcf ; C2110DCF > s_buffer_load_dword s23, s[12:15], 0xd7 ; C20B8DD7 > s_buffer_load_dword s35, s[12:15], 0xdc ; C2118DDC > s_buffer_load_dword s36, s[12:15], 0xdd ; C2120DDD > s_buffer_load_dword s40, s[12:15], 0xcc ; C2140DCC > s_buffer_load_dword s41, s[12:15], 0xcd ; C2148DCD > s_buffer_load_dword s44, s[12:15], 0xc3 ; C2160DC3 > s_buffer_load_dword s12, s[12:15], 0xbf ; C2060DBF > v_mul_f32_e32 v5, s38, v5 ; 100A0A26 > v_cmp_gt_f32_e32 vcc, s6, v2 ; 7C080406 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mac_f32_e32 v5, s37, v4 ; 3E0A0825 > v_cndmask_b32_e64 v15, 0, 1.0, vcc ; D200000F 01A9E480 > v_cmp_gt_f32_e32 vcc, s1, v2 ; 7C080401 > v_mac_f32_e32 v12, s43, v6 ; 3E180C2B > v_mac_f32_e32 v5, s39, v6 ; 3E0A0C27 > v_cndmask_b32_e64 v6, 0, 1.0, vcc ; D2000006 01A9E480 > v_cmp_gt_f32_e32 vcc, s0, v2 ; 7C080400 > v_add_f32_e32 v4, s12, v5 ; 06080A0C > v_mul_f32_e32 v5, v7, v15 ; 100A1F07 > v_cndmask_b32_e64 v15, 0, 1.0, vcc ; D200000F 01A9E480 > v_cmp_gt_f32_e32 vcc, s23, v2 ; 7C080417 > v_mul_f32_e32 v6, v8, v6 ; 100C0D08 > v_cndmask_b32_e64 v2, 0, 1.0, vcc ; D2000002 01A9E480 > v_mul_f32_e32 v1, v1, v2 ; 10020501 > v_mul_f32_e32 v2, s36, v6 ; 10040C24 > v_mul_f32_e32 v3, v3, v15 ; 10061F03 > v_mac_f32_e32 v2, s35, v5 ; 3E040A23 > v_mac_f32_e32 v2, s22, v3 ; 3E040616 > v_mul_f32_e32 v7, s11, v6 ; 100E0C0B > v_mul_f32_e32 v15, s31, v6 ; 101E0C1F > v_add_f32_e32 v10, s46, v10 ; 0614142E > v_mac_f32_e32 v2, s29, v1 ; 3E04021D > v_mac_f32_e32 v7, s9, v5 ; 3E0E0A09 > v_mac_f32_e32 v15, s30, v5 ; 3E1E0A1E > v_add_f32_e32 v2, v10, v2 ; 0604050A > v_mul_f32_e32 v10, s26, v6 ; 10140C1A > v_mul_f32_e32 v8, s17, v6 ; 10100C11 > v_mac_f32_e32 v7, s16, v3 ; 3E0E0610 > v_mac_f32_e32 v15, s32, v3 ; 3E1E0620 > v_mac_f32_e32 v10, s25, v5 ; 3E140A19 > v_mac_f32_e32 v8, s21, v5 ; 3E100A15 > v_mac_f32_e32 v10, s27, v3 ; 3E14061B > v_mac_f32_e32 v7, s20, v1 ; 3E0E0214 > v_mac_f32_e32 v15, s24, v1 ; 3E1E0218 > v_mac_f32_e32 v8, s18, v3 ; 3E100612 > v_add_f32_e32 v4, v4, v15 ; 06081F04 > v_mul_f32_e32 v2, v2, v7 ; 10040F02 > v_add_f32_e32 v12, s44, v12 ; 0618182C > v_mac_f32_e32 v10, s28, v1 ; 3E14021C > v_mac_f32_e32 v8, s19, v1 ; 3E100213 > v_add_f32_e32 v10, v12, v10 ; 0614150C > v_mul_f32_e32 v4, v4, v7 ; 10080F04 > v_cmp_lt_f32_e64 s[0:1], 0, v2 ; D0020000 00020480 > v_mul_f32_e32 v7, v10, v8 ; 100E110A > v_cndmask_b32_e64 v8, v2, 1.0, s[0:1] ; D2000008 0001E502 > v_cmp_lt_f32_e64 s[0:1], 0, v4 ; D0020000 00020880 > v_cndmask_b32_e64 v10, v4, 1.0, s[0:1] ; D200000A 0001E504 > v_bfrev_b32_e32 v0, 14 ; 7E00708E > v_cmp_lt_f32_e64 s[0:1], 0, v7 ; D0020000 00020E80 > v_rcp_f32_e32 v14, v11 ; 7E1C550B > v_cmp_eq_f32_e32 vcc, 0, v11 ; 7C041680 > v_cndmask_b32_e64 v11, v7, 1.0, s[0:1] ; D200000B 0001E507 > v_cmp_le_f32_e64 s[0:1], 0, v8 ; D0060000 00021080 > v_bfrev_b32_e32 v9, 15 ; 7E12708F > v_mul_f32_e32 v8, v0, v8 ; 10101100 > v_cndmask_b32_e64 v8, v9, v8, s[0:1] ; D2000008 00021109 > v_cmp_le_f32_e64 s[0:1], 0, v10 ; D0060000 00021480 > v_mul_f32_e32 v10, v0, v10 ; 10141500 > v_cndmask_b32_e64 v10, v9, v10, s[0:1] ; D200000A 00021509 > v_cmp_le_f32_e64 s[0:1], 0, v11 ; D0060000 00021680 > v_mul_f32_e32 v0, v0, v11 ; 10001700 > v_cndmask_b32_e64 v0, v9, v0, s[0:1] ; D2000000 00020109 > v_mul_f32_e32 v9, s41, v6 ; 10120C29 > v_mul_f32_e32 v2, v14, v2 ; 1004050E > v_mac_f32_e32 v9, s40, v5 ; 3E120A28 > s_load_dwordx4 s[12:15], s[4:5], 0x2c ; C086052C > s_load_dwordx8 s[16:23], s[4:5], 0x20 ; C0C80520 > v_mul_f32_e32 v4, v14, v4 ; 1008090E > v_cndmask_b32_e32 v2, v2, v8 ; 00041102 > v_mac_f32_e32 v9, s33, v3 ; 3E120621 > v_cndmask_b32_e32 v4, v4, v10 ; 00081504 > v_mul_f32_e32 v7, v14, v7 ; 100E0F0E > v_mac_f32_e32 v9, s34, v1 ; 3E120222 > v_fma_f32 v2, v2, 0.5, 0.5 ; D2960002 03C1E102 > v_cndmask_b32_e32 v0, v7, v0 ; 00000107 > v_fma_f32 v4, v4, -0.5, 0.5 ; D2960004 03C1E304 > v_fma_f32 v7, s7, -0.5, v2 ; D2960007 0409E207 > v_mov_b32_e32 v17, v9 ; 7E220309 > v_sub_f32_e32 v6, 1.0, v0 ; 080C00F2 > v_fma_f32 v8, s8, -0.5, v4 ; D2960008 0411E208 > v_fma_f32 v0, s7, 0.5, v2 ; D2960000 0409E007 > v_mov_b32_e32 v15, v7 ; 7E1E0307 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s12, s12, s23 ; 870C170C > v_mov_b32_e32 v16, v8 ; 7E200308 > v_mov_b32_e32 v14, v6 ; 7E1C0306 > v_mov_b32_e32 v15, v0 ; 7E1E0300 > image_sample_c v1, v[6:9], s[16:23], s[12:15] dmask:0x1 da ; F0A04100 00640106 > image_sample_c v0, v[14:17], s[16:23], s[12:15] dmask:0x1 da ; F0A04100 0064000E > v_fma_f32 v8, s8, 0.5, v4 ; D2960008 0411E008 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v0, v1, v0 ; 06000101 > image_sample_c v2, v[6:9], s[16:23], s[12:15] dmask:0x1 da ; F0A04100 00640206 > v_mov_b32_e32 v16, v8 ; 7E200308 > v_mov_b32_e32 v17, v9 ; 7E220309 > image_sample_c v3, v[14:17], s[16:23], s[12:15] dmask:0x1 da ; F0A04100 0064030E > s_waitcnt vmcnt(1) ; BF8C0F71 > v_add_f32_e32 v0, v0, v2 ; 06000500 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v0, v0, v3 ; 06000700 > v_mov_b32_e32 v1, 0x3e800000 ; 7E0202FF 3E800000 > v_fma_f32 v0, -v0, v1, 1.0 ; D2960000 23CA0300 > s_or_b64 exec, exec, s[2:3] ; 88FE027E > v_mov_b32_e32 v3, 0 ; 7E060280 > v_mov_b32_e32 v1, v0 ; 7E020300 > v_mov_b32_e32 v2, v0 ; 7E040300 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 20 >Code Size: 1268 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s8, s[0:3], 0x54 ; C2040154 > s_buffer_load_dword s7, s[0:3], 0x2f ; C203812F > s_buffer_load_dword s9, s[0:3], 0x55 ; C2048155 > s_buffer_load_dword s10, s[0:3], 0x56 ; C2050156 > s_buffer_load_dword s5, s[0:3], 0x2d ; C202812D > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v0, s8 ; 7E000208 > s_buffer_load_dword s4, s[0:3], 0x2c ; C202012C > v_mov_b32_e32 v1, s9 ; 7E020209 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, s7, v0 ; 100C0007 > s_buffer_load_dword s6, s[0:3], 0x2e ; C203012E > s_buffer_load_dword s32, s[0:3], 0xd9 ; C21001D9 > v_mul_f32_e32 v0, s10, v0 ; 1000000A > v_mul_f32_e32 v7, s7, v1 ; 100E0207 > v_mul_f32_e32 v1, s10, v1 ; 1002020A > s_buffer_load_dword s28, s[0:3], 0xd5 ; C20E01D5 > s_buffer_load_dword s31, s[0:3], 0xd8 ; C20F81D8 > s_buffer_load_dword s36, s[0:3], 0xdd ; C21201DD > s_buffer_load_dword s27, s[0:3], 0xd4 ; C20D81D4 > s_buffer_load_dword s33, s[0:3], 0xda ; C21081DA > s_buffer_load_dword s35, s[0:3], 0xdc ; C21181DC > s_buffer_load_dword s29, s[0:3], 0xd6 ; C20E81D6 > s_buffer_load_dword s34, s[0:3], 0xdb ; C21101DB > s_buffer_load_dword s37, s[0:3], 0xde ; C21281DE > s_buffer_load_dword s12, s[0:3], 0xb9 ; C20601B9 > s_buffer_load_dword s16, s[0:3], 0xbd ; C20801BD > s_buffer_load_dword s20, s[0:3], 0xc1 ; C20A01C1 > s_buffer_load_dword s24, s[0:3], 0xc5 ; C20C01C5 > s_buffer_load_dword s30, s[0:3], 0xd7 ; C20F01D7 > s_buffer_load_dword s11, s[0:3], 0xb8 ; C20581B8 > s_buffer_load_dword s13, s[0:3], 0xba ; C20681BA > s_buffer_load_dword s14, s[0:3], 0xbb ; C20701BB > s_buffer_load_dword s15, s[0:3], 0xbc ; C20781BC > s_buffer_load_dword s17, s[0:3], 0xbe ; C20881BE > s_buffer_load_dword s18, s[0:3], 0xbf ; C20901BF > s_buffer_load_dword s19, s[0:3], 0xc0 ; C20981C0 > s_buffer_load_dword s21, s[0:3], 0xc2 ; C20A81C2 > s_buffer_load_dword s22, s[0:3], 0xc3 ; C20B01C3 > s_buffer_load_dword s23, s[0:3], 0xc4 ; C20B81C4 > s_buffer_load_dword s25, s[0:3], 0xc6 ; C20C81C6 > s_buffer_load_dword s26, s[0:3], 0xc7 ; C20D01C7 > s_buffer_load_dword s0, s[0:3], 0xdf ; C20001DF > v_add_f32_e32 v10, 0, v3 ; 06140680 > v_mul_f32_e32 v9, s10, v5 ; 10120A0A > v_mul_f32_e32 v6, v10, v6 ; 100C0D0A > v_add_f32_e32 v10, 0, v4 ; 06140880 > v_mul_f32_e32 v0, v3, v0 ; 10000103 > v_mul_f32_e32 v1, v4, v1 ; 10020304 > v_cmp_gt_f32_e32 vcc, -0.5, v5 ; 7C080AF1 > v_mul_f32_e32 v7, v10, v7 ; 100E0F0A > v_mad_f32 v8, v5, s7, s7 ; D2820008 001C0F05 > v_xor_b32_e32 v9, 0x80000000, v9 ; 3A1212FF 80000000 > v_cndmask_b32_e32 v0, v6, v0 ; 00000106 > v_cndmask_b32_e32 v1, v7, v1 ; 00020307 > v_cndmask_b32_e32 v6, v8, v9 ; 000C1308 > v_mul_f32_e32 v4, s5, v4 ; 10080805 > v_cmp_lt_f32_e32 vcc, 0.5, v5 ; 7C020AF0 > v_cndmask_b32_e32 v1, v1, v4 ; 00020901 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s4, v3 ; 10060604 > v_mul_f32_e32 v7, s6, v5 ; 100E0A06 > v_cndmask_b32_e32 v0, v0, v3 ; 00000700 > v_mul_f32_e32 v5, s32, v1 ; 100A0220 > v_mul_f32_e32 v4, s28, v1 ; 1008021C > v_mul_f32_e32 v1, s36, v1 ; 10020224 > v_cndmask_b32_e32 v3, v6, v7 ; 00060F06 > v_mac_f32_e32 v5, s31, v0 ; 3E0A001F > v_mac_f32_e32 v4, s27, v0 ; 3E08001B > v_mac_f32_e32 v1, s35, v0 ; 3E020023 > v_mac_f32_e32 v5, s33, v3 ; 3E0A0621 > v_mac_f32_e32 v1, s37, v3 ; 3E020625 > v_mac_f32_e32 v4, s29, v3 ; 3E08061D > v_add_f32_e32 v3, s34, v5 ; 06060A22 > v_add_f32_e32 v0, s30, v4 ; 0600081E > v_mul_f32_e32 v4, s12, v3 ; 1008060C > v_mul_f32_e32 v5, s16, v3 ; 100A0610 > v_mul_f32_e32 v6, s20, v3 ; 100C0614 > v_mul_f32_e32 v3, s24, v3 ; 10060618 > v_add_f32_e32 v1, s0, v1 ; 06020200 > v_mac_f32_e32 v4, s11, v0 ; 3E08000B > v_mac_f32_e32 v5, s15, v0 ; 3E0A000F > v_mac_f32_e32 v6, s19, v0 ; 3E0C0013 > v_mac_f32_e32 v3, s23, v0 ; 3E060017 > v_mac_f32_e32 v3, s25, v1 ; 3E060219 > v_mac_f32_e32 v4, s13, v1 ; 3E08020D > v_mac_f32_e32 v5, s17, v1 ; 3E0A0211 > v_mac_f32_e32 v6, s21, v1 ; 3E0C0215 > v_add_f32_e32 v0, s14, v4 ; 0600080E > v_add_f32_e32 v1, s18, v5 ; 06020A12 > v_add_f32_e32 v4, s22, v6 ; 06080C16 > v_add_f32_e32 v3, s26, v3 ; 0606061A > exp 15, 12, 0, 1, 0, v0, v1, v4, v3 ; F80008CF 03040100 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 40 >VGPRS: 12 >Code Size: 416 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 14 >VGPRS: 15 >Code Size: 12 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_load_dwordx4 s[4:7], s[2:3], 0x4 ; C0820304 > s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s12, s[4:7], 0x54 ; C2060554 > s_buffer_load_dword s11, s[4:7], 0x2f ; C205852F > s_buffer_load_dword s13, s[4:7], 0x55 ; C2068555 > s_buffer_load_dword s14, s[4:7], 0x56 ; C2070556 > s_buffer_load_dword s9, s[4:7], 0x2d ; C204852D > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v0, s12 ; 7E00020C > s_buffer_load_dword s8, s[4:7], 0x2c ; C204052C > v_mov_b32_e32 v1, s13 ; 7E02020D > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, s11, v0 ; 100C000B > v_mul_f32_e32 v0, s14, v0 ; 1000000E > s_buffer_load_dword s10, s[4:7], 0x2e ; C205052E > s_buffer_load_dword s36, s[4:7], 0xd9 ; C21205D9 > v_mul_f32_e32 v7, s11, v1 ; 100E020B > v_mul_f32_e32 v1, s14, v1 ; 1002020E > s_buffer_load_dword s32, s[4:7], 0xd5 ; C21005D5 > s_buffer_load_dword s35, s[4:7], 0xd8 ; C21185D8 > s_buffer_load_dword s40, s[4:7], 0xdd ; C21405DD > s_buffer_load_dword s31, s[4:7], 0xd4 ; C20F85D4 > s_buffer_load_dword s37, s[4:7], 0xda ; C21285DA > s_buffer_load_dword s39, s[4:7], 0xdc ; C21385DC > s_buffer_load_dword s33, s[4:7], 0xd6 ; C21085D6 > s_buffer_load_dword s38, s[4:7], 0xdb ; C21305DB > s_buffer_load_dword s41, s[4:7], 0xde ; C21485DE > s_buffer_load_dword s28, s[4:7], 0xc5 ; C20E05C5 > s_buffer_load_dword s34, s[4:7], 0xd7 ; C21105D7 > s_buffer_load_dword s16, s[4:7], 0xb9 ; C20805B9 > s_buffer_load_dword s20, s[4:7], 0xbd ; C20A05BD > s_buffer_load_dword s27, s[4:7], 0xc4 ; C20D85C4 > s_buffer_load_dword s42, s[4:7], 0xdf ; C21505DF > s_buffer_load_dword s15, s[4:7], 0xb8 ; C20785B8 > s_buffer_load_dword s19, s[4:7], 0xbc ; C20985BC > s_buffer_load_dword s24, s[4:7], 0xc1 ; C20C05C1 > s_buffer_load_dword s29, s[4:7], 0xc6 ; C20E85C6 > s_buffer_load_dword s17, s[4:7], 0xba ; C20885BA > s_buffer_load_dword s21, s[4:7], 0xbe ; C20A85BE > s_buffer_load_dword s23, s[4:7], 0xc0 ; C20B85C0 > s_buffer_load_dword s30, s[4:7], 0xc7 ; C20F05C7 > s_buffer_load_dword s18, s[4:7], 0xbb ; C20905BB > s_buffer_load_dword s22, s[4:7], 0xbf ; C20B05BF > s_buffer_load_dword s25, s[4:7], 0xc2 ; C20C85C2 > s_buffer_load_dword s26, s[4:7], 0xc3 ; C20D05C3 > s_buffer_load_dword s43, s[4:7], 0xe4 ; C21585E4 > s_buffer_load_dword s44, s[4:7], 0xe5 ; C21605E5 > s_buffer_load_dword s4, s[4:7], 0xe6 ; C20205E6 > s_buffer_load_dword s5, s[0:3], 0x54 ; C2028154 > s_buffer_load_dword s6, s[0:3], 0x55 ; C2030155 > s_buffer_load_dword s7, s[0:3], 0x56 ; C2038156 > s_buffer_load_dword s45, s[0:3], 0x57 ; C2168157 > s_buffer_load_dword s46, s[0:3], 0x60 ; C2170160 > s_buffer_load_dword s0, s[0:3], 0x61 ; C2000161 > v_add_f32_e32 v10, 0, v3 ; 06140680 > v_mul_f32_e32 v6, v10, v6 ; 100C0D0A > v_add_f32_e32 v10, 0, v4 ; 06140880 > v_mul_f32_e32 v0, v3, v0 ; 10000103 > v_cmp_gt_f32_e32 vcc, -0.5, v5 ; 7C080AF1 > v_mul_f32_e32 v9, s14, v5 ; 10120A0E > v_cndmask_b32_e32 v0, v6, v0 ; 00000106 > v_mul_f32_e32 v1, v4, v1 ; 10020304 > v_mul_f32_e32 v7, v10, v7 ; 100E0F0A > v_mad_f32 v8, v5, s11, s11 ; D2820008 002C1705 > v_xor_b32_e32 v6, 0x80000000, v9 ; 3A0C12FF 80000000 > v_cndmask_b32_e32 v1, v7, v1 ; 00020307 > v_cndmask_b32_e32 v6, v8, v6 ; 000C0D08 > v_mul_f32_e32 v4, s9, v4 ; 10080809 > v_cmp_lt_f32_e32 vcc, 0.5, v5 ; 7C020AF0 > v_cndmask_b32_e32 v1, v1, v4 ; 00020901 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s8, v3 ; 10060608 > v_mul_f32_e32 v7, s10, v5 ; 100E0A0A > v_cndmask_b32_e32 v0, v0, v3 ; 00000700 > v_mul_f32_e32 v5, s36, v1 ; 100A0224 > v_mul_f32_e32 v4, s32, v1 ; 10080220 > v_mul_f32_e32 v1, s40, v1 ; 10020228 > v_cndmask_b32_e32 v3, v6, v7 ; 00060F06 > v_mac_f32_e32 v5, s35, v0 ; 3E0A0023 > v_mac_f32_e32 v4, s31, v0 ; 3E08001F > v_mac_f32_e32 v1, s39, v0 ; 3E020027 > v_mac_f32_e32 v5, s37, v3 ; 3E0A0625 > v_mac_f32_e32 v1, s41, v3 ; 3E020629 > v_mac_f32_e32 v4, s33, v3 ; 3E080621 > v_add_f32_e32 v3, s38, v5 ; 06060A26 > v_add_f32_e32 v0, s34, v4 ; 06000822 > v_mul_f32_e32 v7, s28, v3 ; 100E061C > v_mul_f32_e32 v4, s16, v3 ; 10080610 > v_mul_f32_e32 v5, s20, v3 ; 100A0614 > v_add_f32_e32 v1, s42, v1 ; 0602022A > v_mac_f32_e32 v7, s27, v0 ; 3E0E001B > v_mul_f32_e32 v6, s24, v3 ; 100C0618 > v_mac_f32_e32 v4, s15, v0 ; 3E08000F > v_mac_f32_e32 v5, s19, v0 ; 3E0A0013 > v_mac_f32_e32 v7, s29, v1 ; 3E0E021D > v_add_f32_e32 v7, s30, v7 ; 060E0E1E > v_mac_f32_e32 v6, s23, v0 ; 3E0C0017 > v_mac_f32_e32 v4, s17, v1 ; 3E080211 > v_mac_f32_e32 v5, s21, v1 ; 3E0A0215 > v_mac_f32_e32 v6, s25, v1 ; 3E0C0219 > v_add_f32_e32 v4, s18, v4 ; 06080812 > v_mul_f32_e32 v10, s46, v7 ; 10140E2E > v_add_f32_e32 v5, s22, v5 ; 060A0A16 > v_mul_f32_e32 v11, s0, v7 ; 10160E00 > v_add_f32_e32 v6, s26, v6 ; 060C0C1A > v_mul_f32_e32 v8, s5, v7 ; 10100E05 > v_fma_f32 v10, v4, s46, v10 ; D296000A 04285D04 > v_fma_f32 v11, v5, -s0, v11 ; D296000B 442C0105 > v_mul_f32_e32 v9, s6, v7 ; 10120E06 > v_fma_f32 v8, v10, s7, v8 ; D2960008 04200F0A > v_fma_f32 v9, v11, s45, v9 ; D2960009 04245B0B > exp 15, 32, 0, 0, 0, v10, v11, v6, v7 ; F800020F 07060B0A > v_sub_f32_e32 v0, s43, v0 ; 0800002B > v_sub_f32_e32 v3, s44, v3 ; 0806062C > v_sub_f32_e32 v1, s4, v1 ; 08020204 > exp 15, 33, 0, 0, 0, v8, v9, v6, v7 ; F800021F 07060908 > exp 15, 34, 0, 0, 0, v0, v3, v1, v0 ; F800022F 00010300 > exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 12 >Code Size: 544 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_interp_p1_f32 v5, v2, 0, 1, [m0] ; C8140402 > v_rcp_f32_e32 v7, v4 ; 7E0E5504 > v_interp_p2_f32 v5, [v5], v3, 0, 1, [m0] ; C8150403 > v_interp_p1_f32 v6, v2, 1, 1, [m0] ; C8180502 > v_interp_p2_f32 v6, [v6], v3, 1, 1, [m0] ; C8190503 > v_interp_p1_f32 v11, v2, 0, 2, [m0] ; C82C0802 > s_load_dwordx4 s[12:15], s[2:3], 0x8 ; C0860308 > v_interp_p2_f32 v11, [v11], v3, 0, 2, [m0] ; C82D0803 > v_interp_p1_f32 v12, v2, 1, 2, [m0] ; C8300902 > v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 > v_interp_p2_f32 v12, [v12], v3, 1, 2, [m0] ; C8310903 > v_interp_p1_f32 v18, v2, 2, 2, [m0] ; C8480A02 > v_mul_f32_e32 v2, v7, v0 ; 10040107 > v_cndmask_b32_e64 v0, v0, 1.0, vcc ; D2000000 01A9E500 > v_cmp_lt_f32_e32 vcc, 0, v1 ; 7C020280 > s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C > s_load_dwordx8 s[28:35], s[4:5], 0x0 ; C0CE0500 > s_load_dwordx4 s[20:23], s[4:5], 0x1c ; C08A051C > s_load_dwordx8 s[36:43], s[4:5], 0x10 ; C0D20510 > v_bfrev_b32_e32 v14, 14 ; 7E1C708E > v_cndmask_b32_e64 v8, v1, 1.0, vcc ; D2000008 01A9E501 > v_interp_p2_f32 v18, [v18], v3, 2, 2, [m0] ; C8490A03 > v_bfrev_b32_e32 v15, 15 ; 7E1E708F > v_mul_f32_e32 v3, v14, v0 ; 1006010E > v_cmp_le_f32_e32 vcc, 0, v0 ; 7C060080 > v_cndmask_b32_e32 v0, v15, v3 ; 0000070F > v_mul_f32_e32 v3, v14, v8 ; 1006110E > v_cmp_le_f32_e32 vcc, 0, v8 ; 7C061080 > v_cmp_lt_f32_e64 s[0:1], 0, v5 ; D0020000 00020A80 > v_cndmask_b32_e32 v3, v15, v3 ; 0006070F > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_cndmask_b32_e64 v4, v5, 1.0, s[0:1] ; D2000004 0001E505 > v_cmp_lt_f32_e64 s[0:1], 0, v6 ; D0020000 00020C80 > v_cndmask_b32_e64 v8, v6, 1.0, s[0:1] ; D2000008 0001E506 > v_mul_f32_e32 v1, v7, v1 ; 10020307 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s8, s[12:15], 0xa0 ; C2040DA0 > v_mul_f32_e32 v9, v14, v4 ; 1012090E > v_cmp_le_f32_e64 s[0:1], 0, v4 ; D0060000 00020880 > v_cndmask_b32_e64 v4, v15, v9, s[0:1] ; D2000004 0002130F > s_buffer_load_dword s9, s[12:15], 0xa1 ; C2048DA1 > s_and_b32 s24, s24, s35 ; 87182318 > v_cndmask_b32_e32 v27, v2, v0 ; 00360102 > v_cndmask_b32_e32 v28, v1, v3 ; 00380701 > s_and_b32 s20, s20, s43 ; 87142B14 > v_mul_f32_e32 v9, v14, v8 ; 1012110E > v_cmp_le_f32_e64 s[0:1], 0, v8 ; D0060000 00021080 > image_sample v29, v[27:28], s[28:35], s[24:27] dmask:0x1 ; F0800100 00C71D1B > image_sample v[0:3], v[27:28], s[36:43], s[20:23] dmask:0xf ; F0800F00 00A9001B > v_cndmask_b32_e64 v8, v15, v9, s[0:1] ; D2000008 0002130F > v_mul_f32_e32 v5, v7, v5 ; 100A0B07 > v_mul_f32_e32 v6, v7, v6 ; 100C0D07 > s_buffer_load_dword s7, s[12:15], 0x61 ; C2038D61 > v_cndmask_b32_e32 v30, v5, v4 ; 003C0905 > v_cndmask_b32_e32 v31, v6, v8 ; 003E1106 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_le_f32_e32 vcc, 0.5, v3 ; 7C0606F0 > s_buffer_load_dword s11, s[12:15], 0xa2 ; C2058DA2 > s_buffer_load_dword s0, s[12:15], 0x60 ; C2000D60 > v_cndmask_b32_e64 v4, 0, 1.0, vcc ; D2000004 01A9E480 > v_mul_f32_e32 v16, v29, v30 ; 10203D1D > s_buffer_load_dword s1, s[12:15], 0x62 ; C2008D62 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_fma_f32 v5, v4, s8, v16 ; D2960005 04401104 > v_mul_f32_e32 v17, v29, v31 ; 10223F1D > s_buffer_load_dword s8, s[12:15], 0x63 ; C2040D63 > v_fma_f32 v6, v4, s9, v17 ; D2960006 04441304 > v_mul_f32_e32 v7, s7, v6 ; 100E0C07 > v_fma_f32 v4, v4, s11, v29 ; D2960004 04741704 > v_mac_f32_e32 v7, s0, v5 ; 3E0E0A00 > s_buffer_load_dword s11, s[12:15], 0x65 ; C2058D65 > v_mac_f32_e32 v7, s1, v4 ; 3E0E0801 > s_buffer_load_dword s9, s[12:15], 0x64 ; C2048D64 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_add_f32_e32 v7, s8, v7 ; 060E0E08 > s_buffer_load_dword s8, s[12:15], 0x69 ; C2040D69 > s_buffer_load_dword s0, s[12:15], 0x66 ; C2000D66 > s_buffer_load_dword s7, s[12:15], 0x68 ; C2038D68 > s_buffer_load_dword s1, s[12:15], 0x67 ; C2008D67 > v_mul_f32_e32 v9, s11, v6 ; 10120C0B > v_mac_f32_e32 v9, s9, v5 ; 3E120A09 > s_buffer_load_dword s9, s[12:15], 0x6a ; C2048D6A > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v19, s8, v6 ; 10260C08 > v_mac_f32_e32 v9, s0, v4 ; 3E120800 > v_mac_f32_e32 v19, s7, v5 ; 3E260A07 > s_buffer_load_dword s7, s[12:15], 0x6d ; C2038D6D > v_add_f32_e32 v9, s1, v9 ; 06121201 > s_buffer_load_dword s1, s[12:15], 0x6c ; C2008D6C > s_buffer_load_dword s8, s[12:15], 0x6e ; C2040D6E > v_mac_f32_e32 v19, s9, v4 ; 3E260809 > s_buffer_load_dword s0, s[12:15], 0x6b ; C2000D6B > s_buffer_load_dword s9, s[12:15], 0x6f ; C2048D6F > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v6, s7, v6 ; 100C0C07 > v_mac_f32_e32 v6, s1, v5 ; 3E0C0A01 > v_mac_f32_e32 v6, s8, v4 ; 3E0C0808 > v_cmp_lt_f32_e32 vcc, 0, v7 ; 7C020E80 > v_cndmask_b32_e64 v8, v7, 1.0, vcc ; D2000008 01A9E507 > v_add_f32_e32 v4, s9, v6 ; 06080C09 > v_cmp_lt_f32_e32 vcc, 0, v9 ; 7C021280 > v_add_f32_e32 v19, s0, v19 ; 06262600 > v_cndmask_b32_e64 v10, v9, 1.0, vcc ; D200000A 01A9E509 > v_rcp_f32_e32 v5, v4 ; 7E0A5504 > v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 > v_cndmask_b32_e64 v20, v19, 1.0, vcc ; D2000014 01A9E513 > v_mul_f32_e32 v21, v14, v8 ; 102A110E > v_cmp_le_f32_e32 vcc, 0, v8 ; 7C061080 > v_cndmask_b32_e32 v8, v15, v21 ; 00102B0F > v_mul_f32_e32 v21, v14, v10 ; 102A150E > v_cmp_le_f32_e32 vcc, 0, v10 ; 7C061480 > s_buffer_load_dword s0, s[12:15], 0x5e ; C2000D5E > v_cndmask_b32_e32 v10, v15, v21 ; 00142B0F > v_mul_f32_e32 v6, v14, v20 ; 100C290E > v_cmp_le_f32_e32 vcc, 0, v20 ; 7C062880 > v_cndmask_b32_e32 v6, v15, v6 ; 000C0D0F > v_mul_f32_e32 v7, v5, v7 ; 100E0F05 > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_cndmask_b32_e32 v4, v7, v8 ; 00081107 > v_fma_f32 v4, v4, 0.5, 0.5 ; D2960004 03C1E104 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_fma_f32 v8, s0, 0.5, v4 ; D2960008 0411E000 > v_fma_f32 v20, s0, -0.5, v4 ; D2960014 0411E200 > s_buffer_load_dword s0, s[12:15], 0x74 ; C2000D74 > s_load_dwordx8 s[28:35], s[4:5], 0x20 ; C0CE0520 > s_load_dwordx4 s[36:39], s[4:5], 0x2c ; C092052C > s_buffer_load_dword s1, s[12:15], 0x5f ; C2008D5F > v_mul_f32_e32 v9, v5, v9 ; 10121305 > v_mul_f32_e32 v5, v5, v19 ; 100A2705 > v_cndmask_b32_e32 v7, v9, v10 ; 000E1509 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v22, s0 ; 7E2C0200 > v_mov_b32_e32 v26, v22 ; 7E340316 > v_cndmask_b32_e32 v5, v5, v6 ; 000A0D05 > v_fma_f32 v9, v7, -0.5, 0.5 ; D2960009 03C1E307 > v_fma_f32 v21, s1, -0.5, v9 ; D2960015 0425E201 > v_sub_f32_e32 v19, 1.0, v5 ; 08260AF2 > s_and_b32 s36, s36, s35 ; 87242324 > v_mov_b32_e32 v24, v20 ; 7E300314 > v_mov_b32_e32 v26, s0 ; 7E340200 > s_buffer_load_dword s0, s[12:15], 0x1d ; C2000D1D > v_mov_b32_e32 v25, v21 ; 7E320315 > v_mov_b32_e32 v23, v19 ; 7E2E0313 > image_sample_c v[4:7], v[19:22], s[28:35], s[36:39] dmask:0xf da ; F0A04F00 01270413 > v_mov_b32_e32 v24, v8 ; 7E300308 > v_fma_f32 v21, s1, 0.5, v9 ; D2960015 0425E001 > s_buffer_load_dword s1, s[12:15], 0x1c ; C2008D1C > s_waitcnt vmcnt(0) ; BF8C0F70 > image_sample_c v[5:8], v[23:26], s[28:35], s[36:39] dmask:0xf da ; F0A04F00 01270517 > s_buffer_load_dword s7, s[12:15], 0x1e ; C2038D1E > s_waitcnt vmcnt(0) ; BF8C0F70 > image_sample_c v[6:9], v[19:22], s[28:35], s[36:39] dmask:0xf da ; F0A04F00 01270613 > v_mov_b32_e32 v25, v21 ; 7E320315 > s_waitcnt vmcnt(0) ; BF8C0F70 > image_sample_c v[7:10], v[23:26], s[28:35], s[36:39] dmask:0xf da ; F0A04F00 01270717 > s_buffer_load_dword s8, s[12:15], 0x1f ; C2040D1F > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v8, s0, v17 ; 10102200 > s_buffer_load_dword s0, s[12:15], 0x21 ; C2000D21 > v_mac_f32_e32 v8, s1, v16 ; 3E102001 > s_buffer_load_dword s1, s[12:15], 0x20 ; C2008D20 > v_mac_f32_e32 v8, s7, v29 ; 3E103A07 > s_buffer_load_dword s7, s[12:15], 0x22 ; C2038D22 > v_add_f32_e32 v8, s8, v8 ; 06101008 > s_buffer_load_dword s8, s[12:15], 0x23 ; C2040D23 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v10, s0, v17 ; 10142200 > v_mac_f32_e32 v10, s1, v16 ; 3E142001 > v_mac_f32_e32 v10, s7, v29 ; 3E143A07 > s_buffer_load_dword s7, s[12:15], 0x29 ; C2038D29 > s_buffer_load_dword s9, s[12:15], 0x28 ; C2048D28 > v_add_f32_e32 v10, s8, v10 ; 06141408 > s_buffer_load_dword s8, s[12:15], 0x2a ; C2040D2A > s_buffer_load_dword s11, s[12:15], 0x2b ; C2058D2B > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v17, s7, v17 ; 10222207 > v_mac_f32_e32 v17, s9, v16 ; 3E222009 > v_cmp_lt_f32_e32 vcc, 0, v8 ; 7C021080 > v_mac_f32_e32 v17, s8, v29 ; 3E223A08 > v_add_f32_e32 v16, s11, v17 ; 0620220B > s_load_dwordx8 s[20:27], s[4:5], 0x30 ; C0CA0530 > s_load_dwordx4 s[28:31], s[4:5], 0x3c ; C08E053C > v_rcp_f32_e32 v17, v16 ; 7E225510 > s_load_dwordx4 s[16:19], s[2:3], 0x4 ; C0880304 > v_cndmask_b32_e64 v9, v8, 1.0, vcc ; D2000009 01A9E508 > v_cmp_lt_f32_e32 vcc, 0, v10 ; 7C021480 > v_cndmask_b32_e64 v19, v10, 1.0, vcc ; D2000013 01A9E50A > v_mul_f32_e32 v20, v14, v9 ; 1028130E > v_cmp_le_f32_e32 vcc, 0, v9 ; 7C061280 > v_cndmask_b32_e32 v9, v15, v20 ; 0012290F > v_mul_f32_e32 v14, v14, v19 ; 101C270E > v_cmp_le_f32_e32 vcc, 0, v19 ; 7C062680 > v_cndmask_b32_e32 v14, v15, v14 ; 001C1D0F > v_mul_f32_e32 v8, v17, v8 ; 10101111 > v_cmp_eq_f32_e32 vcc, 0, v16 ; 7C042080 > v_mul_f32_e32 v10, v17, v10 ; 10141511 > v_cndmask_b32_e32 v32, v8, v9 ; 00401308 > v_cndmask_b32_e32 v33, v10, v14 ; 00421D0A > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s28, s28, s27 ; 871C1B1C > v_mov_b32_e32 v34, 0 ; 7E440280 > image_sample_l v[23:26], v[32:35], s[20:27], s[28:31] dmask:0xf ; F0900F00 00E51720 > s_buffer_load_dword s6, s[16:19], 0x10 ; C2031110 > s_buffer_load_dword s3, s[16:19], 0x11 ; C2019111 > s_buffer_load_dword s2, s[16:19], 0x12 ; C2011112 > s_buffer_load_dword s8, s[16:19], 0x14 ; C2041114 > s_buffer_load_dword s9, s[16:19], 0x15 ; C2049115 > s_buffer_load_dword s11, s[16:19], 0x16 ; C2059116 > s_buffer_load_dword s28, s[16:19], 0x18 ; C20E1118 > s_buffer_load_dword s29, s[16:19], 0x19 ; C20E9119 > s_buffer_load_dword s16, s[16:19], 0x1a ; C208111A > s_buffer_load_dword s17, s[12:15], 0x44 ; C2088D44 > s_buffer_load_dword s18, s[12:15], 0x45 ; C2090D45 > s_buffer_load_dword s19, s[12:15], 0x0 ; C2098D00 > s_buffer_load_dword s30, s[12:15], 0x46 ; C20F0D46 > s_load_dwordx8 s[32:39], s[4:5], 0x40 ; C0D00540 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mad_f32 v8, -v30, v29, s17 ; D2820008 20463B1E > v_mad_f32 v9, -v31, v29, s18 ; D2820009 204A3B1F > v_mul_f32_e32 v8, s19, v8 ; 10101013 > v_mul_f32_e32 v10, s6, v8 ; 10141006 > v_mul_f32_e32 v9, s19, v9 ; 10121213 > v_mul_f32_e32 v30, s8, v8 ; 103C1008 > v_mul_f32_e32 v31, s28, v8 ; 103E101C > v_sub_f32_e32 v8, s30, v29 ; 08103A1E > v_mac_f32_e32 v10, s3, v9 ; 3E141203 > v_mul_f32_e32 v8, s19, v8 ; 10101013 > v_mac_f32_e32 v30, s9, v9 ; 3E3C1209 > v_mac_f32_e32 v31, s29, v9 ; 3E3E121D > v_mac_f32_e32 v10, s2, v8 ; 3E141002 > v_mac_f32_e32 v31, s16, v8 ; 3E3E1010 > v_mac_f32_e32 v30, s11, v8 ; 3E3C100B > v_mul_f32_e32 v8, v10, v10 ; 1010150A > s_load_dwordx4 s[40:43], s[4:5], 0x4c ; C094054C > v_mac_f32_e32 v8, v30, v30 ; 3E103D1E > v_mul_f32_e32 v9, v11, v11 ; 1012170B > s_load_dwordx4 s[44:47], s[4:5], 0x5c ; C096055C > s_load_dwordx8 s[48:55], s[4:5], 0x50 ; C0D80550 > v_mac_f32_e32 v9, v12, v12 ; 3E12190C > v_mac_f32_e32 v8, v31, v31 ; 3E103F1F > v_rsq_clamp_f32_e32 v29, v8 ; 7E3A5908 > v_mac_f32_e32 v9, v18, v18 ; 3E122512 > v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s40, s40, s39 ; 87282728 > s_and_b32 s44, s44, s55 ; 872C372C > image_sample v[14:17], v[27:28], s[32:39], s[40:43] dmask:0xf ; F0800F00 01480E1B > image_sample v[19:22], v[27:28], s[48:55], s[44:47] dmask:0xf ; F0800F00 016C131B > v_mul_f32_e32 v10, v10, v29 ; 10143B0A > v_mul_f32_e32 v30, v30, v29 ; 103C3B1E > v_mul_f32_e32 v29, v31, v29 ; 103A3B1F > v_fma_f32 v11, v11, v9, v10 ; D296000B 042A130B > v_fma_f32 v12, v12, v9, v30 ; D296000C 047A130C > v_fma_f32 v9, v18, v9, v29 ; D2960009 04761312 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v18, v19, 2.0, -1.0 ; D2960012 03CDE913 > v_mul_f32_e32 v31, v11, v11 ; 103E170B > v_fma_f32 v19, v20, 2.0, -1.0 ; D2960013 03CDE914 > v_mul_f32_e32 v20, v18, v18 ; 10282512 > v_mac_f32_e32 v31, v12, v12 ; 3E3E190C > v_mac_f32_e32 v20, v19, v19 ; 3E282713 > v_fma_f32 v21, v21, 2.0, -1.0 ; D2960015 03CDE915 > v_mac_f32_e32 v20, v21, v21 ; 3E282B15 > v_mac_f32_e32 v31, v9, v9 ; 3E3E1309 > v_rsq_clamp_f32_e32 v31, v31 ; 7E3E591F > v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 > s_load_dwordx8 s[20:27], s[4:5], 0x60 ; C0CA0560 > s_load_dwordx4 s[16:19], s[4:5], 0x6c ; C088056C > v_mul_f32_e32 v11, v11, v31 ; 10163F0B > v_mul_f32_e32 v18, v20, v18 ; 10242514 > v_mul_f32_e32 v32, v11, v18 ; 1040250B > v_mul_f32_e32 v19, v20, v19 ; 10262714 > v_mul_f32_e32 v12, v12, v31 ; 10183F0C > v_mul_f32_e32 v33, v11, v10 ; 1042150B > v_mul_f32_e32 v18, v10, v18 ; 1024250A > v_mac_f32_e32 v32, v12, v19 ; 3E40270C > v_mul_f32_e32 v10, v20, v21 ; 10142B14 > v_mul_f32_e32 v9, v9, v31 ; 10123F09 > v_mac_f32_e32 v33, v12, v30 ; 3E423D0C > v_mac_f32_e32 v32, v9, v10 ; 3E401509 > v_mac_f32_e32 v33, v9, v29 ; 3E423B09 > v_add_f32_e64 v9, 0, v32 clamp ; D2060809 00024080 > v_log_f32_e32 v9, v9 ; 7E124F09 > v_mac_f32_e32 v18, v30, v19 ; 3E24271E > v_mac_f32_e32 v18, v29, v10 ; 3E24151D > v_mul_f32_e32 v19, v0, v0 ; 10260100 > s_buffer_load_dword s1, s[12:15], 0x40 ; C2008D40 > s_buffer_load_dword s0, s[12:15], 0x41 ; C2000D41 > s_buffer_load_dword s7, s[12:15], 0x48 ; C2038D48 > v_mov_b32_e32 v0, 0x3b83126f ; 7E0002FF 3B83126F > v_mov_b32_e32 v10, 0x45800000 ; 7E1402FF 45800000 > s_buffer_load_dword s4, s[12:15], 0x42 ; C2020D42 > s_buffer_load_dword s3, s[12:15], 0x49 ; C2018D49 > s_buffer_load_dword s2, s[12:15], 0x4a ; C2010D4A > v_fma_f32 v0, v19, v10, v0 ; D2960000 04021513 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s16, s16, s27 ; 87101B10 > v_mov_b32_e32 v29, v34 ; 7E3A0322 > v_mul_f32_e32 v0, v0, v9 ; 10001300 > image_sample_l v[9:12], v[27:30], s[20:27], s[16:19] dmask:0xf ; F0900F00 0085091B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v11, v18, v17 ; 06162312 > v_exp_f32_e32 v20, v0 ; 7E284B00 > v_add_f32_e64 v0, 0, v33 clamp ; D2060800 00024280 > v_add_f32_e32 v11, -1.0, v11 ; 061616F3 > v_cmp_eq_f32_e32 vcc, 0, v17 ; 7C042280 > s_and_saveexec_b64 s[16:17], vcc ; BE90246A > s_xor_b64 s[16:17], exec, s[16:17] ; 8990107E > v_cmp_lt_f32_e32 vcc, 0, v11 ; 7C021680 > v_cndmask_b32_e64 v12, v11, 1.0, vcc ; D200000C 01A9E50B > v_cmp_le_f32_e32 vcc, 0, v12 ; 7C061880 > v_mul_f32_e32 v12, 0x70000000, v12 ; 101818FF 70000000 > v_bfrev_b32_e32 v21, 15 ; 7E2A708F > v_cndmask_b32_e32 v12, v21, v12 ; 00181915 > s_or_saveexec_b64 s[16:17], s[16:17] ; BE902510 > s_buffer_load_dword s8, s[12:15], 0x43 ; C2040D43 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_xor_b64 exec, exec, s[16:17] ; 89FE107E > v_rcp_f32_e32 v12, v17 ; 7E185511 > v_mul_f32_e32 v12, v12, v11 ; 1018170C > s_or_b64 exec, exec, s[16:17] ; 88FE107E > v_add_f32_e64 v27, 0, v12 clamp ; D206081B 00021880 > v_mul_f32_e32 v11, s1, v23 ; 10162E01 > v_mul_f32_e32 v12, s0, v24 ; 10183000 > v_mul_f32_e32 v17, s4, v25 ; 10223204 > v_mul_f32_e32 v23, s7, v26 ; 102E3407 > v_mul_f32_e32 v24, s3, v26 ; 10303403 > v_mul_f32_e32 v25, s2, v26 ; 10323402 > v_mov_b32_e32 v21, 0x40004189 ; 7E2A02FF 40004189 > v_mov_b32_e32 v26, 0x45800000 ; 7E3402FF 45800000 > v_fma_f32 v21, v19, v26, v21 ; D2960015 04563513 > v_mul_f32_e32 v21, 0x3e000000, v21 ; 102A2AFF 3E000000 > v_mul_f32_e32 v21, v20, v21 ; 102A2B14 > v_sub_f32_e32 v20, 1.0, v0 ; 082800F2 > v_mad_f32 v20, -v0, v20, v20 ; D2820014 24522900 > v_mul_f32_e32 v20, v20, v20 ; 10282914 > v_mad_f32 v0, -v0, v20, v20 ; D2820000 24522900 > v_sub_f32_e32 v20, 1.0, v2 ; 082804F2 > v_fma_f32 v0, v20, v0, v2 ; D2960000 040A0114 > v_mul_f32_e32 v2, v9, v23 ; 10042F09 > v_mul_f32_e32 v23, v9, v24 ; 102E3109 > v_mul_f32_e32 v9, v9, v25 ; 10123309 > v_mul_f32_e32 v0, v0, v21 ; 10002B00 > v_mul_f32_e32 v19, v14, v11 ; 1026170E > v_mul_f32_e32 v2, v2, v1 ; 10040302 > v_mul_f32_e32 v23, v23, v1 ; 102E0317 > v_mul_f32_e32 v20, v15, v12 ; 1028190F > v_mul_f32_e32 v1, v9, v1 ; 10020309 > v_mul_f32_e32 v21, v16, v17 ; 102A2310 > v_fma_f32 v2, v2, v0, v19 ; D2960002 044E0102 > v_fma_f32 v9, v23, v0, v20 ; D2960009 04520117 > v_fma_f32 v0, v1, v0, v21 ; D2960000 04560101 > v_mul_f32_e32 v2, v2, v27 ; 10043702 > v_mul_f32_e32 v1, v9, v27 ; 10023709 > v_mul_f32_e32 v0, v0, v27 ; 10003700 > v_cmp_lt_f32_e32 vcc, 0, v3 ; 7C020680 > s_and_saveexec_b64 s[12:13], vcc ; BE8C246A > s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E > s_cbranch_execz BB0_6 ; BF880000 > v_mov_b32_e32 v9, 0x3f028283 ; 7E1202FF 3F028283 > v_cmp_gt_f32_e32 vcc, v9, v22 ; 7C082D09 > v_add_f32_e32 v9, v19, v19 ; 06122713 > v_add_f32_e32 v19, v20, v20 ; 06262914 > v_add_f32_e32 v20, v21, v21 ; 06282B15 > v_max3_f32 v21, v16, v15, v14 ; D2A80015 043A1F10 > v_cmp_eq_f32_e64 s[0:1], 0, v21 ; D0040000 00022A80 > v_rcp_f32_e32 v21, v21 ; 7E2A5515 > v_cmp_lt_f32_e64 s[2:3], 0, v14 ; D0020002 00021C80 > v_cmp_lt_f32_e64 s[4:5], 0, v15 ; D0020004 00021E80 > v_cmp_lt_f32_e64 s[6:7], 0, v16 ; D0020006 00022080 > v_cndmask_b32_e64 v22, v14, 1.0, s[2:3] ; D2000016 0009E50E > v_cndmask_b32_e64 v23, v15, 1.0, s[4:5] ; D2000017 0011E50F > v_cndmask_b32_e64 v24, v16, 1.0, s[6:7] ; D2000018 0019E510 > v_bfrev_b32_e32 v25, 14 ; 7E32708E > v_cmp_le_f32_e64 s[2:3], 0, v22 ; D0060002 00022C80 > v_cmp_le_f32_e64 s[4:5], 0, v23 ; D0060004 00022E80 > v_cmp_le_f32_e64 s[6:7], 0, v24 ; D0060006 00023080 > v_mul_f32_e32 v22, v25, v22 ; 102C2D19 > v_bfrev_b32_e32 v26, 15 ; 7E34708F > v_mul_f32_e32 v23, v25, v23 ; 102E2F19 > v_mul_f32_e32 v24, v25, v24 ; 10303119 > v_cndmask_b32_e64 v22, v26, v22, s[2:3] ; D2000016 000A2D1A > v_mul_f32_e32 v14, v21, v14 ; 101C1D15 > v_cndmask_b32_e64 v23, v26, v23, s[4:5] ; D2000017 00122F1A > v_mul_f32_e32 v15, v21, v15 ; 101E1F15 > v_cndmask_b32_e64 v24, v26, v24, s[6:7] ; D2000018 001A311A > v_mul_f32_e32 v16, v21, v16 ; 10202115 > v_cndmask_b32_e64 v14, v14, v22, s[0:1] ; D200000E 00022D0E > v_cndmask_b32_e64 v15, v15, v23, s[0:1] ; D200000F 00022F0F > v_cndmask_b32_e64 v16, v16, v24, s[0:1] ; D2000010 00023110 > v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 > v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 > v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 > v_mul_f32_e32 v14, v14, v14 ; 101C1D0E > v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 > v_mul_f32_e32 v15, v15, v15 ; 101E1F0F > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_mul_f32_e32 v16, v16, v16 ; 10202110 > v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 > v_mul_f32_e32 v11, v14, v11 ; 1016170E > v_mul_f32_e32 v12, v15, v12 ; 1018190F > v_mov_b32_e32 v15, 0x3e19999a ; 7E1E02FF 3E19999A > v_mul_f32_e32 v14, v16, v17 ; 101C2310 > v_mul_f32_e32 v11, v15, v11 ; 1016170F > v_mul_f32_e32 v12, v15, v12 ; 1018190F > v_mul_f32_e32 v14, v15, v14 ; 101C1D0F > v_cndmask_b32_e32 v9, v11, v9 ; 0012130B > v_cndmask_b32_e32 v11, v12, v19 ; 0016270C > v_cndmask_b32_e32 v12, v14, v20 ; 0018290E > v_add_f32_e32 v14, -0.5, v3 ; 061C06F1 > v_mov_b32_e32 v17, 0x3e800000 ; 7E2202FF 3E800000 > v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 > v_subrev_f32_e32 v19, v18, v17 ; 0A262312 > v_sub_f32_e32 v20, 1.0, v18 ; 082824F2 > v_min_f32_e32 v3, 0.5, v3 ; 1E0606F0 > v_mul_f32_e32 v15, v9, v14 ; 101E1D09 > v_mul_f32_e32 v16, v11, v14 ; 10201D0B > v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 > v_mul_f32_e32 v14, v12, v14 ; 101C1D0C > v_add_f32_e64 v20, 0, v20 clamp ; D2060814 00022880 > v_mul_f32_e32 v9, v9, v3 ; 10120709 > v_mul_f32_e32 v11, v11, v3 ; 1016070B > v_mul_f32_e32 v3, v12, v3 ; 1006070C > v_add_f32_e32 v12, v17, v18 ; 06182511 > v_fma_f32 v2, v15, v19, v2 ; D2960002 040A270F > v_fma_f32 v1, v16, v19, v1 ; D2960001 04062710 > v_fma_f32 v0, v14, v19, v0 ; D2960000 0402270E > v_mul_f32_e32 v9, v9, v20 ; 10122909 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_mul_f32_e32 v11, v11, v20 ; 1016290B > v_mul_f32_e32 v3, v3, v20 ; 10062903 > v_fma_f32 v2, v9, v12, v2 ; D2960002 040A1909 > v_fma_f32 v1, v11, v12, v1 ; D2960001 0406190B > v_fma_f32 v0, v3, v12, v0 ; D2960000 04021903 > s_or_b64 exec, exec, s[12:13] ; 88FE0C7E > v_add_f32_e32 v3, v5, v4 ; 06060905 > v_add_f32_e32 v3, v3, v6 ; 06060D03 > v_add_f32_e32 v3, v3, v7 ; 06060F03 > v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000 > v_fma_f32 v3, -v3, v4, 1.0 ; D2960003 23CA0903 > v_sub_f32_e64 v4, 1.0, s8 ; D2080004 000010F2 > v_fma_f32 v3, s8, v3, v4 ; D2960003 04120608 > v_min_f32_e32 v4, 1.0, v8 ; 1E0810F2 > v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 > v_mad_f32 v4, -v4, v5, v5 ; D2820004 24160B04 > v_mul_f32_e32 v2, v4, v2 ; 10040504 > v_mul_f32_e32 v1, v4, v1 ; 10020304 > v_mul_f32_e32 v0, v4, v0 ; 10000104 > v_mul_f32_e32 v2, v2, v3 ; 10040702 > v_mul_f32_e32 v1, v1, v3 ; 10020701 > v_mul_f32_e32 v3, v0, v3 ; 10060700 > v_add_f32_e32 v0, 0x3eaa7efa, v10 ; 060014FF 3EAA7EFA > v_add_f32_e64 v4, 0, v0 clamp ; D2060804 00020080 > v_mul_f32_e32 v0, v2, v4 ; 10000902 > v_mul_f32_e32 v2, v3, v4 ; 10040903 > v_mul_f32_e32 v1, v1, v4 ; 10020901 > v_mov_b32_e32 v3, 1.0 ; 7E0602F2 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 64 >VGPRS: 36 >Code Size: 2296 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 7 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s5, s[0:3], 0x55 ; C2028155 > s_buffer_load_dword s4, s[0:3], 0x54 ; C2020154 > s_buffer_load_dword s6, s[0:3], 0x56 ; C2030156 > s_buffer_load_dword s28, s[0:3], 0xd9 ; C20E01D9 > s_buffer_load_dword s24, s[0:3], 0xd5 ; C20C01D5 > s_buffer_load_dword s27, s[0:3], 0xd8 ; C20D81D8 > s_buffer_load_dword s32, s[0:3], 0xdd ; C21001DD > s_buffer_load_dword s23, s[0:3], 0xd4 ; C20B81D4 > s_buffer_load_dword s29, s[0:3], 0xda ; C20E81DA > s_buffer_load_dword s31, s[0:3], 0xdc ; C20F81DC > s_buffer_load_dword s25, s[0:3], 0xd6 ; C20C81D6 > s_buffer_load_dword s30, s[0:3], 0xdb ; C20F01DB > s_buffer_load_dword s33, s[0:3], 0xde ; C21081DE > s_buffer_load_dword s8, s[0:3], 0xb9 ; C20401B9 > s_buffer_load_dword s12, s[0:3], 0xbd ; C20601BD > s_buffer_load_dword s16, s[0:3], 0xc1 ; C20801C1 > s_buffer_load_dword s20, s[0:3], 0xc5 ; C20A01C5 > s_buffer_load_dword s26, s[0:3], 0xd7 ; C20D01D7 > s_buffer_load_dword s7, s[0:3], 0xb8 ; C20381B8 > s_buffer_load_dword s9, s[0:3], 0xba ; C20481BA > s_buffer_load_dword s10, s[0:3], 0xbb ; C20501BB > s_buffer_load_dword s11, s[0:3], 0xbc ; C20581BC > s_buffer_load_dword s13, s[0:3], 0xbe ; C20681BE > s_buffer_load_dword s14, s[0:3], 0xbf ; C20701BF > s_buffer_load_dword s15, s[0:3], 0xc0 ; C20781C0 > s_buffer_load_dword s17, s[0:3], 0xc2 ; C20881C2 > s_buffer_load_dword s18, s[0:3], 0xc3 ; C20901C3 > s_buffer_load_dword s19, s[0:3], 0xc4 ; C20981C4 > s_buffer_load_dword s21, s[0:3], 0xc6 ; C20A81C6 > s_buffer_load_dword s22, s[0:3], 0xc7 ; C20B01C7 > s_buffer_load_dword s0, s[0:3], 0xdf ; C20001DF > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v1, s5, v4 ; 10020805 > v_mul_f32_e32 v0, s4, v3 ; 10000604 > v_mul_f32_e32 v3, s6, v5 ; 10060A06 > v_mul_f32_e32 v5, s28, v1 ; 100A021C > v_mul_f32_e32 v4, s24, v1 ; 10080218 > v_mul_f32_e32 v1, s32, v1 ; 10020220 > v_mac_f32_e32 v5, s27, v0 ; 3E0A001B > v_mac_f32_e32 v4, s23, v0 ; 3E080017 > v_mac_f32_e32 v1, s31, v0 ; 3E02001F > v_mac_f32_e32 v5, s29, v3 ; 3E0A061D > v_mac_f32_e32 v1, s33, v3 ; 3E020621 > v_mac_f32_e32 v4, s25, v3 ; 3E080619 > v_add_f32_e32 v3, s30, v5 ; 06060A1E > v_add_f32_e32 v0, s26, v4 ; 0600081A > v_mul_f32_e32 v4, s8, v3 ; 10080608 > v_mul_f32_e32 v5, s12, v3 ; 100A060C > v_mul_f32_e32 v6, s16, v3 ; 100C0610 > v_mul_f32_e32 v3, s20, v3 ; 10060614 > v_add_f32_e32 v1, s0, v1 ; 06020200 > v_mac_f32_e32 v4, s7, v0 ; 3E080007 > v_mac_f32_e32 v5, s11, v0 ; 3E0A000B > v_mac_f32_e32 v6, s15, v0 ; 3E0C000F > v_mac_f32_e32 v3, s19, v0 ; 3E060013 > v_mac_f32_e32 v3, s21, v1 ; 3E060215 > v_mac_f32_e32 v4, s9, v1 ; 3E080209 > v_mac_f32_e32 v5, s13, v1 ; 3E0A020D > v_mac_f32_e32 v6, s17, v1 ; 3E0C0211 > v_add_f32_e32 v0, s10, v4 ; 0600080A > v_add_f32_e32 v1, s14, v5 ; 06020A0E > v_add_f32_e32 v4, s18, v6 ; 06080C12 > v_add_f32_e32 v3, s22, v3 ; 06060616 > exp 15, 12, 0, 1, 0, v0, v1, v4, v3 ; F80008CF 03040100 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 40 >VGPRS: 8 >Code Size: 292 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 14 >VGPRS: 15 >Code Size: 12 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_load_dwordx4 s[4:7], s[2:3], 0x4 ; C0820304 > s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s18, s[4:7], 0x55 ; C2090555 > s_buffer_load_dword s17, s[4:7], 0x54 ; C2088554 > s_buffer_load_dword s19, s[4:7], 0x56 ; C2098556 > s_buffer_load_dword s41, s[4:7], 0xd9 ; C21485D9 > s_buffer_load_dword s37, s[4:7], 0xd5 ; C21285D5 > s_buffer_load_dword s40, s[4:7], 0xd8 ; C21405D8 > s_buffer_load_dword s45, s[4:7], 0xdd ; C21685DD > s_buffer_load_dword s36, s[4:7], 0xd4 ; C21205D4 > s_buffer_load_dword s42, s[4:7], 0xda ; C21505DA > s_buffer_load_dword s44, s[4:7], 0xdc ; C21605DC > s_buffer_load_dword s38, s[4:7], 0xd6 ; C21305D6 > s_buffer_load_dword s43, s[4:7], 0xdb ; C21585DB > s_buffer_load_dword s46, s[4:7], 0xde ; C21705DE > s_buffer_load_dword s21, s[4:7], 0xb9 ; C20A85B9 > s_buffer_load_dword s25, s[4:7], 0xbd ; C20C85BD > s_buffer_load_dword s29, s[4:7], 0xc1 ; C20E85C1 > s_buffer_load_dword s33, s[4:7], 0xc5 ; C21085C5 > s_buffer_load_dword s39, s[4:7], 0xd7 ; C21385D7 > s_buffer_load_dword s20, s[4:7], 0xb8 ; C20A05B8 > s_buffer_load_dword s24, s[4:7], 0xbc ; C20C05BC > s_buffer_load_dword s28, s[4:7], 0xc0 ; C20E05C0 > s_buffer_load_dword s32, s[4:7], 0xc4 ; C21005C4 > s_buffer_load_dword s47, s[4:7], 0xdf ; C21785DF > s_buffer_load_dword s22, s[4:7], 0xba ; C20B05BA > s_buffer_load_dword s26, s[4:7], 0xbe ; C20D05BE > s_buffer_load_dword s30, s[4:7], 0xc2 ; C20F05C2 > s_buffer_load_dword s34, s[4:7], 0xc6 ; C21105C6 > s_buffer_load_dword s8, s[4:7], 0x30 ; C2040530 > s_buffer_load_dword s9, s[4:7], 0x31 ; C2048531 > s_buffer_load_dword s10, s[4:7], 0x32 ; C2050532 > s_buffer_load_dword s11, s[4:7], 0x34 ; C2058534 > s_buffer_load_dword s12, s[4:7], 0x35 ; C2060535 > s_buffer_load_dword s13, s[4:7], 0x36 ; C2068536 > s_buffer_load_dword s14, s[4:7], 0x38 ; C2070538 > s_buffer_load_dword s15, s[4:7], 0x39 ; C2078539 > s_buffer_load_dword s16, s[4:7], 0x3a ; C208053A > s_buffer_load_dword s23, s[4:7], 0xbb ; C20B85BB > s_buffer_load_dword s27, s[4:7], 0xbf ; C20D85BF > s_buffer_load_dword s31, s[4:7], 0xc3 ; C20F85C3 > s_buffer_load_dword s35, s[4:7], 0xc7 ; C21185C7 > s_buffer_load_dword s48, s[4:7], 0xe4 ; C21805E4 > s_buffer_load_dword s49, s[4:7], 0xe5 ; C21885E5 > s_buffer_load_dword s4, s[4:7], 0xe6 ; C20205E6 > s_buffer_load_dword s5, s[0:3], 0x54 ; C2028154 > s_buffer_load_dword s6, s[0:3], 0x55 ; C2030155 > s_buffer_load_dword s7, s[0:3], 0x56 ; C2038156 > s_buffer_load_dword s50, s[0:3], 0x57 ; C2190157 > s_buffer_load_dword s51, s[0:3], 0x60 ; C2198160 > s_buffer_load_dword s0, s[0:3], 0x61 ; C2000161 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v1, s18, v4 ; 10020812 > v_mul_f32_e32 v0, s17, v3 ; 10000611 > v_mul_f32_e32 v3, s19, v5 ; 10060A13 > v_mul_f32_e32 v5, s41, v1 ; 100A0229 > v_mul_f32_e32 v4, s37, v1 ; 10080225 > v_mul_f32_e32 v1, s45, v1 ; 1002022D > v_mac_f32_e32 v5, s40, v0 ; 3E0A0028 > v_mac_f32_e32 v4, s36, v0 ; 3E080024 > v_mac_f32_e32 v1, s44, v0 ; 3E02002C > v_mac_f32_e32 v5, s42, v3 ; 3E0A062A > v_mac_f32_e32 v1, s46, v3 ; 3E02062E > v_mac_f32_e32 v4, s38, v3 ; 3E080626 > v_add_f32_e32 v3, s43, v5 ; 06060A2B > v_add_f32_e32 v0, s39, v4 ; 06000827 > v_mul_f32_e32 v4, s21, v3 ; 10080615 > v_mul_f32_e32 v5, s25, v3 ; 100A0619 > v_mul_f32_e32 v6, s29, v3 ; 100C061D > v_mul_f32_e32 v7, s33, v3 ; 100E0621 > v_add_f32_e32 v1, s47, v1 ; 0602022F > v_mac_f32_e32 v4, s20, v0 ; 3E080014 > v_mac_f32_e32 v5, s24, v0 ; 3E0A0018 > v_mac_f32_e32 v6, s28, v0 ; 3E0C001C > v_mac_f32_e32 v7, s32, v0 ; 3E0E0020 > v_mac_f32_e32 v4, s22, v1 ; 3E080216 > v_mac_f32_e32 v5, s26, v1 ; 3E0A021A > v_mac_f32_e32 v6, s30, v1 ; 3E0C021E > v_mac_f32_e32 v7, s34, v1 ; 3E0E0222 > v_sub_f32_e32 v9, s4, v1 ; 08120204 > v_add_f32_e32 v1, s23, v4 ; 06020817 > v_add_f32_e32 v4, s27, v5 ; 06080A1B > v_add_f32_e32 v5, s31, v6 ; 060A0C1F > v_add_f32_e32 v6, s35, v7 ; 060C0E23 > v_sub_f32_e32 v8, s48, v0 ; 08100030 > v_mul_f32_e32 v11, s51, v6 ; 10160C33 > v_mul_f32_e32 v12, s0, v6 ; 10180C00 > v_sub_f32_e32 v3, s49, v3 ; 08060631 > v_mul_f32_e32 v0, s8, v8 ; 10001008 > v_mul_f32_e32 v13, s11, v8 ; 101A100B > v_mul_f32_e32 v8, s14, v8 ; 1010100E > v_mul_f32_e32 v7, s5, v6 ; 100E0C05 > v_fma_f32 v11, v1, s51, v11 ; D296000B 042C6701 > v_fma_f32 v12, v4, -s0, v12 ; D296000C 44300104 > v_mul_f32_e32 v10, s6, v6 ; 10140C06 > v_mac_f32_e32 v0, s9, v3 ; 3E000609 > v_mac_f32_e32 v13, s12, v3 ; 3E1A060C > v_mac_f32_e32 v8, s15, v3 ; 3E10060F > v_fma_f32 v7, v11, s7, v7 ; D2960007 041C0F0B > v_fma_f32 v10, v12, s50, v10 ; D296000A 0428650C > exp 15, 32, 0, 0, 0, v11, v12, v5, v6 ; F800020F 06050C0B > v_mac_f32_e32 v0, s10, v9 ; 3E00120A > v_mac_f32_e32 v13, s13, v9 ; 3E1A120D > v_mac_f32_e32 v8, s16, v9 ; 3E101210 > exp 15, 33, 0, 0, 0, v7, v10, v5, v6 ; F800021F 06050A07 > exp 15, 34, 0, 0, 0, v0, v13, v8, v0 ; F800022F 00080D00 > exp 15, 12, 0, 1, 0, v1, v4, v5, v6 ; F80008CF 06050401 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 16 >Code Size: 492 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > v_cndmask_b32_e64 v6, v0, 1.0, vcc ; D2000006 01A9E500 > v_bfrev_b32_e32 v9, 14 ; 7E12708E > v_cmp_lt_f32_e32 vcc, 0, v1 ; 7C020280 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_interp_p1_f32 v5, v2, 0, 1, [m0] ; C8140402 > v_cndmask_b32_e64 v7, v1, 1.0, vcc ; D2000007 01A9E501 > v_bfrev_b32_e32 v10, 15 ; 7E14708F > v_mul_f32_e32 v8, v9, v6 ; 10100D09 > v_cmp_le_f32_e32 vcc, 0, v6 ; 7C060C80 > v_cndmask_b32_e32 v11, v10, v8 ; 0016110A > v_interp_p2_f32 v5, [v5], v3, 0, 1, [m0] ; C8150403 > v_interp_p1_f32 v14, v2, 1, 1, [m0] ; C8380502 > v_mul_f32_e32 v6, v9, v7 ; 100C0F09 > v_cmp_le_f32_e32 vcc, 0, v7 ; 7C060E80 > v_cndmask_b32_e32 v12, v10, v6 ; 00180D0A > v_interp_p2_f32 v14, [v14], v3, 1, 1, [m0] ; C8390503 > v_interp_p1_f32 v6, v2, 0, 2, [m0] ; C8180802 > v_interp_p2_f32 v6, [v6], v3, 0, 2, [m0] ; C8190803 > v_interp_p1_f32 v7, v2, 1, 2, [m0] ; C81C0902 > s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 > v_interp_p2_f32 v7, [v7], v3, 1, 2, [m0] ; C81D0903 > v_interp_p1_f32 v8, v2, 2, 2, [m0] ; C8200A02 > v_rcp_f32_e32 v2, v4 ; 7E045504 > s_load_dwordx8 s[16:23], s[4:5], 0x0 ; C0C80500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_mul_f32_e32 v0, v2, v0 ; 10000102 > v_mul_f32_e32 v1, v2, v1 ; 10020302 > v_cndmask_b32_e32 v19, v1, v12 ; 00261901 > s_load_dwordx8 s[24:31], s[4:5], 0x10 ; C0CC0510 > s_load_dwordx4 s[32:35], s[4:5], 0x1c ; C090051C > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s23 ; 87001700 > v_cndmask_b32_e32 v18, v0, v11 ; 00241700 > v_interp_p2_f32 v8, [v8], v3, 2, 2, [m0] ; C8210A03 > image_sample v[22:25], v[18:19], s[16:23], s[0:3] dmask:0xf ; F0800F00 00041612 > v_cmp_lt_f32_e64 s[0:1], 0, v5 ; D0020000 00020A80 > v_cndmask_b32_e64 v1, v5, 1.0, s[0:1] ; D2000001 0001E505 > v_cmp_lt_f32_e64 s[0:1], 0, v14 ; D0020000 00021C80 > v_cndmask_b32_e64 v3, v14, 1.0, s[0:1] ; D2000003 0001E50E > v_cmp_le_f32_e64 s[0:1], 0, v1 ; D0060000 00020280 > v_mul_f32_e32 v1, v9, v1 ; 10020309 > s_buffer_load_dword s7, s[12:15], 0x11 ; C2038D11 > v_cndmask_b32_e64 v1, v10, v1, s[0:1] ; D2000001 0002030A > v_cmp_le_f32_e64 s[0:1], 0, v3 ; D0060000 00020680 > v_mul_f32_e32 v3, v9, v3 ; 10060709 > s_buffer_load_dword s6, s[12:15], 0x10 ; C2030D10 > v_cndmask_b32_e64 v3, v10, v3, s[0:1] ; D2000003 0002070A > v_mul_f32_e32 v4, v2, v5 ; 10080B02 > v_mul_f32_e32 v2, v2, v14 ; 10041D02 > s_and_b32 s32, s32, s31 ; 87201F20 > s_buffer_load_dword s8, s[12:15], 0x12 ; C2040D12 > image_sample v0, v[18:19], s[24:31], s[32:35] dmask:0x1 ; F0800100 01060012 > v_cndmask_b32_e32 v2, v2, v3 ; 00040702 > s_buffer_load_dword s0, s[12:15], 0x13 ; C2000D13 > s_buffer_load_dword s2, s[12:15], 0x15 ; C2010D15 > v_cndmask_b32_e32 v1, v4, v1 ; 00020304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v2, v0, v2 ; 10040500 > s_buffer_load_dword s1, s[12:15], 0x14 ; C2008D14 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s7, v2 ; 10060407 > v_mul_f32_e32 v1, v0, v1 ; 10020300 > v_mac_f32_e32 v3, s6, v1 ; 3E060206 > s_buffer_load_dword s9, s[12:15], 0x19 ; C2048D19 > v_mac_f32_e32 v3, s8, v0 ; 3E060008 > s_buffer_load_dword s7, s[12:15], 0x18 ; C2038D18 > v_add_f32_e32 v9, s0, v3 ; 06120600 > v_mul_f32_e32 v3, s2, v2 ; 10060402 > s_buffer_load_dword s3, s[12:15], 0x16 ; C2018D16 > s_buffer_load_dword s0, s[12:15], 0x1a ; C2000D1A > v_mac_f32_e32 v3, s1, v1 ; 3E060201 > s_load_dwordx8 s[16:23], s[4:5], 0x20 ; C0C80520 > s_load_dwordx4 s[24:27], s[4:5], 0x2c ; C08C052C > s_buffer_load_dword s6, s[12:15], 0x17 ; C2030D17 > s_buffer_load_dword s1, s[12:15], 0x1b ; C2008D1B > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v2, s9, v2 ; 10040409 > v_mac_f32_e32 v2, s7, v1 ; 3E040207 > v_mac_f32_e32 v3, s3, v0 ; 3E060003 > v_mac_f32_e32 v2, s0, v0 ; 3E040000 > v_add_f32_e32 v10, s6, v3 ; 06140606 > v_add_f32_e32 v11, s1, v2 ; 06160401 > v_mov_b32_e32 v12, 0 ; 7E180280 > s_and_b32 s24, s24, s23 ; 87181718 > image_sample_l v[32:35], v[9:12], s[16:23], s[24:27] dmask:0xf ; F0900F00 00C42009 > s_load_dwordx8 s[28:35], s[4:5], 0x30 ; C0CE0530 > s_load_dwordx4 s[36:39], s[4:5], 0x3c ; C092053C > s_load_dwordx8 s[16:23], s[4:5], 0x40 ; C0C80540 > s_load_dwordx4 s[24:27], s[4:5], 0x4c ; C08C054C > s_load_dwordx8 s[40:47], s[4:5], 0x60 ; C0D40560 > s_buffer_load_dword s2, s[12:15], 0x30 ; C2010D30 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s36, s36, s35 ; 87242324 > image_sample_l v[3:5], v[9:12], s[28:35], s[36:39] dmask:0x7 ; F0900700 01270309 > s_and_b32 s24, s24, s23 ; 87181718 > image_sample_l v[0:2], v[9:12], s[16:23], s[24:27] dmask:0x7 ; F0900700 00C40009 > s_load_dwordx4 s[24:27], s[4:5], 0x6c ; C08C056C > s_load_dwordx8 s[28:35], s[4:5], 0x50 ; C0CE0550 > s_load_dwordx4 s[36:39], s[4:5], 0x5c ; C092055C > s_buffer_load_dword s3, s[12:15], 0x31 ; C2018D31 > v_mov_b32_e32 v20, v12 ; 7E28030C > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s24, s24, s47 ; 87182F18 > s_buffer_load_dword s0, s[12:15], 0x34 ; C2000D34 > s_and_b32 s36, s36, s35 ; 87242324 > image_sample v[26:29], v[18:19], s[28:35], s[36:39] dmask:0xf ; F0800F00 01271A12 > image_sample_l v[9:12], v[18:21], s[40:47], s[24:27] dmask:0xf ; F0900F00 00CA0912 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v11, v26, 2.0, -1.0 ; D296000B 03CDE91A > v_mul_f32_e32 v23, s2, v11 ; 102E1602 > v_fma_f32 v12, v27, 2.0, -1.0 ; D296000C 03CDE91B > s_buffer_load_dword s2, s[12:15], 0x38 ; C2010D38 > v_mac_f32_e32 v23, s3, v12 ; 3E2E1803 > s_buffer_load_dword s1, s[12:15], 0x35 ; C2008D35 > s_buffer_load_dword s3, s[12:15], 0x39 ; C2018D39 > s_buffer_load_dword s6, s[12:15], 0x32 ; C2030D32 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v24, s0, v11 ; 10301600 > s_buffer_load_dword s7, s[12:15], 0x36 ; C2038D36 > s_buffer_load_dword s0, s[12:15], 0x3a ; C2000D3A > v_mul_f32_e32 v11, s2, v11 ; 10161602 > v_mac_f32_e32 v24, s1, v12 ; 3E301801 > v_mac_f32_e32 v11, s3, v12 ; 3E161803 > v_fma_f32 v12, v28, 2.0, -1.0 ; D296000C 03CDE91C > v_mac_f32_e32 v23, s6, v12 ; 3E2E1806 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mac_f32_e32 v24, s7, v12 ; 3E301807 > v_mac_f32_e32 v11, s0, v12 ; 3E161800 > v_mul_f32_e32 v12, v23, v23 ; 10182F17 > v_mac_f32_e32 v12, v24, v24 ; 3E183118 > v_mac_f32_e32 v12, v11, v11 ; 3E18170B > v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C > s_load_dwordx8 s[16:23], s[4:5], 0x70 ; C0C80570 > s_load_dwordx4 s[28:31], s[4:5], 0x7c ; C08E057C > v_mul_f32_e32 v31, v33, v35 ; 103E4721 > v_mul_f32_e32 v26, v23, v12 ; 10341917 > v_mul_f32_e64 v23, |v26|, v26 ; D2100117 0002351A > v_cmp_ge_f32_e32 vcc, 0, v23 ; 7C0C2E80 > v_cndmask_b32_e64 v25, 1.0, 0, vcc ; D2000019 01A900F2 > v_mul_f32_e32 v25, v25, v3 ; 10320719 > v_cndmask_b32_e64 v27, 0, 1.0, vcc ; D200001B 01A9E480 > v_fma_f32 v25, v0, v27, v25 ; D2960019 04663700 > v_mul_f32_e32 v27, v24, v12 ; 10361918 > v_mul_f32_e64 v24, |v27|, v27 ; D2100118 0002371B > v_cmp_ge_f32_e32 vcc, 0, v24 ; 7C0C3080 > v_cndmask_b32_e64 v28, 1.0, 0, vcc ; D200001C 01A900F2 > v_mul_f32_e32 v28, v28, v4 ; 1038091C > v_cndmask_b32_e64 v33, 0, 1.0, vcc ; D2000021 01A9E480 > v_mul_f32_e64 v23, v25, |v23| ; D2100217 00022F19 > v_fma_f32 v28, v1, v33, v28 ; D296001C 04724301 > v_mad_f32 v23, v28, |v24|, v23 ; D2820217 045E311C > v_mul_f32_e32 v28, v11, v12 ; 1038190B > v_mul_f32_e64 v11, |v28|, v28 ; D210010B 0002391C > v_cmp_ge_f32_e32 vcc, 0, v11 ; 7C0C1680 > s_load_dwordx4 s[32:35], s[4:5], 0x8c ; C090058C > s_load_dwordx8 s[48:55], s[4:5], 0x80 ; C0D80580 > s_buffer_load_dword s1, s[12:15], 0x40 ; C2008D40 > s_buffer_load_dword s2, s[12:15], 0x41 ; C2010D41 > s_buffer_load_dword s3, s[12:15], 0x42 ; C2018D42 > v_cndmask_b32_e64 v12, 1.0, 0, vcc ; D200000C 01A900F2 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s28, s28, s23 ; 871C171C > image_sample v[14:17], v[18:19], s[16:23], s[28:31] dmask:0xf ; F0800F00 00E40E12 > s_load_dwordx4 s[16:19], s[4:5], 0x9c ; C088059C > s_buffer_load_dword s20, s[12:15], 0x43 ; C20A0D43 > v_mul_f32_e32 v12, v12, v5 ; 10180B0C > v_cndmask_b32_e64 v24, 0, 1.0, vcc ; D2000018 01A9E480 > s_buffer_load_dword s21, s[12:15], 0x48 ; C20A8D48 > s_buffer_load_dword s22, s[12:15], 0x49 ; C20B0D49 > s_buffer_load_dword s23, s[12:15], 0x4a ; C20B8D4A > s_buffer_load_dword s11, s[12:15], 0x78 ; C2058D78 > v_fma_f32 v12, v2, v24, v12 ; D296000C 04323102 > v_mad_f32 v11, v12, |v11|, v23 ; D282020B 045E170C > v_mul_f32_e32 v30, v32, v35 ; 103C4720 > v_mul_f32_e32 v32, v34, v35 ; 10404722 > v_mul_f32_e32 v12, v11, v10 ; 1018150B > v_mul_f32_e32 v10, s1, v30 ; 10143C01 > v_mul_f32_e32 v11, s2, v31 ; 10163E02 > s_and_b32 s32, s32, s55 ; 87203720 > v_mul_f32_e32 v23, s3, v32 ; 102E4003 > image_sample v[18:21], v[18:19], s[48:55], s[32:35] dmask:0xf ; F0800F00 010C1212 > v_mul_f32_e32 v10, v12, v10 ; 1014150C > v_mul_f32_e32 v11, v12, v11 ; 1016170C > v_mul_f32_e32 v12, v12, v23 ; 10182F0C > v_mov_b32_e32 v23, 0 ; 7E2E0280 > v_mov_b32_e32 v24, 0 ; 7E300280 > v_mov_b32_e32 v25, 0 ; 7E320280 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_lt_f32_e32 vcc, 0, v21 ; 7C022A80 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_saveexec_b64 s[12:13], vcc ; BE8C246A > s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E > s_cbranch_execz BB0_2 ; BF880000 > v_mov_b32_e32 v23, 0x3f028283 ; 7E2E02FF 3F028283 > v_cmp_gt_f32_e32 vcc, v23, v29 ; 7C083B17 > v_max3_f32 v29, v16, v15, v14 ; D2A8001D 043A1F10 > v_cmp_eq_f32_e64 s[0:1], 0, v29 ; D0040000 00023A80 > v_rcp_f32_e32 v29, v29 ; 7E3A551D > v_cmp_lt_f32_e64 s[2:3], 0, v14 ; D0020002 00021C80 > v_cmp_lt_f32_e64 s[6:7], 0, v15 ; D0020006 00021E80 > v_cmp_lt_f32_e64 s[8:9], 0, v16 ; D0020008 00022080 > v_cndmask_b32_e64 v33, v14, 1.0, s[2:3] ; D2000021 0009E50E > v_cndmask_b32_e64 v34, v15, 1.0, s[6:7] ; D2000022 0019E50F > v_cndmask_b32_e64 v35, v16, 1.0, s[8:9] ; D2000023 0021E510 > v_bfrev_b32_e32 v36, 14 ; 7E48708E > v_cmp_le_f32_e64 s[2:3], 0, v33 ; D0060002 00024280 > v_cmp_le_f32_e64 s[6:7], 0, v34 ; D0060006 00024480 > v_cmp_le_f32_e64 s[8:9], 0, v35 ; D0060008 00024680 > v_mul_f32_e32 v33, v36, v33 ; 10424324 > v_bfrev_b32_e32 v37, 15 ; 7E4A708F > v_mul_f32_e32 v34, v36, v34 ; 10444524 > v_mul_f32_e32 v35, v36, v35 ; 10464724 > v_cndmask_b32_e64 v33, v37, v33, s[2:3] ; D2000021 000A4325 > v_mul_f32_e32 v36, v29, v14 ; 10481D1D > v_cndmask_b32_e64 v34, v37, v34, s[6:7] ; D2000022 001A4525 > v_cndmask_b32_e64 v35, v37, v35, s[8:9] ; D2000023 00224725 > v_mul_f32_e32 v37, v29, v15 ; 104A1F1D > v_mul_f32_e32 v29, v29, v16 ; 103A211D > v_cndmask_b32_e64 v33, v36, v33, s[0:1] ; D2000021 00024324 > v_cndmask_b32_e64 v34, v37, v34, s[0:1] ; D2000022 00024525 > v_cndmask_b32_e64 v29, v29, v35, s[0:1] ; D200001D 0002471D > v_add_f32_e64 v33, 0, v33 clamp ; D2060821 00024280 > v_add_f32_e64 v34, 0, v34 clamp ; D2060822 00024480 > v_add_f32_e64 v29, 0, v29 clamp ; D206081D 00023A80 > v_add_f32_e64 v35, 0, v10 clamp ; D2060823 00021480 > v_mul_f32_e32 v33, v33, v33 ; 10424321 > v_add_f32_e64 v36, 0, v11 clamp ; D2060824 00021680 > v_mul_f32_e32 v34, v34, v34 ; 10444522 > v_add_f32_e64 v37, 0, v12 clamp ; D2060825 00021880 > v_mul_f32_e32 v29, v29, v29 ; 103A3B1D > v_mul_f32_e32 v23, v14, v10 ; 102E150E > v_mul_f32_e32 v24, v15, v11 ; 1030170F > v_mul_f32_e32 v25, v16, v12 ; 10321910 > v_mul_f32_e32 v33, v35, v33 ; 10424323 > v_mov_b32_e32 v35, 0x3e19999a ; 7E4602FF 3E19999A > v_mul_f32_e32 v34, v36, v34 ; 10444524 > v_mul_f32_e32 v29, v37, v29 ; 103A3B25 > v_mac_f32_e32 v23, v14, v10 ; 3E2E150E > v_mul_f32_e32 v33, v35, v33 ; 10424323 > v_mac_f32_e32 v24, v15, v11 ; 3E30170F > v_mul_f32_e32 v34, v35, v34 ; 10444523 > v_mac_f32_e32 v25, v16, v12 ; 3E321910 > v_mul_f32_e32 v29, v35, v29 ; 103A3B23 > v_min_f32_e32 v21, 0.5, v21 ; 1E2A2AF0 > v_cndmask_b32_e32 v24, v34, v24 ; 00303122 > v_cndmask_b32_e32 v23, v33, v23 ; 002E2F21 > v_cndmask_b32_e32 v29, v29, v25 ; 003A331D > v_mul_f32_e32 v25, v23, v21 ; 10322B17 > v_mul_f32_e32 v24, v24, v21 ; 10302B18 > v_mul_f32_e32 v23, v29, v21 ; 102E2B1D > s_or_b64 exec, exec, s[12:13] ; 88FE0C7E > v_mul_f32_e32 v29, s22, v31 ; 103A3E16 > v_sub_f32_e64 v31, 1.0, s20 ; D208001F 000028F2 > v_fma_f32 v22, s20, v22, v31 ; D2960016 047E2C14 > v_mul_f32_e32 v31, v6, v6 ; 103E0D06 > v_mac_f32_e32 v31, v7, v7 ; 3E3E0F07 > v_mac_f32_e32 v31, v8, v8 ; 3E3E1108 > v_rsq_clamp_f32_e32 v31, v31 ; 7E3E591F > v_mul_f32_e32 v21, s21, v30 ; 102A3C15 > v_mul_f32_e32 v30, s23, v32 ; 103C4017 > v_bfrev_b32_e32 v32, 1 ; 7E407081 > v_mul_f32_e32 v6, v6, v31 ; 100C3F06 > v_mul_f32_e32 v7, v7, v31 ; 100E3F07 > v_mul_f32_e32 v8, v8, v31 ; 10103F08 > v_mul_f32_e32 v31, v26, v6 ; 103E0D1A > v_mac_f32_e32 v31, v27, v7 ; 3E3E0F1B > v_mac_f32_e32 v31, v28, v8 ; 3E3E111C > v_mul_f32_e32 v26, v31, v26 ; 1034351F > v_mul_f32_e32 v27, v31, v27 ; 1036371F > v_mul_f32_e32 v28, v31, v28 ; 1038391F > v_fma_f32 v26, -v26, 2.0, v6 ; D296001A 2419E91A > v_fma_f32 v27, -v27, 2.0, v7 ; D296001B 241DE91B > v_fma_f32 v28, -v28, 2.0, v8 ; D296001C 2421E91C > v_xor_b32_e32 v31, v26, v32 ; 3A3E411A > v_xor_b32_e32 v35, v27, v32 ; 3A46411B > v_xor_b32_e32 v32, v28, v32 ; 3A40411C > v_cubema_f32 v36, v31, v35, v32 ; D28E0024 0482471F > s_load_dwordx8 s[0:7], s[4:5], 0x90 ; C0C00590 > v_rcp_f32_e64 v36, |v36| ; D3540124 00000124 > v_cubetc_f32 v37, v31, v35, v32 ; D28C0025 0482471F > v_mov_b32_e32 v38, 0x3fc00000 ; 7E4C02FF 3FC00000 > v_sqrt_f32_e64 v33, |v18| ; D3660121 00000112 > v_mov_b32_e32 v34, 0x41000000 ; 7E4402FF 41000000 > v_mad_f32 v39, v36, v37, v38 ; D2820027 049A4B24 > v_cubesc_f32 v37, v31, v35, v32 ; D28A0025 0482471F > v_mac_f32_e32 v38, v36, v37 ; 3E4C4B24 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s16, s16, s7 ; 87100710 > v_cubeid_f32 v40, v31, v35, v32 ; D2880028 0482471F > v_fma_f32 v41, -v33, v34, s11 ; D2960029 202E4521 > image_sample_l v[31:34], v[38:41], s[0:7], s[16:19] dmask:0xf ; F0900F00 00801F26 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v35, v31, v34 ; 1046451F > v_mov_b32_e32 v36, 0x41200000 ; 7E4802FF 41200000 > v_mul_f32_e32 v37, v32, v34 ; 104A4520 > v_mul_f32_e32 v34, v33, v34 ; 10444521 > v_fma_f32 v34, v34, v36, -v33 ; D2960022 84864922 > v_fma_f32 v33, v18, v34, v33 ; D2960021 04864512 > v_mul_f32_e64 v34, |v26|, v26 ; D2100122 0002351A > v_cmp_le_f32_e32 vcc, 0, v34 ; 7C064480 > v_mul_f32_e64 v34, |v27|, v27 ; D2100122 0002371B > v_cmp_le_f32_e64 s[0:1], 0, v34 ; D0060000 00024480 > v_mul_f32_e64 v34, |v28|, v28 ; D2100122 0002391C > v_cmp_le_f32_e64 s[2:3], 0, v34 ; D0060002 00024480 > v_cndmask_b32_e64 v34, 1.0, 0, vcc ; D2000022 01A900F2 > v_mul_f32_e32 v3, v34, v3 ; 10060722 > v_cndmask_b32_e64 v34, 1.0, 0, s[0:1] ; D2000022 000100F2 > v_mul_f32_e32 v4, v34, v4 ; 10080922 > v_cndmask_b32_e64 v34, 1.0, 0, s[2:3] ; D2000022 000900F2 > v_mul_f32_e32 v5, v34, v5 ; 100A0B22 > v_cndmask_b32_e64 v34, 0, 1.0, vcc ; D2000022 01A9E480 > v_fma_f32 v0, v0, v34, v3 ; D2960000 040E4500 > v_cndmask_b32_e64 v3, 0, 1.0, s[0:1] ; D2000003 0001E480 > v_fma_f32 v1, v1, v3, v4 ; D2960001 04120701 > v_mul_f32_e64 v0, v0, |v26| ; D2100200 00023500 > v_cndmask_b32_e64 v3, 0, 1.0, s[2:3] ; D2000003 0009E480 > v_fma_f32 v2, v2, v3, v5 ; D2960002 04160702 > v_mad_f32 v0, v1, |v27|, v0 ; D2820200 04023701 > v_add_f32_e32 v1, 0x3d4ccccd, v18 ; 060224FF 3D4CCCCD > v_mad_f32 v0, v2, |v28|, v0 ; D2820200 04023902 > v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 > v_mul_f32_e32 v0, v0, v1 ; 10000300 > v_mul_f32_e32 v1, v6, v26 ; 10023506 > v_mac_f32_e32 v1, v7, v27 ; 3E023707 > v_mac_f32_e32 v1, v8, v28 ; 3E023908 > v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 > v_mul_f32_e32 v2, v1, v1 ; 10040301 > v_mul_f32_e32 v2, v2, v2 ; 10040502 > v_mul_f32_e32 v1, v2, v1 ; 10020302 > v_sub_f32_e32 v2, 1.0, v20 ; 080428F2 > v_mul_f32_e32 v0, v0, v9 ; 10001300 > v_fma_f32 v1, v2, v1, v20 ; D2960001 04520302 > v_mul_f32_e32 v0, v0, v1 ; 10000300 > v_fma_f32 v35, v35, v36, -v31 ; D2960023 847E4923 > v_fma_f32 v37, v37, v36, -v32 ; D2960025 84824925 > v_fma_f32 v31, v18, v35, v31 ; D296001F 047E4712 > v_mul_f32_e32 v1, v0, v21 ; 10022B00 > v_mul_f32_e32 v2, v0, v29 ; 10043B00 > v_mul_f32_e32 v0, v0, v30 ; 10003D00 > v_fma_f32 v32, v18, v37, v32 ; D2960020 04824B12 > v_mul_f32_e32 v1, v31, v1 ; 1002031F > v_mul_f32_e32 v2, v32, v2 ; 10040520 > v_mul_f32_e32 v0, v33, v0 ; 10000121 > v_mul_f32_e32 v1, v1, v19 ; 10022701 > v_mul_f32_e32 v2, v2, v19 ; 10042702 > v_mul_f32_e32 v0, v0, v19 ; 10002700 > v_fma_f32 v1, v10, v14, v1 ; D2960001 04061D0A > v_fma_f32 v2, v11, v15, v2 ; D2960002 040A1F0B > v_fma_f32 v0, v12, v16, v0 ; D2960000 0402210C > v_add_f32_e32 v1, v1, v25 ; 06023301 > v_mul_f32_e32 v3, v17, v17 ; 10062311 > v_add_f32_e32 v2, v2, v24 ; 06043102 > v_add_f32_e32 v0, v0, v23 ; 06002F00 > v_mul_f32_e32 v1, v3, v1 ; 10020303 > v_mul_f32_e32 v2, v3, v2 ; 10040503 > v_mul_f32_e32 v3, v3, v0 ; 10060103 > v_mul_f32_e32 v0, v22, v1 ; 10000316 > v_mul_f32_e32 v1, v22, v2 ; 10020516 > v_mul_f32_e32 v2, v22, v3 ; 10040716 > v_mov_b32_e32 v3, 1.0 ; 7E0602F2 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 64 >VGPRS: 44 >Code Size: 1932 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 5 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s5, s[0:3], 0x55 ; C2028155 > s_buffer_load_dword s4, s[0:3], 0x54 ; C2020154 > s_buffer_load_dword s6, s[0:3], 0x56 ; C2030156 > s_buffer_load_dword s28, s[0:3], 0xd9 ; C20E01D9 > s_buffer_load_dword s24, s[0:3], 0xd5 ; C20C01D5 > s_buffer_load_dword s27, s[0:3], 0xd8 ; C20D81D8 > s_buffer_load_dword s32, s[0:3], 0xdd ; C21001DD > s_buffer_load_dword s23, s[0:3], 0xd4 ; C20B81D4 > s_buffer_load_dword s29, s[0:3], 0xda ; C20E81DA > s_buffer_load_dword s31, s[0:3], 0xdc ; C20F81DC > s_buffer_load_dword s25, s[0:3], 0xd6 ; C20C81D6 > s_buffer_load_dword s30, s[0:3], 0xdb ; C20F01DB > s_buffer_load_dword s33, s[0:3], 0xde ; C21081DE > s_buffer_load_dword s8, s[0:3], 0xb9 ; C20401B9 > s_buffer_load_dword s12, s[0:3], 0xbd ; C20601BD > s_buffer_load_dword s16, s[0:3], 0xc1 ; C20801C1 > s_buffer_load_dword s20, s[0:3], 0xc5 ; C20A01C5 > s_buffer_load_dword s26, s[0:3], 0xd7 ; C20D01D7 > s_buffer_load_dword s7, s[0:3], 0xb8 ; C20381B8 > s_buffer_load_dword s9, s[0:3], 0xba ; C20481BA > s_buffer_load_dword s10, s[0:3], 0xbb ; C20501BB > s_buffer_load_dword s11, s[0:3], 0xbc ; C20581BC > s_buffer_load_dword s13, s[0:3], 0xbe ; C20681BE > s_buffer_load_dword s14, s[0:3], 0xbf ; C20701BF > s_buffer_load_dword s15, s[0:3], 0xc0 ; C20781C0 > s_buffer_load_dword s17, s[0:3], 0xc2 ; C20881C2 > s_buffer_load_dword s18, s[0:3], 0xc3 ; C20901C3 > s_buffer_load_dword s19, s[0:3], 0xc4 ; C20981C4 > s_buffer_load_dword s21, s[0:3], 0xc6 ; C20A81C6 > s_buffer_load_dword s22, s[0:3], 0xc7 ; C20B01C7 > s_buffer_load_dword s0, s[0:3], 0xdf ; C20001DF > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v1, s5, v4 ; 10020805 > v_mul_f32_e32 v0, s4, v3 ; 10000604 > v_fma_f32 v3, v5, s6, s6 ; D2960003 00180D05 > v_mul_f32_e32 v5, s28, v1 ; 100A021C > v_mul_f32_e32 v4, s24, v1 ; 10080218 > v_mul_f32_e32 v1, s32, v1 ; 10020220 > v_mac_f32_e32 v5, s27, v0 ; 3E0A001B > v_mac_f32_e32 v4, s23, v0 ; 3E080017 > v_mac_f32_e32 v1, s31, v0 ; 3E02001F > v_mac_f32_e32 v5, s29, v3 ; 3E0A061D > v_mac_f32_e32 v1, s33, v3 ; 3E020621 > v_mac_f32_e32 v4, s25, v3 ; 3E080619 > v_add_f32_e32 v3, s30, v5 ; 06060A1E > v_add_f32_e32 v0, s26, v4 ; 0600081A > v_mul_f32_e32 v4, s8, v3 ; 10080608 > v_mul_f32_e32 v5, s12, v3 ; 100A060C > v_mul_f32_e32 v6, s16, v3 ; 100C0610 > v_mul_f32_e32 v3, s20, v3 ; 10060614 > v_add_f32_e32 v1, s0, v1 ; 06020200 > v_mac_f32_e32 v4, s7, v0 ; 3E080007 > v_mac_f32_e32 v5, s11, v0 ; 3E0A000B > v_mac_f32_e32 v6, s15, v0 ; 3E0C000F > v_mac_f32_e32 v3, s19, v0 ; 3E060013 > v_mac_f32_e32 v3, s21, v1 ; 3E060215 > v_mac_f32_e32 v4, s9, v1 ; 3E080209 > v_mac_f32_e32 v5, s13, v1 ; 3E0A020D > v_mac_f32_e32 v6, s17, v1 ; 3E0C0211 > v_add_f32_e32 v0, s10, v4 ; 0600080A > v_add_f32_e32 v1, s14, v5 ; 06020A0E > v_add_f32_e32 v4, s18, v6 ; 06080C12 > v_add_f32_e32 v3, s22, v3 ; 06060616 > exp 15, 12, 0, 1, 0, v0, v1, v4, v3 ; F80008CF 03040100 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 40 >VGPRS: 8 >Code Size: 296 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 14 >VGPRS: 15 >Code Size: 12 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_load_dwordx4 s[4:7], s[2:3], 0x4 ; C0820304 > s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[4:7], 0x55 ; C2048555 > s_buffer_load_dword s8, s[4:7], 0x54 ; C2040554 > s_buffer_load_dword s10, s[4:7], 0x56 ; C2050556 > s_buffer_load_dword s32, s[4:7], 0xd9 ; C21005D9 > s_buffer_load_dword s28, s[4:7], 0xd5 ; C20E05D5 > s_buffer_load_dword s31, s[4:7], 0xd8 ; C20F85D8 > s_buffer_load_dword s36, s[4:7], 0xdd ; C21205DD > s_buffer_load_dword s27, s[4:7], 0xd4 ; C20D85D4 > s_buffer_load_dword s33, s[4:7], 0xda ; C21085DA > s_buffer_load_dword s35, s[4:7], 0xdc ; C21185DC > s_buffer_load_dword s29, s[4:7], 0xd6 ; C20E85D6 > s_buffer_load_dword s34, s[4:7], 0xdb ; C21105DB > s_buffer_load_dword s37, s[4:7], 0xde ; C21285DE > s_buffer_load_dword s16, s[4:7], 0xbd ; C20805BD > s_buffer_load_dword s24, s[4:7], 0xc5 ; C20C05C5 > s_buffer_load_dword s30, s[4:7], 0xd7 ; C20F05D7 > s_buffer_load_dword s12, s[4:7], 0xb9 ; C20605B9 > s_buffer_load_dword s15, s[4:7], 0xbc ; C20785BC > s_buffer_load_dword s20, s[4:7], 0xc1 ; C20A05C1 > s_buffer_load_dword s23, s[4:7], 0xc4 ; C20B85C4 > s_buffer_load_dword s38, s[4:7], 0xdf ; C21305DF > s_buffer_load_dword s11, s[4:7], 0xb8 ; C20585B8 > s_buffer_load_dword s17, s[4:7], 0xbe ; C20885BE > s_buffer_load_dword s19, s[4:7], 0xc0 ; C20985C0 > s_buffer_load_dword s25, s[4:7], 0xc6 ; C20C85C6 > s_buffer_load_dword s13, s[4:7], 0xba ; C20685BA > s_buffer_load_dword s14, s[4:7], 0xbb ; C20705BB > s_buffer_load_dword s18, s[4:7], 0xbf ; C20905BF > s_buffer_load_dword s21, s[4:7], 0xc2 ; C20A85C2 > s_buffer_load_dword s22, s[4:7], 0xc3 ; C20B05C3 > s_buffer_load_dword s26, s[4:7], 0xc7 ; C20D05C7 > s_buffer_load_dword s39, s[4:7], 0xe4 ; C21385E4 > s_buffer_load_dword s40, s[4:7], 0xe5 ; C21405E5 > s_buffer_load_dword s4, s[4:7], 0xe6 ; C20205E6 > s_buffer_load_dword s5, s[0:3], 0x54 ; C2028154 > s_buffer_load_dword s6, s[0:3], 0x55 ; C2030155 > s_buffer_load_dword s7, s[0:3], 0x56 ; C2038156 > s_buffer_load_dword s41, s[0:3], 0x57 ; C2148157 > s_buffer_load_dword s42, s[0:3], 0x60 ; C2150160 > s_buffer_load_dword s0, s[0:3], 0x61 ; C2000161 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v1, s9, v4 ; 10020809 > v_mul_f32_e32 v0, s8, v3 ; 10000608 > v_fma_f32 v3, v5, s10, s10 ; D2960003 00281505 > v_mul_f32_e32 v5, s32, v1 ; 100A0220 > v_mul_f32_e32 v4, s28, v1 ; 1008021C > v_mul_f32_e32 v1, s36, v1 ; 10020224 > v_mac_f32_e32 v5, s31, v0 ; 3E0A001F > v_mac_f32_e32 v4, s27, v0 ; 3E08001B > v_mac_f32_e32 v1, s35, v0 ; 3E020023 > v_mac_f32_e32 v5, s33, v3 ; 3E0A0621 > v_mac_f32_e32 v1, s37, v3 ; 3E020625 > v_mac_f32_e32 v4, s29, v3 ; 3E08061D > v_add_f32_e32 v3, s34, v5 ; 06060A22 > v_add_f32_e32 v0, s30, v4 ; 0600081E > v_mul_f32_e32 v5, s16, v3 ; 100A0610 > v_mul_f32_e32 v7, s24, v3 ; 100E0618 > v_mul_f32_e32 v4, s12, v3 ; 1008060C > v_mul_f32_e32 v6, s20, v3 ; 100C0614 > v_add_f32_e32 v1, s38, v1 ; 06020226 > v_mac_f32_e32 v5, s15, v0 ; 3E0A000F > v_mac_f32_e32 v7, s23, v0 ; 3E0E0017 > v_mac_f32_e32 v4, s11, v0 ; 3E08000B > v_mac_f32_e32 v6, s19, v0 ; 3E0C0013 > v_mac_f32_e32 v5, s17, v1 ; 3E0A0211 > v_mac_f32_e32 v7, s25, v1 ; 3E0E0219 > v_sub_f32_e32 v9, s4, v1 ; 08120204 > v_mac_f32_e32 v4, s13, v1 ; 3E08020D > v_mac_f32_e32 v6, s21, v1 ; 3E0C0215 > v_add_f32_e32 v1, s18, v5 ; 06020A12 > v_add_f32_e32 v5, s26, v7 ; 060A0E1A > v_mul_f32_e32 v11, s0, v5 ; 10160A00 > v_sub_f32_e32 v8, s39, v0 ; 08100027 > v_add_f32_e32 v0, s14, v4 ; 0600080E > v_mul_f32_e32 v10, s42, v5 ; 10140A2A > v_add_f32_e32 v4, s22, v6 ; 06080C16 > v_mul_f32_e32 v6, s5, v5 ; 100C0A05 > v_fma_f32 v10, v0, s42, v10 ; D296000A 04285500 > v_fma_f32 v11, v1, -s0, v11 ; D296000B 442C0101 > v_mul_f32_e32 v7, s6, v5 ; 100E0A06 > v_fma_f32 v6, v10, s7, v6 ; D2960006 04180F0A > v_fma_f32 v7, v11, s41, v7 ; D2960007 041C530B > exp 15, 32, 0, 0, 0, v10, v11, v4, v5 ; F800020F 05040B0A > v_sub_f32_e32 v3, s40, v3 ; 08060628 > exp 15, 33, 0, 0, 0, v6, v7, v4, v5 ; F800021F 05040706 > exp 15, 34, 0, 0, 0, v8, v3, v9, v0 ; F800022F 00090308 > exp 15, 12, 0, 1, 0, v0, v1, v4, v5 ; F80008CF 05040100 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 48 >VGPRS: 12 >Code Size: 424 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 > v_cndmask_b32_e64 v8, v0, 1.0, vcc ; D2000008 01A9E500 > v_bfrev_b32_e32 v6, 14 ; 7E0C708E > v_cmp_lt_f32_e32 vcc, 0, v1 ; 7C020280 > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > v_cndmask_b32_e64 v9, v1, 1.0, vcc ; D2000009 01A9E501 > v_bfrev_b32_e32 v7, 15 ; 7E0E708F > v_mul_f32_e32 v10, v6, v8 ; 10141106 > v_cmp_le_f32_e32 vcc, 0, v8 ; 7C061080 > v_cndmask_b32_e32 v8, v7, v10 ; 00101507 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_interp_p1_f32 v5, v2, 0, 1, [m0] ; C8140402 > v_mul_f32_e32 v10, v6, v9 ; 10141306 > v_cmp_le_f32_e32 vcc, 0, v9 ; 7C061280 > v_cndmask_b32_e32 v9, v7, v10 ; 00121507 > v_interp_p2_f32 v5, [v5], v3, 0, 1, [m0] ; C8150403 > v_interp_p1_f32 v10, v2, 1, 1, [m0] ; C8280502 > v_interp_p2_f32 v10, [v10], v3, 1, 1, [m0] ; C8290503 > v_interp_p1_f32 v12, v2, 0, 2, [m0] ; C8300802 > v_interp_p2_f32 v12, [v12], v3, 0, 2, [m0] ; C8310803 > v_interp_p1_f32 v22, v2, 1, 2, [m0] ; C8580902 > s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 > v_interp_p2_f32 v22, [v22], v3, 1, 2, [m0] ; C8590903 > v_interp_p1_f32 v23, v2, 2, 2, [m0] ; C85C0A02 > v_rcp_f32_e32 v2, v4 ; 7E045504 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_load_dwordx8 s[24:31], s[4:5], 0x0 ; C0CC0500 > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_mul_f32_e32 v0, v2, v0 ; 10000102 > v_mul_f32_e32 v1, v2, v1 ; 10020302 > v_cndmask_b32_e32 v24, v0, v8 ; 00301100 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s31 ; 87001F00 > v_cndmask_b32_e32 v25, v1, v9 ; 00321301 > v_interp_p2_f32 v23, [v23], v3, 2, 2, [m0] ; C85D0A03 > image_sample v0, v[24:25], s[24:31], s[0:3] dmask:0x1 ; F0800100 00060018 > v_cmp_lt_f32_e64 s[0:1], 0, v5 ; D0020000 00020A80 > v_cndmask_b32_e64 v1, v5, 1.0, s[0:1] ; D2000001 0001E505 > v_cmp_lt_f32_e64 s[0:1], 0, v10 ; D0020000 00021480 > v_cndmask_b32_e64 v3, v10, 1.0, s[0:1] ; D2000003 0001E50A > s_buffer_load_dword s7, s[12:15], 0x11 ; C2038D11 > v_mul_f32_e32 v4, v6, v1 ; 10080306 > v_cmp_le_f32_e64 s[0:1], 0, v1 ; D0060000 00020280 > v_cndmask_b32_e64 v1, v7, v4, s[0:1] ; D2000001 00020907 > s_buffer_load_dword s6, s[12:15], 0x10 ; C2030D10 > v_mul_f32_e32 v4, v6, v3 ; 10080706 > v_cmp_le_f32_e64 s[0:1], 0, v3 ; D0060000 00020680 > v_cndmask_b32_e64 v3, v7, v4, s[0:1] ; D2000003 00020907 > v_mul_f32_e32 v4, v2, v5 ; 10080B02 > v_mul_f32_e32 v2, v2, v10 ; 10041502 > s_buffer_load_dword s8, s[12:15], 0x12 ; C2040D12 > v_cndmask_b32_e32 v2, v2, v3 ; 00040702 > s_buffer_load_dword s0, s[12:15], 0x13 ; C2000D13 > v_cndmask_b32_e32 v1, v4, v1 ; 00020304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v2, v0, v2 ; 10040500 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s7, v2 ; 10060407 > v_mul_f32_e32 v1, v0, v1 ; 10020300 > v_mac_f32_e32 v3, s6, v1 ; 3E060206 > v_mac_f32_e32 v3, s8, v0 ; 3E060008 > v_add_f32_e32 v26, s0, v3 ; 06340600 > s_buffer_load_dword s0, s[12:15], 0x15 ; C2000D15 > s_buffer_load_dword s1, s[12:15], 0x14 ; C2008D14 > s_buffer_load_dword s7, s[12:15], 0x19 ; C2038D19 > s_buffer_load_dword s6, s[12:15], 0x18 ; C2030D18 > s_buffer_load_dword s2, s[12:15], 0x16 ; C2010D16 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s0, v2 ; 10060400 > s_buffer_load_dword s0, s[12:15], 0x1a ; C2000D1A > v_mac_f32_e32 v3, s1, v1 ; 3E060201 > s_load_dwordx4 s[20:23], s[4:5], 0x1c ; C08A051C > s_load_dwordx8 s[32:39], s[4:5], 0x10 ; C0D00510 > s_load_dwordx4 s[16:19], s[4:5], 0x2c ; C088052C > s_load_dwordx8 s[24:31], s[4:5], 0x20 ; C0CC0520 > s_buffer_load_dword s3, s[12:15], 0x17 ; C2018D17 > s_buffer_load_dword s1, s[12:15], 0x1b ; C2008D1B > v_mul_f32_e32 v2, s7, v2 ; 10040407 > v_mac_f32_e32 v2, s6, v1 ; 3E040206 > v_mac_f32_e32 v3, s2, v0 ; 3E060002 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mac_f32_e32 v2, s0, v0 ; 3E040000 > v_mov_b32_e32 v28, 0 ; 7E380280 > v_add_f32_e32 v27, s3, v3 ; 06360603 > v_add_f32_e32 v29, s1, v2 ; 063A0401 > s_and_b32 s20, s20, s39 ; 87142714 > s_and_b32 s16, s16, s31 ; 87101F10 > v_mov_b32_e32 v30, v28 ; 7E3C031C > image_sample_l v[0:3], v[26:29], s[32:39], s[20:23] dmask:0xf ; F0900F00 00A8001A > image_sample_l v[4:7], v[28:31], s[24:31], s[16:19] dmask:0xf ; F0900F00 0086041C > s_load_dwordx8 s[32:39], s[4:5], 0x30 ; C0D00530 > s_load_dwordx4 s[20:23], s[4:5], 0x3c ; C08A053C > s_load_dwordx8 s[24:31], s[4:5], 0x40 ; C0CC0540 > s_load_dwordx4 s[16:19], s[4:5], 0x4c ; C088054C > s_load_dwordx4 s[40:43], s[4:5], 0x5c ; C094055C > s_load_dwordx8 s[44:51], s[4:5], 0x50 ; C0D60550 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s20, s20, s39 ; 87142714 > image_sample v[14:17], v[24:25], s[32:39], s[20:23] dmask:0xf ; F0800F00 00A80E18 > s_and_b32 s16, s16, s31 ; 87101F10 > image_sample v[18:21], v[24:25], s[24:31], s[16:19] dmask:0xf ; F0800F00 00861218 > s_and_b32 s40, s40, s51 ; 87283328 > image_sample v[8:11], v[24:25], s[44:51], s[40:43] dmask:0xf ; F0800F00 014B0818 > v_mov_b32_e32 v26, v28 ; 7E34031C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v8, v8, v8 ; 10101108 > v_mov_b32_e32 v27, 0x3b83126f ; 7E3602FF 3B83126F > v_mov_b32_e32 v28, 0x45800000 ; 7E3802FF 45800000 > v_fma_f32 v27, v8, v28, v27 ; D296001B 046E3908 > v_fma_f32 v8, v18, 2.0, -1.0 ; D2960008 03CDE912 > v_mul_f32_e32 v28, v12, v12 ; 1038190C > v_fma_f32 v18, v19, 2.0, -1.0 ; D2960012 03CDE913 > v_mul_f32_e32 v19, v8, v8 ; 10261108 > v_mac_f32_e32 v28, v22, v22 ; 3E382D16 > v_mac_f32_e32 v19, v18, v18 ; 3E262512 > v_fma_f32 v20, v20, 2.0, -1.0 ; D2960014 03CDE914 > v_mac_f32_e32 v19, v20, v20 ; 3E262914 > v_mac_f32_e32 v28, v23, v23 ; 3E382F17 > s_buffer_load_dword s2, s[12:15], 0x3c ; C2010D3C > v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 > s_buffer_load_dword s3, s[12:15], 0x3d ; C2018D3D > v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C > s_buffer_load_dword s6, s[12:15], 0x3e ; C2030D3E > v_mul_f32_e32 v8, v8, v19 ; 10102708 > v_mul_f32_e32 v18, v18, v19 ; 10242712 > v_mul_f32_e32 v19, v20, v19 ; 10262714 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_fma_f32 v20, v12, v28, -s2 ; D2960014 800A390C > v_fma_f32 v12, v12, v28, s2 ; D296000C 000A390C > v_fma_f32 v29, v22, v28, -s3 ; D296001D 800E3916 > v_fma_f32 v30, v23, v28, -s6 ; D296001E 801A3917 > v_fma_f32 v22, v22, v28, s3 ; D2960016 000E3916 > v_fma_f32 v23, v23, v28, s6 ; D2960017 001A3917 > v_mul_f32_e32 v28, v20, v20 ; 10382914 > v_mac_f32_e32 v28, v29, v29 ; 3E383B1D > v_mul_f32_e32 v31, v12, v12 ; 103E190C > v_mac_f32_e32 v28, v30, v30 ; 3E383D1E > v_mac_f32_e32 v31, v22, v22 ; 3E3E2D16 > v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C > v_mac_f32_e32 v31, v23, v23 ; 3E3E2F17 > v_rsq_clamp_f32_e32 v31, v31 ; 7E3E591F > s_load_dwordx8 s[16:23], s[4:5], 0x60 ; C0C80560 > v_mul_f32_e32 v20, v20, v28 ; 10283914 > v_mul_f32_e32 v29, v29, v28 ; 103A391D > v_mul_f32_e32 v28, v30, v28 ; 1038391E > v_mul_f32_e32 v30, s2, v8 ; 103C1002 > v_mul_f32_e32 v12, v12, v31 ; 10183F0C > v_mad_f32 v30, -s3, v18, -v30 ; D282001E A47A2403 > v_mul_f32_e32 v22, v22, v31 ; 102C3F16 > v_mul_f32_e32 v32, v12, v8 ; 1040110C > v_mul_f32_e32 v23, v23, v31 ; 102E3F17 > v_mul_f32_e32 v31, v20, v8 ; 103E1114 > v_mad_f32 v8, -s6, v19, v30 ; D2820008 247A2606 > v_cmp_le_f32_e32 vcc, 0, v8 ; 7C061080 > v_cndmask_b32_e32 v12, v12, v20 ; 0018290C > v_mac_f32_e32 v31, v29, v18 ; 3E3E251D > v_mac_f32_e32 v32, v22, v18 ; 3E402516 > v_cndmask_b32_e32 v18, v22, v29 ; 00243B16 > v_mul_f32_e32 v12, s2, v12 ; 10181802 > v_mad_f32 v12, -s3, v18, -v12 ; D282000C A4322403 > v_cndmask_b32_e32 v18, v23, v28 ; 00243917 > v_mac_f32_e32 v31, v28, v19 ; 3E3E271C > v_mac_f32_e32 v32, v23, v19 ; 3E402717 > v_mad_f32 v12, -s6, v18, v12 ; D282000C 24322406 > s_load_dwordx4 s[0:3], s[4:5], 0x6c ; C080056C > v_add_f32_e64 v18, 0, v31 clamp ; D2060812 00023E80 > v_add_f32_e64 v19, 0, v32 clamp ; D2060813 00024080 > v_cndmask_b32_e32 v18, v19, v18 ; 00242513 > v_log_f32_e32 v18, v18 ; 7E244F12 > v_mul_f32_e32 v19, 0x3e99999a, v27 ; 102636FF 3E99999A > v_cndmask_b32_e32 v20, v19, v27 ; 00283713 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s23 ; 87001700 > image_sample_l v[22:25], v[24:27], s[16:23], s[0:3] dmask:0xf ; F0900F00 00041618 > v_mul_f32_e32 v18, v20, v18 ; 10242514 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_exp_f32_e32 v24, v18 ; 7E304B12 > v_add_f32_e32 v18, v8, v17 ; 06242308 > v_add_f32_e32 v19, -1.0, v18 ; 062624F3 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_cndmask_b32_e64 v25, 0, -1, vcc ; D2000019 01A98280 > v_cmp_eq_f32_e64 s[0:1], 0, v17 ; D0040000 00022280 > s_and_saveexec_b64 s[6:7], s[0:1] ; BE862400 > s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E > v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 > v_cndmask_b32_e64 v18, v19, 1.0, vcc ; D2000012 01A9E513 > v_cmp_le_f32_e32 vcc, 0, v18 ; 7C062480 > v_mul_f32_e32 v18, 0x70000000, v18 ; 102424FF 70000000 > v_bfrev_b32_e32 v26, 15 ; 7E34708F > v_cndmask_b32_e32 v18, v26, v18 ; 0024251A > s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 > s_xor_b64 exec, exec, s[6:7] ; 89FE067E > v_rcp_f32_e32 v18, v17 ; 7E245511 > v_mul_f32_e32 v18, v18, v19 ; 10242712 > s_or_b64 exec, exec, s[6:7] ; 88FE067E > s_buffer_load_dword s3, s[12:15], 0x40 ; C2018D40 > s_buffer_load_dword s4, s[12:15], 0x41 ; C2020D41 > s_buffer_load_dword s5, s[12:15], 0x42 ; C2028D42 > s_buffer_load_dword s0, s[12:15], 0x48 ; C2000D48 > s_buffer_load_dword s1, s[12:15], 0x49 ; C2008D49 > s_buffer_load_dword s2, s[12:15], 0x4a ; C2010D4A > v_subrev_f32_e32 v19, v8, v17 ; 0A262308 > v_add_f32_e32 v26, -1.0, v19 ; 063426F3 > v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 > v_cmp_eq_f32_e32 vcc, 0, v17 ; 7C042280 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_saveexec_b64 s[6:7], vcc ; BE86246A > s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E > v_cmp_lt_f32_e32 vcc, 0, v26 ; 7C023480 > v_cndmask_b32_e64 v19, v26, 1.0, vcc ; D2000013 01A9E51A > v_cmp_le_f32_e32 vcc, 0, v19 ; 7C062680 > v_mul_f32_e32 v19, 0x70000000, v19 ; 102626FF 70000000 > v_bfrev_b32_e32 v27, 15 ; 7E36708F > v_cndmask_b32_e32 v19, v27, v19 ; 0026271B > s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 > s_buffer_load_dword s8, s[12:15], 0x4b ; C2040D4B > s_waitcnt lgkmcnt(0) ; BF8C007F > s_xor_b64 exec, exec, s[6:7] ; 89FE067E > v_rcp_f32_e32 v17, v17 ; 7E225511 > v_mul_f32_e32 v19, v17, v26 ; 10263511 > s_or_b64 exec, exec, s[6:7] ; 88FE067E > v_mul_f32_e32 v0, v0, v4 ; 10000900 > v_sub_f32_e32 v4, 1.0, v12 ; 080818F2 > v_mad_f32 v4, -v12, v4, v4 ; D2820004 2412090C > v_mul_f32_e32 v4, v4, v4 ; 10080904 > v_add_f32_e32 v17, 2.0, v20 ; 062228F4 > v_mul_f32_e32 v1, v1, v5 ; 10020B01 > v_mad_f32 v4, -v12, v4, v4 ; D2820004 2412090C > v_sub_f32_e32 v5, 1.0, v10 ; 080A14F2 > v_mul_f32_e32 v17, 0x3e000000, v17 ; 102222FF 3E000000 > v_fma_f32 v4, v5, v4, v10 ; D2960004 042A0905 > v_mul_f32_e32 v5, v9, v14 ; 100A1D09 > v_mul_f32_e32 v28, v24, v17 ; 10382318 > v_mul_f32_e32 v17, s3, v0 ; 10220003 > v_mul_f32_e32 v0, s0, v0 ; 10000000 > v_cmp_ne_i32_e32 vcc, 0, v25 ; 7D0A3280 > v_mac_f32_e32 v5, v9, v14 ; 3E0A1D09 > v_cndmask_b32_e32 v5, v5, v9 ; 000A1305 > v_mul_f32_e32 v0, v22, v0 ; 10000116 > v_mul_f32_e32 v2, v2, v6 ; 10040D02 > v_mul_f32_e32 v6, v9, v15 ; 100C1F09 > v_mul_f32_e32 v0, v0, v5 ; 10000B00 > v_mul_f32_e32 v5, v9, v16 ; 100A2109 > v_mul_f32_e32 v20, s4, v1 ; 10280204 > v_mul_f32_e32 v24, s5, v2 ; 10300405 > v_mul_f32_e32 v1, s1, v1 ; 10020201 > v_mul_f32_e32 v2, s2, v2 ; 10040402 > v_mac_f32_e32 v6, v9, v15 ; 3E0C1F09 > v_mac_f32_e32 v5, v9, v16 ; 3E0A2109 > v_cndmask_b32_e32 v6, v6, v9 ; 000C1306 > v_mul_f32_e32 v1, v22, v1 ; 10020316 > v_cndmask_b32_e32 v5, v5, v9 ; 000A1305 > v_mul_f32_e32 v2, v22, v2 ; 10040516 > v_mul_f32_e32 v25, v14, v17 ; 1032230E > v_mul_f32_e32 v26, v15, v20 ; 1034290F > v_mul_f32_e32 v27, v16, v24 ; 10363110 > v_mul_f32_e32 v4, v4, v28 ; 10083904 > v_mul_f32_e32 v1, v1, v6 ; 10020D01 > v_mul_f32_e32 v5, v2, v5 ; 100A0B02 > v_fma_f32 v2, v0, v4, v25 ; D2960002 04660900 > v_fma_f32 v0, v5, v4, v27 ; D2960000 046E0905 > v_fma_f32 v1, v1, v4, v26 ; D2960001 046A0901 > v_cmp_le_f32_e32 vcc, 0, v8 ; 7C061080 > v_mul_f32_e32 v6, v2, v18 ; 100C2502 > v_mul_f32_e32 v9, v1, v18 ; 10122501 > v_mul_f32_e32 v10, v0, v18 ; 10142500 > v_cndmask_b32_e64 v4, 0, -1, vcc ; D2000004 01A98280 > v_add_f32_e64 v5, 0, v19 clamp ; D2060805 00022680 > v_cndmask_b32_e32 v6, 0, v6 ; 000C0C80 > v_cndmask_b32_e32 v9, 0, v9 ; 00121280 > v_cndmask_b32_e32 v10, 0, v10 ; 00141480 > v_cmp_lt_f32_e32 vcc, 0, v11 ; 7C021680 > s_and_saveexec_b64 s[12:13], vcc ; BE8C246A > s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E > s_cbranch_execz BB0_10 ; BF880000 > v_add_f32_e32 v12, v25, v25 ; 06183319 > v_max3_f32 v25, v16, v15, v14 ; D2A80019 043A1F10 > v_add_f32_e32 v18, v26, v26 ; 0624351A > v_rcp_f32_e32 v26, v25 ; 7E345519 > v_add_f32_e64 v22, 0, v24 clamp ; D2060816 00023080 > v_mov_b32_e32 v24, 0x3f028283 ; 7E3002FF 3F028283 > v_cmp_lt_f32_e32 vcc, 0, v14 ; 7C021C80 > v_cmp_lt_f32_e64 s[0:1], 0, v15 ; D0020000 00021E80 > v_cmp_lt_f32_e64 s[2:3], 0, v16 ; D0020002 00022080 > v_cmp_gt_f32_e64 s[4:5], v24, v21 ; D0080004 00022B18 > v_cndmask_b32_e64 v21, v14, 1.0, vcc ; D2000015 01A9E50E > v_cndmask_b32_e64 v24, v15, 1.0, s[0:1] ; D2000018 0001E50F > v_cmp_eq_f32_e32 vcc, 0, v25 ; 7C043280 > v_mul_f32_e32 v14, v26, v14 ; 101C1D1A > v_mul_f32_e32 v15, v26, v15 ; 101E1F1A > v_mul_f32_e32 v26, v26, v16 ; 1034211A > v_cndmask_b32_e64 v16, v16, 1.0, s[2:3] ; D2000010 0009E510 > v_bfrev_b32_e32 v25, 14 ; 7E32708E > v_cmp_le_f32_e64 s[0:1], 0, v21 ; D0060000 00022A80 > v_cmp_le_f32_e64 s[2:3], 0, v24 ; D0060002 00023080 > v_cmp_le_f32_e64 s[6:7], 0, v16 ; D0060006 00022080 > v_mul_f32_e32 v21, v25, v21 ; 102A2B19 > v_mul_f32_e32 v24, v25, v24 ; 10303119 > v_mul_f32_e32 v16, v25, v16 ; 10202119 > v_bfrev_b32_e32 v25, 15 ; 7E32708F > v_cndmask_b32_e64 v16, v25, v16, s[6:7] ; D2000010 001A2119 > v_cndmask_b32_e64 v21, v25, v21, s[0:1] ; D2000015 00022B19 > v_cndmask_b32_e64 v24, v25, v24, s[2:3] ; D2000018 000A3119 > v_cndmask_b32_e32 v14, v14, v21 ; 001C2B0E > v_cndmask_b32_e32 v15, v15, v24 ; 001E310F > v_cndmask_b32_e32 v16, v26, v16 ; 0020211A > v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 > v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 > v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 > v_mul_f32_e32 v16, v16, v16 ; 10202110 > v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 > v_mul_f32_e32 v14, v14, v14 ; 101C1D0E > v_add_f32_e64 v20, 0, v20 clamp ; D2060814 00022880 > v_mul_f32_e32 v15, v15, v15 ; 101E1F0F > v_add_f32_e32 v19, v27, v27 ; 0626371B > v_add_f32_e32 v27, -0.5, v11 ; 063616F1 > v_mov_b32_e32 v30, 0x3e800000 ; 7E3C02FF 3E800000 > v_mul_f32_e32 v14, v14, v17 ; 101C230E > v_min_f32_e32 v17, 0.5, v11 ; 1E2216F0 > v_mul_f32_e32 v15, v15, v20 ; 101E290F > v_mov_b32_e32 v20, 0x3e19999a ; 7E2802FF 3E19999A > v_mul_f32_e32 v16, v16, v22 ; 10202D10 > v_add_f32_e64 v27, 0, v27 clamp ; D206081B 00023680 > v_subrev_f32_e32 v31, v8, v30 ; 0A3E3D08 > v_cndmask_b32_e64 v11, v17, v11, s[4:5] ; D200000B 00121711 > v_mul_f32_e32 v14, v20, v14 ; 101C1D14 > v_mul_f32_e32 v15, v20, v15 ; 101E1F14 > v_mul_f32_e32 v16, v20, v16 ; 10202114 > v_sub_f32_e32 v21, 1.0, v8 ; 082A10F2 > v_mul_f32_e32 v28, v12, v27 ; 1038370C > v_mul_f32_e32 v29, v18, v27 ; 103A3712 > v_add_f32_e64 v31, 0, v31 clamp ; D206081F 00023E80 > v_mul_f32_e32 v27, v19, v27 ; 10363713 > v_cndmask_b32_e64 v12, v14, v12, s[4:5] ; D200000C 0012190E > v_cndmask_b32_e64 v14, v15, v18, s[4:5] ; D200000E 0012250F > v_min_f32_e32 v11, 0.5, v11 ; 1E1616F0 > v_cndmask_b32_e64 v15, v16, v19, s[4:5] ; D200000F 00122710 > v_fma_f32 v28, v28, v31, v6 ; D296001C 041A3F1C > v_fma_f32 v29, v29, v31, v9 ; D296001D 04263F1D > v_fma_f32 v27, v27, v31, v10 ; D296001B 042A3F1B > v_add_f32_e32 v8, v30, v8 ; 0610111E > v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 > v_mul_f32_e32 v12, v12, v11 ; 1018170C > v_mul_f32_e32 v14, v14, v11 ; 101C170E > v_mul_f32_e32 v11, v15, v11 ; 1016170F > v_cndmask_b32_e64 v6, v6, v28, s[4:5] ; D2000006 00123906 > v_cndmask_b32_e64 v9, v9, v29, s[4:5] ; D2000009 00123B09 > v_cndmask_b32_e64 v10, v10, v27, s[4:5] ; D200000A 0012370A > v_mul_f32_e32 v12, v12, v21 ; 10182B0C > v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 > v_mul_f32_e32 v14, v14, v21 ; 101C2B0E > v_mul_f32_e32 v11, v11, v21 ; 10162B0B > v_fma_f32 v6, v12, v8, v6 ; D2960006 041A110C > v_fma_f32 v9, v14, v8, v9 ; D2960009 0426110E > v_fma_f32 v10, v11, v8, v10 ; D296000A 042A110B > s_or_b64 exec, exec, s[12:13] ; 88FE0C7E > v_mul_f32_e32 v3, v3, v7 ; 10060F03 > v_mul_f32_e32 v2, v2, v5 ; 10040B02 > v_cmp_ne_i32_e32 vcc, 0, v4 ; 7D0A0880 > v_mul_f32_e32 v1, v1, v5 ; 10020B01 > v_mul_f32_e32 v0, v0, v5 ; 10000B00 > v_mul_f32_e32 v3, s8, v3 ; 10060608 > v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 > v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 > v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 > v_fma_f32 v2, v3, v2, v6 ; D2960002 041A0503 > v_fma_f32 v1, v3, v1, v9 ; D2960001 04260303 > v_fma_f32 v3, v3, v0, v10 ; D2960003 042A0103 > v_add_f32_e32 v0, 0x3eaa7efa, v23 ; 06002EFF 3EAA7EFA > v_add_f32_e64 v4, 0, v0 clamp ; D2060804 00020080 > v_mul_f32_e32 v0, v2, v4 ; 10000902 > v_mul_f32_e32 v2, v3, v4 ; 10040903 > v_mul_f32_e32 v1, v1, v4 ; 10020901 > v_mov_b32_e32 v3, 1.0 ; 7E0602F2 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 36 >Code Size: 1940 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 7 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_load_dwordx4 s[4:7], s[2:3], 0x4 ; C0820304 > s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s12, s[4:7], 0x54 ; C2060554 > s_buffer_load_dword s11, s[4:7], 0x2f ; C205852F > s_buffer_load_dword s13, s[4:7], 0x55 ; C2068555 > s_buffer_load_dword s14, s[4:7], 0x56 ; C2070556 > s_buffer_load_dword s9, s[4:7], 0x2d ; C204852D > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v0, s12 ; 7E00020C > s_buffer_load_dword s8, s[4:7], 0x2c ; C204052C > v_mov_b32_e32 v1, s13 ; 7E02020D > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, s11, v0 ; 100C000B > v_mul_f32_e32 v0, s14, v0 ; 1000000E > s_buffer_load_dword s10, s[4:7], 0x2e ; C205052E > s_buffer_load_dword s36, s[4:7], 0xd9 ; C21205D9 > v_mul_f32_e32 v7, s11, v1 ; 100E020B > v_mul_f32_e32 v1, s14, v1 ; 1002020E > s_buffer_load_dword s32, s[4:7], 0xd5 ; C21005D5 > s_buffer_load_dword s35, s[4:7], 0xd8 ; C21185D8 > s_buffer_load_dword s40, s[4:7], 0xdd ; C21405DD > s_buffer_load_dword s31, s[4:7], 0xd4 ; C20F85D4 > s_buffer_load_dword s37, s[4:7], 0xda ; C21285DA > s_buffer_load_dword s39, s[4:7], 0xdc ; C21385DC > s_buffer_load_dword s33, s[4:7], 0xd6 ; C21085D6 > s_buffer_load_dword s38, s[4:7], 0xdb ; C21305DB > s_buffer_load_dword s41, s[4:7], 0xde ; C21485DE > s_buffer_load_dword s28, s[4:7], 0xc5 ; C20E05C5 > s_buffer_load_dword s34, s[4:7], 0xd7 ; C21105D7 > s_buffer_load_dword s16, s[4:7], 0xb9 ; C20805B9 > s_buffer_load_dword s20, s[4:7], 0xbd ; C20A05BD > s_buffer_load_dword s27, s[4:7], 0xc4 ; C20D85C4 > s_buffer_load_dword s42, s[4:7], 0xdf ; C21505DF > s_buffer_load_dword s15, s[4:7], 0xb8 ; C20785B8 > s_buffer_load_dword s19, s[4:7], 0xbc ; C20985BC > s_buffer_load_dword s24, s[4:7], 0xc1 ; C20C05C1 > s_buffer_load_dword s29, s[4:7], 0xc6 ; C20E85C6 > s_buffer_load_dword s17, s[4:7], 0xba ; C20885BA > s_buffer_load_dword s21, s[4:7], 0xbe ; C20A85BE > s_buffer_load_dword s23, s[4:7], 0xc0 ; C20B85C0 > s_buffer_load_dword s30, s[4:7], 0xc7 ; C20F05C7 > s_buffer_load_dword s18, s[4:7], 0xbb ; C20905BB > s_buffer_load_dword s22, s[4:7], 0xbf ; C20B05BF > s_buffer_load_dword s25, s[4:7], 0xc2 ; C20C85C2 > s_buffer_load_dword s26, s[4:7], 0xc3 ; C20D05C3 > s_buffer_load_dword s43, s[4:7], 0xe4 ; C21585E4 > s_buffer_load_dword s44, s[4:7], 0xe5 ; C21605E5 > s_buffer_load_dword s4, s[4:7], 0xe6 ; C20205E6 > s_buffer_load_dword s5, s[0:3], 0x54 ; C2028154 > s_buffer_load_dword s6, s[0:3], 0x55 ; C2030155 > s_buffer_load_dword s7, s[0:3], 0x56 ; C2038156 > s_buffer_load_dword s45, s[0:3], 0x57 ; C2168157 > s_buffer_load_dword s46, s[0:3], 0x60 ; C2170160 > s_buffer_load_dword s0, s[0:3], 0x61 ; C2000161 > v_add_f32_e32 v10, 0, v3 ; 06140680 > v_mul_f32_e32 v6, v10, v6 ; 100C0D0A > v_add_f32_e32 v10, 0, v4 ; 06140880 > v_mul_f32_e32 v0, v3, v0 ; 10000103 > v_cmp_gt_f32_e32 vcc, -0.5, v5 ; 7C080AF1 > v_mul_f32_e32 v9, s14, v5 ; 10120A0E > v_cndmask_b32_e32 v0, v6, v0 ; 00000106 > v_mul_f32_e32 v1, v4, v1 ; 10020304 > v_mul_f32_e32 v7, v10, v7 ; 100E0F0A > v_mad_f32 v8, v5, s11, s11 ; D2820008 002C1705 > v_xor_b32_e32 v6, 0x80000000, v9 ; 3A0C12FF 80000000 > v_cndmask_b32_e32 v1, v7, v1 ; 00020307 > v_cndmask_b32_e32 v6, v8, v6 ; 000C0D08 > v_mul_f32_e32 v4, s9, v4 ; 10080809 > v_cmp_lt_f32_e32 vcc, 0.5, v5 ; 7C020AF0 > v_cndmask_b32_e32 v1, v1, v4 ; 00020901 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s8, v3 ; 10060608 > v_mul_f32_e32 v7, s10, v5 ; 100E0A0A > v_cndmask_b32_e32 v0, v0, v3 ; 00000700 > v_mul_f32_e32 v5, s36, v1 ; 100A0224 > v_mul_f32_e32 v4, s32, v1 ; 10080220 > v_mul_f32_e32 v1, s40, v1 ; 10020228 > v_cndmask_b32_e32 v3, v6, v7 ; 00060F06 > v_mac_f32_e32 v5, s35, v0 ; 3E0A0023 > v_mac_f32_e32 v4, s31, v0 ; 3E08001F > v_mac_f32_e32 v1, s39, v0 ; 3E020027 > v_mac_f32_e32 v5, s37, v3 ; 3E0A0625 > v_mac_f32_e32 v1, s41, v3 ; 3E020629 > v_mac_f32_e32 v4, s33, v3 ; 3E080621 > v_add_f32_e32 v3, s38, v5 ; 06060A26 > v_add_f32_e32 v0, s34, v4 ; 06000822 > v_mul_f32_e32 v7, s28, v3 ; 100E061C > v_mul_f32_e32 v4, s16, v3 ; 10080610 > v_mul_f32_e32 v5, s20, v3 ; 100A0614 > v_add_f32_e32 v1, s42, v1 ; 0602022A > v_mac_f32_e32 v7, s27, v0 ; 3E0E001B > v_mul_f32_e32 v6, s24, v3 ; 100C0618 > v_mac_f32_e32 v4, s15, v0 ; 3E08000F > v_mac_f32_e32 v5, s19, v0 ; 3E0A0013 > v_mac_f32_e32 v7, s29, v1 ; 3E0E021D > v_add_f32_e32 v7, s30, v7 ; 060E0E1E > v_mac_f32_e32 v6, s23, v0 ; 3E0C0017 > v_mac_f32_e32 v4, s17, v1 ; 3E080211 > v_mac_f32_e32 v5, s21, v1 ; 3E0A0215 > v_mac_f32_e32 v6, s25, v1 ; 3E0C0219 > v_add_f32_e32 v4, s18, v4 ; 06080812 > v_mul_f32_e32 v10, s46, v7 ; 10140E2E > v_add_f32_e32 v5, s22, v5 ; 060A0A16 > v_mul_f32_e32 v11, s0, v7 ; 10160E00 > v_add_f32_e32 v6, s26, v6 ; 060C0C1A > v_mul_f32_e32 v8, s5, v7 ; 10100E05 > v_fma_f32 v10, v4, s46, v10 ; D296000A 04285D04 > v_fma_f32 v11, v5, -s0, v11 ; D296000B 442C0105 > v_mul_f32_e32 v9, s6, v7 ; 10120E06 > v_fma_f32 v8, v10, s7, v8 ; D2960008 04200F0A > v_fma_f32 v9, v11, s45, v9 ; D2960009 04245B0B > exp 15, 32, 0, 0, 0, v10, v11, v6, v7 ; F800020F 07060B0A > v_sub_f32_e32 v0, s43, v0 ; 0800002B > v_sub_f32_e32 v3, s44, v3 ; 0806062C > v_sub_f32_e32 v1, s4, v1 ; 08020204 > exp 15, 33, 0, 0, 0, v8, v9, v6, v7 ; F800021F 07060908 > exp 15, 34, 0, 0, 0, v0, v3, v1, v0 ; F800022F 00010300 > exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 12 >Code Size: 544 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_interp_p1_f32 v5, v2, 0, 1, [m0] ; C8140402 > v_interp_p2_f32 v5, [v5], v3, 0, 1, [m0] ; C8150403 > v_interp_p1_f32 v6, v2, 1, 1, [m0] ; C8180502 > v_interp_p2_f32 v6, [v6], v3, 1, 1, [m0] ; C8190503 > v_interp_p1_f32 v9, v2, 0, 2, [m0] ; C8240802 > v_interp_p2_f32 v9, [v9], v3, 0, 2, [m0] ; C8250803 > v_interp_p1_f32 v10, v2, 1, 2, [m0] ; C8280902 > v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 > v_interp_p2_f32 v10, [v10], v3, 1, 2, [m0] ; C8290903 > v_interp_p1_f32 v11, v2, 2, 2, [m0] ; C82C0A02 > v_cndmask_b32_e64 v8, v0, 1.0, vcc ; D2000008 01A9E500 > v_bfrev_b32_e32 v2, 14 ; 7E04708E > v_cmp_lt_f32_e32 vcc, 0, v1 ; 7C020280 > v_cndmask_b32_e64 v12, v1, 1.0, vcc ; D200000C 01A9E501 > v_bfrev_b32_e32 v7, 15 ; 7E0E708F > v_mul_f32_e32 v14, v2, v8 ; 101C1102 > v_cmp_le_f32_e32 vcc, 0, v8 ; 7C061080 > v_cndmask_b32_e32 v8, v7, v14 ; 00101D07 > v_mul_f32_e32 v14, v2, v12 ; 101C1902 > v_cmp_le_f32_e32 vcc, 0, v12 ; 7C061880 > v_cndmask_b32_e32 v12, v7, v14 ; 00181D07 > v_rcp_f32_e32 v14, v4 ; 7E1C5504 > s_load_dwordx4 s[12:15], s[2:3], 0x8 ; C0860308 > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_cmp_lt_f32_e64 s[0:1], 0, v5 ; D0020000 00020A80 > v_mul_f32_e32 v1, v14, v1 ; 1002030E > v_cndmask_b32_e32 v23, v1, v12 ; 002E1901 > v_cndmask_b32_e64 v1, v5, 1.0, s[0:1] ; D2000001 0001E505 > s_load_dwordx8 s[20:27], s[4:5], 0x0 ; C0CA0500 > s_load_dwordx4 s[36:39], s[4:5], 0xc ; C092050C > v_cmp_lt_f32_e64 s[0:1], 0, v6 ; D0020000 00020C80 > v_interp_p2_f32 v11, [v11], v3, 2, 2, [m0] ; C82D0A03 > v_cndmask_b32_e64 v3, v6, 1.0, s[0:1] ; D2000003 0001E506 > v_mul_f32_e32 v4, v2, v1 ; 10080302 > v_cmp_le_f32_e64 s[0:1], 0, v1 ; D0060000 00020280 > v_cndmask_b32_e64 v1, v7, v4, s[0:1] ; D2000001 00020907 > v_mul_f32_e32 v4, v2, v3 ; 10080702 > v_cmp_le_f32_e64 s[0:1], 0, v3 ; D0060000 00020680 > v_cndmask_b32_e64 v3, v7, v4, s[0:1] ; D2000003 00020907 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s0, s[12:15], 0x1d ; C2000D1D > v_mul_f32_e32 v0, v14, v0 ; 1000010E > s_buffer_load_dword s1, s[12:15], 0x1c ; C2008D1C > s_load_dwordx4 s[16:19], s[2:3], 0x4 ; C0880304 > v_mul_f32_e32 v4, v14, v5 ; 10080B0E > v_mul_f32_e32 v5, v14, v6 ; 100A0D0E > s_and_b32 s36, s36, s27 ; 87241B24 > v_cndmask_b32_e32 v22, v0, v8 ; 002C1100 > s_buffer_load_dword s2, s[12:15], 0x1e ; C2010D1E > image_sample v0, v[22:23], s[20:27], s[36:39] dmask:0x1 ; F0800100 01250016 > v_cndmask_b32_e32 v14, v5, v3 ; 001C0705 > v_cndmask_b32_e32 v12, v4, v1 ; 00180304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v14 ; 10021D00 > s_buffer_load_dword s8, s[12:15], 0x1f ; C2040D1F > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s0, v1 ; 10060200 > v_mul_f32_e32 v4, v0, v12 ; 10081900 > s_buffer_load_dword s0, s[12:15], 0x21 ; C2000D21 > v_mac_f32_e32 v3, s1, v4 ; 3E060801 > s_buffer_load_dword s1, s[12:15], 0x20 ; C2008D20 > v_mac_f32_e32 v3, s2, v0 ; 3E060002 > s_buffer_load_dword s2, s[12:15], 0x22 ; C2010D22 > v_add_f32_e32 v3, s8, v3 ; 06060608 > s_buffer_load_dword s8, s[12:15], 0x23 ; C2040D23 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v6, s0, v1 ; 100C0200 > v_mac_f32_e32 v6, s1, v4 ; 3E0C0801 > v_mac_f32_e32 v6, s2, v0 ; 3E0C0002 > s_buffer_load_dword s2, s[12:15], 0x29 ; C2010D29 > s_buffer_load_dword s9, s[12:15], 0x28 ; C2048D28 > v_add_f32_e32 v6, s8, v6 ; 060C0C08 > s_buffer_load_dword s8, s[12:15], 0x2a ; C2040D2A > s_buffer_load_dword s11, s[12:15], 0x2b ; C2058D2B > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v1, s2, v1 ; 10020202 > v_mac_f32_e32 v1, s9, v4 ; 3E020809 > v_cmp_lt_f32_e32 vcc, 0, v3 ; 7C020680 > v_mac_f32_e32 v1, s8, v0 ; 3E020008 > v_add_f32_e32 v1, s11, v1 ; 0602020B > s_load_dwordx8 s[28:35], s[4:5], 0x10 ; C0CE0510 > s_load_dwordx4 s[20:23], s[4:5], 0x1c ; C08A051C > v_rcp_f32_e32 v4, v1 ; 7E085501 > v_cndmask_b32_e64 v5, v3, 1.0, vcc ; D2000005 01A9E503 > v_cmp_lt_f32_e32 vcc, 0, v6 ; 7C020C80 > v_cndmask_b32_e64 v8, v6, 1.0, vcc ; D2000008 01A9E506 > v_mul_f32_e32 v15, v2, v5 ; 101E0B02 > v_cmp_le_f32_e32 vcc, 0, v5 ; 7C060A80 > v_cndmask_b32_e32 v5, v7, v15 ; 000A1F07 > v_mul_f32_e32 v2, v2, v8 ; 10041102 > v_cmp_le_f32_e32 vcc, 0, v8 ; 7C061080 > v_cndmask_b32_e32 v2, v7, v2 ; 00040507 > v_mul_f32_e32 v3, v4, v3 ; 10060704 > v_cmp_eq_f32_e32 vcc, 0, v1 ; 7C040280 > v_mul_f32_e32 v4, v4, v6 ; 10080D04 > v_cndmask_b32_e32 v24, v3, v5 ; 00300B03 > v_cndmask_b32_e32 v25, v4, v2 ; 00320504 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s20, s20, s35 ; 87142314 > v_mov_b32_e32 v26, 0 ; 7E340280 > image_sample_l v[18:21], v[24:27], s[28:35], s[20:23] dmask:0xf ; F0900F00 00A71218 > s_buffer_load_dword s3, s[16:19], 0x10 ; C2019110 > s_buffer_load_dword s6, s[16:19], 0x11 ; C2031111 > s_buffer_load_dword s7, s[16:19], 0x12 ; C2039112 > s_buffer_load_dword s8, s[16:19], 0x14 ; C2041114 > s_buffer_load_dword s9, s[16:19], 0x15 ; C2049115 > s_buffer_load_dword s11, s[16:19], 0x16 ; C2059116 > s_buffer_load_dword s28, s[16:19], 0x18 ; C20E1118 > s_buffer_load_dword s29, s[16:19], 0x19 ; C20E9119 > s_buffer_load_dword s16, s[16:19], 0x1a ; C208111A > s_buffer_load_dword s17, s[12:15], 0x44 ; C2088D44 > s_buffer_load_dword s18, s[12:15], 0x45 ; C2090D45 > s_buffer_load_dword s19, s[12:15], 0x0 ; C2098D00 > s_buffer_load_dword s30, s[12:15], 0x46 ; C20F0D46 > s_load_dwordx8 s[36:43], s[4:5], 0x20 ; C0D20520 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mad_f32 v12, -v12, v0, s17 ; D282000C 2046010C > v_mad_f32 v14, -v14, v0, s18 ; D282000E 204A010E > v_mul_f32_e32 v12, s19, v12 ; 10181813 > v_sub_f32_e32 v0, s30, v0 ; 0800001E > v_mul_f32_e32 v15, s3, v12 ; 101E1803 > v_mul_f32_e32 v14, s19, v14 ; 101C1C13 > v_mul_f32_e32 v16, s8, v12 ; 10201808 > v_mul_f32_e32 v12, s28, v12 ; 1018181C > v_mac_f32_e32 v15, s6, v14 ; 3E1E1C06 > v_mul_f32_e32 v0, s19, v0 ; 10000013 > v_mac_f32_e32 v12, s29, v14 ; 3E181C1D > v_mac_f32_e32 v16, s9, v14 ; 3E201C09 > v_mac_f32_e32 v15, s7, v0 ; 3E1E0007 > s_load_dwordx4 s[24:27], s[4:5], 0x2c ; C08C052C > v_mac_f32_e32 v12, s16, v0 ; 3E180010 > v_mac_f32_e32 v16, s11, v0 ; 3E20000B > v_mul_f32_e32 v0, v15, v15 ; 10001F0F > s_load_dwordx4 s[44:47], s[4:5], 0x3c ; C096053C > s_load_dwordx8 s[48:55], s[4:5], 0x30 ; C0D80530 > v_mac_f32_e32 v0, v16, v16 ; 3E002110 > v_mul_f32_e32 v14, v9, v9 ; 101C1309 > v_mac_f32_e32 v14, v10, v10 ; 3E1C150A > v_mac_f32_e32 v0, v12, v12 ; 3E00190C > v_rsq_clamp_f32_e32 v17, v0 ; 7E225900 > v_mac_f32_e32 v14, v11, v11 ; 3E1C170B > v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s24, s24, s43 ; 87182B18 > s_and_b32 s44, s44, s55 ; 872C372C > image_sample v[1:4], v[22:23], s[36:43], s[24:27] dmask:0xf ; F0800F00 00C90116 > image_sample v[5:8], v[22:23], s[48:55], s[44:47] dmask:0xf ; F0800F00 016C0516 > v_mul_f32_e32 v24, v15, v17 ; 1030230F > v_mul_f32_e32 v25, v16, v17 ; 10322310 > v_mul_f32_e32 v12, v12, v17 ; 1018230C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v5, v5, 2.0, -1.0 ; D2960005 03CDE905 > v_fma_f32 v9, v9, v14, v24 ; D2960009 04621D09 > v_fma_f32 v10, v10, v14, v25 ; D296000A 04661D0A > v_fma_f32 v11, v11, v14, v12 ; D296000B 04321D0B > v_fma_f32 v6, v6, 2.0, -1.0 ; D2960006 03CDE906 > v_mul_f32_e32 v14, v5, v5 ; 101C0B05 > v_mac_f32_e32 v14, v6, v6 ; 3E1C0D06 > v_fma_f32 v7, v7, 2.0, -1.0 ; D2960007 03CDE907 > v_mac_f32_e32 v14, v7, v7 ; 3E1C0F07 > v_rsq_clamp_f32_e32 v27, v14 ; 7E36590E > v_mul_f32_e32 v14, v9, v9 ; 101C1309 > v_mac_f32_e32 v14, v10, v10 ; 3E1C150A > v_mac_f32_e32 v14, v11, v11 ; 3E1C170B > v_rsq_clamp_f32_e32 v28, v14 ; 7E38590E > s_load_dwordx8 s[20:27], s[4:5], 0x40 ; C0CA0540 > s_load_dwordx4 s[16:19], s[4:5], 0x4c ; C088054C > v_mul_f32_e32 v5, v5, v27 ; 100A3705 > v_mul_f32_e32 v9, v9, v28 ; 10123909 > v_mul_f32_e32 v29, v9, v5 ; 103A0B09 > v_mul_f32_e32 v6, v6, v27 ; 100C3706 > v_mul_f32_e32 v5, v24, v5 ; 100A0B18 > v_mul_f32_e32 v10, v10, v28 ; 1014390A > v_mac_f32_e32 v29, v10, v6 ; 3E3A0D0A > v_mac_f32_e32 v5, v25, v6 ; 3E0A0D19 > v_mul_f32_e32 v6, v7, v27 ; 100C3707 > v_mul_f32_e32 v7, v11, v28 ; 100E390B > s_load_dwordx8 s[28:35], s[4:5], 0x50 ; C0CE0550 > s_load_dwordx4 s[4:7], s[4:5], 0x5c ; C082055C > v_mac_f32_e32 v29, v7, v6 ; 3E3A0D07 > v_mac_f32_e32 v5, v12, v6 ; 3E0A0D0C > v_add_f32_e64 v6, 0, v29 clamp ; D2060806 00023A80 > v_mul_f32_e32 v30, v9, v24 ; 103C3109 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s16, s16, s27 ; 87101B10 > v_log_f32_e32 v6, v6 ; 7E0C4F06 > v_mac_f32_e32 v30, v10, v25 ; 3E3C330A > image_sample v[14:17], v[22:23], s[20:27], s[16:19] dmask:0xf ; F0800F00 00850E16 > v_mac_f32_e32 v30, v7, v12 ; 3E3C1907 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v7, v14, v14 ; 100E1D0E > v_mov_b32_e32 v9, 0x3b83126f ; 7E1202FF 3B83126F > v_mov_b32_e32 v10, 0x45800000 ; 7E1402FF 45800000 > v_fma_f32 v9, v7, v10, v9 ; D2960009 04261507 > s_and_b32 s4, s4, s35 ; 87042304 > v_mov_b32_e32 v24, v26 ; 7E30031A > v_mul_f32_e32 v6, v9, v6 ; 100C0D09 > image_sample_l v[9:12], v[22:25], s[28:35], s[4:7] dmask:0xf ; F0900F00 00270916 > s_buffer_load_dword s1, s[12:15], 0x40 ; C2008D40 > s_buffer_load_dword s0, s[12:15], 0x41 ; C2000D41 > s_buffer_load_dword s2, s[12:15], 0x48 ; C2010D48 > s_buffer_load_dword s5, s[12:15], 0x42 ; C2028D42 > s_buffer_load_dword s4, s[12:15], 0x49 ; C2020D49 > s_buffer_load_dword s3, s[12:15], 0x4a ; C2018D4A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v11, v5, v4 ; 06160905 > v_exp_f32_e32 v22, v6 ; 7E2C4B06 > v_add_f32_e64 v6, 0, v30 clamp ; D2060806 00023C80 > v_add_f32_e32 v11, -1.0, v11 ; 061616F3 > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_saveexec_b64 s[6:7], vcc ; BE86246A > s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E > v_cmp_lt_f32_e32 vcc, 0, v11 ; 7C021680 > v_cndmask_b32_e64 v12, v11, 1.0, vcc ; D200000C 01A9E50B > v_cmp_le_f32_e32 vcc, 0, v12 ; 7C061880 > v_mul_f32_e32 v12, 0x70000000, v12 ; 101818FF 70000000 > v_bfrev_b32_e32 v14, 15 ; 7E1C708F > v_cndmask_b32_e32 v12, v14, v12 ; 0018190E > s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 > s_xor_b64 exec, exec, s[6:7] ; 89FE067E > v_rcp_f32_e32 v4, v4 ; 7E085504 > v_mul_f32_e32 v12, v4, v11 ; 10181704 > s_or_b64 exec, exec, s[6:7] ; 88FE067E > v_add_f32_e64 v23, 0, v12 clamp ; D2060817 00021880 > v_mul_f32_e32 v11, s1, v18 ; 10162401 > v_mul_f32_e32 v12, s0, v19 ; 10182600 > v_mov_b32_e32 v18, 0x40004189 ; 7E2402FF 40004189 > v_mov_b32_e32 v19, 0x45800000 ; 7E2602FF 45800000 > v_fma_f32 v7, v7, v19, v18 ; D2960007 044A2707 > v_sub_f32_e32 v19, 1.0, v6 ; 08260CF2 > v_mad_f32 v19, -v6, v19, v19 ; D2820013 244E2706 > v_mul_f32_e32 v19, v19, v19 ; 10262713 > v_mul_f32_e32 v7, 0x3e000000, v7 ; 100E0EFF 3E000000 > v_mad_f32 v6, -v6, v19, v19 ; D2820006 244E2706 > v_sub_f32_e32 v19, 1.0, v16 ; 082620F2 > v_mul_f32_e32 v4, s2, v21 ; 10082A02 > v_mul_f32_e32 v14, s5, v20 ; 101C2805 > v_fma_f32 v6, v19, v6, v16 ; D2960006 04420D13 > v_mul_f32_e32 v20, s4, v21 ; 10282A04 > v_mul_f32_e32 v7, v22, v7 ; 100E0F16 > v_mul_f32_e32 v21, s3, v21 ; 102A2A03 > v_mul_f32_e32 v4, v9, v4 ; 10080909 > v_mul_f32_e32 v6, v6, v7 ; 100C0F06 > v_mul_f32_e32 v7, v9, v20 ; 100E2909 > v_mul_f32_e32 v9, v9, v21 ; 10122B09 > v_mul_f32_e32 v18, v1, v11 ; 10241701 > v_mul_f32_e32 v4, v4, v15 ; 10081F04 > v_mul_f32_e32 v16, v2, v12 ; 10201902 > v_mul_f32_e32 v7, v7, v15 ; 100E1F07 > v_mul_f32_e32 v19, v3, v14 ; 10261D03 > v_mul_f32_e32 v9, v9, v15 ; 10121F09 > v_fma_f32 v4, v4, v6, v18 ; D2960004 044A0D04 > v_fma_f32 v7, v7, v6, v16 ; D2960007 04420D07 > v_fma_f32 v9, v9, v6, v19 ; D2960009 044E0D09 > v_mul_f32_e32 v6, v7, v23 ; 100C2F07 > v_mul_f32_e32 v4, v4, v23 ; 10082F04 > v_mul_f32_e32 v7, v9, v23 ; 100E2F09 > v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280 > s_and_saveexec_b64 s[8:9], vcc ; BE88246A > s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E > s_cbranch_execz BB0_6 ; BF880000 > v_add_f32_e32 v15, v16, v16 ; 061E2110 > v_add_f32_e32 v16, v19, v19 ; 06202713 > v_max3_f32 v19, v3, v2, v1 ; D2A80013 04060503 > v_rcp_f32_e32 v20, v19 ; 7E285513 > v_add_f32_e32 v9, v18, v18 ; 06122512 > v_mov_b32_e32 v18, 0x3f028283 ; 7E2402FF 3F028283 > v_cmp_lt_f32_e32 vcc, 0, v1 ; 7C020280 > v_cmp_lt_f32_e64 s[0:1], 0, v2 ; D0020000 00020480 > v_cmp_lt_f32_e64 s[2:3], 0, v3 ; D0020002 00020680 > v_cmp_gt_f32_e64 s[4:5], v18, v8 ; D0080004 00021112 > v_cndmask_b32_e64 v8, v1, 1.0, vcc ; D2000008 01A9E501 > v_cndmask_b32_e64 v18, v2, 1.0, s[0:1] ; D2000012 0001E502 > v_cmp_eq_f32_e32 vcc, 0, v19 ; 7C042680 > v_mul_f32_e32 v1, v20, v1 ; 10020314 > v_mul_f32_e32 v2, v20, v2 ; 10040514 > v_mul_f32_e32 v20, v20, v3 ; 10280714 > v_cndmask_b32_e64 v3, v3, 1.0, s[2:3] ; D2000003 0009E503 > v_bfrev_b32_e32 v19, 14 ; 7E26708E > v_cmp_le_f32_e64 s[0:1], 0, v8 ; D0060000 00021080 > v_cmp_le_f32_e64 s[2:3], 0, v18 ; D0060002 00022480 > v_cmp_le_f32_e64 s[6:7], 0, v3 ; D0060006 00020680 > v_mul_f32_e32 v8, v19, v8 ; 10101113 > v_mul_f32_e32 v18, v19, v18 ; 10242513 > v_mul_f32_e32 v3, v19, v3 ; 10060713 > v_bfrev_b32_e32 v19, 15 ; 7E26708F > v_cndmask_b32_e64 v3, v19, v3, s[6:7] ; D2000003 001A0713 > v_cndmask_b32_e64 v8, v19, v8, s[0:1] ; D2000008 00021113 > v_cndmask_b32_e64 v18, v19, v18, s[2:3] ; D2000012 000A2513 > v_cndmask_b32_e32 v1, v1, v8 ; 00021101 > v_cndmask_b32_e32 v2, v2, v18 ; 00042502 > v_cndmask_b32_e32 v3, v20, v3 ; 00060714 > v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 > v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 > v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 > v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 > v_mul_f32_e32 v1, v1, v1 ; 10020301 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_mul_f32_e32 v2, v2, v2 ; 10040502 > v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 > v_mul_f32_e32 v3, v3, v3 ; 10060703 > v_add_f32_e32 v21, -0.5, v17 ; 062A22F1 > v_mov_b32_e32 v24, 0x3e800000 ; 7E3002FF 3E800000 > v_mul_f32_e32 v1, v1, v11 ; 10021701 > v_min_f32_e32 v11, 0.5, v17 ; 1E1622F0 > v_mul_f32_e32 v2, v2, v12 ; 10041902 > v_mov_b32_e32 v12, 0x3e19999a ; 7E1802FF 3E19999A > v_mul_f32_e32 v3, v3, v14 ; 10061D03 > v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 > v_subrev_f32_e32 v25, v5, v24 ; 0A323105 > v_mul_f32_e32 v1, v12, v1 ; 1002030C > v_mul_f32_e32 v2, v12, v2 ; 1004050C > v_mul_f32_e32 v3, v12, v3 ; 1006070C > v_cndmask_b32_e64 v11, v11, v17, s[4:5] ; D200000B 0012230B > v_sub_f32_e32 v8, 1.0, v5 ; 08100AF2 > v_mul_f32_e32 v22, v9, v21 ; 102C2B09 > v_mul_f32_e32 v23, v15, v21 ; 102E2B0F > v_add_f32_e64 v25, 0, v25 clamp ; D2060819 00023280 > v_mul_f32_e32 v21, v16, v21 ; 102A2B10 > v_cndmask_b32_e64 v1, v1, v9, s[4:5] ; D2000001 00121301 > v_min_f32_e32 v9, 0.5, v11 ; 1E1216F0 > v_cndmask_b32_e64 v2, v2, v15, s[4:5] ; D2000002 00121F02 > v_cndmask_b32_e64 v3, v3, v16, s[4:5] ; D2000003 00122103 > v_fma_f32 v22, v22, v25, v4 ; D2960016 04123316 > v_fma_f32 v23, v23, v25, v6 ; D2960017 041A3317 > v_fma_f32 v21, v21, v25, v7 ; D2960015 041E3315 > v_add_f32_e32 v5, v24, v5 ; 060A0B18 > v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 > v_mul_f32_e32 v1, v1, v9 ; 10021301 > v_mul_f32_e32 v2, v2, v9 ; 10041302 > v_mul_f32_e32 v3, v3, v9 ; 10061303 > v_cndmask_b32_e64 v4, v4, v22, s[4:5] ; D2000004 00122D04 > v_cndmask_b32_e64 v6, v6, v23, s[4:5] ; D2000006 00122F06 > v_cndmask_b32_e64 v7, v7, v21, s[4:5] ; D2000007 00122B07 > v_mul_f32_e32 v1, v1, v8 ; 10021101 > v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 > v_mul_f32_e32 v2, v2, v8 ; 10041102 > v_mul_f32_e32 v3, v3, v8 ; 10061103 > v_fma_f32 v4, v1, v5, v4 ; D2960004 04120B01 > v_fma_f32 v6, v2, v5, v6 ; D2960006 041A0B02 > v_fma_f32 v7, v3, v5, v7 ; D2960007 041E0B03 > s_or_b64 exec, exec, s[8:9] ; 88FE087E > v_min_f32_e32 v0, 1.0, v0 ; 1E0000F2 > v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 > v_mad_f32 v0, -v0, v1, v1 ; D2820000 24060300 > v_mul_f32_e32 v1, v4, v0 ; 10020104 > v_mul_f32_e32 v2, v6, v0 ; 10040106 > v_mul_f32_e32 v3, v7, v0 ; 10060107 > v_add_f32_e32 v0, 0x3eaa7efa, v10 ; 060014FF 3EAA7EFA > v_add_f32_e64 v4, 0, v0 clamp ; D2060804 00020080 > v_mul_f32_e32 v0, v1, v4 ; 10000901 > v_mul_f32_e32 v1, v2, v4 ; 10020902 > v_mul_f32_e32 v2, v3, v4 ; 10040903 > v_mov_b32_e32 v3, 1.0 ; 7E0602F2 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 64 >VGPRS: 32 >Code Size: 1816 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 8 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[8:11], s[10:11], 0x4 ; C0840B04 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[4:7], 0 idxen ; E00C2000 80010604 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[9:12], v5, s[8:11], 0 idxen ; E00C2000 80020905 > s_buffer_load_dword s10, s[0:3], 0x45 ; C2050145 > s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149 > s_buffer_load_dword s18, s[0:3], 0x4d ; C209014D > s_buffer_load_dword s22, s[0:3], 0x51 ; C20B0151 > s_buffer_load_dword s7, s[0:3], 0x40 ; C2038140 > s_buffer_load_dword s9, s[0:3], 0x44 ; C2048144 > s_buffer_load_dword s13, s[0:3], 0x48 ; C2068148 > s_buffer_load_dword s17, s[0:3], 0x4c ; C208814C > s_buffer_load_dword s21, s[0:3], 0x50 ; C20A8150 > s_buffer_load_dword s4, s[0:3], 0x3d ; C202013D > s_buffer_load_dword s5, s[0:3], 0x3e ; C202813E > s_buffer_load_dword s6, s[0:3], 0x3f ; C203013F > s_buffer_load_dword s8, s[0:3], 0x41 ; C2040141 > s_buffer_load_dword s11, s[0:3], 0x46 ; C2058146 > s_buffer_load_dword s12, s[0:3], 0x47 ; C2060147 > s_buffer_load_dword s15, s[0:3], 0x4a ; C207814A > s_buffer_load_dword s16, s[0:3], 0x4b ; C208014B > s_buffer_load_dword s19, s[0:3], 0x4e ; C209814E > s_buffer_load_dword s20, s[0:3], 0x4f ; C20A014F > s_buffer_load_dword s23, s[0:3], 0x52 ; C20B8152 > s_buffer_load_dword s24, s[0:3], 0x53 ; C20C0153 > s_buffer_load_dword s25, s[0:3], 0x54 ; C20C8154 > s_buffer_load_dword s26, s[0:3], 0x55 ; C20D0155 > s_buffer_load_dword s27, s[0:3], 0x56 ; C20D8156 > s_buffer_load_dword s0, s[0:3], 0x57 ; C2000157 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v0, s7 ; 7E000207 > v_mul_f32_e32 v1, s4, v0 ; 10020004 > v_mul_f32_e32 v3, s10, v7 ; 10060E0A > v_mul_f32_e32 v4, s14, v7 ; 10080E0E > v_mul_f32_e32 v5, s18, v7 ; 100A0E12 > v_mul_f32_e32 v7, s22, v7 ; 100E0E16 > v_mac_f32_e32 v3, s9, v6 ; 3E060C09 > v_mac_f32_e32 v4, s13, v6 ; 3E080C0D > v_mac_f32_e32 v5, s17, v6 ; 3E0A0C11 > v_mac_f32_e32 v7, s21, v6 ; 3E0E0C15 > v_mac_f32_e32 v3, s11, v8 ; 3E06100B > v_mac_f32_e32 v4, s15, v8 ; 3E08100F > v_mac_f32_e32 v5, s19, v8 ; 3E0A1013 > v_mac_f32_e32 v7, s23, v8 ; 3E0E1017 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v8, s25, v9 ; 10101219 > v_mul_f32_e32 v6, s5, v0 ; 100C0005 > v_mul_f32_e32 v9, s26, v10 ; 1012141A > v_mul_f32_e32 v10, s27, v11 ; 1014161B > v_mul_f32_e32 v0, s6, v0 ; 10000006 > v_mul_f32_e32 v11, s0, v12 ; 10161800 > v_mul_f32_e32 v11, s8, v11 ; 10161608 > v_mul_f32_e32 v1, v8, v1 ; 10020308 > v_mul_f32_e32 v6, v9, v6 ; 100C0D09 > v_mul_f32_e32 v0, v10, v0 ; 1000010A > v_add_f32_e32 v3, s12, v3 ; 0606060C > v_add_f32_e32 v4, s16, v4 ; 06080810 > v_add_f32_e32 v5, s20, v5 ; 060A0A14 > v_add_f32_e32 v7, s24, v7 ; 060E0E18 > exp 15, 32, 0, 0, 0, v1, v6, v0, v11 ; F800020F 0B000601 > exp 15, 12, 0, 1, 0, v3, v4, v5, v7 ; F80008CF 07050403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 16 >Code Size: 288 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >radeonsi: Compiling shader 388 >Fragment Shader Epilog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { >main_body: > %22 = call i32 @llvm.SI.packf16(float %6, float %7) > %23 = bitcast i32 %22 to float > %24 = call i32 @llvm.SI.packf16(float %8, float %9) > %25 = bitcast i32 %24 to float > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %23, float %25, float undef, float undef) > %26 = call i32 @llvm.SI.packf16(float %10, float %11) > %27 = bitcast i32 %26 to float > %28 = call i32 @llvm.SI.packf16(float %12, float %13) > %29 = bitcast i32 %28 to float > call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %27, float %29, float undef, float undef) > ret void >} > >; Function Attrs: nounwind readnone >declare i32 @llvm.SI.packf16(float, float) #1 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { "InitialPSInputAddr"="16777215" } >attributes #1 = { nounwind readnone } > > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v17, v2, 2, 0, [m0] ; C8440202 > v_interp_p2_f32 v17, [v17], v3, 2, 0, [m0] ; C8450203 > v_interp_p1_f32 v18, v2, 3, 0, [m0] ; C8480302 > v_interp_p2_f32 v18, [v18], v3, 3, 0, [m0] ; C8490303 > v_mov_b32_e32 v16, v13 ; 7E20030D > v_mov_b32_e32 v4, 0 ; 7E080280 > v_mov_b32_e32 v5, 0 ; 7E0A0280 > v_mov_b32_e32 v6, 0 ; 7E0C0280 > v_mov_b32_e32 v7, 0 ; 7E0E0280 > v_mov_b32_e32 v8, 0 ; 7E100280 > v_mov_b32_e32 v9, 0 ; 7E120280 > v_mov_b32_e32 v10, 0 ; 7E140280 > v_mov_b32_e32 v11, 0 ; 7E160280 > v_mov_b32_e32 v12, 0 ; 7E180280 > v_mov_b32_e32 v13, 0 ; 7E1A0280 > v_mov_b32_e32 v14, 0 ; 7E1C0280 > v_mov_b32_e32 v15, 0 ; 7E1E0280 > v_mov_b32_e32 v2, v17 ; 7E040311 > v_mov_b32_e32 v3, v18 ; 7E060312 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 > v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 > exp 15, 1, 1, 1, 1, v0, v1, v0, v0 ; F8001C1F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 16 >VGPRS: 20 >Code Size: 136 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 12, 0, 1, 0, v3, v4, v5, v6 ; F80008CF 06050403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 40 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > v_cvt_u32_f32_e32 v0, v12 ; 7E000F0C > v_mov_b32_e32 v2, 0 ; 7E040280 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 > s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v1, s4 ; 7E020204 > v_mac_f32_e32 v1, s0, v13 ; 3E021A00 > v_cvt_u32_f32_e32 v1, v1 ; 7E020F01 > v_mov_b32_e32 v13, v15 ; 7E1A030F > image_load_mip v[0:3], v[0:3], s[12:19] dmask:0xf ; F0040F00 00030000 > s_waitcnt vmcnt(0) ; BF8C0F70 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd377 >SPI_PS_INPUT_ENA = 0x0320 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 17 >Code Size: 80 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[8:11], s[2:3], 0x4 ; C0840304 > s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[4:7], v4, s[4:7], 0 idxen ; E00C2000 80010404 > s_buffer_load_dword s4, s[8:11], 0x2c ; C202092C > s_buffer_load_dword s5, s[8:11], 0x2d ; C202892D > s_buffer_load_dword s6, s[8:11], 0x2e ; C203092E > s_buffer_load_dword s9, s[0:3], 0x59 ; C2048159 > s_buffer_load_dword s8, s[0:3], 0x58 ; C2040158 > s_buffer_load_dword s10, s[0:3], 0x60 ; C2050160 > s_buffer_load_dword s11, s[0:3], 0x61 ; C2058161 > s_buffer_load_dword s13, s[0:3], 0x55 ; C2068155 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mov_b32_e32 v7, s9 ; 7E0E0209 > s_buffer_load_dword s9, s[0:3], 0x54 ; C2048154 > s_buffer_load_dword s15, s[0:3], 0x65 ; C2078165 > s_buffer_load_dword s14, s[0:3], 0x56 ; C2070156 > s_buffer_load_dword s7, s[0:3], 0x57 ; C2038157 > v_mov_b32_e32 v6, s8 ; 7E0C0208 > v_mov_b32_e32 v9, s13 ; 7E12020D > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mac_f32_e32 v7, s15, v6 ; 3E0E0C0F > v_mov_b32_e32 v6, s15 ; 7E0C020F > s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A > s_buffer_load_dword s8, s[0:3], 0x1b ; C204011B > s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 > v_mov_b32_e32 v8, s9 ; 7E100209 > s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 > s_buffer_load_dword s13, s[0:3], 0x12 ; C2068112 > s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113 > s_buffer_load_dword s15, s[0:3], 0x14 ; C2078114 > s_buffer_load_dword s19, s[0:3], 0x15 ; C2098115 > s_buffer_load_dword s20, s[0:3], 0x16 ; C20A0116 > s_buffer_load_dword s21, s[0:3], 0x17 ; C20A8117 > s_buffer_load_dword s22, s[0:3], 0x18 ; C20B0118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > v_bfrev_b32_e32 v3, 1 ; 7E067081 > v_fma_f32 v11, v4, s10, s10 ; D296000B 00281504 > v_fma_f32 v12, v5, -s11, s11 ; D296000C 402C1705 > exp 15, 32, 0, 0, 0, v11, v12, v1, v0 ; F800020F 00010C0B > v_fma_f32 v8, s14, v11, v8 ; D2960008 0422160E > s_waitcnt expcnt(0) ; BF8C0F0F > v_add_f32_e32 v11, 1.0, v5 ; 06160AF2 > v_mad_f32 v10, v4, s14, s14 ; D282000A 00381D04 > v_fma_f32 v11, -v11, 0.5, 1.0 ; D296000B 23C9E10B > v_fma_f32 v10, v10, 0.5, s9 ; D296000A 0025E10A > v_fma_f32 v12, s7, v12, v9 ; D296000C 04261807 > v_fma_f32 v9, s7, v11, v9 ; D2960009 04261607 > v_mul_f32_e32 v10, v7, v10 ; 10141507 > v_mul_f32_e32 v7, v7, v9 ; 100E1307 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v9, s17, v7 ; 10120E11 > v_mul_f32_e32 v11, s19, v7 ; 10160E13 > v_mul_f32_e32 v7, s0, v7 ; 100E0E00 > v_mac_f32_e32 v9, s16, v10 ; 3E121410 > v_mac_f32_e32 v11, s15, v10 ; 3E16140F > v_mac_f32_e32 v7, s22, v10 ; 3E0E1416 > v_mac_f32_e32 v7, s12, v6 ; 3E0E0C0C > v_mac_f32_e32 v9, s13, v6 ; 3E120C0D > v_mac_f32_e32 v11, s20, v6 ; 3E160C14 > v_add_f32_e32 v6, s18, v9 ; 060C1212 > v_add_f32_e32 v9, s21, v11 ; 06121615 > v_add_f32_e32 v7, s8, v7 ; 060E0E08 > v_sub_f32_e32 v6, s4, v6 ; 080C0C04 > v_sub_f32_e32 v9, s5, v9 ; 08121205 > v_sub_f32_e32 v7, s6, v7 ; 080E0E06 > v_xor_b32_e32 v6, v6, v3 ; 3A0C0706 > v_xor_b32_e32 v9, v9, v3 ; 3A120709 > v_xor_b32_e32 v3, v7, v3 ; 3A060707 > exp 15, 33, 0, 0, 0, v8, v12, v0, v0 ; F800021F 00000C08 > exp 15, 34, 0, 0, 0, v6, v9, v3, v0 ; F800022F 00030906 > exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 16 >Code Size: 364 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v1, v2, 0, 0, [m0] ; C8040002 > v_interp_p2_f32 v1, [v1], v3, 0, 0, [m0] ; C8050003 > v_interp_p1_f32 v4, v2, 1, 0, [m0] ; C8100102 > v_interp_p2_f32 v4, [v4], v3, 1, 0, [m0] ; C8110103 > v_interp_p1_f32 v5, v2, 3, 0, [m0] ; C8140302 > v_interp_p2_f32 v5, [v5], v3, 3, 0, [m0] ; C8150303 > v_interp_p1_f32 v6, v2, 0, 1, [m0] ; C8180402 > v_interp_p2_f32 v6, [v6], v3, 0, 1, [m0] ; C8190403 > v_interp_p1_f32 v7, v2, 1, 1, [m0] ; C81C0502 > v_interp_p2_f32 v7, [v7], v3, 1, 1, [m0] ; C81D0503 > v_interp_p1_f32 v8, v2, 0, 2, [m0] ; C8200802 > s_load_dwordx4 s[32:35], s[2:3], 0x8 ; C0900308 > v_interp_p2_f32 v8, [v8], v3, 0, 2, [m0] ; C8210803 > v_interp_p1_f32 v11, v2, 1, 2, [m0] ; C82C0902 > v_interp_p2_f32 v11, [v11], v3, 1, 2, [m0] ; C82D0903 > v_interp_p1_f32 v12, v2, 2, 2, [m0] ; C8300A02 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C > v_cmp_lt_f32_e64 s[0:1], 0, v4 ; D0020000 00020880 > v_rcp_f32_e32 v14, v5 ; 7E1C5505 > s_load_dwordx8 s[24:31], s[4:5], 0x10 ; C0CC0510 > s_load_dwordx4 s[36:39], s[4:5], 0x1c ; C092051C > v_interp_p2_f32 v12, [v12], v3, 2, 2, [m0] ; C8310A03 > v_cndmask_b32_e64 v3, v4, 1.0, s[0:1] ; D2000003 0001E504 > v_cmp_lt_f32_e32 vcc, 0, v1 ; 7C020280 > v_bfrev_b32_e32 v0, 14 ; 7E00708E > v_cndmask_b32_e64 v2, v1, 1.0, vcc ; D2000002 01A9E501 > v_cmp_le_f32_e64 s[0:1], 0, v3 ; D0060000 00020680 > v_bfrev_b32_e32 v10, 15 ; 7E14708F > v_mul_f32_e32 v3, v0, v3 ; 10060700 > v_cmp_le_f32_e32 vcc, 0, v2 ; 7C060480 > v_mul_f32_e32 v2, v0, v2 ; 10040500 > v_cndmask_b32_e64 v3, v10, v3, s[0:1] ; D2000003 0002070A > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s0, s[32:35], 0x2d ; C200212D > v_cndmask_b32_e32 v2, v10, v2 ; 0004050A > v_mul_f32_e32 v1, v14, v1 ; 1002030E > v_cmp_eq_f32_e32 vcc, 0, v5 ; 7C040A80 > v_mul_f32_e32 v4, v14, v4 ; 1008090E > s_and_b32 s20, s20, s19 ; 87141314 > v_cndmask_b32_e32 v15, v1, v2 ; 001E0501 > v_cndmask_b32_e32 v16, v4, v3 ; 00200704 > s_and_b32 s36, s36, s31 ; 87241F24 > image_sample v1, v[15:16], s[12:19], s[20:23] dmask:0x1 ; F0800100 00A3010F > image_sample v[2:5], v[15:16], s[24:31], s[36:39] dmask:0xf ; F0800F00 0126020F > s_load_dwordx4 s[28:31], s[2:3], 0x4 ; C08E0304 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v9, s0 ; 7E120200 > v_cmp_lt_f32_e64 s[0:1], 0, v6 ; D0020000 00020C80 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cndmask_b32_e64 v5, v6, 1.0, s[0:1] ; D2000005 0001E506 > v_cmp_lt_f32_e64 s[2:3], 0, v7 ; D0020002 00020E80 > v_cndmask_b32_e64 v15, v7, 1.0, s[2:3] ; D200000F 0009E507 > v_cmp_le_f32_e64 s[0:1], 0, v5 ; D0060000 00020A80 > v_mul_f32_e32 v5, v0, v5 ; 100A0B00 > v_cndmask_b32_e64 v5, v10, v5, s[0:1] ; D2000005 00020B0A > v_cmp_le_f32_e64 s[2:3], 0, v15 ; D0060002 00021E80 > v_mul_f32_e32 v15, v0, v15 ; 101E1F00 > s_buffer_load_dword s0, s[28:31], 0x14 ; C2001D14 > v_cndmask_b32_e64 v15, v10, v15, s[2:3] ; D200000F 000A1F0A > s_buffer_load_dword s2, s[28:31], 0x16 ; C2011D16 > v_mul_f32_e32 v7, v14, v7 ; 100E0F0E > v_mul_f32_e32 v6, v14, v6 ; 100C0D0E > v_mul_f32_e32 v14, v8, v8 ; 101C1108 > v_mac_f32_e32 v14, v11, v11 ; 3E1C170B > v_cndmask_b32_e32 v5, v6, v5 ; 000A0B06 > s_buffer_load_dword s1, s[28:31], 0x15 ; C2009D15 > v_mac_f32_e32 v14, v12, v12 ; 3E1C190C > s_buffer_load_dword s3, s[28:31], 0x17 ; C2019D17 > v_mul_f32_e32 v5, v1, v5 ; 100A0B01 > v_cndmask_b32_e32 v7, v7, v15 ; 000E1F07 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v15, s0, v5 ; 101E0A00 > v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E > v_mac_f32_e32 v15, s2, v1 ; 3E1E0202 > s_buffer_load_dword s2, s[28:31], 0x50 ; C2011D50 > v_mul_f32_e32 v7, v1, v7 ; 100E0F01 > s_buffer_load_dword s6, s[28:31], 0x51 ; C2031D51 > v_mac_f32_e32 v15, s1, v7 ; 3E1E0E01 > s_buffer_load_dword s7, s[28:31], 0x52 ; C2039D52 > s_buffer_load_dword s1, s[28:31], 0x40 ; C2009D40 > v_mul_f32_e32 v5, v11, v14 ; 100A1D0B > v_add_f32_e32 v11, s3, v15 ; 06161E03 > s_buffer_load_dword s3, s[28:31], 0x41 ; C2019D41 > v_mul_f32_e32 v6, v8, v14 ; 100C1D08 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v8, s2, v6 ; 10100C02 > v_mul_f32_e32 v7, v12, v14 ; 100E1D0C > v_mac_f32_e32 v8, s6, v5 ; 3E100A06 > s_buffer_load_dword s0, s[28:31], 0x4b ; C2001D4B > v_mov_b32_e32 v12, s1 ; 7E180201 > v_mac_f32_e32 v8, s7, v7 ; 3E100E07 > s_buffer_load_dword s2, s[28:31], 0x3d ; C2011D3D > s_buffer_load_dword s6, s[28:31], 0x4a ; C2031D4A > v_fma_f32 v12, -s3, v8, v12 ; D296000C 24321003 > s_buffer_load_dword s24, s[32:35], 0x2c ; C20C212C > s_buffer_load_dword s23, s[32:35], 0x2e ; C20BA12E > s_buffer_load_dword s15, s[28:31], 0x3b ; C2079D3B > s_buffer_load_dword s25, s[28:31], 0x3c ; C20C9D3C > v_log_f32_e64 v12, |v12| ; D34E010C 0000010C > s_waitcnt lgkmcnt(0) ; BF8C007F > v_add_f32_e32 v11, s0, v11 ; 06161600 > v_sub_f32_e32 v15, s6, v11 ; 081E1606 > v_cmp_eq_f32_e64 s[6:7], 0, s2 ; D0040006 00000480 > s_movk_i32 s1, 0x440 ; B0010440 > v_mov_b32_e32 v14, 0xbfc00000 ; 7E1C02FF BFC00000 > s_and_b64 vcc, exec, s[6:7] ; 87EA067E > s_cbranch_vccnz BB0_2 ; BF870000 > v_rcp_f32_e32 v0, s2 ; 7E005402 > v_mul_f32_e32 v0, v0, v15 ; 10001F00 > s_branch BB0_3 ; BF820000 > v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 > v_cndmask_b32_e64 v15, v15, 1.0, vcc ; D200000F 01A9E50F > v_cmp_le_f32_e32 vcc, 0, v15 ; 7C061E80 > v_mul_f32_e32 v0, v0, v15 ; 10001F00 > v_cndmask_b32_e32 v0, v10, v0 ; 0000010A > v_mov_b32_e32 v10, 0x4e17e580 ; 7E1402FF 4E17E580 > v_add_f32_e32 v15, s0, v9 ; 061E1200 > v_mul_f32_e32 v12, v12, v14 ; 10181D0C > v_add_f32_e32 v14, s15, v10 ; 061C140F > v_add_f32_e32 v9, v10, v15 ; 06121F0A > v_mul_f32_e64 v10, s24, s24 ; D210000A 00003018 > v_mac_f32_e32 v10, v9, v9 ; 3E141309 > v_mul_f32_e32 v9, v5, v9 ; 10121305 > v_mad_f32 v9, -s24, v6, -v9 ; D2820009 A4260C18 > v_mac_f32_e64 v10, s23, s23 ; D23E000A 00002E17 > v_mad_f32 v9, -s23, v7, v9 ; D2820009 24260E17 > v_fma_f32 v10, -v9, v9, v10 ; D296000A 242A1309 > v_fma_f32 v10, v14, v14, -v10 ; D296000A 842A1D0E > v_sqrt_f32_e32 v10, v10 ; 7E14670A > v_add_f32_e32 v10, v9, v10 ; 06141509 > v_max_f32_e32 v9, 0x3c23d70a, v5 ; 20120AFF 3C23D70A > v_mul_f32_e32 v14, v9, v10 ; 101C1509 > s_buffer_load_dword s8, s[32:35], s1 ; C2042001 > s_buffer_load_dword s2, s[28:31], 0x38 ; C2011D38 > s_buffer_load_dword s3, s[28:31], 0x39 ; C2019D39 > s_buffer_load_dword s6, s[28:31], 0x3a ; C2031D3A > s_buffer_load_dword s20, s[28:31], 0x3e ; C20A1D3E > s_buffer_load_dword s7, s[28:31], 0x3f ; C2039D3F > s_buffer_load_dword s22, s[28:31], 0x42 ; C20B1D42 > s_buffer_load_dword s21, s[28:31], 0x43 ; C20A9D43 > s_buffer_load_dword s13, s[28:31], 0x44 ; C2069D44 > s_buffer_load_dword s16, s[28:31], 0x45 ; C2081D45 > s_buffer_load_dword s17, s[28:31], 0x46 ; C2089D46 > s_buffer_load_dword s19, s[28:31], 0x47 ; C2099D47 > s_buffer_load_dword s26, s[28:31], 0x48 ; C20D1D48 > s_buffer_load_dword s14, s[28:31], 0x49 ; C2071D49 > s_buffer_load_dword s9, s[28:31], 0x4c ; C2049D4C > s_buffer_load_dword s11, s[28:31], 0x4d ; C2059D4D > s_buffer_load_dword s12, s[28:31], 0x4e ; C2061D4E > s_buffer_load_dword s18, s[28:31], 0x4f ; C2091D4F > v_fma_f32 v14, v14, 0.5, v15 ; D296000E 043DE10E > v_mov_b32_e32 v15, 0x4b189680 ; 7E1E02FF 4B189680 > v_cmp_eq_f32_e64 s[0:1], v15, v1 ; D0040000 0002030F > v_cndmask_b32_e64 v11, v11, v14, s[0:1] ; D200000B 00021D0B > v_cmp_eq_f32_e64 s[28:29], 0, s25 ; D004001C 00003280 > v_max_f32_e32 v14, 0, v11 ; 201C1680 > s_and_b64 vcc, exec, s[28:29] ; 87EA1C7E > s_waitcnt lgkmcnt(0) ; BF8C007F > s_mov_b64 vcc, vcc ; BEEA046A > s_cbranch_vccnz BB0_5 ; BF870000 > v_mov_b32_e32 v11, 0x6f800000 ; 7E1602FF 6F800000 > v_cmp_gt_f32_e64 vcc, |s25|, v11 ; D008016A 00021619 > v_mov_b32_e32 v11, 0x2f800000 ; 7E1602FF 2F800000 > v_cndmask_b32_e32 v11, 1.0, v11 ; 001616F2 > v_mul_f32_e32 v15, s25, v11 ; 101E1619 > v_rcp_f32_e32 v15, v15 ; 7E1E550F > v_mul_f32_e64 v11, v11, -v15 ; D210000B 40021F0B > v_mul_f32_e32 v15, v11, v14 ; 101E1D0B > s_branch BB0_6 ; BF820000 > v_bfrev_b32_e32 v11, 1 ; 7E167081 > v_cmp_ge_f32_e32 vcc, v11, v14 ; 7C0C1D0B > v_bfrev_b32_e32 v11, 15 ; 7E16708F > v_mul_f32_e32 v15, v11, v14 ; 101E1D0B > v_cndmask_b32_e32 v15, v11, v15 ; 001E1F0B > v_mov_b32_e32 v11, s26 ; 7E16021A > s_load_dwordx4 s[24:27], s[4:5], 0x2c ; C08C052C > v_cndmask_b32_e64 v16, 0, -1, s[0:1] ; D2000010 00018280 > v_cmp_eq_f32_e64 s[0:1], 0, s15 ; D0040000 00001E80 > v_exp_f32_e32 v12, v12 ; 7E184B0C > s_and_b64 vcc, exec, s[0:1] ; 87EA007E > s_waitcnt lgkmcnt(0) ; BF8C007F > s_mov_b64 vcc, vcc ; BEEA046A > s_cbranch_vccnz BB0_8 ; BF870000 > v_mov_b32_e32 v17, 0x6f800000 ; 7E2202FF 6F800000 > v_cmp_gt_f32_e64 vcc, |s15|, v17 ; D008016A 0002220F > v_mov_b32_e32 v17, 0x2f800000 ; 7E2202FF 2F800000 > v_cndmask_b32_e32 v17, 1.0, v17 ; 002222F2 > v_mul_f32_e32 v18, s15, v17 ; 1024220F > v_rcp_f32_e32 v18, v18 ; 7E245512 > v_mul_f32_e64 v17, v17, -v18 ; D2100011 40022511 > v_mul_f32_e32 v14, v17, v14 ; 101C1D11 > s_branch BB0_9 ; BF820000 > v_bfrev_b32_e32 v17, 1 ; 7E227081 > v_cmp_ge_f32_e32 vcc, v17, v14 ; 7C0C1D11 > v_bfrev_b32_e32 v17, 15 ; 7E22708F > v_mul_f32_e32 v14, v17, v14 ; 101C1D11 > v_cndmask_b32_e32 v14, v17, v14 ; 001C1D11 > v_mul_f32_e32 v12, s22, v12 ; 10181816 > v_cmp_ne_i32_e32 vcc, 0, v16 ; 7D0A2080 > v_min_f32_e32 v17, s20, v12 ; 1E221814 > v_mov_b32_e32 v18, 0x4b189680 ; 7E2402FF 4B189680 > v_mov_b32_e32 v19, 0x3fb8aa3b ; 7E2602FF 3FB8AA3B > v_cndmask_b32_e32 v12, v17, v12 ; 00181911 > v_cndmask_b32_e32 v10, v1, v10 ; 00141501 > v_cmp_eq_f32_e32 vcc, v18, v1 ; 7C040312 > v_mul_f32_e32 v15, v19, v15 ; 101E1F13 > v_mul_f32_e32 v20, s19, v5 ; 10280A13 > v_cndmask_b32_e32 v5, v5, v9 ; 000A1305 > v_mov_b32_e32 v9, 0xbe19999a ; 7E1202FF BE19999A > v_exp_f32_e32 v15, v15 ; 7E1E4B0F > v_add_f32_e32 v5, v5, v9 ; 060A1305 > v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2 > v_add_f32_e32 v15, s21, v15 ; 061E1E15 > v_mul_f32_e32 v1, s16, v15 ; 10021E10 > v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 > v_mul_f32_e32 v1, 0.5, v1 ; 100202F0 > v_mul_f32_e32 v5, v5, v5 ; 100A0B05 > v_mul_f32_e32 v5, v1, v5 ; 100A0B01 > v_mul_f32_e32 v14, v19, v14 ; 101C1D13 > v_min_f32_e32 v5, s7, v5 ; 1E0A0A07 > v_fma_f32 v8, v8, v8, 1.0 ; D2960008 03CA1108 > v_exp_f32_e32 v14, v14 ; 7E1C4B0E > v_mul_f32_e32 v19, s17, v6 ; 10260C11 > v_max_f32_e32 v6, s18, v12 ; 200C1812 > v_max_f32_e32 v5, s13, v5 ; 200A0A0D > v_mul_f32_e32 v21, s17, v7 ; 102A0E11 > v_cndmask_b32_e32 v7, 0, v11 ; 000E1680 > v_fma_f32 v9, s2, v14, v5 ; D2960009 04161C02 > v_fma_f32 v11, s3, v14, v5 ; D296000B 04161C03 > v_mul_f32_e32 v6, v6, v5 ; 100C0B06 > v_mul_f32_e32 v8, 0x3d747645, v8 ; 101010FF 3D747645 > v_mul_f32_e32 v12, s2, v14 ; 10181C02 > v_mul_f32_e32 v15, s3, v14 ; 101E1C03 > v_fma_f32 v5, s6, v14, v5 ; D2960005 04161C06 > v_mul_f32_e32 v14, s6, v14 ; 101C1C06 > v_fma_f32 v12, v12, v8, v6 ; D296000C 041A110C > v_fma_f32 v15, v15, v8, v6 ; D296000F 041A110F > v_fma_f32 v6, v14, v8, v6 ; D2960006 041A110E > s_load_dwordx8 s[28:35], s[4:5], 0x20 ; C0CE0520 > v_cmp_lt_f32_e64 s[2:3], 0, v12 ; D0020002 00021880 > v_cmp_lt_f32_e64 s[4:5], 0, v15 ; D0020004 00021E80 > v_cmp_lt_f32_e64 s[6:7], 0, v6 ; D0020006 00020C80 > v_cndmask_b32_e64 v8, v12, 1.0, s[2:3] ; D2000008 0009E50C > v_cndmask_b32_e64 v14, v15, 1.0, s[4:5] ; D200000E 0011E50F > v_cndmask_b32_e64 v16, v6, 1.0, s[6:7] ; D2000010 0019E506 > v_bfrev_b32_e32 v17, 14 ; 7E22708E > v_cmp_le_f32_e64 s[2:3], 0, v8 ; D0060002 00021080 > v_cmp_le_f32_e64 s[4:5], 0, v14 ; D0060004 00021C80 > v_cmp_le_f32_e64 s[6:7], 0, v16 ; D0060006 00022080 > v_mul_f32_e32 v8, v17, v8 ; 10101111 > v_bfrev_b32_e32 v18, 15 ; 7E24708F > v_mul_f32_e32 v14, v17, v14 ; 101C1D11 > v_mul_f32_e32 v16, v17, v16 ; 10202111 > v_cndmask_b32_e64 v8, v18, v8, s[2:3] ; D2000008 000A1112 > v_rcp_f32_e32 v17, v9 ; 7E225509 > v_cndmask_b32_e64 v14, v18, v14, s[4:5] ; D200000E 00121D12 > v_cndmask_b32_e64 v16, v18, v16, s[6:7] ; D2000010 001A2112 > v_rcp_f32_e32 v18, v11 ; 7E24550B > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s24, s24, s35 ; 87182318 > v_mov_b32_e32 v22, 0 ; 7E2C0280 > image_sample_l v[19:21], v[19:22], s[28:35], s[24:27] dmask:0x7 ; F0900700 00C71313 > v_rcp_f32_e32 v22, v5 ; 7E2C5505 > v_cmp_eq_f32_e32 vcc, 0, v9 ; 7C041280 > v_mul_f32_e32 v12, v17, v12 ; 10181911 > v_cmp_eq_f32_e64 s[0:1], 0, v11 ; D0040000 00021680 > v_mul_f32_e32 v15, v18, v15 ; 101E1F12 > v_cndmask_b32_e32 v8, v12, v8 ; 0010110C > v_cndmask_b32_e64 v12, v15, v14, s[0:1] ; D200000C 00021D0F > v_mul_f32_e32 v14, v0, v9 ; 101C1300 > v_mul_f32_e32 v15, v0, v11 ; 101E1700 > v_cmp_eq_f32_e64 s[2:3], 0, v5 ; D0040002 00020A80 > v_mul_f32_e32 v0, v0, v5 ; 10000B00 > v_mul_f32_e32 v9, v10, v9 ; 1012130A > v_mul_f32_e32 v11, v10, v11 ; 1016170A > v_mul_f32_e32 v5, v10, v5 ; 100A0B0A > v_mov_b32_e32 v10, 0xbfb8aa3b ; 7E1402FF BFB8AA3B > v_mul_f32_e32 v0, v10, v0 ; 1000010A > v_mul_f32_e32 v14, v10, v14 ; 101C1D0A > v_mul_f32_e32 v15, v10, v15 ; 101E1F0A > v_fma_f32 v1, -v1, s14, 1.0 ; D2960001 23C81D01 > v_mul_f32_e32 v6, v22, v6 ; 100C0D16 > v_mul_f32_e32 v9, v10, v9 ; 1012130A > v_mul_f32_e32 v11, v10, v11 ; 1016170A > v_mul_f32_e32 v5, v10, v5 ; 100A0B0A > v_exp_f32_e32 v10, v14 ; 7E144B0E > v_exp_f32_e32 v14, v15 ; 7E1C4B0F > v_exp_f32_e32 v0, v0 ; 7E004B00 > v_mul_f32_e32 v10, s9, v10 ; 10141409 > v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 > v_cndmask_b32_e64 v6, v6, v16, s[2:3] ; D2000006 000A2106 > v_mul_f32_e32 v0, s12, v0 ; 1000000C > v_exp_f32_e32 v9, v9 ; 7E124B09 > v_exp_f32_e32 v11, v11 ; 7E164B0B > v_exp_f32_e32 v5, v5 ; 7E0A4B05 > v_mul_f32_e32 v14, s11, v14 ; 101C1C0B > v_mul_f32_e32 v0, v6, v0 ; 10000106 > v_fma_f32 v6, v7, v1, v9 ; D2960006 04260307 > v_mul_f32_e32 v8, v8, v10 ; 10101508 > v_mul_f32_e32 v10, v12, v14 ; 10141D0C > v_fma_f32 v12, v7, v1, v11 ; D296000C 042E0307 > v_fma_f32 v1, v7, v1, v5 ; D2960001 04160307 > v_add_f32_e64 v7, 0, v12 clamp ; D2060807 00021880 > v_add_f32_e64 v12, 0, v1 clamp ; D206080C 00020280 > v_mad_f32 v1, -v9, v8, v8 ; D2820001 24221109 > v_mad_f32 v8, -v11, v10, v10 ; D2820008 242A150B > v_mad_f32 v0, -v5, v0, v0 ; D2820000 24020105 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v5, v1, v19 ; 100A2701 > v_mul_f32_e32 v9, v8, v20 ; 10122908 > v_mul_f32_e32 v10, v0, v21 ; 10142B00 > v_fma_f32 v1, v5, s8, v1 ; D2960001 04041105 > v_fma_f32 v5, v9, s8, v8 ; D2960005 04201109 > v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 > v_fma_f32 v8, v10, s8, v0 ; D2960008 0400110A > v_fma_f32 v0, v2, v6, v1 ; D2960000 04060D02 > v_fma_f32 v1, v3, v7, v5 ; D2960001 04160F03 > v_fma_f32 v2, v4, v12, v8 ; D2960002 04221904 > v_mov_b32_e32 v3, 0 ; 7E060280 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 48 >VGPRS: 24 >Code Size: 1668 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_bfrev_b32_e32 v1, 14 ; 7E02708E > v_mov_b32_e32 v0, 0 ; 7E000280 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_rcp_f32_e32 v5, s4 ; 7E0A5404 > v_rcp_f32_e32 v6, s0 ; 7E0C5400 > v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 > v_cmp_eq_f32_e64 s[0:1], 0, s0 ; D0040000 00000080 > v_cndmask_b32_e32 v5, v5, v1 ; 000A0305 > v_cndmask_b32_e64 v1, v6, v1, s[0:1] ; D2000001 00020306 > v_fma_f32 v6, v3, 0.5, 0.5 ; D2960006 03C1E103 > v_fma_f32 v7, v4, -0.5, 0.5 ; D2960007 03C1E304 > v_subrev_f32_e32 v8, v5, v6 ; 0A100D05 > v_fma_f32 v10, v5, 0, v6 ; D296000A 04190105 > v_subrev_f32_e32 v9, v1, v7 ; 0A120F01 > v_fma_f32 v11, v1, 0, v7 ; D296000B 041D0101 > v_add_f32_e32 v5, v6, v5 ; 060A0B06 > exp 15, 32, 0, 0, 0, v8, v9, v10, v9 ; F800020F 090A0908 > exp 15, 33, 0, 0, 0, v5, v9, v8, v11 ; F800021F 0B080905 > v_add_f32_e32 v1, v7, v1 ; 06020307 > exp 15, 34, 0, 0, 0, v6, v7, v5, v7 ; F800022F 07050706 > exp 15, 35, 0, 0, 0, v8, v1, v10, v1 ; F800023F 010A0108 > exp 15, 36, 0, 0, 0, v5, v1, v0, v0 ; F800024F 00000105 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 1.0 ; 7E0202F2 > exp 15, 12, 0, 1, 0, v3, v4, v0, v1 ; F80008CF 01000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 12 >Code Size: 192 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v5, v2, 3, 0, [m0] ; C8140302 > v_interp_p2_f32 v5, [v5], v3, 3, 0, [m0] ; C8150303 > v_interp_p1_f32 v6, v2, 0, 1, [m0] ; C8180402 > v_interp_p2_f32 v6, [v6], v3, 0, 1, [m0] ; C8190403 > v_interp_p1_f32 v7, v2, 1, 1, [m0] ; C81C0502 > v_interp_p2_f32 v7, [v7], v3, 1, 1, [m0] ; C81D0503 > v_interp_p1_f32 v8, v2, 2, 1, [m0] ; C8200602 > v_interp_p2_f32 v8, [v8], v3, 2, 1, [m0] ; C8210603 > v_interp_p1_f32 v9, v2, 3, 1, [m0] ; C8240702 > v_interp_p2_f32 v9, [v9], v3, 3, 1, [m0] ; C8250703 > v_interp_p1_f32 v10, v2, 0, 2, [m0] ; C8280802 > v_interp_p2_f32 v10, [v10], v3, 0, 2, [m0] ; C8290803 > v_interp_p1_f32 v11, v2, 1, 2, [m0] ; C82C0902 > v_interp_p2_f32 v11, [v11], v3, 1, 2, [m0] ; C82D0903 > v_interp_p1_f32 v14, v2, 2, 2, [m0] ; C8380A02 > v_interp_p2_f32 v14, [v14], v3, 2, 2, [m0] ; C8390A03 > v_interp_p1_f32 v15, v2, 3, 2, [m0] ; C83C0B02 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[4:7], s[4:5], 0xc ; C082050C > v_interp_p2_f32 v15, [v15], v3, 3, 2, [m0] ; C83D0B03 > v_interp_p1_f32 v16, v2, 0, 3, [m0] ; C8400C02 > v_interp_p2_f32 v16, [v16], v3, 0, 3, [m0] ; C8410C03 > v_interp_p1_f32 v17, v2, 1, 3, [m0] ; C8440D02 > v_interp_p2_f32 v17, [v17], v3, 1, 3, [m0] ; C8450D03 > v_interp_p1_f32 v18, v2, 2, 3, [m0] ; C8480E02 > v_interp_p2_f32 v18, [v18], v3, 2, 3, [m0] ; C8490E03 > v_interp_p1_f32 v19, v2, 3, 3, [m0] ; C84C0F02 > v_interp_p2_f32 v19, [v19], v3, 3, 3, [m0] ; C84D0F03 > v_interp_p1_f32 v20, v2, 0, 4, [m0] ; C8501002 > v_interp_p2_f32 v20, [v20], v3, 0, 4, [m0] ; C8511003 > v_interp_p1_f32 v21, v2, 1, 4, [m0] ; C8541102 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s4, s4, s19 ; 87041304 > v_interp_p2_f32 v21, [v21], v3, 1, 4, [m0] ; C8551103 > image_sample v[10:12], v[10:11], s[12:19], s[4:7] dmask:0x7 ; F0800700 00230A0A > v_mov_b32_e32 v2, 0x3e99999a ; 7E0402FF 3E99999A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v3, v2, v10 ; 10061502 > v_mov_b32_e32 v10, 0x3f170a3d ; 7E1402FF 3F170A3D > v_mac_f32_e32 v3, v10, v11 ; 3E06170A > v_mov_b32_e32 v11, 0x3de147ae ; 7E1602FF 3DE147AE > image_sample v[22:24], v[14:15], s[12:19], s[4:7] dmask:0x7 ; F0800700 0023160E > v_mac_f32_e32 v3, v11, v12 ; 3E06190B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v12, v2, v22 ; 10182D02 > v_mac_f32_e32 v12, v10, v23 ; 3E182F0A > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mac_f32_e32 v12, v11, v24 ; 3E18310B > image_sample v[22:24], v[8:9], s[12:19], s[4:7] dmask:0x7 ; F0800700 00231608 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v8, v2, v22 ; 10102D02 > v_mac_f32_e32 v8, v10, v23 ; 3E102F0A > v_mac_f32_e32 v8, v11, v24 ; 3E10310B > v_min_legacy_f32_e32 v14, v3, v12 ; 1A1C1903 > v_cmp_ge_f32_e32 vcc, v8, v12 ; 7C0C1908 > v_cndmask_b32_e32 v9, v14, v8 ; 0012110E > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s2, s[0:3], 0x16 ; C2010116 > v_max_legacy_f32_e32 v14, v3, v12 ; 1C1C1903 > v_cmp_lt_f32_e64 s[0:1], v8, v12 ; D0020000 00021908 > image_sample v[22:24], v[18:19], s[12:19], s[4:7] dmask:0x7 ; F0800700 00231612 > v_cmp_lt_f32_e32 vcc, v8, v3 ; 7C020708 > v_cndmask_b32_e64 v3, v14, v8, s[0:1] ; D2000003 0002110E > image_sample v[18:20], v[20:21], s[12:19], s[4:7] dmask:0x7 ; F0800700 00231214 > image_sample v[14:16], v[16:17], s[12:19], s[4:7] dmask:0x7 ; F0800700 00230E10 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v8, v2, v22 ; 10102D02 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v14, v2, v14 ; 101C1D02 > v_cndmask_b32_e32 v3, v3, v9 ; 00061303 > v_mul_f32_e32 v9, v2, v18 ; 10122502 > v_mac_f32_e32 v8, v10, v23 ; 3E102F0A > v_mac_f32_e32 v9, v10, v19 ; 3E12270A > v_mac_f32_e32 v14, v10, v15 ; 3E1C1F0A > v_mac_f32_e32 v8, v11, v24 ; 3E10310B > v_mac_f32_e32 v9, v11, v20 ; 3E12290B > v_mac_f32_e32 v14, v11, v16 ; 3E1C210B > v_min_legacy_f32_e32 v12, v8, v9 ; 1A181308 > v_cmp_ge_f32_e32 vcc, v14, v9 ; 7C0C130E > v_max_legacy_f32_e32 v15, v8, v9 ; 1C1E1308 > v_cmp_lt_f32_e64 s[0:1], v14, v9 ; D0020000 0002130E > v_cndmask_b32_e32 v12, v12, v14 ; 00181D0C > v_cmp_lt_f32_e32 vcc, v14, v8 ; 7C02110E > v_cndmask_b32_e64 v8, v15, v14, s[0:1] ; D2000008 00021D0F > image_sample v[14:16], v[4:5], s[12:19], s[4:7] dmask:0x7 ; F0800700 00230E04 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v4, v2, v14 ; 10081D02 > v_mac_f32_e32 v4, v10, v15 ; 3E081F0A > image_sample v[5:7], v[6:7], s[12:19], s[4:7] dmask:0x7 ; F0800700 00230506 > v_mac_f32_e32 v4, v11, v16 ; 3E08210B > image_sample v[14:16], v[0:1], s[12:19], s[4:7] dmask:0x7 ; F0800700 00230E00 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v5, v2, v5 ; 100A0B02 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v0, v2, v14 ; 10001D02 > v_mac_f32_e32 v5, v10, v6 ; 3E0A0D0A > v_mac_f32_e32 v0, v10, v15 ; 3E001F0A > v_mac_f32_e32 v5, v11, v7 ; 3E0A0F0B > v_mac_f32_e32 v0, v11, v16 ; 3E00210B > v_cndmask_b32_e32 v8, v8, v12 ; 00101908 > v_min_legacy_f32_e32 v6, v4, v5 ; 1A0C0B04 > v_cmp_ge_f32_e32 vcc, v0, v5 ; 7C0C0B00 > v_max_legacy_f32_e32 v2, v4, v5 ; 1C040B04 > v_cmp_lt_f32_e64 s[0:1], v0, v5 ; D0020000 00020B00 > v_cndmask_b32_e32 v1, v6, v0 ; 00020106 > v_cmp_lt_f32_e32 vcc, v0, v4 ; 7C020900 > v_cndmask_b32_e64 v0, v2, v0, s[0:1] ; D2000000 00020102 > v_cndmask_b32_e32 v0, v0, v1 ; 00000300 > v_min_legacy_f32_e32 v9, v3, v8 ; 1A121103 > v_cmp_ge_f32_e32 vcc, v0, v8 ; 7C0C1100 > v_max_legacy_f32_e32 v2, v3, v8 ; 1C041103 > v_cmp_lt_f32_e64 s[0:1], v0, v8 ; D0020000 00021100 > v_cndmask_b32_e32 v1, v9, v0 ; 00020109 > v_cmp_lt_f32_e32 vcc, v0, v3 ; 7C020700 > v_cndmask_b32_e64 v0, v2, v0, s[0:1] ; D2000000 00020102 > v_cndmask_b32_e32 v0, v0, v1 ; 00000300 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_min_f32_e32 v0, s2, v0 ; 1E000002 > v_mov_b32_e32 v1, v0 ; 7E020300 > v_mov_b32_e32 v2, v0 ; 7E040300 > v_mov_b32_e32 v3, v0 ; 7E060300 >Shader epilog disassembly: > exp 1, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001801 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 28 >Code Size: 608 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_bfrev_b32_e32 v1, 14 ; 7E02708E > v_mov_b32_e32 v0, 0 ; 7E000280 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_rcp_f32_e32 v5, s4 ; 7E0A5404 > v_rcp_f32_e32 v6, s0 ; 7E0C5400 > v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 > v_cmp_eq_f32_e64 s[0:1], 0, s0 ; D0040000 00000080 > v_cndmask_b32_e32 v5, v5, v1 ; 000A0305 > v_cndmask_b32_e64 v1, v6, v1, s[0:1] ; D2000001 00020306 > v_fma_f32 v6, v3, 0.5, 0.5 ; D2960006 03C1E103 > v_fma_f32 v7, v4, -0.5, 0.5 ; D2960007 03C1E304 > v_subrev_f32_e32 v8, v5, v6 ; 0A100D05 > v_fma_f32 v10, v5, 0, v6 ; D296000A 04190105 > v_subrev_f32_e32 v9, v1, v7 ; 0A120F01 > v_fma_f32 v11, v1, 0, v7 ; D296000B 041D0101 > v_add_f32_e32 v5, v6, v5 ; 060A0B06 > exp 15, 32, 0, 0, 0, v8, v9, v10, v9 ; F800020F 090A0908 > exp 15, 33, 0, 0, 0, v5, v9, v8, v11 ; F800021F 0B080905 > v_add_f32_e32 v1, v7, v1 ; 06020307 > exp 15, 34, 0, 0, 0, v6, v7, v5, v7 ; F800022F 07050706 > exp 15, 35, 0, 0, 0, v8, v1, v10, v1 ; F800023F 010A0108 > exp 15, 36, 0, 0, 0, v5, v1, v0, v0 ; F800024F 00000105 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 1.0 ; 7E0202F2 > exp 15, 12, 0, 1, 0, v3, v4, v0, v1 ; F80008CF 01000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 12 >Code Size: 192 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v5, v2, 3, 0, [m0] ; C8140302 > v_interp_p2_f32 v5, [v5], v3, 3, 0, [m0] ; C8150303 > v_interp_p1_f32 v6, v2, 0, 1, [m0] ; C8180402 > v_interp_p2_f32 v6, [v6], v3, 0, 1, [m0] ; C8190403 > v_interp_p1_f32 v7, v2, 1, 1, [m0] ; C81C0502 > v_interp_p2_f32 v7, [v7], v3, 1, 1, [m0] ; C81D0503 > v_interp_p1_f32 v8, v2, 2, 1, [m0] ; C8200602 > v_interp_p2_f32 v8, [v8], v3, 2, 1, [m0] ; C8210603 > v_interp_p1_f32 v9, v2, 3, 1, [m0] ; C8240702 > v_interp_p2_f32 v9, [v9], v3, 3, 1, [m0] ; C8250703 > v_interp_p1_f32 v10, v2, 0, 2, [m0] ; C8280802 > v_interp_p2_f32 v10, [v10], v3, 0, 2, [m0] ; C8290803 > v_interp_p1_f32 v11, v2, 1, 2, [m0] ; C82C0902 > v_interp_p2_f32 v11, [v11], v3, 1, 2, [m0] ; C82D0903 > v_interp_p1_f32 v14, v2, 2, 2, [m0] ; C8380A02 > v_interp_p2_f32 v14, [v14], v3, 2, 2, [m0] ; C8390A03 > v_interp_p1_f32 v15, v2, 3, 2, [m0] ; C83C0B02 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[4:7], s[4:5], 0xc ; C082050C > v_interp_p2_f32 v15, [v15], v3, 3, 2, [m0] ; C83D0B03 > v_interp_p1_f32 v16, v2, 0, 3, [m0] ; C8400C02 > v_interp_p2_f32 v16, [v16], v3, 0, 3, [m0] ; C8410C03 > v_interp_p1_f32 v17, v2, 1, 3, [m0] ; C8440D02 > v_interp_p2_f32 v17, [v17], v3, 1, 3, [m0] ; C8450D03 > v_interp_p1_f32 v18, v2, 2, 3, [m0] ; C8480E02 > v_interp_p2_f32 v18, [v18], v3, 2, 3, [m0] ; C8490E03 > v_interp_p1_f32 v19, v2, 3, 3, [m0] ; C84C0F02 > v_interp_p2_f32 v19, [v19], v3, 3, 3, [m0] ; C84D0F03 > v_interp_p1_f32 v20, v2, 0, 4, [m0] ; C8501002 > v_interp_p2_f32 v20, [v20], v3, 0, 4, [m0] ; C8511003 > v_interp_p1_f32 v21, v2, 1, 4, [m0] ; C8541102 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s4, s4, s19 ; 87041304 > v_interp_p2_f32 v21, [v21], v3, 1, 4, [m0] ; C8551103 > image_sample v2, v[10:11], s[12:19], s[4:7] dmask:0x1 ; F0800100 0023020A > image_sample v3, v[14:15], s[12:19], s[4:7] dmask:0x1 ; F0800100 0023030E > image_sample v8, v[8:9], s[12:19], s[4:7] dmask:0x1 ; F0800100 00230808 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_min_f32_e32 v10, v3, v2 ; 1E140503 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_ge_f32_e32 vcc, v8, v3 ; 7C0C0708 > v_cndmask_b32_e32 v9, v10, v8 ; 0012110A > v_max_f32_e32 v10, v2, v3 ; 20140702 > v_cmp_lt_f32_e64 s[0:1], v8, v3 ; D0020000 00020708 > v_cmp_lt_f32_e32 vcc, v8, v2 ; 7C020508 > v_cndmask_b32_e64 v2, v10, v8, s[0:1] ; D2000002 0002110A > image_sample v3, v[18:19], s[12:19], s[4:7] dmask:0x1 ; F0800100 00230312 > image_sample v8, v[20:21], s[12:19], s[4:7] dmask:0x1 ; F0800100 00230814 > image_sample v10, v[16:17], s[12:19], s[4:7] dmask:0x1 ; F0800100 00230A10 > v_cndmask_b32_e32 v2, v2, v9 ; 00041302 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_min_f32_e32 v9, v8, v3 ; 1E120708 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_ge_f32_e32 vcc, v10, v8 ; 7C0C110A > v_max_f32_e32 v11, v3, v8 ; 20161103 > v_cmp_lt_f32_e64 s[0:1], v10, v8 ; D0020000 0002110A > image_sample v4, v[4:5], s[12:19], s[4:7] dmask:0x1 ; F0800100 00230404 > image_sample v5, v[6:7], s[12:19], s[4:7] dmask:0x1 ; F0800100 00230506 > v_cndmask_b32_e32 v9, v9, v10 ; 00121509 > v_cmp_lt_f32_e32 vcc, v10, v3 ; 7C02070A > v_cndmask_b32_e64 v3, v11, v10, s[0:1] ; D2000003 0002150B > image_sample v0, v[0:1], s[12:19], s[4:7] dmask:0x1 ; F0800100 00230000 > v_cndmask_b32_e32 v3, v3, v9 ; 00061303 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_min_f32_e32 v6, v5, v4 ; 1E0C0905 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_ge_f32_e32 vcc, v0, v5 ; 7C0C0B00 > v_cndmask_b32_e32 v1, v6, v0 ; 00020106 > v_max_f32_e32 v6, v5, v4 ; 200C0905 > v_cmp_lt_f32_e64 s[0:1], v0, v5 ; D0020000 00020B00 > v_cmp_lt_f32_e32 vcc, v0, v4 ; 7C020900 > v_cndmask_b32_e64 v0, v6, v0, s[0:1] ; D2000000 00020106 > v_cndmask_b32_e32 v0, v0, v1 ; 00000300 > v_min_f32_e32 v8, v3, v2 ; 1E100503 > v_cmp_ge_f32_e32 vcc, v0, v3 ; 7C0C0700 > v_max_f32_e32 v4, v2, v3 ; 20080702 > v_cmp_lt_f32_e64 s[0:1], v0, v3 ; D0020000 00020700 > v_cndmask_b32_e32 v1, v8, v0 ; 00020108 > v_cmp_lt_f32_e32 vcc, v0, v2 ; 7C020500 > v_cndmask_b32_e64 v0, v4, v0, s[0:1] ; D2000000 00020104 > v_cndmask_b32_e32 v0, v0, v1 ; 00000300 > v_mov_b32_e32 v1, v0 ; 7E020300 > v_mov_b32_e32 v2, v0 ; 7E040300 > v_mov_b32_e32 v3, v0 ; 7E060300 >Shader epilog disassembly: > exp 1, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001801 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 24 >Code Size: 448 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 >DCL OUT[0], COLOR >DCL CONST[0..3] > 0: MOV OUT[0], CONST[3] > 1: END >radeonsi: Compiling shader 389 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) > %29 = bitcast float %5 to i32 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %29, 10 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 11 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 12 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %27, 13 > %34 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33, float %28, 14 > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %34, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %35 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL OUT[0], POSITION >DCL CONST[0..4] >DCL TEMP[0] > 0: MUL TEMP[0], CONST[0].xxxx, CONST[1] > 1: MAD TEMP[0], CONST[0].yyyy, CONST[2], TEMP[0] > 2: MAD TEMP[0], CONST[0].zzzz, CONST[3], TEMP[0] > 3: MAD OUT[0], CONST[0].wwww, CONST[4], TEMP[0] > 4: END >radeonsi: Compiling shader 390 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { >main_body: > %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 > %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 > %15 = call float @llvm.SI.load.const(<16 x i8> %14, i32 0) > %16 = call float @llvm.SI.load.const(<16 x i8> %14, i32 4) > %17 = call float @llvm.SI.load.const(<16 x i8> %14, i32 8) > %18 = call float @llvm.SI.load.const(<16 x i8> %14, i32 12) > %19 = call float @llvm.SI.load.const(<16 x i8> %14, i32 16) > %20 = call float @llvm.SI.load.const(<16 x i8> %14, i32 20) > %21 = call float @llvm.SI.load.const(<16 x i8> %14, i32 24) > %22 = call float @llvm.SI.load.const(<16 x i8> %14, i32 28) > %23 = call float @llvm.SI.load.const(<16 x i8> %14, i32 32) > %24 = call float @llvm.SI.load.const(<16 x i8> %14, i32 36) > %25 = call float @llvm.SI.load.const(<16 x i8> %14, i32 40) > %26 = call float @llvm.SI.load.const(<16 x i8> %14, i32 44) > %27 = call float @llvm.SI.load.const(<16 x i8> %14, i32 48) > %28 = call float @llvm.SI.load.const(<16 x i8> %14, i32 52) > %29 = call float @llvm.SI.load.const(<16 x i8> %14, i32 56) > %30 = call float @llvm.SI.load.const(<16 x i8> %14, i32 60) > %31 = call float @llvm.SI.load.const(<16 x i8> %14, i32 64) > %32 = call float @llvm.SI.load.const(<16 x i8> %14, i32 68) > %33 = call float @llvm.SI.load.const(<16 x i8> %14, i32 72) > %34 = call float @llvm.SI.load.const(<16 x i8> %14, i32 76) > %35 = fmul float %15, %19 > %36 = fmul float %15, %20 > %37 = fmul float %15, %21 > %38 = fmul float %15, %22 > %39 = fmul float %16, %23 > %40 = fadd float %39, %35 > %41 = fmul float %16, %24 > %42 = fadd float %41, %36 > %43 = fmul float %16, %25 > %44 = fadd float %43, %37 > %45 = fmul float %16, %26 > %46 = fadd float %45, %38 > %47 = fmul float %17, %27 > %48 = fadd float %47, %40 > %49 = fmul float %17, %28 > %50 = fadd float %49, %42 > %51 = fmul float %17, %29 > %52 = fadd float %51, %44 > %53 = fmul float %17, %30 > %54 = fadd float %53, %46 > %55 = fmul float %18, %31 > %56 = fadd float %55, %48 > %57 = fmul float %18, %32 > %58 = fadd float %57, %50 > %59 = fmul float %18, %33 > %60 = fadd float %59, %52 > %61 = fmul float %18, %34 > %62 = fadd float %61, %54 > %63 = bitcast i32 %11 to float > %64 = insertvalue <{ float, float, float }> undef, float %63, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %56, float %58, float %60, float %62) > ret <{ float, float, float }> %64 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x60 ; C2020160 > s_buffer_load_dword s0, s[0:3], 0x61 ; C2000161 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v6, 0 ; 7E0C0280 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_fma_f32 v1, v3, s4, s4 ; D2960001 00100903 > v_fma_f32 v5, v4, -s0, s0 ; D2960005 40000104 > exp 15, 32, 0, 0, 0, v1, v5, v6, v0 ; F800020F 00060501 > exp 15, 12, 0, 1, 0, v3, v4, v6, v0 ; F80008CF 00060403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 88 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 > s_load_dwordx4 s[16:19], s[2:3], 0x8 ; C0880308 > s_load_dwordx2 s[0:1], s[4:5], 0x4 ; C0400504 > s_load_dwordx2 s[2:3], s[4:5], 0x6 ; C0410506 > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v2, v2, 3, 0, [m0] ; C8080302 > v_mov_b32_e32 v4, 0 ; 7E080280 > v_interp_p2_f32 v2, [v2], v3, 3, 0, [m0] ; C8090303 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 > v_cmp_lt_f32_e64 s[0:1], 0, v1 ; D0020000 00020280 > v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cndmask_b32_e64 v6, v1, 1.0, s[0:1] ; D2000006 0001E501 > v_rcp_f32_e32 v4, v2 ; 7E085502 > s_load_dwordx8 s[20:27], s[4:5], 0x10 ; C0CA0510 > s_load_dwordx4 s[0:3], s[4:5], 0x1c ; C080051C > v_cndmask_b32_e64 v5, v0, 1.0, vcc ; D2000005 01A9E500 > v_bfrev_b32_e32 v7, 14 ; 7E0E708E > v_cmp_le_f32_e32 vcc, 0, v5 ; 7C060A80 > v_mul_f32_e32 v5, v7, v5 ; 100A0B07 > v_bfrev_b32_e32 v8, 15 ; 7E10708F > v_cndmask_b32_e32 v5, v8, v5 ; 000A0B08 > v_cmp_le_f32_e32 vcc, 0, v6 ; 7C060C80 > v_mul_f32_e32 v6, v7, v6 ; 100C0D07 > v_cndmask_b32_e32 v6, v8, v6 ; 000C0D08 > v_cmp_eq_f32_e32 vcc, 0, v2 ; 7C040480 > v_mul_f32_e32 v0, v4, v0 ; 10000104 > v_mul_f32_e32 v1, v4, v1 ; 10020304 > v_cndmask_b32_e32 v4, v0, v5 ; 00080B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s27 ; 87001B00 > v_cndmask_b32_e32 v5, v1, v6 ; 000A0D01 > v_mov_b32_e32 v10, 0x3fd9999a ; 7E1402FF 3FD9999A > v_mov_b32_e32 v11, 0x3d75c28f ; 7E1602FF 3D75C28F > image_sample v[0:2], v[4:5], s[20:27], s[0:3] dmask:0x7 ; F0800700 00050004 > s_buffer_load_dword s0, s[12:15], 0x6c ; C2000D6C > s_buffer_load_dword s1, s[12:15], 0x6d ; C2008D6D > s_buffer_load_dword s2, s[12:15], 0x6e ; C2010D6E > s_buffer_load_dword s3, s[16:19], 0x58 ; C2019158 > v_mov_b32_e32 v4, 0xbb83126f ; 7E0802FF BB83126F > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v6, v0, v3, v4 ; D2960006 04120700 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v0, s0, v0 ; 10000000 > v_mac_f32_e32 v0, s1, v1 ; 3E000201 > v_fma_f32 v9, v1, v3, v4 ; D2960009 04120701 > v_fma_f32 v4, v2, v3, v4 ; D2960004 04120702 > v_mac_f32_e32 v0, s2, v2 ; 3E000402 > v_mul_f32_e32 v3, s3, v0 ; 10060003 > v_mov_b32_e32 v5, 0x40c66666 ; 7E0A02FF 40C66666 > v_max_f32_e32 v1, 0, v9 ; 20021280 > v_max_f32_e32 v0, 0, v6 ; 20000C80 > v_max_f32_e32 v2, 0, v4 ; 20040880 > v_fma_f32 v4, v0, v5, 0.5 ; D2960004 03C20B00 > v_fma_f32 v6, v1, v5, 0.5 ; D2960006 03C20B01 > v_fma_f32 v9, v0, v5, v10 ; D2960009 042A0B00 > v_fma_f32 v12, v1, v5, v10 ; D296000C 042A0B01 > v_fma_f32 v10, v2, v5, v10 ; D296000A 042A0B02 > v_fma_f32 v5, v2, v5, 0.5 ; D2960005 03C20B02 > v_mul_f32_e32 v4, v4, v0 ; 10080104 > v_mul_f32_e32 v6, v6, v1 ; 100C0306 > v_mul_f32_e32 v5, v5, v2 ; 100A0505 > v_fma_f32 v0, v0, v9, v11 ; D2960000 042E1300 > v_fma_f32 v1, v1, v12, v11 ; D2960001 042E1901 > v_fma_f32 v2, v2, v10, v11 ; D2960002 042E1502 > v_cmp_eq_f32_e32 vcc, 0, v0 ; 7C040080 > v_rcp_f32_e32 v0, v0 ; 7E005500 > v_cmp_eq_f32_e64 s[0:1], 0, v1 ; D0040000 00020280 > v_rcp_f32_e32 v1, v1 ; 7E025501 > v_cmp_eq_f32_e64 s[2:3], 0, v2 ; D0040002 00020480 > v_rcp_f32_e32 v2, v2 ; 7E045502 > v_cmp_lt_f32_e64 s[4:5], 0, v4 ; D0020004 00020880 > v_cmp_lt_f32_e64 s[6:7], 0, v6 ; D0020006 00020C80 > v_cmp_lt_f32_e64 s[8:9], 0, v5 ; D0020008 00020A80 > v_cndmask_b32_e64 v9, v4, 1.0, s[4:5] ; D2000009 0011E504 > v_cndmask_b32_e64 v10, v6, 1.0, s[6:7] ; D200000A 0019E506 > v_cndmask_b32_e64 v11, v5, 1.0, s[8:9] ; D200000B 0021E505 > v_cmp_le_f32_e64 s[4:5], 0, v9 ; D0060004 00021280 > v_mul_f32_e32 v9, v7, v9 ; 10121307 > v_cmp_le_f32_e64 s[6:7], 0, v10 ; D0060006 00021480 > v_mul_f32_e32 v10, v7, v10 ; 10141507 > v_cmp_le_f32_e64 s[8:9], 0, v11 ; D0060008 00021680 > v_mul_f32_e32 v7, v7, v11 ; 100E1707 > v_cndmask_b32_e64 v9, v8, v9, s[4:5] ; D2000009 00121308 > v_mul_f32_e32 v0, v0, v4 ; 10000900 > v_cndmask_b32_e64 v10, v8, v10, s[6:7] ; D200000A 001A1508 > v_mul_f32_e32 v1, v1, v6 ; 10020D01 > v_cndmask_b32_e64 v7, v8, v7, s[8:9] ; D2000007 00220F08 > v_mul_f32_e32 v2, v2, v5 ; 10040B02 > v_cndmask_b32_e32 v0, v0, v9 ; 00001300 > v_cndmask_b32_e64 v1, v1, v10, s[0:1] ; D2000001 00021501 > v_cndmask_b32_e64 v2, v2, v7, s[2:3] ; D2000002 000A0F02 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 16 >Code Size: 560 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >radeonsi: Compiling shader 392 >Vertex Shader Prolog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { >main_body: > %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> undef, i32 %0, 0 > %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %19, i32 %1, 1 > %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %20, i32 %2, 2 > %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %21, i32 %3, 3 > %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %22, i32 %4, 4 > %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %23, i32 %5, 5 > %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %24, i32 %6, 6 > %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %25, i32 %7, 7 > %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %26, i32 %8, 8 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %27, i32 %9, 9 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %28, i32 %10, 10 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %29, i32 %11, 11 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %30, i32 %12, 12 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %31, i32 %13, 13 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %32, i32 %14, 14 > %34 = bitcast i32 %15 to float > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %33, float %34, 15 > %36 = bitcast i32 %16 to float > %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %35, float %36, 16 > %38 = bitcast i32 %17 to float > %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %37, float %38, 17 > %40 = bitcast i32 %18 to float > %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %39, float %40, 18 > %42 = add i32 %15, %12 > %43 = bitcast i32 %42 to float > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %41, float %43, 19 > %45 = add i32 %15, %12 > %46 = bitcast i32 %45 to float > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %44, float %46, 20 > %48 = add i32 %15, %12 > %49 = bitcast i32 %48 to float > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %47, float %49, 21 > %51 = add i32 %15, %12 > %52 = bitcast i32 %51 to float > %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %50, float %52, 22 > %54 = add i32 %15, %12 > %55 = bitcast i32 %54 to float > %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %53, float %55, 23 > %57 = add i32 %15, %12 > %58 = bitcast i32 %57 to float > %59 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %56, float %58, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float }> %59 >} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 > v_mov_b32_e32 v8, v4 ; 7E100304 > v_mov_b32_e32 v9, v4 ; 7E120304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v0, 0x3d981627 ; 7E0002FF 3D981627 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[12:15], v4, s[4:7], 0 idxen ; E00C2000 80010C04 > s_load_dwordx4 s[4:7], s[10:11], 0xc ; C0820B0C > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[15:18], v5, s[12:15], 0 idxen ; E00C2000 80030F05 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[18:21], v6, s[16:19], 0 idxen ; E00C2000 80041206 > s_load_dwordx4 s[12:15], s[10:11], 0x10 ; C0860B10 > s_buffer_load_dword s9, s[0:3], 0x61 ; C2048161 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[4:7], v7, s[4:7], 0 idxen ; E00C2000 80010407 > s_load_dwordx4 s[4:7], s[10:11], 0x14 ; C0820B14 > s_buffer_load_dword s8, s[0:3], 0x60 ; C2040160 > s_waitcnt vmcnt(1) ; BF8C0F71 > buffer_load_format_xyzw v[20:23], v8, s[12:15], 0 idxen ; E00C2000 80031408 > s_buffer_load_dword s13, s[0:3], 0x65 ; C2068165 > v_mov_b32_e32 v1, 0xbc996e30 ; 7E0202FF BC996E30 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > buffer_load_format_xyzw v[23:26], v9, s[4:7], 0 idxen ; E00C2000 80011709 > s_buffer_load_dword s5, s[0:3], 0x5d ; C202815D > s_buffer_load_dword s4, s[0:3], 0x5c ; C202015C > s_buffer_load_dword s6, s[0:3], 0x5e ; C203015E > s_buffer_load_dword s12, s[0:3], 0x64 ; C2060164 > v_mov_b32_e32 v3, 0xbe593484 ; 7E0602FF BE593484 > v_mov_b32_e32 v10, 0x3fc90da4 ; 7E1402FF 3FC90DA4 > v_mov_b32_e32 v11, 0x40490fdb ; 7E1602FF 40490FDB > s_buffer_load_dword s10, s[0:3], 0x62 ; C2050162 > s_buffer_load_dword s7, s[0:3], 0x5f ; C203815F > s_buffer_load_dword s11, s[0:3], 0x63 ; C2058163 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v7, s5, v13 ; 100E1A05 > s_buffer_load_dword s5, s[0:3], 0x69 ; C2028169 > v_mac_f32_e32 v7, s4, v12 ; 3E0E1804 > v_mul_f32_e32 v8, s9, v13 ; 10101A09 > exp 15, 32, 0, 0, 0, v18, v19, v18, v19 ; F800020F 13121312 > s_buffer_load_dword s4, s[0:3], 0x68 ; C2020168 > s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F > v_mul_f32_e32 v18, s5, v13 ; 10241A05 > s_buffer_load_dword s5, s[0:3], 0x94 ; C2028194 > v_mac_f32_e32 v8, s8, v12 ; 3E101808 > v_fma_f32 v0, |v5|, v1, v0 ; D2960100 04020305 > s_buffer_load_dword s8, s[0:3], 0x6a ; C204016A > s_buffer_load_dword s9, s[0:3], 0x66 ; C2048166 > v_sub_f32_e64 v1, 1.0, |v5| ; D2080201 00020AF2 > v_fma_f32 v0, v0, |v5|, v3 ; D2960200 040E0B00 > v_sqrt_f32_e32 v1, v1 ; 7E026701 > v_fma_f32 v0, v0, |v5|, v10 ; D2960200 042A0B00 > v_mul_f32_e32 v9, s13, v13 ; 10121A0D > v_mul_f32_e32 v3, v0, v1 ; 10060300 > v_mac_f32_e32 v18, s4, v12 ; 3E241804 > v_mac_f32_e32 v9, s12, v12 ; 3E12180C > v_mac_f32_e32 v7, s6, v14 ; 3E0E1C06 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_sub_f32_e32 v12, s5, v12 ; 08181805 > s_buffer_load_dword s6, s[0:3], 0x95 ; C2030195 > s_buffer_load_dword s5, s[0:3], 0x6b ; C202816B > v_fma_f32 v3, -2.0, v3, v11 ; D2960003 042E06F5 > v_cmp_lt_f32_e64 vcc, v5, -v5 ; D002006A 40020B05 > v_mac_f32_e32 v18, s8, v14 ; 3E241C08 > v_mac_f32_e32 v9, s9, v14 ; 3E121C09 > s_buffer_load_dword s4, s[0:3], 0x67 ; C2020167 > v_cndmask_b32_e32 v3, 0, v3 ; 00060680 > s_buffer_load_dword s9, s[0:3], 0x96 ; C2048196 > s_buffer_load_dword s8, s[0:3], 0x98 ; C2040198 > s_buffer_load_dword s0, s[0:3], 0x99 ; C2000199 > v_fma_f32 v0, v0, v1, v3 ; D2960000 040E0300 > v_mul_f32_e32 v0, 0x3e22f983, v0 ; 100000FF 3E22F983 > v_fract_f32_e32 v0, v0 ; 7E004100 > v_mac_f32_e32 v8, s10, v14 ; 3E101C0A > s_waitcnt lgkmcnt(0) ; BF8C007F > v_add_f32_e32 v10, s5, v18 ; 06142405 > v_sub_f32_e32 v13, s6, v13 ; 081A1A06 > v_mul_f32_e32 v15, v12, v15 ; 101E1F0C > v_sin_f32_e32 v0, v0 ; 7E006B00 > v_mac_f32_e32 v15, v13, v16 ; 3E1E210D > v_add_f32_e32 v7, s7, v7 ; 060E0E07 > v_mul_f32_e32 v11, s8, v10 ; 10161408 > v_add_f32_e32 v8, s11, v8 ; 0610100B > v_mul_f32_e32 v16, s0, v10 ; 10201400 > v_add_f32_e32 v9, s4, v9 ; 06121204 > v_fma_f32 v11, v7, s8, v11 ; D296000B 042C1107 > v_fma_f32 v16, v8, -s0, v16 ; D2960010 44400108 > v_mul_f32_e32 v1, v12, v4 ; 1002090C > v_mul_f32_e32 v19, v12, v20 ; 1026290C > exp 15, 33, 0, 0, 0, v11, v16, v9, v10 ; F800021F 0A09100B > v_sub_f32_e32 v14, s9, v14 ; 081C1C09 > v_mac_f32_e32 v19, v13, v21 ; 3E262B0D > v_mac_f32_e32 v1, v13, v5 ; 3E020B0D > v_mul_f32_e32 v3, 0.5, v5 ; 10060AF0 > v_mul_f32_e32 v5, 0.5, v0 ; 100A00F0 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 34, 0, 0, 0, v23, v24, v25, v26 ; F800022F 1A191817 > v_mov_b32_e32 v4, 1.0 ; 7E0802F2 > v_mul_f32_e32 v0, -0.5, v0 ; 100000F1 > v_mac_f32_e32 v19, v14, v22 ; 3E262D0E > v_mac_f32_e32 v15, v14, v17 ; 3E1E230E > v_mac_f32_e32 v1, v14, v6 ; 3E020D0E > exp 15, 35, 0, 0, 0, v3, v5, v0, v4 ; F800023F 04000503 > exp 15, 36, 0, 0, 0, v1, v19, v15, v0 ; F800024F 000F1301 > exp 15, 12, 0, 1, 0, v7, v8, v9, v10 ; F80008CF 0A090807 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 28 >Code Size: 560 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v5, v2, 0, 0, [m0] ; C8140002 > v_interp_p2_f32 v5, [v5], v3, 0, 0, [m0] ; C8150003 > v_interp_p1_f32 v6, v2, 1, 0, [m0] ; C8180102 > v_interp_p2_f32 v6, [v6], v3, 1, 0, [m0] ; C8190103 > v_interp_p1_f32 v7, v2, 2, 0, [m0] ; C81C0202 > v_interp_p2_f32 v7, [v7], v3, 2, 0, [m0] ; C81D0203 > v_interp_p1_f32 v8, v2, 3, 0, [m0] ; C8200302 > v_interp_p2_f32 v8, [v8], v3, 3, 0, [m0] ; C8210303 > v_interp_p1_f32 v9, v2, 0, 1, [m0] ; C8240402 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_interp_p2_f32 v9, [v9], v3, 0, 1, [m0] ; C8250403 > v_interp_p1_f32 v10, v2, 1, 1, [m0] ; C8280502 > v_interp_p2_f32 v10, [v10], v3, 1, 1, [m0] ; C8290503 > v_interp_p1_f32 v11, v2, 3, 1, [m0] ; C82C0702 > v_interp_p2_f32 v11, [v11], v3, 3, 1, [m0] ; C82D0703 > v_interp_p1_f32 v0, v2, 0, 2, [m0] ; C8000802 > v_interp_p2_f32 v0, [v0], v3, 0, 2, [m0] ; C8010803 > v_interp_p1_f32 v1, v2, 1, 2, [m0] ; C8040902 > v_interp_p2_f32 v1, [v1], v3, 1, 2, [m0] ; C8050903 > v_interp_p1_f32 v4, v2, 2, 2, [m0] ; C8100A02 > v_interp_p2_f32 v4, [v4], v3, 2, 2, [m0] ; C8110A03 > v_interp_p1_f32 v12, v2, 3, 2, [m0] ; C8300B02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s22, s[0:3], 0x4c ; C20B014C > s_buffer_load_dword s9, s[0:3], 0x54 ; C2048154 > v_interp_p2_f32 v12, [v12], v3, 3, 2, [m0] ; C8310B03 > v_interp_p1_f32 v14, v2, 0, 3, [m0] ; C8380C02 > s_buffer_load_dword s6, s[0:3], 0x4d ; C203014D > s_buffer_load_dword s11, s[0:3], 0x55 ; C2058155 > s_buffer_load_dword s23, s[0:3], 0x58 ; C20B8158 > s_buffer_load_dword s20, s[0:3], 0x56 ; C20A0156 > s_buffer_load_dword s21, s[0:3], 0x57 ; C20A8157 > v_interp_p2_f32 v14, [v14], v3, 0, 3, [m0] ; C8390C03 > v_interp_p1_f32 v15, v2, 1, 3, [m0] ; C83C0D02 > s_buffer_load_dword s7, s[0:3], 0x50 ; C2038150 > s_buffer_load_dword s8, s[0:3], 0x51 ; C2040151 > v_interp_p2_f32 v15, [v15], v3, 1, 3, [m0] ; C83D0D03 > v_interp_p1_f32 v16, v2, 0, 4, [m0] ; C8401002 > v_interp_p2_f32 v16, [v16], v3, 0, 4, [m0] ; C8411003 > v_interp_p1_f32 v17, v2, 1, 4, [m0] ; C8441102 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C > s_load_dwordx8 s[28:35], s[4:5], 0x10 ; C0CE0510 > s_load_dwordx4 s[36:39], s[4:5], 0x1c ; C092051C > v_interp_p2_f32 v17, [v17], v3, 1, 4, [m0] ; C8451103 > v_interp_p1_f32 v18, v2, 2, 4, [m0] ; C8481202 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v2, s9, v5 ; 10040A09 > v_mov_b32_e32 v19, s22 ; 7E260216 > v_interp_p2_f32 v18, [v18], v3, 2, 4, [m0] ; C8491203 > v_mul_f32_e32 v3, s11, v6 ; 10060C0B > v_fma_f32 v19, v19, s23, v2 ; D2960013 04082F13 > v_mov_b32_e32 v2, s6 ; 7E040206 > v_fma_f32 v20, v2, s23, v3 ; D2960014 040C2F02 > v_mov_b32_e32 v3, s8 ; 7E060208 > v_mul_f32_e32 v8, s21, v8 ; 10101015 > v_mul_f32_e32 v7, s20, v7 ; 100E0E14 > v_mov_b32_e32 v2, s7 ; 7E040207 > v_fma_f32 v21, v2, s23, v7 ; D2960015 041C2F02 > v_fma_f32 v22, v3, s23, v8 ; D2960016 04202F03 > s_load_dwordx8 s[40:47], s[4:5], 0x20 ; C0D40520 > s_load_dwordx4 s[20:23], s[4:5], 0x2c ; C08A052C > s_and_b32 s24, s24, s19 ; 87181318 > s_and_b32 s36, s36, s35 ; 87242324 > image_sample v[2:3], v[19:20], s[12:19], s[24:27] dmask:0xa ; F0800A00 00C30213 > image_sample v19, v[19:20], s[28:35], s[36:39] dmask:0x2 ; F0800200 01271313 > image_sample v[23:24], v[21:22], s[12:19], s[24:27] dmask:0xa ; F0800A00 00C31715 > s_load_dwordx2 s[12:13], s[4:5], 0x34 ; C0460534 > s_load_dwordx2 s[14:15], s[4:5], 0x36 ; C0470536 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s20, s20, s47 ; 87142F14 > image_sample v20, v[21:22], s[28:35], s[36:39] dmask:0x8 ; F0800800 01271415 > image_sample v21, v[5:6], s[40:47], s[20:23] dmask:0x1 ; F0800100 00AA1505 > v_mov_b32_e32 v5, 0 ; 7E0A0280 > buffer_load_format_xyzw v[5:8], v5, s[12:15], 0 idxen ; E00C2000 80030505 > s_waitcnt vmcnt(5) ; BF8C0F75 > v_fma_f32 v2, v2, 2.0, -1.0 ; D2960002 03CDE902 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v6, v24, 2.0, v2 ; D2960006 0409E918 > v_rcp_f32_e32 v2, v11 ; 7E04550B > v_fma_f32 v3, v3, 2.0, -1.0 ; D2960003 03CDE903 > v_cmp_lt_f32_e32 vcc, 0, v9 ; 7C021280 > v_fma_f32 v7, v23, 2.0, v3 ; D2960007 040DE917 > s_load_dwordx8 s[16:23], s[4:5], 0x40 ; C0C80540 > s_load_dwordx4 s[4:7], s[4:5], 0x4c ; C082054C > v_cndmask_b32_e64 v3, v9, 1.0, vcc ; D2000003 01A9E509 > v_mul_f32_e32 v8, v2, v9 ; 10101302 > v_mul_f32_e32 v9, v2, v10 ; 10121502 > v_cmp_lt_f32_e32 vcc, 0, v10 ; 7C021480 > v_bfrev_b32_e32 v2, 14 ; 7E04708E > v_cndmask_b32_e64 v10, v10, 1.0, vcc ; D200000A 01A9E50A > v_cmp_le_f32_e32 vcc, 0, v3 ; 7C060680 > v_mul_f32_e32 v22, v2, v3 ; 102C0702 > v_bfrev_b32_e32 v3, 15 ; 7E06708F > v_mad_f32 v7, v7, v15, -v15 ; D2820007 843E1F07 > v_add_f32_e32 v6, -1.0, v6 ; 060C0CF3 > v_cndmask_b32_e32 v22, v3, v22 ; 002C2D03 > v_cmp_le_f32_e32 vcc, 0, v10 ; 7C061480 > v_mul_f32_e32 v10, v2, v10 ; 10141502 > v_mac_f32_e32 v7, v6, v14 ; 3E0E1D06 > v_mul_f32_e32 v6, v16, v16 ; 100C2110 > v_mac_f32_e32 v6, v17, v17 ; 3E0C2311 > v_cndmask_b32_e32 v10, v3, v10 ; 00141503 > v_cmp_eq_f32_e32 vcc, 0, v11 ; 7C041680 > s_buffer_load_dword s12, s[0:3], 0x47 ; C2060147 > v_cndmask_b32_e32 v22, v8, v22 ; 002C2D08 > v_mul_f32_e32 v8, v21, v19 ; 10102715 > v_mac_f32_e32 v6, v18, v18 ; 3E0C2512 > v_cndmask_b32_e32 v23, v9, v10 ; 002E1509 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s4, s4, s23 ; 87041704 > v_rsq_clamp_f32_e32 v9, v6 ; 7E125906 > v_mul_f32_e32 v6, v8, v20 ; 100C2908 > s_buffer_load_dword s11, s[0:3], 0x4a ; C205814A > image_sample v10, v[22:23], s[16:23], s[4:7] dmask:0x1 ; F0800100 00240A16 > v_mul_f32_e32 v8, v12, v6 ; 10100D0C > s_buffer_load_dword s7, s[0:3], 0x46 ; C2038146 > s_buffer_load_dword s8, s[0:3], 0x48 ; C2040148 > s_buffer_load_dword s4, s[0:3], 0x40 ; C2020140 > s_buffer_load_dword s5, s[0:3], 0x41 ; C2028141 > s_buffer_load_dword s6, s[0:3], 0x42 ; C2030142 > s_buffer_load_dword s9, s[0:3], 0x44 ; C2048144 > s_buffer_load_dword s0, s[0:3], 0x45 ; C2000145 > v_mul_f32_e32 v8, v8, v8 ; 10101108 > v_mov_b32_e32 v12, 0xbdcccccd ; 7E1802FF BDCCCCCD > v_fma_f32 v12, v8, s12, v12 ; D296000C 04301908 > v_mul_f32_e32 v8, v18, v9 ; 10101312 > v_add_f32_e64 v9, 0, v7 clamp ; D2060809 00020E80 > v_add_f32_e64 v7, 0, v12 clamp ; D2060807 00021880 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v10, v11, v10 ; 0A14150B > s_waitcnt lgkmcnt(0) ; BF8C007F > v_cmp_eq_f32_e64 s[2:3], 0, s11 ; D0040002 00001680 > v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 > v_mul_f32_e32 v10, v7, v10 ; 10141507 > s_and_b64 vcc, exec, s[2:3] ; 87EA027E > s_cbranch_vccnz BB0_2 ; BF870000 > v_rcp_f32_e32 v2, s11 ; 7E04540B > v_mul_f32_e32 v3, v2, v10 ; 10061502 > s_branch BB0_3 ; BF820000 > v_cmp_lt_f32_e32 vcc, 0, v10 ; 7C021480 > v_cndmask_b32_e64 v10, v10, 1.0, vcc ; D200000A 01A9E50A > v_cmp_le_f32_e32 vcc, 0, v10 ; 7C061480 > v_mul_f32_e32 v2, v2, v10 ; 10041502 > v_cndmask_b32_e32 v3, v3, v2 ; 00060503 > v_mul_f32_e32 v2, v8, v9 ; 10041308 > v_mul_f32_e32 v2, s8, v2 ; 10040408 > v_mul_f32_e32 v8, s9, v2 ; 10100409 > v_mul_f32_e32 v9, s0, v2 ; 10120400 > v_mul_f32_e32 v2, s7, v2 ; 10040407 > v_mul_f32_e32 v0, v0, v8 ; 10001100 > v_mul_f32_e32 v1, v1, v9 ; 10021301 > v_mul_f32_e32 v2, v4, v2 ; 10040504 > v_fma_f32 v0, s4, v6, v0 ; D2960000 04020C04 > v_fma_f32 v1, s5, v6, v1 ; D2960001 04060C05 > v_fma_f32 v2, s6, v6, v2 ; D2960002 040A0C06 > v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 > v_mul_f32_e32 v0, v5, v0 ; 10000105 > v_mul_f32_e32 v1, v5, v1 ; 10020305 > v_mul_f32_e32 v2, v5, v2 ; 10040505 > v_mul_f32_e32 v3, v7, v3 ; 10060707 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 28 >Code Size: 788 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[2:3], 0x4 ; C0880304 > v_mov_b32_e32 v16, 0xbe19999a ; 7E2002FF BE19999A > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[7:10], v4, s[4:7], 0 idxen ; E00C2000 80010704 > s_load_dwordx4 s[4:7], s[10:11], 0x8 ; C0820B08 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[10:13], v5, s[12:15], 0 idxen ; E00C2000 80030A05 > s_buffer_load_dword s0, s[16:19], 0x84 ; C2001184 > s_buffer_load_dword s1, s[16:19], 0x85 ; C2009185 > s_buffer_load_dword s2, s[16:19], 0x86 ; C2011186 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v6, s[4:7], 0 idxen ; E00C2000 80010306 > s_buffer_load_dword s3, s[16:19], 0x55 ; C2019155 > s_buffer_load_dword s5, s[16:19], 0x58 ; C2029158 > s_buffer_load_dword s6, s[16:19], 0x59 ; C2031159 > s_buffer_load_dword s4, s[16:19], 0x56 ; C2021156 > s_buffer_load_dword s15, s[16:19], 0x72 ; C2079172 > s_buffer_load_dword s7, s[16:19], 0x61 ; C2039161 > s_buffer_load_dword s36, s[16:19], 0x6f ; C212116F > v_sub_f32_e32 v0, s0, v7 ; 08000E00 > s_buffer_load_dword s0, s[16:19], 0x54 ; C2001154 > v_sub_f32_e32 v1, s1, v8 ; 08021001 > s_buffer_load_dword s1, s[16:19], 0x5d ; C200915D > s_waitcnt vmcnt(1) ; BF8C0F71 > v_sub_f32_e32 v12, s2, v9 ; 08181202 > s_buffer_load_dword s2, s[16:19], 0x5e ; C201115E > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v13, s0, v0 ; 101A0000 > s_buffer_load_dword s0, s[16:19], 0x5c ; C200115C > v_mac_f32_e32 v13, s3, v1 ; 3E1A0203 > v_mac_f32_e32 v13, s4, v12 ; 3E1A1804 > v_mul_f32_e32 v14, s6, v8 ; 101C1006 > v_mac_f32_e32 v14, s5, v7 ; 3E1C0E05 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v15, s0, v0 ; 101E0000 > s_buffer_load_dword s0, s[16:19], 0x5a ; C200115A > v_mul_f32_e32 v0, s5, v0 ; 10000005 > v_mac_f32_e32 v0, s6, v1 ; 3E000206 > v_mac_f32_e32 v15, s1, v1 ; 3E1E0201 > v_mul_f32_e32 v1, v13, v13 ; 10021B0D > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mac_f32_e32 v0, s0, v12 ; 3E001800 > v_mac_f32_e32 v15, s2, v12 ; 3E1E1802 > v_mac_f32_e32 v1, v0, v0 ; 3E020100 > s_buffer_load_dword s3, s[16:19], 0x5b ; C201915B > v_mac_f32_e32 v14, s0, v9 ; 3E1C1200 > v_mac_f32_e32 v1, v15, v15 ; 3E021F0F > s_buffer_load_dword s0, s[16:19], 0x73 ; C2001173 > v_rsq_clamp_f32_e32 v17, v1 ; 7E225901 > s_buffer_load_dword s1, s[16:19], 0x78 ; C2009178 > s_buffer_load_dword s2, s[16:19], 0x79 ; C2011179 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_add_f32_e32 v1, s3, v14 ; 06021C03 > v_add_f32_e32 v12, s0, v1 ; 06180200 > v_mul_f32_e32 v1, v13, v17 ; 1002230D > v_mul_f32_e32 v1, s1, v1 ; 10020201 > v_mul_f32_e32 v13, v0, v17 ; 101A2300 > s_buffer_load_dword s4, s[16:19], 0x7a ; C202117A > s_buffer_load_dword s1, s[16:19], 0x64 ; C2009164 > v_mad_f32 v1, -v13, s2, -v1 ; D2820001 A404050D > s_buffer_load_dword s2, s[16:19], 0x65 ; C2011165 > v_mul_f32_e32 v13, v15, v17 ; 101A230F > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mad_f32 v1, -v13, s4, v1 ; D2820001 2404090D > v_fma_f32 v13, -v0, v17, v16 ; D296000D 24422300 > v_mov_b32_e32 v0, s1 ; 7E000201 > s_buffer_load_dword s0, s[16:19], 0x60 ; C2001160 > v_fma_f32 v0, -s2, v1, v0 ; D2960000 24020202 > v_log_f32_e64 v14, |v0| ; D34E010E 00000100 > v_mov_b32_e32 v16, 0xbfc00000 ; 7E2002FF BFC00000 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_cmp_eq_f32_e64 s[2:3], 0, s0 ; D0040002 00000080 > s_and_b64 vcc, exec, s[2:3] ; 87EA027E > s_waitcnt vmcnt(0) ; BF8C0F70 > s_cbranch_vccnz BB0_2 ; BF870000 > v_mov_b32_e32 v0, 0x6f800000 ; 7E0002FF 6F800000 > v_cmp_gt_f32_e64 vcc, |s0|, v0 ; D008016A 00020000 > v_mov_b32_e32 v0, 0x2f800000 ; 7E0002FF 2F800000 > v_cndmask_b32_e32 v0, 1.0, v0 ; 000000F2 > v_mul_f32_e32 v15, s0, v0 ; 101E0000 > v_rcp_f32_e32 v15, v15 ; 7E1E550F > v_mul_f32_e64 v0, v0, -v15 ; D2100000 40021F00 > v_mul_f32_e32 v0, v0, v12 ; 10001900 > s_branch BB0_3 ; BF820000 > v_bfrev_b32_e32 v0, 1 ; 7E007081 > v_xor_b32_e32 v15, v12, v0 ; 3A1E010C > v_cmp_lt_f32_e32 vcc, v12, v0 ; 7C02010C > v_cndmask_b32_e64 v0, v15, 1.0, vcc ; D2000000 01A9E50F > v_cmp_le_f32_e32 vcc, 0, v0 ; 7C060080 > v_mul_f32_e32 v0, 0x70000000, v0 ; 100000FF 70000000 > v_bfrev_b32_e32 v15, 15 ; 7E1E708F > v_cndmask_b32_e32 v0, v15, v0 ; 0000010F > s_buffer_load_dword s29, s[16:19], 0x44 ; C20E9144 > s_buffer_load_dword s33, s[16:19], 0x45 ; C2109145 > s_buffer_load_dword s30, s[16:19], 0x46 ; C20F1146 > s_buffer_load_dword s31, s[16:19], 0x47 ; C20F9147 > s_buffer_load_dword s26, s[16:19], 0x48 ; C20D1148 > s_buffer_load_dword s32, s[16:19], 0x49 ; C2101149 > s_buffer_load_dword s27, s[16:19], 0x4a ; C20D914A > s_buffer_load_dword s8, s[16:19], 0x4b ; C204114B > s_buffer_load_dword s23, s[16:19], 0x4c ; C20B914C > s_buffer_load_dword s28, s[16:19], 0x4d ; C20E114D > s_buffer_load_dword s24, s[16:19], 0x4e ; C20C114E > s_buffer_load_dword s9, s[16:19], 0x4f ; C204914F > s_buffer_load_dword s20, s[16:19], 0x50 ; C20A1150 > s_buffer_load_dword s25, s[16:19], 0x51 ; C20C9151 > s_buffer_load_dword s21, s[16:19], 0x52 ; C20A9152 > s_buffer_load_dword s22, s[16:19], 0x53 ; C20B1153 > s_buffer_load_dword s5, s[16:19], 0x62 ; C2029162 > s_buffer_load_dword s3, s[16:19], 0x63 ; C2019163 > s_buffer_load_dword s40, s[16:19], 0x66 ; C2141166 > s_buffer_load_dword s34, s[16:19], 0x67 ; C2111167 > s_buffer_load_dword s4, s[16:19], 0x68 ; C2021168 > s_buffer_load_dword s35, s[16:19], 0x69 ; C2119169 > s_buffer_load_dword s0, s[16:19], 0x6c ; C200116C > s_buffer_load_dword s1, s[16:19], 0x6d ; C200916D > s_buffer_load_dword s2, s[16:19], 0x6e ; C201116E > s_buffer_load_dword s12, s[16:19], 0x74 ; C2061174 > s_buffer_load_dword s13, s[16:19], 0x75 ; C2069175 > s_buffer_load_dword s14, s[16:19], 0x76 ; C2071176 > s_buffer_load_dword s6, s[16:19], 0x77 ; C2031177 > s_buffer_load_dword s37, s[16:19], 0x7c ; C212917C > s_buffer_load_dword s38, s[16:19], 0x7d ; C213117D > s_buffer_load_dword s39, s[16:19], 0x7e ; C213917E > s_buffer_load_dword s41, s[16:19], 0x7f ; C214917F > s_buffer_load_dword s42, s[16:19], 0x80 ; C2151180 > s_buffer_load_dword s43, s[16:19], 0x81 ; C2159181 > s_buffer_load_dword s44, s[16:19], 0x82 ; C2161182 > s_buffer_load_dword s10, s[16:19], 0x88 ; C2051188 > s_buffer_load_dword s11, s[16:19], 0x89 ; C2059189 > v_cmp_eq_f32_e64 s[16:17], 0, s36 ; D0040010 00004880 > v_sub_f32_e32 v15, 1.0, v13 ; 081E1AF2 > v_mul_f32_e32 v14, v14, v16 ; 101C210E > v_mul_f32_e32 v17, 0x3fb8aa3b, v0 ; 102200FF 3FB8AA3B > s_and_b64 vcc, exec, s[16:17] ; 87EA107E > s_waitcnt lgkmcnt(0) ; BF8C007F > s_mov_b64 vcc, vcc ; BEEA046A > s_cbranch_vccnz BB0_5 ; BF870000 > v_mov_b32_e32 v0, 0x6f800000 ; 7E0002FF 6F800000 > v_cmp_gt_f32_e64 vcc, |s36|, v0 ; D008016A 00020024 > v_mov_b32_e32 v0, 0x2f800000 ; 7E0002FF 2F800000 > v_cndmask_b32_e32 v0, 1.0, v0 ; 000000F2 > v_mul_f32_e32 v13, s36, v0 ; 101A0024 > v_rcp_f32_e32 v13, v13 ; 7E1A550D > v_mul_f32_e64 v0, v0, -v13 ; D2100000 40021B00 > v_mul_f32_e32 v13, v0, v12 ; 101A1900 > s_branch BB0_6 ; BF820000 > v_bfrev_b32_e32 v0, 1 ; 7E007081 > v_xor_b32_e32 v13, v12, v0 ; 3A1A010C > v_cmp_lt_f32_e32 vcc, v12, v0 ; 7C02010C > v_cndmask_b32_e64 v0, v13, 1.0, vcc ; D2000000 01A9E50D > v_cmp_le_f32_e32 vcc, 0, v0 ; 7C060080 > v_mul_f32_e32 v0, 0x70000000, v0 ; 100000FF 70000000 > v_bfrev_b32_e32 v13, 15 ; 7E1A708F > v_cndmask_b32_e32 v13, v13, v0 ; 001A010D > v_cmp_eq_f32_e64 s[16:17], 0, s7 ; D0040010 00000E80 > v_mov_b32_e32 v16, s40 ; 7E200228 > v_mov_b32_e32 v0, s41 ; 7E000229 > v_mov_b32_e32 v18, s42 ; 7E24022A > v_mov_b32_e32 v19, s43 ; 7E26022B > v_mov_b32_e32 v20, s44 ; 7E28022C > v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 > v_exp_f32_e32 v14, v14 ; 7E1C4B0E > v_sub_f32_e32 v12, s15, v12 ; 0818180F > v_exp_f32_e32 v17, v17 ; 7E224B11 > s_and_b64 vcc, exec, s[16:17] ; 87EA107E > s_cbranch_vccnz BB0_8 ; BF870000 > v_rcp_f32_e32 v21, s7 ; 7E2A5407 > v_mul_f32_e32 v12, v21, v12 ; 10181915 > s_branch BB0_9 ; BF820000 > v_cmp_lt_f32_e32 vcc, 0, v12 ; 7C021880 > v_cndmask_b32_e64 v12, v12, 1.0, vcc ; D200000C 01A9E50C > v_cmp_le_f32_e32 vcc, 0, v12 ; 7C061880 > v_mul_f32_e32 v12, 0x70000000, v12 ; 101818FF 70000000 > v_bfrev_b32_e32 v21, 15 ; 7E2A708F > v_cndmask_b32_e32 v12, v21, v12 ; 00181915 > v_mul_f32_e32 v21, s33, v8 ; 102A1021 > v_mul_f32_e32 v22, s32, v8 ; 102C1020 > exp 15, 32, 0, 0, 0, v10, v11, v0, v0 ; F800020F 00000B0A > s_waitcnt expcnt(0) ; BF8C0F0F > v_mul_f32_e32 v10, s28, v8 ; 1014101C > v_mul_f32_e32 v8, s25, v8 ; 10101019 > v_mac_f32_e32 v8, s20, v7 ; 3E100E14 > v_mac_f32_e32 v21, s29, v7 ; 3E2A0E1D > v_mac_f32_e32 v22, s26, v7 ; 3E2C0E1A > v_mac_f32_e32 v10, s23, v7 ; 3E140E17 > v_mac_f32_e32 v8, s21, v9 ; 3E101215 > v_mac_f32_e32 v21, s30, v9 ; 3E2A121E > v_mac_f32_e32 v22, s27, v9 ; 3E2C121B > v_mac_f32_e32 v10, s24, v9 ; 3E141218 > v_fma_f32 v9, v14, v16, -s5 ; D2960009 8016210E > v_add_f32_e32 v7, s22, v8 ; 060E1016 > v_mul_f32_e32 v11, v16, v14 ; 10161D10 > v_mov_b32_e32 v14, 0x37a7c5ac ; 7E1C02FF 37A7C5AC > v_mul_f32_e64 v14, |v7|, v14 ; D210010E 00021D07 > v_add_f32_e32 v17, s34, v17 ; 06222222 > v_min_f32_e32 v14, 1.0, v14 ; 1E1C1CF2 > v_mul_f32_e32 v17, s35, v17 ; 10222223 > v_max_f32_e32 v9, 0, v9 ; 20121280 > v_sub_f32_e32 v14, 1.0, v14 ; 081C1CF2 > v_fma_f32 v9, -v9, v14, v11 ; D2960009 242E1D09 > v_mul_f32_e32 v8, v15, v15 ; 10101F0F > v_mul_f32_e32 v11, 0.5, v17 ; 101622F0 > v_mul_f32_e32 v8, v11, v8 ; 1010110B > v_min_f32_e32 v8, s3, v8 ; 1E101003 > v_mul_f32_e32 v11, 0x3fb8aa3b, v13 ; 10161AFF 3FB8AA3B > v_exp_f32_e32 v11, v11 ; 7E164B0B > v_fma_f32 v1, v1, v1, 1.0 ; D2960001 03CA0301 > v_max_f32_e32 v8, s4, v8 ; 20101004 > v_max_f32_e32 v9, s6, v9 ; 20121206 > v_fma_f32 v16, s0, v11, v8 ; D2960010 04221600 > v_mul_f32_e32 v1, 0x3d747645, v1 ; 100202FF 3D747645 > v_mul_f32_e32 v9, v9, v8 ; 10121109 > v_mul_f32_e32 v13, s0, v11 ; 101A1600 > v_mul_f32_e32 v14, s1, v11 ; 101C1601 > v_fma_f32 v17, s1, v11, v8 ; D2960011 04221601 > v_mul_f32_e32 v15, s2, v11 ; 101E1602 > v_fma_f32 v8, s2, v11, v8 ; D2960008 04221602 > v_fma_f32 v11, v13, v1, v9 ; D296000B 0426030D > v_fma_f32 v13, v14, v1, v9 ; D296000D 0426030E > v_rcp_f32_e32 v14, v16 ; 7E1C5510 > v_cmp_lt_f32_e64 s[4:5], 0, v11 ; D0020004 00021680 > v_fma_f32 v1, v15, v1, v9 ; D2960001 0426030F > v_cndmask_b32_e64 v9, v11, 1.0, s[4:5] ; D2000009 0011E50B > v_bfrev_b32_e32 v15, 14 ; 7E1E708E > v_cmp_le_f32_e64 s[4:5], 0, v9 ; D0060004 00021280 > v_mul_f32_e32 v9, v15, v9 ; 1012130F > v_bfrev_b32_e32 v23, 15 ; 7E2E708F > v_mul_f32_e32 v11, v14, v11 ; 1016170E > v_cndmask_b32_e64 v9, v23, v9, s[4:5] ; D2000009 00121317 > v_cmp_eq_f32_e32 vcc, 0, v16 ; 7C042080 > v_rcp_f32_e32 v14, v17 ; 7E1C5511 > v_cndmask_b32_e32 v9, v11, v9 ; 0012130B > v_rcp_f32_e32 v11, v8 ; 7E165508 > v_cmp_lt_f32_e64 s[6:7], 0, v13 ; D0020006 00021A80 > v_cmp_lt_f32_e64 s[4:5], 0, v1 ; D0020004 00020280 > v_mul_f32_e32 v14, v14, v13 ; 101C1B0E > v_cndmask_b32_e64 v13, v13, 1.0, s[6:7] ; D200000D 0019E50D > v_mul_f32_e32 v11, v11, v1 ; 1016030B > v_cndmask_b32_e64 v1, v1, 1.0, s[4:5] ; D2000001 0011E501 > v_cmp_le_f32_e32 vcc, 0, v13 ; 7C061A80 > v_mul_f32_e32 v13, v15, v13 ; 101A1B0F > v_cmp_le_f32_e64 s[4:5], 0, v1 ; D0060004 00020280 > v_mul_f32_e32 v1, v15, v1 ; 1002030F > v_cmp_eq_f32_e64 s[0:1], 0, v17 ; D0040000 00022280 > v_cndmask_b32_e32 v13, v23, v13 ; 001A1B17 > v_cmp_eq_f32_e64 s[2:3], 0, v8 ; D0040002 00021080 > v_cndmask_b32_e64 v1, v23, v1, s[4:5] ; D2000001 00120317 > v_cndmask_b32_e64 v13, v14, v13, s[0:1] ; D200000D 00021B0E > v_cndmask_b32_e64 v1, v11, v1, s[2:3] ; D2000001 000A030B > v_mul_f32_e32 v11, v12, v16 ; 1016210C > v_mul_f32_e32 v14, v12, v17 ; 101C230C > v_mov_b32_e32 v24, 0xbfb8aa3b ; 7E3002FF BFB8AA3B > v_mul_f32_e32 v12, v12, v8 ; 1018110C > v_mul_f32_e64 v16, v16, |v7| ; D2100210 00020F10 > v_mul_f32_e64 v17, v17, |v7| ; D2100211 00020F11 > v_mul_f32_e64 v8, v8, |v7| ; D2100208 00020F08 > v_mul_f32_e32 v16, v24, v16 ; 10202118 > v_mul_f32_e32 v17, v24, v17 ; 10222318 > v_mul_f32_e32 v8, v24, v8 ; 10101118 > v_mul_f32_e32 v11, v24, v11 ; 10161718 > v_mul_f32_e32 v14, v24, v14 ; 101C1D18 > v_mul_f32_e32 v12, v24, v12 ; 10181918 > v_log_f32_e32 v24, s12 ; 7E304E0C > v_log_f32_e32 v25, s13 ; 7E324E0D > v_log_f32_e32 v26, s14 ; 7E344E0E > v_mov_b32_e32 v27, 0x3ee8ba2f ; 7E3602FF 3EE8BA2F > v_mul_f32_e32 v24, v27, v24 ; 1030311B > v_mul_f32_e32 v25, v27, v25 ; 1032331B > v_exp_f32_e32 v24, v24 ; 7E304B18 > v_exp_f32_e32 v11, v11 ; 7E164B0B > v_mul_f32_e32 v11, v24, v11 ; 10161718 > v_mul_f32_e32 v26, v27, v26 ; 1034351B > v_exp_f32_e32 v24, v25 ; 7E304B19 > v_exp_f32_e32 v14, v14 ; 7E1C4B0E > v_mul_f32_e32 v14, v24, v14 ; 101C1D18 > v_exp_f32_e32 v24, v26 ; 7E304B1A > v_exp_f32_e32 v12, v12 ; 7E184B0C > v_mul_f32_e32 v12, v24, v12 ; 10181918 > v_exp_f32_e32 v16, v16 ; 7E204B10 > v_exp_f32_e32 v17, v17 ; 7E224B11 > v_exp_f32_e32 v8, v8 ; 7E104B08 > v_mul_f32_e32 v9, v11, v9 ; 1012130B > v_mul_f32_e32 v11, v14, v13 ; 10161B0E > v_mul_f32_e32 v1, v12, v1 ; 1002030C > v_sub_f32_e32 v13, 1.0, v17 ; 081A22F2 > v_mov_b32_e32 v14, 0xbb83126f ; 7E1C02FF BB83126F > v_sub_f32_e32 v12, 1.0, v16 ; 081820F2 > v_sub_f32_e32 v8, 1.0, v8 ; 081010F2 > v_fma_f32 v11, v11, v13, v14 ; D296000B 043A1B0B > v_fma_f32 v9, v9, v12, v14 ; D2960009 043A1909 > v_fma_f32 v1, v1, v8, v14 ; D2960001 043A1101 > v_max_f32_e32 v8, 0, v9 ; 20101280 > v_max_f32_e32 v9, 0, v11 ; 20121680 > v_mov_b32_e32 v11, 0x40c66666 ; 7E1602FF 40C66666 > v_mov_b32_e32 v13, 0x3fd9999a ; 7E1A02FF 3FD9999A > v_fma_f32 v12, v8, v11, 0.5 ; D296000C 03C21708 > v_fma_f32 v14, v8, v11, v13 ; D296000E 04361708 > v_mov_b32_e32 v17, 0x3d75c28f ; 7E2202FF 3D75C28F > v_mul_f32_e32 v12, v12, v8 ; 1018110C > v_fma_f32 v8, v8, v14, v17 ; D2960008 04461D08 > v_fma_f32 v14, v9, v11, 0.5 ; D296000E 03C21709 > v_mul_f32_e32 v14, v14, v9 ; 101C130E > v_max_f32_e32 v1, 0, v1 ; 20020280 > v_fma_f32 v24, v9, v11, v13 ; D2960018 04361709 > v_fma_f32 v13, v1, v11, v13 ; D296000D 04361701 > v_fma_f32 v11, v1, v11, 0.5 ; D296000B 03C21701 > v_cmp_lt_f32_e32 vcc, 0, v12 ; 7C021880 > v_cmp_lt_f32_e64 s[0:1], 0, v14 ; D0020000 00021C80 > v_mul_f32_e32 v11, v11, v1 ; 1016030B > v_fma_f32 v1, v1, v13, v17 ; D2960001 04461B01 > v_cndmask_b32_e64 v13, v12, 1.0, vcc ; D200000D 01A9E50C > v_fma_f32 v9, v9, v24, v17 ; D2960009 04463109 > v_cndmask_b32_e64 v17, v14, 1.0, s[0:1] ; D2000011 0001E50E > v_cmp_eq_f32_e64 s[4:5], 0, v8 ; D0040004 00021080 > v_rcp_f32_e32 v8, v8 ; 7E105508 > v_cmp_le_f32_e32 vcc, 0, v13 ; 7C061A80 > v_mul_f32_e32 v13, v15, v13 ; 101A1B0F > v_cmp_le_f32_e64 s[0:1], 0, v17 ; D0060000 00022280 > v_mul_f32_e32 v17, v15, v17 ; 1022230F > v_cmp_lt_f32_e64 s[2:3], 0, v11 ; D0020002 00021680 > v_cndmask_b32_e64 v24, v11, 1.0, s[2:3] ; D2000018 0009E50B > v_cndmask_b32_e32 v13, v23, v13 ; 001A1B17 > v_cndmask_b32_e64 v17, v23, v17, s[0:1] ; D2000011 00022317 > v_cmp_eq_f32_e32 vcc, 0, v9 ; 7C041280 > v_rcp_f32_e32 v9, v9 ; 7E125509 > v_cmp_eq_f32_e64 s[0:1], 0, v1 ; D0040000 00020280 > v_rcp_f32_e32 v1, v1 ; 7E025501 > v_cmp_le_f32_e64 s[2:3], 0, v24 ; D0060002 00023080 > v_mul_f32_e32 v15, v15, v24 ; 101E310F > v_mul_f32_e32 v8, v8, v12 ; 10101908 > v_cndmask_b32_e64 v15, v23, v15, s[2:3] ; D200000F 000A1F17 > v_add_f32_e32 v21, s31, v21 ; 062A2A1F > v_mul_f32_e32 v23, s10, v7 ; 102E0E0A > v_add_f32_e32 v22, s8, v22 ; 062C2C08 > v_mul_f32_e32 v12, s11, v7 ; 10180E0B > v_add_f32_e32 v10, s9, v10 ; 06141409 > v_fma_f32 v23, v21, s10, v23 ; D2960017 045C1515 > v_fma_f32 v12, v22, -s11, v12 ; D296000C 44301716 > v_mul_f32_e32 v1, v1, v11 ; 10021701 > v_mul_f32_e32 v18, s37, v18 ; 10242425 > v_mul_f32_e32 v19, s38, v19 ; 10262626 > v_mul_f32_e32 v20, s39, v20 ; 10282827 > exp 15, 33, 0, 0, 0, v23, v12, v10, v7 ; F800021F 070A0C17 > v_mul_f32_e32 v9, v9, v14 ; 10121D09 > exp 15, 34, 0, 0, 0, v18, v19, v20, v0 ; F800022F 00141312 > v_cndmask_b32_e64 v8, v8, v13, s[4:5] ; D2000008 00121B08 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cndmask_b32_e32 v0, v9, v17 ; 00002309 > v_cndmask_b32_e64 v1, v1, v15, s[0:1] ; D2000001 00021F01 > exp 15, 35, 0, 0, 0, v8, v0, v1, v16 ; F800023F 10010008 > exp 15, 36, 0, 0, 0, v3, v4, v5, v6 ; F800024F 06050403 > exp 15, 12, 0, 1, 0, v21, v22, v10, v7 ; F80008CF 070A1615 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 48 >VGPRS: 28 >Code Size: 1832 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v17, v2, 0, 1, [m0] ; C8440402 > v_interp_p2_f32 v17, [v17], v3, 0, 1, [m0] ; C8450403 > v_interp_p1_f32 v18, v2, 1, 1, [m0] ; C8480502 > v_interp_p2_f32 v18, [v18], v3, 1, 1, [m0] ; C8490503 > v_interp_p1_f32 v19, v2, 3, 1, [m0] ; C84C0702 > v_interp_p2_f32 v19, [v19], v3, 3, 1, [m0] ; C84D0703 > v_interp_p1_f32 v4, v2, 0, 2, [m0] ; C8100802 > v_interp_p2_f32 v4, [v4], v3, 0, 2, [m0] ; C8110803 > v_interp_p1_f32 v5, v2, 1, 2, [m0] ; C8140902 > v_interp_p2_f32 v5, [v5], v3, 1, 2, [m0] ; C8150903 > v_interp_p1_f32 v7, v2, 2, 2, [m0] ; C81C0A02 > v_interp_p2_f32 v7, [v7], v3, 2, 2, [m0] ; C81D0A03 > v_interp_p1_f32 v6, v2, 3, 2, [m0] ; C8180B02 > v_interp_p2_f32 v6, [v6], v3, 3, 2, [m0] ; C8190B03 > v_interp_p1_f32 v8, v2, 0, 3, [m0] ; C8200C02 > v_interp_p2_f32 v8, [v8], v3, 0, 3, [m0] ; C8210C03 > v_interp_p1_f32 v9, v2, 1, 3, [m0] ; C8240D02 > v_interp_p2_f32 v9, [v9], v3, 1, 3, [m0] ; C8250D03 > v_interp_p1_f32 v10, v2, 2, 3, [m0] ; C8280E02 > v_interp_p2_f32 v10, [v10], v3, 2, 3, [m0] ; C8290E03 > v_interp_p1_f32 v11, v2, 3, 3, [m0] ; C82C0F02 > s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 > v_interp_p2_f32 v11, [v11], v3, 3, 3, [m0] ; C82D0F03 > v_interp_p1_f32 v14, v2, 0, 4, [m0] ; C8381002 > v_interp_p2_f32 v14, [v14], v3, 0, 4, [m0] ; C8391003 > v_interp_p1_f32 v15, v2, 1, 4, [m0] ; C83C1102 > v_interp_p2_f32 v15, [v15], v3, 1, 4, [m0] ; C83D1103 > v_interp_p1_f32 v12, v2, 2, 4, [m0] ; C8301202 > s_load_dwordx8 s[16:23], s[4:5], 0x0 ; C0C80500 > s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C > v_rcp_f32_e32 v22, v19 ; 7E2C5513 > v_interp_p2_f32 v12, [v12], v3, 2, 4, [m0] ; C8311203 > v_interp_p1_f32 v2, v2, 3, 4, [m0] ; C8081302 > v_cmp_lt_f32_e64 s[0:1], 0, v17 ; D0020000 00022280 > v_cmp_lt_f32_e64 s[2:3], 0, v18 ; D0020002 00022480 > v_cndmask_b32_e64 v16, v17, 1.0, s[0:1] ; D2000010 0001E511 > v_cndmask_b32_e64 v20, v18, 1.0, s[2:3] ; D2000014 0009E512 > v_interp_p2_f32 v2, [v2], v3, 3, 4, [m0] ; C8091303 > v_bfrev_b32_e32 v3, 14 ; 7E06708E > v_cmp_le_f32_e64 s[2:3], 0, v20 ; D0060002 00022880 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s7, s[12:15], 0x3e ; C2038D3E > v_cmp_le_f32_e64 s[0:1], 0, v16 ; D0060000 00022080 > v_mul_f32_e32 v21, v3, v16 ; 102A2103 > v_bfrev_b32_e32 v16, 15 ; 7E20708F > v_mul_f32_e32 v20, v3, v20 ; 10282903 > v_cmp_eq_f32_e32 vcc, 0, v19 ; 7C042680 > v_cndmask_b32_e64 v21, v16, v21, s[0:1] ; D2000015 00022B10 > v_cndmask_b32_e64 v20, v16, v20, s[2:3] ; D2000014 000A2910 > v_mul_f32_e32 v17, v22, v17 ; 10222316 > v_mul_f32_e32 v18, v22, v18 ; 10242516 > s_load_dwordx4 s[0:3], s[4:5], 0x1c ; C080051C > s_buffer_load_dword s6, s[12:15], 0x3f ; C2030D3F > v_cndmask_b32_e32 v21, v17, v21 ; 002A2B11 > v_cndmask_b32_e32 v22, v18, v20 ; 002C2912 > s_and_b32 s24, s24, s23 ; 87181718 > image_sample v17, v[21:22], s[16:23], s[24:27] dmask:0x1 ; F0800100 00C41115 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v17, v17, v19 ; 0A222711 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_cmp_eq_f32_e64 s[8:9], 0, s7 ; D0040008 00000E80 > v_and_b32_e32 v18, 0x7fffffff, v17 ; 362422FF 7FFFFFFF > s_and_b64 vcc, exec, s[8:9] ; 87EA087E > s_cbranch_vccnz BB0_2 ; BF870000 > v_rcp_f32_e32 v3, s7 ; 7E065407 > v_mul_f32_e32 v3, v3, v18 ; 10062503 > s_branch BB0_3 ; BF820000 > v_cmp_lg_f32_e32 vcc, 0, v17 ; 7C0A2280 > v_cndmask_b32_e64 v17, v18, 1.0, vcc ; D2000011 01A9E512 > v_cmp_le_f32_e32 vcc, 0, v17 ; 7C062280 > v_mul_f32_e32 v3, v3, v17 ; 10062303 > v_cndmask_b32_e32 v3, v16, v3 ; 00060710 > s_load_dwordx8 s[12:19], s[4:5], 0x10 ; C0C60510 > s_load_dwordx2 s[20:21], s[4:5], 0x24 ; C04A0524 > s_load_dwordx2 s[22:23], s[4:5], 0x26 ; C04B0526 > v_mov_b32_e32 v16, 0 ; 7E200280 > v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > image_sample v[17:20], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00031100 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v0, v14, v17 ; 1000230E > v_mul_f32_e32 v1, v15, v18 ; 1002250F > buffer_load_format_xyzw v[14:17], v16, s[20:23], 0 idxen ; E00C2000 80050E10 > v_sub_f32_e32 v3, 1.0, v3 ; 080606F2 > v_log_f32_e32 v3, v3 ; 7E064F03 > v_mul_f32_e32 v12, v12, v19 ; 1018270C > v_mul_f32_e32 v0, v4, v0 ; 10000104 > v_mul_f32_e32 v1, v5, v1 ; 10020305 > v_mul_f32_e32 v3, s6, v3 ; 10060606 > v_exp_f32_e32 v3, v3 ; 7E064B03 > v_mad_f32 v3, -v3, v20, v20 ; D2820003 24522903 > v_mul_f32_e32 v4, v7, v12 ; 10081907 > v_mul_f32_e32 v2, v2, v3 ; 10040702 > v_fma_f32 v0, v0, v11, v8 ; D2960000 04221700 > v_fma_f32 v1, v1, v11, v9 ; D2960001 04261701 > v_mul_f32_e32 v2, v6, v2 ; 10040506 > v_fma_f32 v4, v4, v11, v10 ; D2960004 042A1704 > v_mul_f32_e32 v3, v11, v2 ; 1006050B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v0, v0, v14 ; 10001D00 > v_mul_f32_e32 v1, v1, v14 ; 10021D01 > v_mul_f32_e32 v2, v4, v14 ; 10041D04 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 24 >Code Size: 532 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >radeonsi: Compiling shader 393 >Vertex Shader Prolog LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { >main_body: > %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> undef, i32 %0, 0 > %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %19, i32 %1, 1 > %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %20, i32 %2, 2 > %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %21, i32 %3, 3 > %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %22, i32 %4, 4 > %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %23, i32 %5, 5 > %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %24, i32 %6, 6 > %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %25, i32 %7, 7 > %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %26, i32 %8, 8 > %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %27, i32 %9, 9 > %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %28, i32 %10, 10 > %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %29, i32 %11, 11 > %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %30, i32 %12, 12 > %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %31, i32 %13, 13 > %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %32, i32 %14, 14 > %34 = bitcast i32 %15 to float > %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %33, float %34, 15 > %36 = bitcast i32 %16 to float > %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %35, float %36, 16 > %38 = bitcast i32 %17 to float > %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %37, float %38, 17 > %40 = bitcast i32 %18 to float > %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %39, float %40, 18 > %42 = add i32 %15, %12 > %43 = bitcast i32 %42 to float > %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %41, float %43, 19 > %45 = add i32 %15, %12 > %46 = bitcast i32 %45 to float > %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %44, float %46, 20 > %48 = add i32 %15, %12 > %49 = bitcast i32 %48 to float > %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %47, float %49, 21 > %51 = add i32 %15, %12 > %52 = bitcast i32 %51 to float > %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %50, float %52, 22 > %54 = add i32 %15, %12 > %55 = bitcast i32 %54 to float > %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %53, float %55, 23 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %56 >} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 > v_mov_b32_e32 v8, v4 ; 7E100304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[20:23], s[10:11], 0xc ; C08A0B0C > s_load_dwordx4 s[8:11], s[10:11], 0x10 ; C0840B10 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[29:32], v4, s[4:7], 0 idxen ; E00C2000 80011D04 > buffer_load_format_xyzw v[9:12], v5, s[12:15], 0 idxen ; E00C2000 80030905 > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > s_waitcnt vmcnt(2) ; BF8C0F72 > buffer_load_format_xyzw v[32:35], v7, s[20:23], 0 idxen ; E00C2000 80052007 > s_waitcnt vmcnt(2) ; BF8C0F72 > buffer_load_format_xyzw v[12:15], v8, s[8:11], 0 idxen ; E00C2000 80020C08 > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_load_dwordx4 s[4:7], s[2:3], 0x4 ; C0820304 > s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s34, s[4:7], 0x98 ; C2110598 > s_buffer_load_dword s32, s[4:7], 0x99 ; C2100599 > s_buffer_load_dword s33, s[4:7], 0x9a ; C210859A > s_buffer_load_dword s39, s[4:7], 0x53 ; C2138553 > s_buffer_load_dword s40, s[4:7], 0x80 ; C2140580 > s_buffer_load_dword s41, s[4:7], 0x81 ; C2148581 > s_buffer_load_dword s42, s[4:7], 0x82 ; C2150582 > s_buffer_load_dword s43, s[4:7], 0x83 ; C2158583 > s_buffer_load_dword s44, s[4:7], 0xb4 ; C21605B4 > s_buffer_load_dword s45, s[4:7], 0xb5 ; C21685B5 > s_buffer_load_dword s46, s[4:7], 0xb6 ; C21705B6 > s_buffer_load_dword s47, s[4:7], 0xb7 ; C21785B7 > s_buffer_load_dword s21, s[4:7], 0x51 ; C20A8551 > s_buffer_load_dword s30, s[4:7], 0x52 ; C20F0552 > s_buffer_load_dword s27, s[4:7], 0x65 ; C20D8565 > s_buffer_load_dword s28, s[4:7], 0x78 ; C20E0578 > s_buffer_load_dword s29, s[4:7], 0x79 ; C20E8579 > s_buffer_load_dword s22, s[4:7], 0x7c ; C20B057C > s_buffer_load_dword s23, s[4:7], 0x7d ; C20B857D > s_buffer_load_dword s24, s[4:7], 0x7e ; C20C057E > s_buffer_load_dword s25, s[4:7], 0x7f ; C20C857F > s_buffer_load_dword s26, s[4:7], 0x84 ; C20D0584 > s_buffer_load_dword s8, s[4:7], 0x88 ; C2040588 > s_buffer_load_dword s15, s[4:7], 0x89 ; C2078589 > s_buffer_load_dword s9, s[4:7], 0x8a ; C204858A > s_buffer_load_dword s10, s[4:7], 0x8b ; C205058B > s_buffer_load_dword s14, s[4:7], 0x8d ; C207058D > s_buffer_load_dword s17, s[4:7], 0x90 ; C2088590 > s_buffer_load_dword s20, s[4:7], 0x91 ; C20A0591 > s_buffer_load_dword s18, s[4:7], 0x92 ; C2090592 > s_buffer_load_dword s19, s[4:7], 0x93 ; C2098593 > s_buffer_load_dword s11, s[4:7], 0x94 ; C2058594 > s_buffer_load_dword s16, s[4:7], 0x95 ; C2080595 > s_buffer_load_dword s12, s[4:7], 0x96 ; C2060596 > s_buffer_load_dword s13, s[4:7], 0x97 ; C2068597 > s_buffer_load_dword s31, s[4:7], 0xa2 ; C20F85A2 > s_buffer_load_dword s37, s[4:7], 0x9c ; C212859C > s_buffer_load_dword s35, s[4:7], 0x9d ; C211859D > s_buffer_load_dword s36, s[4:7], 0x9e ; C212059E > s_buffer_load_dword s38, s[4:7], 0xa0 ; C21305A0 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v14 ; 10021D00 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_mul_f32_e32 v6, v0, v13 ; 100C1B00 > v_cvt_i32_f32_e32 v6, v6 ; 7E0C1106 > v_mul_f32_e32 v0, v0, v12 ; 10001900 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_or_b32_e32 v12, 28, v1 ; 3818029C > v_or_b32_e32 v13, 20, v1 ; 381A0294 > v_or_b32_e32 v14, 24, v1 ; 381C0298 > v_lshlrev_b32_e32 v6, 5, v6 ; 340C0C85 > buffer_load_dword v20, v13, s[0:3], 0 offen ; E0301000 8000140D > buffer_load_dword v19, v12, s[0:3], 0 offen ; E0301000 8000130C > buffer_load_dword v21, v14, s[0:3], 0 offen ; E0301000 8000150E > v_or_b32_e32 v8, 16, v1 ; 38100290 > buffer_load_dword v18, v8, s[0:3], 0 offen ; E0301000 80001208 > v_or_b32_e32 v8, 16, v6 ; 38100C90 > v_or_b32_e32 v12, 28, v6 ; 38180C9C > v_or_b32_e32 v13, 20, v6 ; 381A0C94 > v_or_b32_e32 v14, 24, v6 ; 381C0C98 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > buffer_load_dword v25, v13, s[0:3], 0 offen ; E0301000 8000190D > buffer_load_dword v23, v12, s[0:3], 0 offen ; E0301000 8000170C > v_or_b32_e32 v7, 4, v1 ; 380E0284 > v_or_b32_e32 v15, 8, v1 ; 381E0288 > buffer_load_dword v17, v1, s[0:3], 0 offen ; E0301000 80001101 > v_or_b32_e32 v1, 4, v6 ; 38020C84 > buffer_load_dword v22, v8, s[0:3], 0 offen ; E0301000 80001608 > buffer_load_dword v26, v14, s[0:3], 0 offen ; E0301000 80001A0E > buffer_load_dword v36, v1, s[0:3], 0 offen ; E0301000 80002401 > v_or_b32_e32 v8, 16, v0 ; 38100090 > buffer_load_dword v24, v6, s[0:3], 0 offen ; E0301000 80001806 > v_or_b32_e32 v16, 8, v6 ; 38200C88 > v_or_b32_e32 v6, 4, v0 ; 380C0084 > v_or_b32_e32 v14, 24, v0 ; 381C0098 > buffer_load_dword v28, v7, s[0:3], 0 offen ; E0301000 80001C07 > buffer_load_dword v38, v8, s[0:3], 0 offen ; E0301000 80002608 > v_or_b32_e32 v12, 28, v0 ; 3818009C > v_or_b32_e32 v13, 20, v0 ; 381A0094 > buffer_load_dword v37, v6, s[0:3], 0 offen ; E0301000 80002506 > buffer_load_dword v41, v14, s[0:3], 0 offen ; E0301000 8000290E > buffer_load_dword v40, v13, s[0:3], 0 offen ; E0301000 8000280D > buffer_load_dword v39, v12, s[0:3], 0 offen ; E0301000 8000270C > buffer_load_dword v42, v15, s[0:3], 0 offen ; E0301000 80002A0F > v_or_b32_e32 v27, 8, v0 ; 38360088 > buffer_load_dword v43, v16, s[0:3], 0 offen ; E0301000 80002B10 > buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B > buffer_load_dword v35, v0, s[0:3], 0 offen ; E0301000 80002300 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e64 v14, s34, s34 ; D210000E 00004422 > v_mac_f32_e64 v14, s32, s32 ; D23E000E 00004020 > s_buffer_load_dword s0, s[4:7], 0x4d ; C200054D > s_buffer_load_dword s1, s[4:7], 0x8c ; C200858C > s_buffer_load_dword s2, s[4:7], 0x8e ; C201058E > s_buffer_load_dword s3, s[4:7], 0x8f ; C201858F > s_buffer_load_dword s4, s[4:7], 0xa1 ; C20205A1 > v_mac_f32_e64 v14, s33, s33 ; D23E000E 00004221 > v_sqrt_f32_e32 v14, v14 ; 7E1C670E > v_mul_f32_e32 v16, s39, v14 ; 10201C27 > v_cmp_eq_f32_e32 vcc, 0, v16 ; 7C042080 > v_mov_b32_e32 v0, s40 ; 7E000228 > v_mov_b32_e32 v1, s41 ; 7E020229 > v_mov_b32_e32 v6, s42 ; 7E0C022A > v_mov_b32_e32 v7, s43 ; 7E0E022B > v_mov_b32_e32 v12, s44 ; 7E18022C > v_mov_b32_e32 v13, s45 ; 7E1A022D > v_mov_b32_e32 v8, s46 ; 7E10022E > v_mov_b32_e32 v15, s47 ; 7E1E022F > s_and_b64 vcc, exec, vcc ; 87EA6A7E > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v50, v20, v20 ; 10642914 > v_mul_f32_e32 v46, v19, v20 ; 105C2913 > v_mul_f32_e32 v45, v19, v21 ; 105A2B13 > v_mac_f32_e32 v50, v21, v21 ; 3E642B15 > v_mul_f32_e32 v44, v19, v18 ; 10582513 > v_fma_f32 v47, v18, v21, -v46 ; D296002F 84BA2B12 > v_fma_f32 v48, v18, v20, v45 ; D2960030 04B62912 > v_mac_f32_e32 v46, v18, v21 ; 3E5C2B12 > v_fma_f32 v45, v18, v20, -v45 ; D296002D 84B62912 > v_mul_f32_e32 v18, v18, v18 ; 10242512 > v_mul_f32_e32 v56, v25, v25 ; 10703319 > v_fma_f32 v49, v20, v21, -v44 ; D2960031 84B22B14 > v_mad_f32 v51, v21, v21, v18 ; D2820033 044A2B15 > v_mac_f32_e32 v18, v20, v20 ; 3E242914 > v_fma_f32 v44, v20, v21, v44 ; D296002C 04B22B14 > v_mul_f32_e32 v52, v23, v25 ; 10683317 > s_waitcnt vmcnt(13) ; BF8C0F7D > v_mul_f32_e32 v19, v23, v22 ; 10262D17 > s_waitcnt vmcnt(12) ; BF8C0F7C > v_mul_f32_e32 v20, v23, v26 ; 10283517 > v_mul_f32_e32 v23, v22, v22 ; 102E2D16 > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mul_f32_e32 v36, v36, v33 ; 10484324 > v_fma_f32 v53, v22, v26, -v52 ; D2960035 84D23516 > v_mac_f32_e32 v52, v22, v26 ; 3E683516 > v_fma_f32 v21, v22, v25, v20 ; D2960015 04523316 > v_fma_f32 v54, v22, v25, -v20 ; D2960036 84523316 > v_fma_f32 v20, v25, v26, -v19 ; D2960014 844E3519 > v_fma_f32 v55, v25, v26, v19 ; D2960037 044E3519 > v_mac_f32_e32 v56, v26, v26 ; 3E70351A > v_mad_f32 v26, v26, v26, v23 ; D282001A 045E351A > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mac_f32_e32 v36, v28, v32 ; 3E48411C > v_fma_f32 v26, -v26, 2.0, 1.0 ; D296001A 23C9E91A > v_mac_f32_e32 v23, v25, v25 ; 3E2E3319 > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mul_f32_e32 v25, v38, v38 ; 10324D26 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_mad_f32 v28, v41, v41, v25 ; D282001C 04665329 > v_mac_f32_e32 v36, v37, v34 ; 3E484525 > v_fma_f32 v37, -v51, 2.0, 1.0 ; D2960025 23C9E933 > v_mul_f32_e32 v26, v26, v33 ; 1034431A > v_fma_f32 v18, -v18, 2.0, 1.0 ; D2960012 23C9E912 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mul_f32_e32 v22, v39, v41 ; 102C5327 > v_mul_f32_e32 v19, v39, v38 ; 10264D27 > v_mul_f32_e32 v39, v39, v40 ; 104E5127 > v_mac_f32_e32 v26, v37, v32 ; 3E344125 > v_fma_f32 v37, -v23, 2.0, 1.0 ; D2960025 23C9E917 > v_mul_f32_e32 v23, v18, v32 ; 102E4112 > v_fma_f32 v18, -v28, 2.0, 1.0 ; D2960012 23C9E91C > v_mac_f32_e32 v25, v40, v40 ; 3E325128 > v_mul_f32_e32 v61, v40, v40 ; 107A5128 > v_fma_f32 v28, -v56, 2.0, 1.0 ; D296001C 23C9E938 > v_fma_f32 v59, v38, v41, -v39 ; D296003B 849E5326 > v_mac_f32_e32 v26, v18, v34 ; 3E344512 > v_fma_f32 v18, -v25, 2.0, 1.0 ; D2960012 23C9E919 > v_mac_f32_e32 v23, v37, v33 ; 3E2E4325 > v_fma_f32 v60, v40, v41, v19 ; D296003C 044E5328 > v_fma_f32 v57, v38, v40, v22 ; D2960039 045A5126 > v_fma_f32 v58, v38, v40, -v22 ; D296003A 845A5126 > v_mac_f32_e32 v39, v38, v41 ; 3E4E5326 > v_fma_f32 v38, v40, v41, -v19 ; D2960026 844E5328 > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mul_f32_e32 v40, v42, v32 ; 1050412A > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mac_f32_e32 v40, v43, v33 ; 3E50432B > v_mul_f32_e32 v19, v33, v20 ; 10262921 > v_mul_f32_e32 v22, v33, v21 ; 102C2B21 > v_mul_f32_e32 v24, v24, v33 ; 10304318 > v_mac_f32_e32 v61, v41, v41 ; 3E7A5329 > v_mac_f32_e32 v23, v18, v34 ; 3E2E4512 > v_fma_f32 v18, -v50, 2.0, 1.0 ; D2960012 23C9E932 > v_mul_f32_e32 v28, v28, v33 ; 1038431C > v_mac_f32_e32 v24, v17, v32 ; 3E304111 > v_mac_f32_e32 v19, v33, v20 ; 3E262921 > v_mul_f32_e32 v20, v32, v47 ; 10285F20 > v_mac_f32_e32 v22, v33, v21 ; 3E2C2B21 > v_mul_f32_e32 v21, v32, v44 ; 102A5920 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v40, v27, v34 ; 3E50451B > v_mul_f32_e32 v27, v33, v54 ; 10366D21 > v_mac_f32_e32 v28, v18, v32 ; 3E384112 > v_fma_f32 v17, -v61, 2.0, 1.0 ; D2960011 23C9E93D > v_mac_f32_e32 v21, v32, v44 ; 3E2A5920 > v_mac_f32_e32 v28, v17, v34 ; 3E384511 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v24, v35, v34 ; 3E304523 > v_mul_f32_e32 v17, v32, v49 ; 10226320 > v_mul_f32_e32 v18, v33, v55 ; 10246F21 > v_mul_f32_e32 v35, v32, v48 ; 10466120 > v_mac_f32_e32 v20, v32, v47 ; 3E285F20 > v_mul_f32_e32 v43, v32, v46 ; 10565D20 > v_mac_f32_e32 v27, v33, v54 ; 3E366D21 > v_mul_f32_e32 v32, v32, v45 ; 10405B20 > v_mul_f32_e32 v25, v33, v52 ; 10326921 > v_mul_f32_e32 v37, v34, v60 ; 104A7922 > v_mac_f32_e32 v21, 2.0, v18 ; 3E2A24F4 > v_mul_f32_e32 v44, v34, v58 ; 10587522 > v_mac_f32_e32 v27, 2.0, v32 ; 3E3640F4 > v_mac_f32_e32 v25, v33, v52 ; 3E326921 > v_mul_f32_e32 v33, v33, v53 ; 10426B21 > v_mac_f32_e32 v21, 2.0, v37 ; 3E2A4AF4 > v_mul_f32_e32 v41, v34, v57 ; 10527322 > v_mac_f32_e32 v22, 2.0, v35 ; 3E2C46F4 > v_mac_f32_e32 v27, 2.0, v44 ; 3E3658F4 > v_mac_f32_e32 v19, 2.0, v17 ; 3E2622F4 > v_mul_f32_e32 v38, v34, v38 ; 104C4D22 > v_mul_f32_e32 v42, v34, v59 ; 10547722 > v_mac_f32_e32 v20, 2.0, v33 ; 3E2842F4 > v_mul_f32_e32 v34, v34, v39 ; 10444F22 > v_mac_f32_e32 v25, 2.0, v43 ; 3E3256F4 > v_mac_f32_e32 v22, 2.0, v41 ; 3E2C52F4 > v_mul_f32_e32 v17, v30, v26 ; 1022351E > v_mul_f32_e32 v18, v30, v21 ; 10242B1E > v_mul_f32_e32 v30, v30, v27 ; 103C371E > v_mac_f32_e32 v20, 2.0, v42 ; 3E2854F4 > v_mac_f32_e32 v19, 2.0, v38 ; 3E264CF4 > v_mac_f32_e32 v17, v29, v22 ; 3E222D1D > v_mac_f32_e32 v25, 2.0, v34 ; 3E3244F4 > v_mac_f32_e32 v30, v29, v28 ; 3E3C391D > v_mac_f32_e32 v17, v31, v19 ; 3E22271F > v_mac_f32_e32 v18, v29, v20 ; 3E24291D > v_mac_f32_e32 v30, v31, v25 ; 3E3C331F > v_mac_f32_e32 v18, v31, v23 ; 3E242F1F > v_add_f32_e32 v17, v36, v17 ; 06222324 > v_add_f32_e32 v24, v24, v30 ; 06303D18 > v_add_f32_e32 v18, v40, v18 ; 06242528 > v_add_f32_e32 v29, v24, v17 ; 063A2318 > v_add_f32_e32 v29, v18, v29 ; 063A3B12 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_mov_b64 vcc, vcc ; BEEA046A > s_cbranch_vccnz BB0_2 ; BF870000 > v_rcp_f32_e32 v16, v16 ; 7E205510 > v_mul_f32_e32 v16, v16, v29 ; 10203B10 > s_branch BB0_3 ; BF820000 > v_cmp_lt_f32_e32 vcc, 0, v29 ; 7C023A80 > v_cndmask_b32_e64 v16, v29, 1.0, vcc ; D2000010 01A9E51D > v_cmp_le_f32_e32 vcc, 0, v16 ; 7C062080 > v_mul_f32_e32 v16, 0x70000000, v16 ; 102020FF 70000000 > v_bfrev_b32_e32 v29, 15 ; 7E3A708F > v_cndmask_b32_e32 v16, v29, v16 ; 0020211D > v_mul_f32_e32 v29, s37, v13 ; 103A1A25 > v_mul_f32_e32 v30, s35, v13 ; 103C1A23 > v_mul_f32_e32 v13, s36, v13 ; 101A1A24 > v_fma_f32 v29, s34, v12, v29 ; D296001D 04761822 > v_fma_f32 v30, v12, s32, v30 ; D296001E 0478410C > v_fma_f32 v12, v12, s33, v13 ; D296000C 0434430C > v_fma_f32 v29, s38, v8, v29 ; D296001D 04761026 > v_fma_f32 v13, v8, s4, v30 ; D296000D 04780908 > v_fma_f32 v8, v8, s31, v12 ; D2960008 04303F08 > v_mul_f32_e32 v12, v9, v22 ; 10182D09 > v_mac_f32_e32 v12, v10, v26 ; 3E18350A > v_mac_f32_e32 v12, v11, v19 ; 3E18270B > v_mul_f32_e32 v19, v9, v20 ; 10262909 > v_mul_f32_e32 v9, v9, v28 ; 10123909 > v_mac_f32_e32 v9, v10, v27 ; 3E12370A > v_mac_f32_e32 v9, v11, v25 ; 3E12330B > v_mac_f32_e32 v19, v10, v21 ; 3E262B0A > v_mul_f32_e32 v9, v9, v29 ; 10123B09 > v_mac_f32_e32 v19, v11, v23 ; 3E262F0B > v_mac_f32_e32 v9, v12, v13 ; 3E121B0C > v_mac_f32_e32 v9, v19, v8 ; 3E121113 > v_mul_f32_e32 v15, s30, v15 ; 101E1E1E > v_mad_f32 v3, v3, v9, v3 ; D2820003 040E1303 > v_mul_f32_e32 v9, 0x3e2aaaab, v15 ; 10121EFF 3E2AAAAB > v_subrev_f32_e32 v1, s23, v1 ; 0A020217 > v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 > v_subrev_f32_e32 v7, s25, v7 ; 0A0E0E19 > v_fma_f32 v1, v9, v1, s23 ; D2960001 005E0309 > v_fma_f32 v7, v9, v7, s25 ; D2960007 00660F09 > v_fma_f32 v4, -v4, s28, 1.0 ; D2960004 23C83904 > v_fma_f32 v1, s26, v1, v16 ; D2960001 0442021A > v_fma_f32 v5, -v5, s29, 1.0 ; D2960005 23C83B05 > v_fma_f32 v7, s26, v7, v16 ; D2960007 04420E1A > v_add_f32_e32 v1, v4, v1 ; 06020304 > v_add_f32_e32 v4, v5, v7 ; 06080F05 > v_mov_b32_e32 v5, 0x3e22f983 ; 7E0A02FF 3E22F983 > v_mul_f32_e32 v1, v5, v1 ; 10020305 > v_fract_f32_e32 v1, v1 ; 7E024101 > v_mul_f32_e32 v4, v5, v4 ; 10080905 > v_fract_f32_e32 v4, v4 ; 7E084104 > v_sin_f32_e32 v1, v1 ; 7E026B01 > v_subrev_f32_e32 v0, s22, v0 ; 0A000016 > v_sin_f32_e32 v4, v4 ; 7E086B04 > v_subrev_f32_e32 v6, s24, v6 ; 0A0C0C18 > v_fma_f32 v0, v9, v0, s22 ; D2960000 005A0109 > v_mul_f32_e32 v0, v0, v14 ; 10001D00 > v_fma_f32 v6, v9, v6, s24 ; D2960006 00620D09 > v_mul_f32_e32 v0, v1, v0 ; 10000101 > v_mul_f32_e32 v5, v6, v14 ; 100A1D06 > v_mul_f32_e32 v1, v4, v5 ; 10020B04 > v_fma_f32 v0, v15, s21, v0 ; D2960000 04002B0F > v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000 > v_fma_f32 v0, v4, v1, v0 ; D2960000 04020304 > v_mul_f32_e32 v3, s27, v3 ; 1006061B > v_mul_f32_e32 v3, 0.5, v3 ; 100606F0 > v_mul_f32_e32 v1, v0, v29 ; 10023B00 > v_mul_f32_e32 v4, v0, v13 ; 10081B00 > v_fma_f32 v1, v1, v3, v24 ; D2960001 04620701 > v_mul_f32_e32 v0, v0, v8 ; 10001100 > v_fma_f32 v4, v4, v3, v17 ; D2960004 04460704 > v_mul_f32_e32 v5, v1, v1 ; 100A0301 > v_fma_f32 v0, v0, v3, v18 ; D2960000 044A0700 > v_mac_f32_e32 v5, v4, v4 ; 3E0A0904 > v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 > v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 > v_mul_f32_e32 v3, v24, v24 ; 10063118 > v_mac_f32_e32 v3, v17, v17 ; 3E062311 > v_mac_f32_e32 v3, v18, v18 ; 3E062512 > v_sqrt_f32_e32 v3, v3 ; 7E066703 > v_mul_f32_e32 v4, v5, v4 ; 10080905 > v_mul_f32_e32 v1, v5, v1 ; 10020305 > v_mul_f32_e32 v0, v5, v0 ; 10000105 > v_mul_f32_e32 v4, v4, v3 ; 10080704 > v_mul_f32_e32 v1, v1, v3 ; 10020701 > v_mul_f32_e32 v0, v0, v3 ; 10000700 > v_mul_f32_e32 v3, s20, v4 ; 10060814 > v_mac_f32_e32 v3, s17, v1 ; 3E060211 > v_mul_f32_e32 v5, s15, v4 ; 100A080F > v_mul_f32_e32 v6, s14, v4 ; 100C080E > v_mul_f32_e32 v4, s16, v4 ; 10080810 > v_mac_f32_e32 v3, s18, v0 ; 3E060012 > v_mac_f32_e32 v5, s8, v1 ; 3E0A0208 > v_mac_f32_e32 v6, s1, v1 ; 3E0C0201 > v_mac_f32_e32 v4, s11, v1 ; 3E08020B > v_add_f32_e32 v3, s19, v3 ; 06060613 > v_mac_f32_e32 v5, s9, v0 ; 3E0A0009 > v_mac_f32_e32 v6, s2, v0 ; 3E0C0002 > v_mac_f32_e32 v4, s12, v0 ; 3E08000C > v_mul_f32_e32 v3, s0, v3 ; 10060600 > v_add_f32_e32 v5, s10, v5 ; 060A0A0A > v_add_f32_e32 v6, s3, v6 ; 060C0C03 > v_add_f32_e32 v0, s13, v4 ; 0600080D > exp 15, 12, 0, 1, 0, v5, v6, v3, v0 ; F80008CF 00030605 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 64 >Code Size: 1816 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 4 >******************** > >Pixel Shader: >Shader main disassembly: >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 14 >VGPRS: 15 >Code Size: 12 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 > v_mov_b32_e32 v7, v4 ; 7E0E0304 > v_mov_b32_e32 v8, v4 ; 7E100304 > v_mov_b32_e32 v9, v4 ; 7E120304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[16:19], s[10:11], 0x8 ; C0880B08 > s_load_dwordx4 s[20:23], s[10:11], 0xc ; C08A0B0C > v_mov_b32_e32 v0, 0x437f0080 ; 7E0002FF 437F0080 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[32:35], v4, s[4:7], 0 idxen ; E00C2000 80012004 > s_load_dwordx4 s[4:7], s[10:11], 0x10 ; C0820B10 > s_load_dwordx4 s[8:11], s[10:11], 0x14 ; C0840B14 > buffer_load_format_xyzw v[14:17], v5, s[12:15], 0 idxen ; E00C2000 80030E05 > buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 > buffer_load_format_xyzw v[10:13], v7, s[20:23], 0 idxen ; E00C2000 80050A07 > s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 > buffer_load_format_xyzw v[35:38], v8, s[4:7], 0 idxen ; E00C2000 80012308 > s_waitcnt vmcnt(2) ; BF8C0F72 > buffer_load_format_xyzw v[5:8], v9, s[8:11], 0 idxen ; E00C2000 80020509 > s_load_dwordx4 s[4:7], s[2:3], 0x8 ; C0820308 > s_load_dwordx4 s[40:43], s[2:3], 0x4 ; C0940304 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s33, s[40:43], 0x98 ; C210A998 > s_buffer_load_dword s31, s[40:43], 0x99 ; C20FA999 > s_buffer_load_dword s32, s[40:43], 0x9a ; C210299A > s_buffer_load_dword s39, s[40:43], 0x53 ; C213A953 > s_buffer_load_dword s44, s[40:43], 0x80 ; C2162980 > s_buffer_load_dword s45, s[40:43], 0x81 ; C216A981 > s_buffer_load_dword s46, s[40:43], 0x82 ; C2172982 > s_buffer_load_dword s47, s[40:43], 0x83 ; C217A983 > s_buffer_load_dword s48, s[40:43], 0xb4 ; C21829B4 > s_buffer_load_dword s49, s[40:43], 0xb5 ; C218A9B5 > s_buffer_load_dword s50, s[40:43], 0xb6 ; C21929B6 > s_buffer_load_dword s51, s[40:43], 0xb7 ; C219A9B7 > s_buffer_load_dword s0, s[40:43], 0x4d ; C200294D > s_buffer_load_dword s18, s[40:43], 0x51 ; C2092951 > s_buffer_load_dword s26, s[40:43], 0x52 ; C20D2952 > s_buffer_load_dword s17, s[40:43], 0x65 ; C208A965 > s_buffer_load_dword s24, s[40:43], 0x78 ; C20C2978 > s_buffer_load_dword s25, s[40:43], 0x79 ; C20CA979 > s_buffer_load_dword s19, s[40:43], 0x7c ; C209A97C > s_buffer_load_dword s20, s[40:43], 0x7d ; C20A297D > s_buffer_load_dword s21, s[40:43], 0x7e ; C20AA97E > s_buffer_load_dword s22, s[40:43], 0x7f ; C20B297F > s_buffer_load_dword s23, s[40:43], 0x84 ; C20BA984 > s_buffer_load_dword s12, s[40:43], 0x89 ; C2062989 > s_buffer_load_dword s8, s[40:43], 0x8a ; C204298A > s_buffer_load_dword s1, s[40:43], 0x8b ; C200A98B > s_buffer_load_dword s11, s[40:43], 0x8d ; C205A98D > s_buffer_load_dword s2, s[40:43], 0x8f ; C201298F > s_buffer_load_dword s14, s[40:43], 0x90 ; C2072990 > s_buffer_load_dword s16, s[40:43], 0x91 ; C2082991 > s_buffer_load_dword s15, s[40:43], 0x92 ; C207A992 > s_buffer_load_dword s3, s[40:43], 0x93 ; C201A993 > s_buffer_load_dword s9, s[40:43], 0x94 ; C204A994 > s_buffer_load_dword s13, s[40:43], 0x95 ; C206A995 > s_buffer_load_dword s10, s[40:43], 0x96 ; C2052996 > s_buffer_load_dword s30, s[40:43], 0xa2 ; C20F29A2 > s_buffer_load_dword s27, s[40:43], 0xac ; C20DA9AC > s_buffer_load_dword s28, s[40:43], 0xad ; C20E29AD > s_buffer_load_dword s29, s[40:43], 0xae ; C20EA9AE > s_buffer_load_dword s37, s[40:43], 0x9c ; C212A99C > s_buffer_load_dword s35, s[40:43], 0x9d ; C211A99D > s_buffer_load_dword s36, s[40:43], 0x9e ; C212299E > s_buffer_load_dword s38, s[40:43], 0xa0 ; C21329A0 > s_buffer_load_dword s34, s[40:43], 0xa1 ; C21129A1 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, v0, v7 ; 10020F00 > v_cvt_i32_f32_e32 v1, v1 ; 7E021101 > v_mul_f32_e32 v6, v0, v6 ; 100C0D00 > v_mul_f32_e32 v0, v0, v5 ; 10000B00 > v_cvt_i32_f32_e32 v5, v6 ; 7E0A1106 > v_lshlrev_b32_e32 v1, 5, v1 ; 34020285 > v_cvt_i32_f32_e32 v0, v0 ; 7E001100 > v_or_b32_e32 v7, 16, v1 ; 380E0290 > v_or_b32_e32 v8, 28, v1 ; 3810029C > v_or_b32_e32 v9, 20, v1 ; 38120294 > v_or_b32_e32 v17, 24, v1 ; 38220298 > v_lshlrev_b32_e32 v5, 5, v5 ; 340A0A85 > buffer_load_dword v23, v9, s[4:7], 0 offen ; E0301000 80011709 > buffer_load_dword v21, v7, s[4:7], 0 offen ; E0301000 80011507 > buffer_load_dword v22, v8, s[4:7], 0 offen ; E0301000 80011608 > buffer_load_dword v24, v17, s[4:7], 0 offen ; E0301000 80011811 > v_or_b32_e32 v7, 16, v5 ; 380E0A90 > v_or_b32_e32 v8, 28, v5 ; 38100A9C > v_or_b32_e32 v9, 20, v5 ; 38120A94 > v_or_b32_e32 v17, 24, v5 ; 38220A98 > buffer_load_dword v42, v9, s[4:7], 0 offen ; E0301000 80012A09 > buffer_load_dword v31, v7, s[4:7], 0 offen ; E0301000 80011F07 > buffer_load_dword v40, v8, s[4:7], 0 offen ; E0301000 80012808 > buffer_load_dword v41, v17, s[4:7], 0 offen ; E0301000 80012911 > v_lshlrev_b32_e32 v0, 5, v0 ; 34000085 > v_or_b32_e32 v6, 4, v1 ; 380C0284 > v_or_b32_e32 v18, 8, v1 ; 38240288 > buffer_load_dword v20, v1, s[4:7], 0 offen ; E0301000 80011401 > v_or_b32_e32 v1, 4, v5 ; 38020A84 > v_or_b32_e32 v26, 28, v0 ; 3834009C > v_or_b32_e32 v28, 24, v0 ; 38380098 > buffer_load_dword v43, v1, s[4:7], 0 offen ; E0301000 80012B01 > v_or_b32_e32 v25, 16, v0 ; 38320090 > v_or_b32_e32 v27, 20, v0 ; 38360094 > buffer_load_dword v30, v6, s[4:7], 0 offen ; E0301000 80011E06 > buffer_load_dword v26, v26, s[4:7], 0 offen ; E0301000 80011A1A > buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C > buffer_load_dword v27, v27, s[4:7], 0 offen ; E0301000 80011B1B > buffer_load_dword v25, v25, s[4:7], 0 offen ; E0301000 80011919 > buffer_load_dword v38, v5, s[4:7], 0 offen ; E0301000 80012605 > v_or_b32_e32 v19, 8, v5 ; 38260A88 > buffer_load_dword v45, v18, s[4:7], 0 offen ; E0301000 80012D12 > v_or_b32_e32 v29, 8, v0 ; 383A0088 > buffer_load_dword v46, v19, s[4:7], 0 offen ; E0301000 80012E13 > buffer_load_dword v39, v0, s[4:7], 0 offen ; E0301000 80012700 > buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D > v_or_b32_e32 v5, 4, v0 ; 380A0084 > buffer_load_dword v44, v5, s[4:7], 0 offen ; E0301000 80012C05 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e64 v9, s33, s33 ; D2100009 00004221 > v_mac_f32_e64 v9, s31, s31 ; D23E0009 00003E1F > s_buffer_load_dword s7, s[40:43], 0x88 ; C203A988 > s_buffer_load_dword s5, s[40:43], 0x8c ; C202A98C > s_buffer_load_dword s6, s[40:43], 0x8e ; C203298E > s_buffer_load_dword s4, s[40:43], 0x97 ; C2022997 > v_mac_f32_e64 v9, s32, s32 ; D23E0009 00004020 > v_sqrt_f32_e32 v9, v9 ; 7E126709 > v_mul_f32_e32 v19, s39, v9 ; 10261227 > v_cmp_eq_f32_e32 vcc, 0, v19 ; 7C042680 > v_mov_b32_e32 v0, s44 ; 7E00022C > v_mov_b32_e32 v1, s45 ; 7E02022D > v_mov_b32_e32 v5, s46 ; 7E0A022E > v_mov_b32_e32 v6, s47 ; 7E0C022F > v_mov_b32_e32 v8, s48 ; 7E100230 > v_mov_b32_e32 v17, s49 ; 7E220231 > v_mov_b32_e32 v7, s50 ; 7E0E0232 > v_mov_b32_e32 v18, s51 ; 7E240233 > s_and_b64 vcc, exec, vcc ; 87EA6A7E > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v52, v23, v23 ; 10682F17 > v_mul_f32_e32 v47, v22, v21 ; 105E2B16 > v_mul_f32_e32 v48, v22, v24 ; 10603116 > v_mul_f32_e32 v22, v22, v23 ; 102C2F16 > v_fma_f32 v49, v21, v24, -v22 ; D2960031 845A3115 > v_mac_f32_e32 v22, v21, v24 ; 3E2C3115 > v_fma_f32 v50, v21, v23, v48 ; D2960032 04C22F15 > v_fma_f32 v48, v21, v23, -v48 ; D2960030 84C22F15 > v_mul_f32_e32 v21, v21, v21 ; 102A2B15 > v_fma_f32 v51, v23, v24, -v47 ; D2960033 84BE3117 > v_mad_f32 v53, v24, v24, v21 ; D2820035 04563118 > v_mac_f32_e32 v21, v23, v23 ; 3E2A2F17 > v_fma_f32 v47, v23, v24, v47 ; D296002F 04BE3117 > v_mac_f32_e32 v52, v24, v24 ; 3E683118 > s_waitcnt vmcnt(13) ; BF8C0F7D > v_mul_f32_e32 v24, v40, v41 ; 10305328 > v_mul_f32_e32 v23, v40, v31 ; 102E3F28 > v_mul_f32_e32 v40, v40, v42 ; 10505528 > v_mul_f32_e32 v58, v42, v42 ; 1074552A > v_fma_f32 v54, v31, v41, -v40 ; D2960036 84A2531F > v_mac_f32_e32 v40, v31, v41 ; 3E50531F > v_fma_f32 v55, v31, v42, v24 ; D2960037 0462551F > v_fma_f32 v56, v31, v42, -v24 ; D2960038 8462551F > v_mul_f32_e32 v31, v31, v31 ; 103E3F1F > v_fma_f32 v24, v42, v41, -v23 ; D2960018 845E532A > v_fma_f32 v57, v42, v41, v23 ; D2960039 045E532A > v_mac_f32_e32 v58, v41, v41 ; 3E745329 > v_mad_f32 v41, v41, v41, v31 ; D2820029 047E5329 > v_mac_f32_e32 v31, v42, v42 ; 3E3E552A > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mul_f32_e32 v42, v43, v36 ; 1054492B > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mac_f32_e32 v42, v30, v35 ; 3E54471E > s_waitcnt vmcnt(8) ; BF8C0F78 > v_mul_f32_e32 v30, v26, v28 ; 103C391A > s_waitcnt vmcnt(6) ; BF8C0F76 > v_mul_f32_e32 v23, v26, v25 ; 102E331A > v_mul_f32_e32 v43, v26, v27 ; 1056371A > v_mul_f32_e32 v64, v27, v27 ; 1080371B > v_fma_f32 v59, v25, v27, v30 ; D296003B 047A3719 > v_fma_f32 v60, v25, v27, -v30 ; D296003C 847A3719 > v_mul_f32_e32 v30, v25, v25 ; 103C3319 > v_fma_f32 v61, v25, v28, -v43 ; D296003D 84AE3919 > v_mac_f32_e32 v43, v25, v28 ; 3E563919 > v_mad_f32 v65, v28, v28, v30 ; D2820041 047A391C > v_fma_f32 v62, v27, v28, -v23 ; D296003E 845E391B > v_fma_f32 v63, v27, v28, v23 ; D296003F 045E391B > v_mac_f32_e32 v64, v28, v28 ; 3E80391C > v_fma_f32 v28, -v41, 2.0, 1.0 ; D296001C 23C9E929 > v_mac_f32_e32 v30, v27, v27 ; 3E3C371B > v_fma_f32 v27, -v53, 2.0, 1.0 ; D296001B 23C9E935 > v_mul_f32_e32 v28, v28, v36 ; 1038491C > v_mac_f32_e32 v28, v27, v35 ; 3E38471B > v_fma_f32 v27, -v65, 2.0, 1.0 ; D296001B 23C9E941 > v_fma_f32 v21, -v21, 2.0, 1.0 ; D2960015 23C9E915 > v_mac_f32_e32 v28, v27, v37 ; 3E384B1B > v_mul_f32_e32 v27, v21, v35 ; 10364715 > v_fma_f32 v31, -v31, 2.0, 1.0 ; D296001F 23C9E91F > v_mac_f32_e32 v27, v31, v36 ; 3E36491F > v_fma_f32 v21, -v30, 2.0, 1.0 ; D2960015 23C9E91E > v_fma_f32 v31, -v58, 2.0, 1.0 ; D296001F 23C9E93A > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mul_f32_e32 v41, v45, v35 ; 1052472D > v_mul_f32_e32 v23, v36, v24 ; 102E3124 > v_mul_f32_e32 v38, v38, v36 ; 104C4926 > v_mac_f32_e32 v27, v21, v37 ; 3E364B15 > v_fma_f32 v21, -v52, 2.0, 1.0 ; D2960015 23C9E934 > v_mul_f32_e32 v31, v31, v36 ; 103E491F > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mac_f32_e32 v41, v46, v36 ; 3E52492E > v_mac_f32_e32 v38, v20, v35 ; 3E4C4714 > v_mul_f32_e32 v25, v35, v47 ; 10325F23 > v_mul_f32_e32 v30, v36, v56 ; 103C7124 > v_mac_f32_e32 v23, v36, v24 ; 3E2E3124 > v_mul_f32_e32 v24, v35, v49 ; 10306323 > v_mac_f32_e32 v31, v21, v35 ; 3E3E4715 > v_fma_f32 v20, -v64, 2.0, 1.0 ; D2960014 23C9E940 > v_mul_f32_e32 v26, v36, v55 ; 10346F24 > v_mac_f32_e32 v25, v35, v47 ; 3E325F23 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v41, v29, v37 ; 3E524B1D > v_mul_f32_e32 v29, v36, v40 ; 103A5124 > v_mac_f32_e32 v31, v20, v37 ; 3E3E4B14 > v_mac_f32_e32 v38, v39, v37 ; 3E4C4B27 > v_mul_f32_e32 v20, v35, v51 ; 10286723 > v_mul_f32_e32 v21, v36, v57 ; 102A7324 > v_mac_f32_e32 v24, v35, v49 ; 3E306323 > v_mac_f32_e32 v30, v36, v56 ; 3E3C7124 > v_mul_f32_e32 v39, v35, v50 ; 104E6523 > v_mul_f32_e32 v22, v35, v22 ; 102C2D23 > v_mul_f32_e32 v35, v35, v48 ; 10466123 > v_mac_f32_e32 v29, v36, v40 ; 3E3A5124 > v_mac_f32_e32 v26, v36, v55 ; 3E346F24 > v_mul_f32_e32 v36, v36, v54 ; 10486D24 > v_mul_f32_e32 v40, v37, v63 ; 10507F25 > v_mac_f32_e32 v25, 2.0, v21 ; 3E322AF4 > v_mul_f32_e32 v47, v37, v60 ; 105E7925 > v_mac_f32_e32 v30, 2.0, v35 ; 3E3C46F4 > v_mul_f32_e32 v45, v37, v59 ; 105A7725 > v_mac_f32_e32 v26, 2.0, v39 ; 3E344EF4 > v_mul_f32_e32 v46, v37, v61 ; 105C7B25 > v_mac_f32_e32 v24, 2.0, v36 ; 3E3048F4 > v_mac_f32_e32 v25, 2.0, v40 ; 3E3250F4 > v_mac_f32_e32 v30, 2.0, v47 ; 3E3C5EF4 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v42, v44, v37 ; 3E544B2C > v_mul_f32_e32 v44, v37, v62 ; 10587D25 > v_mac_f32_e32 v23, 2.0, v20 ; 3E2E28F4 > v_mac_f32_e32 v29, 2.0, v22 ; 3E3A2CF4 > v_mul_f32_e32 v37, v37, v43 ; 104A5725 > v_mac_f32_e32 v26, 2.0, v45 ; 3E345AF4 > v_mul_f32_e32 v20, v33, v28 ; 10283921 > v_mul_f32_e32 v22, v33, v25 ; 102C3321 > v_mul_f32_e32 v33, v33, v30 ; 10423D21 > v_mac_f32_e32 v24, 2.0, v46 ; 3E305CF4 > v_mac_f32_e32 v22, v32, v24 ; 3E2C3120 > v_mac_f32_e32 v23, 2.0, v44 ; 3E2E58F4 > v_mac_f32_e32 v20, v32, v26 ; 3E283520 > v_mac_f32_e32 v29, 2.0, v37 ; 3E3A4AF4 > v_mac_f32_e32 v33, v32, v31 ; 3E423F20 > v_mac_f32_e32 v20, v34, v23 ; 3E282F22 > v_mac_f32_e32 v22, v34, v27 ; 3E2C3722 > v_mac_f32_e32 v33, v34, v29 ; 3E423B22 > v_add_f32_e32 v21, v42, v20 ; 062A292A > v_add_f32_e32 v20, v41, v22 ; 06282D29 > v_add_f32_e32 v22, v38, v33 ; 062C4326 > v_add_f32_e32 v32, v22, v21 ; 06402B16 > v_add_f32_e32 v32, v20, v32 ; 06404114 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_mov_b64 vcc, vcc ; BEEA046A > s_cbranch_vccnz BB0_2 ; BF870000 > v_rcp_f32_e32 v19, v19 ; 7E265513 > v_mul_f32_e32 v19, v19, v32 ; 10264113 > s_branch BB0_3 ; BF820000 > v_cmp_lt_f32_e32 vcc, 0, v32 ; 7C024080 > v_cndmask_b32_e64 v19, v32, 1.0, vcc ; D2000013 01A9E520 > v_cmp_le_f32_e32 vcc, 0, v19 ; 7C062680 > v_mul_f32_e32 v19, 0x70000000, v19 ; 102626FF 70000000 > v_bfrev_b32_e32 v32, 15 ; 7E40708F > v_cndmask_b32_e32 v19, v32, v19 ; 00262720 > v_mul_f32_e32 v26, v14, v26 ; 1034350E > v_mac_f32_e32 v26, v15, v28 ; 3E34390F > v_mac_f32_e32 v26, v16, v23 ; 3E342F10 > v_mul_f32_e32 v23, v14, v24 ; 102E310E > v_mul_f32_e32 v14, v14, v31 ; 101C3F0E > v_mul_f32_e32 v18, s26, v18 ; 1024241A > v_mac_f32_e32 v23, v15, v25 ; 3E2E330F > v_mac_f32_e32 v14, v15, v30 ; 3E1C3D0F > v_mul_f32_e32 v15, 0x3e2aaaab, v18 ; 101E24FF 3E2AAAAB > v_mul_f32_e32 v32, s37, v17 ; 10402225 > v_mul_f32_e32 v33, s35, v17 ; 10422223 > v_mul_f32_e32 v17, s36, v17 ; 10222224 > v_subrev_f32_e32 v0, s19, v0 ; 0A000013 > v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 > v_subrev_f32_e32 v1, s20, v1 ; 0A020214 > v_subrev_f32_e32 v5, s21, v5 ; 0A0A0A15 > v_subrev_f32_e32 v6, s22, v6 ; 0A0C0C16 > v_fma_f32 v32, s33, v8, v32 ; D2960020 04821021 > v_fma_f32 v0, v15, v0, s19 ; D2960000 004E010F > v_fma_f32 v1, v15, v1, s20 ; D2960001 0052030F > v_fma_f32 v5, v15, v5, s21 ; D2960005 00560B0F > v_fma_f32 v15, v15, v6, s22 ; D296000F 005A0D0F > v_fma_f32 v33, v8, s31, v33 ; D2960021 04843F08 > v_fma_f32 v8, v8, s32, v17 ; D2960008 04444108 > v_fma_f32 v17, v7, s34, v33 ; D2960011 04844507 > v_fma_f32 v32, s38, v7, v32 ; D2960020 04820E26 > v_fma_f32 v7, v7, s30, v8 ; D2960007 04203D07 > v_fma_f32 v8, -v11, s24, 1.0 ; D2960008 23C8310B > v_fma_f32 v1, s23, v1, v19 ; D2960001 044E0217 > v_fma_f32 v33, -v12, s25, 1.0 ; D2960021 23C8330C > v_fma_f32 v15, s23, v15, v19 ; D296000F 044E1E17 > v_add_f32_e32 v1, v8, v1 ; 06020308 > v_add_f32_e32 v8, v33, v15 ; 06101F21 > v_mov_b32_e32 v15, 0x3e22f983 ; 7E1E02FF 3E22F983 > v_mul_f32_e32 v1, v15, v1 ; 1002030F > v_mul_f32_e32 v8, v15, v8 ; 1010110F > v_fract_f32_e32 v1, v1 ; 7E024101 > v_fract_f32_e32 v8, v8 ; 7E104108 > v_sin_f32_e32 v1, v1 ; 7E026B01 > v_sin_f32_e32 v8, v8 ; 7E106B08 > v_mul_f32_e32 v0, v0, v9 ; 10001300 > v_mul_f32_e32 v5, v5, v9 ; 100A1305 > v_mul_f32_e32 v0, v1, v0 ; 10000101 > v_mul_f32_e32 v1, v8, v5 ; 10020B08 > v_mac_f32_e32 v14, v16, v29 ; 3E1C3B10 > v_fma_f32 v0, v18, s18, v0 ; D2960000 04002512 > v_mov_b32_e32 v5, 0x3e800000 ; 7E0A02FF 3E800000 > v_fma_f32 v0, v5, v1, v0 ; D2960000 04020305 > v_mul_f32_e32 v1, v14, v32 ; 1002410E > v_mac_f32_e32 v23, v16, v27 ; 3E2E3710 > v_mac_f32_e32 v1, v26, v17 ; 3E02231A > v_mac_f32_e32 v1, v23, v7 ; 3E020F17 > v_mad_f32 v1, v10, v1, v10 ; D2820001 042A030A > v_mul_f32_e32 v1, s17, v1 ; 10020211 > v_mul_f32_e32 v1, 0.5, v1 ; 100202F0 > v_mul_f32_e32 v5, v0, v32 ; 100A4100 > v_mul_f32_e32 v8, v0, v17 ; 10102300 > v_mul_f32_e32 v0, v0, v7 ; 10000F00 > v_fma_f32 v5, v5, v1, v22 ; D2960005 045A0305 > v_fma_f32 v7, v8, v1, v21 ; D2960007 04560308 > v_fma_f32 v0, v0, v1, v20 ; D2960000 04520300 > v_mul_f32_e32 v1, v5, v5 ; 10020B05 > v_mac_f32_e32 v1, v7, v7 ; 3E020F07 > v_mac_f32_e32 v1, v0, v0 ; 3E020100 > v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 > v_mov_b32_e32 v8, 1.0 ; 7E1002F2 > v_sub_f32_e32 v34, s27, v22 ; 08442C1B > v_sub_f32_e32 v35, s28, v21 ; 08462A1C > v_mul_f32_e32 v5, v5, v1 ; 100A0305 > v_mul_f32_e32 v7, v7, v1 ; 100E0307 > v_mul_f32_e32 v0, v0, v1 ; 10000300 > v_mul_f32_e32 v1, v22, v22 ; 10022D16 > v_mac_f32_e32 v1, v21, v21 ; 3E022B15 > v_mac_f32_e32 v1, v20, v20 ; 3E022914 > v_sqrt_f32_e32 v1, v1 ; 7E026701 > v_mul_f32_e32 v7, v7, v1 ; 100E0307 > v_mul_f32_e32 v0, v0, v1 ; 10000300 > v_mul_f32_e32 v5, v5, v1 ; 100A0305 > v_mul_f32_e32 v1, s16, v7 ; 10020E10 > v_mac_f32_e32 v1, s14, v5 ; 3E020A0E > v_mul_f32_e32 v9, s12, v7 ; 10120E0C > v_mul_f32_e32 v15, s11, v7 ; 101E0E0B > v_mul_f32_e32 v7, s13, v7 ; 100E0E0D > exp 15, 32, 0, 0, 0, v14, v26, v23, v0 ; F800020F 00171A0E > v_mac_f32_e32 v9, s7, v5 ; 3E120A07 > v_mac_f32_e32 v15, s5, v5 ; 3E1E0A05 > v_mac_f32_e32 v7, s9, v5 ; 3E0E0A09 > v_sub_f32_e32 v36, s29, v20 ; 0848281D > exp 15, 33, 0, 0, 0, v3, v4, v0, v8 ; F800021F 08000403 > v_mac_f32_e32 v1, s15, v0 ; 3E02000F > v_mac_f32_e32 v15, s6, v0 ; 3E1E0006 > v_mac_f32_e32 v7, s10, v0 ; 3E0E000A > v_mac_f32_e32 v9, s8, v0 ; 3E120008 > s_waitcnt expcnt(0) ; BF8C0F0F > v_add_f32_e32 v0, s3, v1 ; 06000203 > exp 15, 34, 0, 0, 0, v34, v35, v36, v6 ; F800022F 06242322 > exp 15, 35, 0, 0, 0, v10, v11, v12, v13 ; F800023F 0D0C0B0A > v_mul_f32_e32 v0, s0, v0 ; 10000000 > v_add_f32_e32 v1, s1, v9 ; 06021201 > v_add_f32_e32 v3, s2, v15 ; 06061E02 > v_add_f32_e32 v4, s4, v7 ; 06080E04 > exp 15, 12, 0, 1, 0, v1, v3, v0, v4 ; F80008CF 04000301 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 68 >Code Size: 1888 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 3 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v5, v2, 0, 1, [m0] ; C8140402 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_interp_p2_f32 v5, [v5], v3, 0, 1, [m0] ; C8150403 > v_interp_p1_f32 v6, v2, 1, 1, [m0] ; C8180502 > v_interp_p2_f32 v6, [v6], v3, 1, 1, [m0] ; C8190503 > v_interp_p1_f32 v7, v2, 0, 2, [m0] ; C81C0802 > v_interp_p2_f32 v7, [v7], v3, 0, 2, [m0] ; C81D0803 > v_interp_p1_f32 v8, v2, 1, 2, [m0] ; C8200902 > v_interp_p2_f32 v8, [v8], v3, 1, 2, [m0] ; C8210903 > v_interp_p1_f32 v9, v2, 2, 2, [m0] ; C8240A02 > v_interp_p2_f32 v9, [v9], v3, 2, 2, [m0] ; C8250A03 > v_interp_p1_f32 v2, v2, 3, 3, [m0] ; C8080F02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s20, s20, s19 ; 87141314 > v_interp_p2_f32 v2, [v2], v3, 3, 3, [m0] ; C8090F03 > image_sample v[14:17], v[5:6], s[12:19], s[20:23] dmask:0xf ; F0800F00 00A30E05 > s_load_dwordx8 s[24:31], s[4:5], 0x10 ; C0CC0510 > s_load_dwordx4 s[56:59], s[4:5], 0x1c ; C09C051C > s_buffer_load_dword s6, s[0:3], 0x3d ; C203013D > s_buffer_load_dword s7, s[0:3], 0x3e ; C203813E > s_buffer_load_dword s8, s[0:3], 0x3f ; C204013F > s_buffer_load_dword s9, s[0:3], 0x40 ; C2048140 > s_buffer_load_dword s11, s[0:3], 0x41 ; C2058141 > s_buffer_load_dword s20, s[0:3], 0x42 ; C20A0142 > s_buffer_load_dword s21, s[0:3], 0x44 ; C20A8144 > s_buffer_load_dword s22, s[0:3], 0x45 ; C20B0145 > s_buffer_load_dword s23, s[0:3], 0x46 ; C20B8146 > s_buffer_load_dword s32, s[0:3], 0x48 ; C2100148 > s_buffer_load_dword s33, s[0:3], 0x49 ; C2108149 > s_buffer_load_dword s34, s[0:3], 0x4a ; C211014A > s_buffer_load_dword s35, s[0:3], 0x4c ; C211814C > s_buffer_load_dword s36, s[0:3], 0x4e ; C212014E > s_buffer_load_dword s37, s[0:3], 0x4f ; C212814F > s_buffer_load_dword s38, s[0:3], 0x54 ; C2130154 > s_buffer_load_dword s39, s[0:3], 0x55 ; C2138155 > s_buffer_load_dword s40, s[0:3], 0x57 ; C2140157 > s_buffer_load_dword s41, s[0:3], 0x58 ; C2148158 > s_buffer_load_dword s42, s[0:3], 0x59 ; C2150159 > s_buffer_load_dword s43, s[0:3], 0x5a ; C215815A > s_buffer_load_dword s44, s[0:3], 0x5b ; C216015B > s_buffer_load_dword s45, s[0:3], 0x5c ; C216815C > s_buffer_load_dword s46, s[0:3], 0x5d ; C217015D > s_buffer_load_dword s47, s[0:3], 0x5f ; C217815F > s_buffer_load_dword s48, s[0:3], 0x60 ; C2180160 > s_buffer_load_dword s49, s[0:3], 0x64 ; C2188164 > s_buffer_load_dword s50, s[0:3], 0x84 ; C2190184 > s_buffer_load_dword s51, s[0:3], 0xa8 ; C21981A8 > s_buffer_load_dword s52, s[0:3], 0xa9 ; C21A01A9 > s_buffer_load_dword s53, s[0:3], 0xaa ; C21A81AA > s_load_dwordx4 s[0:3], s[4:5], 0x2c ; C080052C > s_load_dwordx8 s[12:19], s[4:5], 0x20 ; C0C60520 > v_mov_b32_e32 v3, 0xbdcccccd ; 7E0602FF BDCCCCCD > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v3, v17, v3 ; 06060711 > v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 > v_cndmask_b32_e64 v3, 0, -1.0, vcc ; D2000003 01A9E680 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s56, s56, s31 ; 87381F38 > s_and_b32 s0, s0, s19 ; 87001300 > v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 > v_mul_f32_e32 v18, v0, v0 ; 10240100 > v_mul_f32_e32 v19, v7, v7 ; 10260F07 > v_mac_f32_e32 v18, v1, v1 ; 3E240301 > v_mac_f32_e32 v19, v8, v8 ; 3E261108 > v_mac_f32_e32 v18, v4, v4 ; 3E240904 > v_mac_f32_e32 v19, v9, v9 ; 3E261309 > v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 > v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 > v_mov_b32_e32 v3, s50 ; 7E060232 > v_mad_f32 v11, s48, v3, v6 ; D282000B 041A0630 > v_mul_f32_e32 v0, v0, v18 ; 10002500 > v_mul_f32_e32 v7, v7, v19 ; 100E2707 > v_mul_f32_e32 v0, v7, v0 ; 10000107 > v_mul_f32_e32 v1, v1, v18 ; 10022501 > v_mul_f32_e32 v7, v8, v19 ; 100E2708 > v_mac_f32_e32 v0, v7, v1 ; 3E000307 > v_mul_f32_e32 v1, v4, v18 ; 10022504 > v_mul_f32_e32 v4, v9, v19 ; 10082709 > v_mac_f32_e32 v0, v4, v1 ; 3E000304 > v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 > v_mul_f32_e32 v0, 0x3f8b851f, v0 ; 100000FF 3F8B851F > v_log_f32_e32 v0, v0 ; 7E004F00 > v_mad_f32 v10, s47, v3, v5 ; D282000A 0416062F > v_mac_f32_e32 v5, s45, v3 ; 3E0A062D > v_mac_f32_e32 v6, s46, v3 ; 3E0C062E > v_mul_f32_e32 v3, 0x3e99999a, v14 ; 10061CFF 3E99999A > v_mac_f32_e32 v3, 0x3f170a3d, v15 ; 3E061EFF 3F170A3D > v_mov_b32_e32 v20, 0x3fa00000 ; 7E2802FF 3FA00000 > v_mac_f32_e32 v3, 0x3de147ae, v16 ; 3E0620FF 3DE147AE > v_mul_f32_e32 v20, s49, v20 ; 10282831 > v_subrev_f32_e32 v12, v14, v3 ; 0A18070E > v_mov_b32_e32 v7, s8 ; 7E0E0208 > v_mul_f32_e32 v0, v20, v0 ; 10000114 > v_subrev_f32_e32 v17, v15, v3 ; 0A22070F > v_subrev_f32_e32 v3, v16, v3 ; 0A060710 > v_fma_f32 v3, s36, v3, v16 ; D2960003 04420624 > v_sub_f32_e32 v7, s20, v7 ; 080E0E14 > v_exp_f32_e32 v0, v0 ; 7E004B00 > image_sample v[10:11], v[10:11], s[24:31], s[56:59] dmask:0xa ; F0800A00 01C60A0A > v_mul_f32_e32 v3, s53, v3 ; 10060635 > v_fma_f32 v7, v0, v7, s8 ; D2960007 00220F00 > v_fma_f32 v12, s36, v12, v14 ; D296000C 043A1824 > v_fma_f32 v14, s36, v17, v15 ; D296000E 043E2224 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v8, v11, 2.0, -1.0 ; D2960008 03CDE90B > v_mul_f32_e32 v17, s38, v3 ; 10220626 > v_mul_f32_e32 v3, v7, v3 ; 10060707 > v_fma_f32 v7, v10, 2.0, -1.0 ; D2960007 03CDE90A > v_mov_b32_e32 v1, s6 ; 7E020206 > v_mov_b32_e32 v4, s7 ; 7E080207 > v_mul_f32_e32 v7, s37, v7 ; 100E0E25 > v_mul_f32_e32 v8, s37, v8 ; 10101025 > v_sub_f32_e32 v1, s9, v1 ; 08020209 > v_sub_f32_e32 v4, s11, v4 ; 0808080B > v_fma_f32 v10, s44, v6, v8 ; D296000A 04220C2C > v_fma_f32 v9, s43, v5, v7 ; D2960009 041E0A2B > image_sample v5, v[9:10], s[12:19], s[0:3] dmask:0x2 ; F0800200 00030509 > v_mul_f32_e32 v12, s51, v12 ; 10181833 > v_fma_f32 v1, v0, v1, s6 ; D2960001 001A0300 > v_mul_f32_e32 v14, s52, v14 ; 101C1C34 > v_fma_f32 v4, v0, v4, s7 ; D2960004 001E0900 > v_mul_f32_e32 v1, v1, v12 ; 10021901 > v_mul_f32_e32 v4, v4, v14 ; 10081D04 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2 > v_mul_f32_e32 v1, s35, v1 ; 10020223 > v_fma_f32 v5, -v5, s42, 1.0 ; D2960005 23C85505 > v_mul_f32_e32 v4, s35, v4 ; 10080823 > v_mul_f32_e32 v3, s35, v3 ; 10060623 > v_mov_b32_e32 v6, s33 ; 7E0C0221 > v_mov_b32_e32 v7, s34 ; 7E0E0222 > v_mul_f32_e32 v1, v1, v5 ; 10020B01 > v_mul_f32_e32 v4, v4, v5 ; 10080B04 > v_mul_f32_e32 v3, v3, v5 ; 10060B03 > v_mov_b32_e32 v5, s32 ; 7E0A0220 > v_rcp_f32_e32 v8, s41 ; 7E105429 > v_sub_f32_e32 v5, s21, v5 ; 080A0A15 > v_sub_f32_e32 v6, s22, v6 ; 080C0C16 > v_sub_f32_e32 v7, s23, v7 ; 080E0E17 > v_fma_f32 v5, v0, v5, s32 ; D2960005 00820B00 > v_fma_f32 v6, v0, v6, s33 ; D2960006 00860D00 > v_fma_f32 v0, v0, v7, s34 ; D2960000 008A0F00 > v_add_f32_e64 v7, 1.0, s41 ; D2060007 000052F2 > v_fma_f32 v2, v7, s39, v2 ; D2960002 04084F07 > v_cmp_neq_f32_e64 vcc, 0, s41 ; D01A006A 00005280 > v_bfrev_b32_e32 v7, 14 ; 7E0E708E > v_cndmask_b32_e32 v7, v7, v8 ; 000E1107 > v_mad_f32 v2, v2, v7, -v7 ; D2820002 841E0F02 > v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 > v_mov_b32_e32 v7, 0x40400000 ; 7E0E02FF 40400000 > v_fma_f32 v7, -2.0, v2, v7 ; D2960007 041E04F5 > v_mul_f32_e32 v2, v2, v2 ; 10040502 > v_mul_f32_e32 v2, v7, v2 ; 10040507 > v_log_f32_e32 v2, v2 ; 7E044F02 > v_fma_f32 v7, v17, v0, -v3 ; D2960007 840E0111 > v_mul_f32_e32 v15, s38, v12 ; 101E1826 > v_mul_f32_e32 v16, s38, v14 ; 10201C26 > v_mul_f32_e32 v0, s40, v2 ; 10000428 > v_exp_f32_e32 v0, v0 ; 7E004B00 > v_fma_f32 v5, v15, v5, -v1 ; D2960005 84060B0F > v_min_f32_e32 v2, 1.0, v0 ; 1E0400F2 > v_fma_f32 v6, v16, v6, -v4 ; D2960006 84120D10 > v_fma_f32 v0, v2, v5, v1 ; D2960000 04060B02 > v_fma_f32 v1, v2, v6, v4 ; D2960001 04120D02 > v_fma_f32 v2, v2, v7, v3 ; D2960002 040E0F02 > v_mov_b32_e32 v3, 0xbc23d70a ; 7E0602FF BC23D70A > v_add_f32_e32 v3, v0, v3 ; 06060700 > v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 > v_cndmask_b32_e64 v3, 0, -1.0, vcc ; D2000003 01A9E680 > v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 > v_mov_b32_e32 v3, v0 ; 7E060300 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 64 >VGPRS: 24 >Code Size: 912 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 8 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v1, 0xbfc00000 ; 7E0202FF BFC00000 > v_bfrev_b32_e32 v7, 14 ; 7E0E708E > v_mov_b32_e32 v0, 0 ; 7E000280 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v6, 0x3fc00000 ; 7E0C02FF 3FC00000 > v_bfrev_b32_e32 v5, 15 ; 7E0A708F > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v8, s4 ; 7E105404 > v_rcp_f32_e32 v9, s0 ; 7E125400 > v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 > v_cmp_eq_f32_e64 s[0:1], 0, s0 ; D0040000 00000080 > v_mul_f32_e32 v10, v1, v8 ; 10141101 > v_mul_f32_e32 v1, v1, v9 ; 10021301 > v_mul_f32_e32 v11, -0.5, v8 ; 101610F1 > v_mul_f32_e32 v12, 0.5, v8 ; 101810F0 > v_mul_f32_e32 v8, v6, v8 ; 10101106 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_mul_f32_e32 v13, -0.5, v9 ; 101A12F1 > v_mul_f32_e32 v14, 0.5, v9 ; 101C12F0 > v_cndmask_b32_e32 v9, v10, v5 ; 00120B0A > v_cndmask_b32_e32 v10, v11, v5 ; 00140B0B > v_cndmask_b32_e64 v1, v1, v5, s[0:1] ; D2000001 00020B01 > v_cndmask_b32_e32 v11, v12, v7 ; 00160F0C > v_cndmask_b32_e64 v5, v13, v5, s[0:1] ; D2000005 00020B0D > v_cndmask_b32_e32 v8, v8, v7 ; 00100F08 > v_cndmask_b32_e64 v12, v14, v7, s[0:1] ; D200000C 00020F0E > v_cndmask_b32_e64 v6, v6, v7, s[0:1] ; D2000006 00020F06 > v_fma_f32 v7, v3, 0.5, 0.5 ; D2960007 03C1E103 > v_fma_f32 v13, v4, -0.5, 0.5 ; D296000D 03C1E304 > v_add_f32_e32 v9, v7, v9 ; 06121307 > v_add_f32_e32 v1, v13, v1 ; 0602030D > v_add_f32_e32 v10, v7, v10 ; 06141507 > v_add_f32_e32 v11, v7, v11 ; 06161707 > v_add_f32_e32 v7, v7, v8 ; 060E1107 > exp 15, 32, 0, 0, 0, v9, v1, v10, v1 ; F800020F 010A0109 > exp 15, 33, 0, 0, 0, v11, v1, v7, v1 ; F800021F 0107010B > v_add_f32_e32 v5, v13, v5 ; 060A0B0D > exp 15, 34, 0, 0, 0, v9, v5, v10, v5 ; F800022F 050A0509 > v_add_f32_e32 v8, v13, v12 ; 0610190D > exp 15, 35, 0, 0, 0, v11, v5, v7, v5 ; F800023F 0507050B > exp 15, 36, 0, 0, 0, v9, v8, v10, v8 ; F800024F 080A0809 > v_add_f32_e32 v6, v13, v6 ; 060C0D0D > exp 15, 37, 0, 0, 0, v11, v8, v7, v8 ; F800025F 0807080B > exp 15, 38, 0, 0, 0, v9, v6, v10, v6 ; F800026F 060A0609 > exp 15, 39, 0, 0, 0, v11, v6, v7, v6 ; F800027F 0607060B > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 1.0 ; 7E0202F2 > exp 15, 12, 0, 1, 0, v3, v4, v0, v1 ; F80008CF 01000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 16 >Code Size: 308 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v5, v2, 0, 0, [m0] ; C8140002 > v_interp_p2_f32 v5, [v5], v3, 0, 0, [m0] ; C8150003 > v_interp_p1_f32 v6, v2, 1, 0, [m0] ; C8180102 > v_interp_p2_f32 v6, [v6], v3, 1, 0, [m0] ; C8190103 > v_interp_p1_f32 v7, v2, 2, 0, [m0] ; C81C0202 > v_interp_p2_f32 v7, [v7], v3, 2, 0, [m0] ; C81D0203 > v_interp_p1_f32 v8, v2, 3, 0, [m0] ; C8200302 > v_interp_p2_f32 v8, [v8], v3, 3, 0, [m0] ; C8210303 > v_interp_p1_f32 v9, v2, 0, 1, [m0] ; C8240402 > v_interp_p2_f32 v9, [v9], v3, 0, 1, [m0] ; C8250403 > v_interp_p1_f32 v10, v2, 1, 1, [m0] ; C8280502 > v_interp_p2_f32 v10, [v10], v3, 1, 1, [m0] ; C8290503 > v_interp_p1_f32 v11, v2, 2, 1, [m0] ; C82C0602 > v_interp_p2_f32 v11, [v11], v3, 2, 1, [m0] ; C82D0603 > v_interp_p1_f32 v12, v2, 3, 1, [m0] ; C8300702 > v_interp_p2_f32 v12, [v12], v3, 3, 1, [m0] ; C8310703 > v_interp_p1_f32 v14, v2, 0, 2, [m0] ; C8380802 > v_interp_p2_f32 v14, [v14], v3, 0, 2, [m0] ; C8390803 > v_interp_p1_f32 v15, v2, 1, 2, [m0] ; C83C0902 > v_interp_p2_f32 v15, [v15], v3, 1, 2, [m0] ; C83D0903 > v_interp_p1_f32 v16, v2, 0, 3, [m0] ; C8400C02 > v_interp_p2_f32 v16, [v16], v3, 0, 3, [m0] ; C8410C03 > v_interp_p1_f32 v17, v2, 1, 3, [m0] ; C8440D02 > v_interp_p2_f32 v17, [v17], v3, 1, 3, [m0] ; C8450D03 > v_interp_p1_f32 v18, v2, 2, 3, [m0] ; C8480E02 > v_interp_p2_f32 v18, [v18], v3, 2, 3, [m0] ; C8490E03 > v_interp_p1_f32 v19, v2, 3, 3, [m0] ; C84C0F02 > v_interp_p2_f32 v19, [v19], v3, 3, 3, [m0] ; C84D0F03 > v_interp_p1_f32 v20, v2, 0, 4, [m0] ; C8501002 > v_interp_p2_f32 v20, [v20], v3, 0, 4, [m0] ; C8511003 > v_interp_p1_f32 v21, v2, 1, 4, [m0] ; C8541102 > v_interp_p2_f32 v21, [v21], v3, 1, 4, [m0] ; C8551103 > v_interp_p1_f32 v22, v2, 2, 4, [m0] ; C8581202 > v_interp_p2_f32 v22, [v22], v3, 2, 4, [m0] ; C8591203 > v_interp_p1_f32 v23, v2, 3, 4, [m0] ; C85C1302 > v_interp_p2_f32 v23, [v23], v3, 3, 4, [m0] ; C85D1303 > v_interp_p1_f32 v24, v2, 0, 5, [m0] ; C8601402 > v_interp_p2_f32 v24, [v24], v3, 0, 5, [m0] ; C8611403 > v_interp_p1_f32 v25, v2, 1, 5, [m0] ; C8641502 > v_interp_p2_f32 v25, [v25], v3, 1, 5, [m0] ; C8651503 > v_interp_p1_f32 v26, v2, 2, 5, [m0] ; C8681602 > s_load_dwordx4 s[24:27], s[2:3], 0x4 ; C08C0304 > v_interp_p2_f32 v26, [v26], v3, 2, 5, [m0] ; C8691603 > v_interp_p1_f32 v27, v2, 3, 5, [m0] ; C86C1702 > v_interp_p2_f32 v27, [v27], v3, 3, 5, [m0] ; C86D1703 > v_interp_p1_f32 v28, v2, 0, 6, [m0] ; C8701802 > v_interp_p2_f32 v28, [v28], v3, 0, 6, [m0] ; C8711803 > v_interp_p1_f32 v29, v2, 1, 6, [m0] ; C8741902 > v_interp_p2_f32 v29, [v29], v3, 1, 6, [m0] ; C8751903 > v_interp_p1_f32 v30, v2, 2, 6, [m0] ; C8781A02 > s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C > s_load_dwordx8 s[16:23], s[4:5], 0x0 ; C0C80500 > v_interp_p2_f32 v30, [v30], v3, 2, 6, [m0] ; C8791A03 > v_interp_p1_f32 v31, v2, 3, 6, [m0] ; C87C1B02 > v_interp_p2_f32 v31, [v31], v3, 3, 6, [m0] ; C87D1B03 > v_interp_p1_f32 v32, v2, 0, 7, [m0] ; C8801C02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s2, s[24:27], 0x1e ; C201191E > v_interp_p2_f32 v32, [v32], v3, 0, 7, [m0] ; C8811C03 > v_interp_p1_f32 v33, v2, 1, 7, [m0] ; C8841D02 > v_interp_p2_f32 v33, [v33], v3, 1, 7, [m0] ; C8851D03 > v_interp_p1_f32 v34, v2, 2, 7, [m0] ; C8881E02 > v_interp_p2_f32 v34, [v34], v3, 2, 7, [m0] ; C8891E03 > v_interp_p1_f32 v35, v2, 3, 7, [m0] ; C88C1F02 > s_and_b32 s12, s12, s23 ; 870C170C > v_interp_p2_f32 v35, [v35], v3, 3, 7, [m0] ; C88D1F03 > image_sample v[36:39], v[7:8], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642407 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_subrev_f32_e32 v2, s2, v39 ; 0A044E02 > image_sample v[39:42], v[5:6], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642705 > v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v5, s2, v42 ; 0A0A5402 > v_mul_f32_e32 v3, v36, v2 ; 10060524 > v_mul_f32_e32 v7, v37, v2 ; 100E0525 > v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 > v_mul_f32_e32 v2, v38, v2 ; 10040526 > v_fma_f32 v6, v40, v5, v7 ; D2960006 041E0B28 > v_fma_f32 v3, v39, v5, v3 ; D2960003 040E0B27 > v_fma_f32 v2, v41, v5, v2 ; D2960002 040A0B29 > v_add_f32_e32 v5, v39, v36 ; 060A4927 > v_add_f32_e32 v7, v40, v37 ; 060E4B28 > v_add_f32_e32 v8, v41, v38 ; 06104D29 > image_sample v[36:39], v[9:10], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642409 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v9, s2, v39 ; 0A124E02 > v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 > v_fma_f32 v3, v36, v9, v3 ; D2960003 040E1324 > v_fma_f32 v6, v37, v9, v6 ; D2960006 041A1325 > v_fma_f32 v2, v38, v9, v2 ; D2960002 040A1326 > image_sample v[9:12], v[11:12], s[16:23], s[12:15] dmask:0xf ; F0800F00 0064090B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_add_f32_e32 v5, v36, v5 ; 060A0B24 > v_add_f32_e32 v7, v37, v7 ; 060E0F25 > v_add_f32_e32 v8, v38, v8 ; 06101126 > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > image_sample v[9:12], v[14:15], s[16:23], s[12:15] dmask:0xf ; F0800F00 0064090E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_mul_f32_e32 v14, v9, v12 ; 101C1909 > v_mul_f32_e32 v15, v10, v12 ; 101E190A > v_mul_f32_e32 v12, v11, v12 ; 1018190B > v_fma_f32 v5, v9, 2.0, v5 ; D2960005 0415E909 > v_fma_f32 v7, v10, 2.0, v7 ; D2960007 041DE90A > v_fma_f32 v8, v11, 2.0, v8 ; D2960008 0421E90B > v_fma_f32 v2, v12, 2.0, v2 ; D2960002 0409E90C > image_sample v[9:12], v[16:17], s[16:23], s[12:15] dmask:0xf ; F0800F00 00640910 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_fma_f32 v3, v14, 2.0, v3 ; D2960003 040DE90E > v_fma_f32 v6, v15, 2.0, v6 ; D2960006 0419E90F > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > image_sample v[9:12], v[18:19], s[16:23], s[12:15] dmask:0xf ; F0800F00 00640912 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > image_sample v[9:12], v[20:21], s[16:23], s[12:15] dmask:0xf ; F0800F00 00640914 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > image_sample v[9:12], v[22:23], s[16:23], s[12:15] dmask:0xf ; F0800F00 00640916 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > image_sample v[9:12], v[24:25], s[16:23], s[12:15] dmask:0xf ; F0800F00 00640918 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > image_sample v[9:12], v[26:27], s[16:23], s[12:15] dmask:0xf ; F0800F00 0064091A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > image_sample v[9:12], v[28:29], s[16:23], s[12:15] dmask:0xf ; F0800F00 0064091C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > image_sample v[9:12], v[30:31], s[16:23], s[12:15] dmask:0xf ; F0800F00 0064091E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > image_sample v[9:12], v[32:33], s[16:23], s[12:15] dmask:0xf ; F0800F00 00640920 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > s_buffer_load_dword s0, s[24:27], 0x1c ; C200191C > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > image_sample v[9:12], v[34:35], s[16:23], s[12:15] dmask:0xf ; F0800F00 00640922 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v12, s2, v12 ; 0A181802 > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_mov_b32_e32 v0, 0x3d800000 ; 7E0002FF 3D800000 > v_fma_f32 v3, v9, v12, v3 ; D2960003 040E1909 > s_buffer_load_dword s1, s[24:27], 0x1d ; C200991D > v_fma_f32 v6, v10, v12, v6 ; D2960006 041A190A > v_add_f32_e32 v5, v9, v5 ; 060A0B09 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v8, v11, v8 ; 0610110B > v_fma_f32 v2, v11, v12, v2 ; D2960002 040A190B > v_mul_f32_e32 v3, v0, v3 ; 10060700 > v_mul_f32_e32 v6, v0, v6 ; 100C0D00 > v_mul_f32_e32 v5, v0, v5 ; 100A0B00 > v_mul_f32_e32 v7, v0, v7 ; 100E0F00 > v_mul_f32_e32 v8, v0, v8 ; 10101100 > v_mul_f32_e32 v0, v0, v2 ; 10000500 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v2, s0, v3 ; 10040600 > v_mul_f32_e32 v3, 0x3e99999a, v3 ; 100606FF 3E99999A > v_mac_f32_e32 v3, 0x3f170a3d, v6 ; 3E060CFF 3F170A3D > v_mul_f32_e32 v9, s0, v6 ; 10120C00 > v_mul_f32_e32 v10, s0, v0 ; 10140000 > v_mac_f32_e32 v3, 0x3de147ae, v0 ; 3E0600FF 3DE147AE > v_max3_f32 v0, v10, v9, v2 ; D2A80000 040A130A > v_mul_f32_e32 v0, s1, v0 ; 10000001 > v_mul_f32_e32 v0, 0x437f0000, v0 ; 100000FF 437F0000 > v_ceil_f32_e32 v0, v0 ; 7E004500 > v_max_f32_e32 v0, 1.0, v0 ; 200000F2 > v_cmp_lt_f32_e32 vcc, 0, v3 ; 7C020680 > v_mul_f32_e32 v3, 0x3b808081, v0 ; 100600FF 3B808081 > v_mul_f32_e32 v0, s0, v3 ; 10000600 > v_cmp_eq_f32_e64 s[0:1], 0, v0 ; D0040000 00020080 > v_rcp_f32_e32 v0, v0 ; 7E005500 > v_cmp_lt_f32_e64 s[2:3], 0, v2 ; D0020002 00020480 > v_cmp_lt_f32_e64 s[4:5], 0, v9 ; D0020004 00021280 > v_cmp_lt_f32_e64 s[6:7], 0, v10 ; D0020006 00021480 > v_cndmask_b32_e64 v6, v2, 1.0, s[2:3] ; D2000006 0009E502 > v_cndmask_b32_e64 v11, v9, 1.0, s[4:5] ; D200000B 0011E509 > v_cndmask_b32_e64 v12, v10, 1.0, s[6:7] ; D200000C 0019E50A > v_bfrev_b32_e32 v1, 14 ; 7E02708E > v_cmp_le_f32_e64 s[2:3], 0, v6 ; D0060002 00020C80 > v_cmp_le_f32_e64 s[4:5], 0, v11 ; D0060004 00021680 > v_bfrev_b32_e32 v4, 15 ; 7E08708F > v_mul_f32_e32 v6, v1, v6 ; 100C0D01 > v_mul_f32_e32 v11, v1, v11 ; 10161701 > v_cmp_le_f32_e64 s[6:7], 0, v12 ; D0060006 00021880 > v_mul_f32_e32 v1, v1, v12 ; 10021901 > v_cndmask_b32_e64 v6, v4, v6, s[2:3] ; D2000006 000A0D04 > v_mul_f32_e32 v2, v0, v2 ; 10040500 > v_cndmask_b32_e64 v11, v4, v11, s[4:5] ; D200000B 00121704 > v_cndmask_b32_e64 v1, v4, v1, s[6:7] ; D2000001 001A0304 > v_mul_f32_e32 v4, v0, v9 ; 10081300 > v_mul_f32_e32 v0, v0, v10 ; 10001500 > v_cndmask_b32_e64 v2, v2, v6, s[0:1] ; D2000002 00020D02 > v_cndmask_b32_e64 v6, v0, v1, s[0:1] ; D2000006 00020300 > v_cndmask_b32_e64 v4, v4, v11, s[0:1] ; D2000004 00021704 > v_cndmask_b32_e32 v0, v5, v2 ; 00000505 > v_cndmask_b32_e32 v1, v7, v4 ; 00020907 > v_cndmask_b32_e32 v2, v8, v6 ; 00040D08 > v_cndmask_b32_e32 v3, 0, v3 ; 00060680 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 44 >Code Size: 1496 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 5 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v1, 0xbfc00000 ; 7E0202FF BFC00000 > v_bfrev_b32_e32 v7, 14 ; 7E0E708E > v_mov_b32_e32 v0, 0 ; 7E000280 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v6, 0x3fc00000 ; 7E0C02FF 3FC00000 > v_bfrev_b32_e32 v5, 15 ; 7E0A708F > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v8, s4 ; 7E105404 > v_rcp_f32_e32 v9, s0 ; 7E125400 > v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 > v_cmp_eq_f32_e64 s[0:1], 0, s0 ; D0040000 00000080 > v_mul_f32_e32 v10, v1, v8 ; 10141101 > v_mul_f32_e32 v1, v1, v9 ; 10021301 > v_mul_f32_e32 v11, -0.5, v8 ; 101610F1 > v_mul_f32_e32 v12, 0.5, v8 ; 101810F0 > v_mul_f32_e32 v8, v6, v8 ; 10101106 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_mul_f32_e32 v13, -0.5, v9 ; 101A12F1 > v_mul_f32_e32 v14, 0.5, v9 ; 101C12F0 > v_cndmask_b32_e32 v9, v10, v5 ; 00120B0A > v_cndmask_b32_e32 v10, v11, v5 ; 00140B0B > v_cndmask_b32_e64 v1, v1, v5, s[0:1] ; D2000001 00020B01 > v_cndmask_b32_e32 v11, v12, v7 ; 00160F0C > v_cndmask_b32_e64 v5, v13, v5, s[0:1] ; D2000005 00020B0D > v_cndmask_b32_e32 v8, v8, v7 ; 00100F08 > v_cndmask_b32_e64 v12, v14, v7, s[0:1] ; D200000C 00020F0E > v_cndmask_b32_e64 v6, v6, v7, s[0:1] ; D2000006 00020F06 > v_fma_f32 v7, v3, 0.5, 0.5 ; D2960007 03C1E103 > v_fma_f32 v13, v4, -0.5, 0.5 ; D296000D 03C1E304 > v_add_f32_e32 v9, v7, v9 ; 06121307 > v_add_f32_e32 v1, v13, v1 ; 0602030D > v_add_f32_e32 v10, v7, v10 ; 06141507 > v_add_f32_e32 v11, v7, v11 ; 06161707 > v_add_f32_e32 v7, v7, v8 ; 060E1107 > exp 15, 32, 0, 0, 0, v9, v1, v10, v1 ; F800020F 010A0109 > exp 15, 33, 0, 0, 0, v11, v1, v7, v1 ; F800021F 0107010B > v_add_f32_e32 v5, v13, v5 ; 060A0B0D > exp 15, 34, 0, 0, 0, v9, v5, v10, v5 ; F800022F 050A0509 > v_add_f32_e32 v8, v13, v12 ; 0610190D > exp 15, 35, 0, 0, 0, v11, v5, v7, v5 ; F800023F 0507050B > exp 15, 36, 0, 0, 0, v9, v8, v10, v8 ; F800024F 080A0809 > v_add_f32_e32 v6, v13, v6 ; 060C0D0D > exp 15, 37, 0, 0, 0, v11, v8, v7, v8 ; F800025F 0807080B > exp 15, 38, 0, 0, 0, v9, v6, v10, v6 ; F800026F 060A0609 > exp 15, 39, 0, 0, 0, v11, v6, v7, v6 ; F800027F 0607060B > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 1.0 ; 7E0202F2 > exp 15, 12, 0, 1, 0, v3, v4, v0, v1 ; F80008CF 01000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 16 >Code Size: 308 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v5, v2, 0, 0, [m0] ; C8140002 > v_interp_p2_f32 v5, [v5], v3, 0, 0, [m0] ; C8150003 > v_interp_p1_f32 v6, v2, 1, 0, [m0] ; C8180102 > v_interp_p2_f32 v6, [v6], v3, 1, 0, [m0] ; C8190103 > v_interp_p1_f32 v7, v2, 2, 0, [m0] ; C81C0202 > v_interp_p2_f32 v7, [v7], v3, 2, 0, [m0] ; C81D0203 > v_interp_p1_f32 v8, v2, 3, 0, [m0] ; C8200302 > v_interp_p2_f32 v8, [v8], v3, 3, 0, [m0] ; C8210303 > v_interp_p1_f32 v9, v2, 0, 1, [m0] ; C8240402 > v_interp_p2_f32 v9, [v9], v3, 0, 1, [m0] ; C8250403 > v_interp_p1_f32 v10, v2, 1, 1, [m0] ; C8280502 > v_interp_p2_f32 v10, [v10], v3, 1, 1, [m0] ; C8290503 > v_interp_p1_f32 v11, v2, 2, 1, [m0] ; C82C0602 > v_interp_p2_f32 v11, [v11], v3, 2, 1, [m0] ; C82D0603 > v_interp_p1_f32 v12, v2, 3, 1, [m0] ; C8300702 > v_interp_p2_f32 v12, [v12], v3, 3, 1, [m0] ; C8310703 > v_interp_p1_f32 v14, v2, 0, 2, [m0] ; C8380802 > v_interp_p2_f32 v14, [v14], v3, 0, 2, [m0] ; C8390803 > v_interp_p1_f32 v15, v2, 1, 2, [m0] ; C83C0902 > v_interp_p2_f32 v15, [v15], v3, 1, 2, [m0] ; C83D0903 > v_interp_p1_f32 v16, v2, 0, 3, [m0] ; C8400C02 > v_interp_p2_f32 v16, [v16], v3, 0, 3, [m0] ; C8410C03 > v_interp_p1_f32 v17, v2, 1, 3, [m0] ; C8440D02 > v_interp_p2_f32 v17, [v17], v3, 1, 3, [m0] ; C8450D03 > v_interp_p1_f32 v18, v2, 2, 3, [m0] ; C8480E02 > v_interp_p2_f32 v18, [v18], v3, 2, 3, [m0] ; C8490E03 > v_interp_p1_f32 v19, v2, 3, 3, [m0] ; C84C0F02 > v_interp_p2_f32 v19, [v19], v3, 3, 3, [m0] ; C84D0F03 > v_interp_p1_f32 v20, v2, 0, 4, [m0] ; C8501002 > v_interp_p2_f32 v20, [v20], v3, 0, 4, [m0] ; C8511003 > v_interp_p1_f32 v21, v2, 1, 4, [m0] ; C8541102 > v_interp_p2_f32 v21, [v21], v3, 1, 4, [m0] ; C8551103 > v_interp_p1_f32 v22, v2, 2, 4, [m0] ; C8581202 > v_interp_p2_f32 v22, [v22], v3, 2, 4, [m0] ; C8591203 > v_interp_p1_f32 v23, v2, 3, 4, [m0] ; C85C1302 > v_interp_p2_f32 v23, [v23], v3, 3, 4, [m0] ; C85D1303 > v_interp_p1_f32 v24, v2, 0, 5, [m0] ; C8601402 > v_interp_p2_f32 v24, [v24], v3, 0, 5, [m0] ; C8611403 > v_interp_p1_f32 v25, v2, 1, 5, [m0] ; C8641502 > v_interp_p2_f32 v25, [v25], v3, 1, 5, [m0] ; C8651503 > v_interp_p1_f32 v26, v2, 2, 5, [m0] ; C8681602 > v_interp_p2_f32 v26, [v26], v3, 2, 5, [m0] ; C8691603 > v_interp_p1_f32 v27, v2, 3, 5, [m0] ; C86C1702 > s_load_dwordx4 s[20:23], s[2:3], 0x4 ; C08A0304 > v_interp_p2_f32 v27, [v27], v3, 3, 5, [m0] ; C86D1703 > v_interp_p1_f32 v28, v2, 0, 6, [m0] ; C8701802 > v_interp_p2_f32 v28, [v28], v3, 0, 6, [m0] ; C8711803 > v_interp_p1_f32 v29, v2, 1, 6, [m0] ; C8741902 > v_interp_p2_f32 v29, [v29], v3, 1, 6, [m0] ; C8751903 > v_interp_p1_f32 v30, v2, 2, 6, [m0] ; C8781A02 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > v_interp_p2_f32 v30, [v30], v3, 2, 6, [m0] ; C8791A03 > v_interp_p1_f32 v31, v2, 3, 6, [m0] ; C87C1B02 > v_interp_p2_f32 v31, [v31], v3, 3, 6, [m0] ; C87D1B03 > v_interp_p1_f32 v32, v2, 0, 7, [m0] ; C8801C02 > v_interp_p2_f32 v32, [v32], v3, 0, 7, [m0] ; C8811C03 > v_interp_p1_f32 v33, v2, 1, 7, [m0] ; C8841D02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s4, s[20:23], 0x1c ; C202151C > v_interp_p2_f32 v33, [v33], v3, 1, 7, [m0] ; C8851D03 > v_interp_p1_f32 v34, v2, 2, 7, [m0] ; C8881E02 > v_interp_p2_f32 v34, [v34], v3, 2, 7, [m0] ; C8891E03 > v_interp_p1_f32 v35, v2, 3, 7, [m0] ; C88C1F02 > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v35, [v35], v3, 3, 7, [m0] ; C88D1F03 > image_sample v[36:39], v[7:8], s[12:19], s[0:3] dmask:0xf ; F0800F00 00032407 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v2, v36, v39 ; 10044F24 > v_mul_f32_e32 v3, v37, v39 ; 10064F25 > v_mul_f32_e32 v7, v38, v39 ; 100E4F26 > image_sample v[36:39], v[5:6], s[12:19], s[0:3] dmask:0xf ; F0800F00 00032405 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v2, s4, v2 ; 10040404 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v5, v36, v39 ; 100A4F24 > v_fma_f32 v2, v5, s4, v2 ; D2960002 04080905 > v_mul_f32_e32 v3, s4, v3 ; 10060604 > v_mul_f32_e32 v5, v37, v39 ; 100A4F25 > v_mul_f32_e32 v7, s4, v7 ; 100E0E04 > v_mul_f32_e32 v6, v38, v39 ; 100C4F26 > v_fma_f32 v3, v5, s4, v3 ; D2960003 040C0905 > v_fma_f32 v5, v6, s4, v7 ; D2960005 041C0906 > image_sample v[6:9], v[9:10], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030609 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_mul_f32_e32 v6, v7, v9 ; 100C1307 > v_mul_f32_e32 v7, v8, v9 ; 100E1308 > v_fma_f32 v3, v6, s4, v3 ; D2960003 040C0906 > v_fma_f32 v5, v7, s4, v5 ; D2960005 04140907 > image_sample v[6:9], v[11:12], s[12:19], s[0:3] dmask:0xf ; F0800F00 0003060B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_mul_f32_e32 v6, v7, v9 ; 100C1307 > v_mul_f32_e32 v7, v8, v9 ; 100E1308 > v_fma_f32 v3, v6, s4, v3 ; D2960003 040C0906 > v_fma_f32 v5, v7, s4, v5 ; D2960005 04140907 > image_sample v[6:9], v[14:15], s[12:19], s[0:3] dmask:0xf ; F0800F00 0003060E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_mul_f32_e32 v6, s4, v6 ; 100C0C04 > v_fma_f32 v2, v6, 2.0, v2 ; D2960002 0409E906 > v_mul_f32_e32 v6, v7, v9 ; 100C1307 > v_mul_f32_e32 v6, s4, v6 ; 100C0C04 > v_mul_f32_e32 v7, v8, v9 ; 100E1308 > v_fma_f32 v3, v6, 2.0, v3 ; D2960003 040DE906 > v_mul_f32_e32 v6, s4, v7 ; 100C0E04 > v_fma_f32 v5, v6, 2.0, v5 ; D2960005 0415E906 > image_sample v[6:9], v[16:17], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030610 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_mul_f32_e32 v6, v7, v9 ; 100C1307 > v_mul_f32_e32 v7, v8, v9 ; 100E1308 > v_fma_f32 v3, v6, s4, v3 ; D2960003 040C0906 > v_fma_f32 v5, v7, s4, v5 ; D2960005 04140907 > image_sample v[6:9], v[18:19], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030612 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_mul_f32_e32 v6, v7, v9 ; 100C1307 > v_mul_f32_e32 v7, v8, v9 ; 100E1308 > v_fma_f32 v3, v6, s4, v3 ; D2960003 040C0906 > v_fma_f32 v5, v7, s4, v5 ; D2960005 04140907 > image_sample v[6:9], v[20:21], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030614 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_mul_f32_e32 v6, v7, v9 ; 100C1307 > v_mul_f32_e32 v7, v8, v9 ; 100E1308 > v_fma_f32 v3, v6, s4, v3 ; D2960003 040C0906 > v_fma_f32 v5, v7, s4, v5 ; D2960005 04140907 > image_sample v[6:9], v[22:23], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030616 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_mul_f32_e32 v6, v7, v9 ; 100C1307 > v_mul_f32_e32 v7, v8, v9 ; 100E1308 > v_fma_f32 v3, v6, s4, v3 ; D2960003 040C0906 > v_fma_f32 v5, v7, s4, v5 ; D2960005 04140907 > image_sample v[6:9], v[24:25], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030618 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_mul_f32_e32 v7, v7, v9 ; 100E1307 > v_mul_f32_e32 v8, v8, v9 ; 10101308 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_fma_f32 v3, v7, s4, v3 ; D2960003 040C0907 > v_fma_f32 v5, v8, s4, v5 ; D2960005 04140908 > image_sample v[6:9], v[26:27], s[12:19], s[0:3] dmask:0xf ; F0800F00 0003061A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_mul_f32_e32 v7, v7, v9 ; 100E1307 > v_mul_f32_e32 v8, v8, v9 ; 10101308 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_fma_f32 v3, v7, s4, v3 ; D2960003 040C0907 > v_fma_f32 v5, v8, s4, v5 ; D2960005 04140908 > image_sample v[6:9], v[28:29], s[12:19], s[0:3] dmask:0xf ; F0800F00 0003061C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_mul_f32_e32 v7, v7, v9 ; 100E1307 > v_mul_f32_e32 v8, v8, v9 ; 10101308 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_fma_f32 v3, v7, s4, v3 ; D2960003 040C0907 > v_fma_f32 v5, v8, s4, v5 ; D2960005 04140908 > image_sample v[6:9], v[30:31], s[12:19], s[0:3] dmask:0xf ; F0800F00 0003061E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_mul_f32_e32 v7, v7, v9 ; 100E1307 > v_mul_f32_e32 v8, v8, v9 ; 10101308 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_fma_f32 v3, v7, s4, v3 ; D2960003 040C0907 > v_fma_f32 v5, v8, s4, v5 ; D2960005 04140908 > image_sample v[6:9], v[32:33], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030620 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_mul_f32_e32 v7, v7, v9 ; 100E1307 > v_mul_f32_e32 v8, v8, v9 ; 10101308 > s_buffer_load_dword s5, s[20:23], 0x1d ; C202951D > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_fma_f32 v3, v7, s4, v3 ; D2960003 040C0907 > v_fma_f32 v5, v8, s4, v5 ; D2960005 04140908 > image_sample v[6:9], v[34:35], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030622 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v6, v9 ; 100C1306 > v_mul_f32_e32 v7, v7, v9 ; 100E1307 > v_mul_f32_e32 v8, v8, v9 ; 10101308 > v_mov_b32_e32 v0, 0x3d800000 ; 7E0002FF 3D800000 > v_fma_f32 v2, v6, s4, v2 ; D2960002 04080906 > v_fma_f32 v3, v7, s4, v3 ; D2960003 040C0907 > v_fma_f32 v5, v8, s4, v5 ; D2960005 04140908 > v_mul_f32_e32 v2, v0, v2 ; 10040500 > v_mul_f32_e32 v6, v0, v3 ; 100C0700 > v_mul_f32_e32 v0, v0, v5 ; 10000B00 > v_max3_f32 v3, v0, v6, v2 ; D2A80003 040A0D00 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s5, v3 ; 10060605 > v_mul_f32_e32 v3, 0x437f0000, v3 ; 100606FF 437F0000 > v_ceil_f32_e32 v3, v3 ; 7E064503 > v_max_f32_e32 v3, 1.0, v3 ; 200606F2 > v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081 > v_mul_f32_e32 v5, s4, v3 ; 100A0604 > v_cmp_eq_f32_e32 vcc, 0, v5 ; 7C040A80 > v_rcp_f32_e32 v5, v5 ; 7E0A5505 > v_cmp_lt_f32_e64 s[0:1], 0, v2 ; D0020000 00020480 > v_cmp_lt_f32_e64 s[2:3], 0, v6 ; D0020002 00020C80 > v_cmp_lt_f32_e64 s[4:5], 0, v0 ; D0020004 00020080 > v_cndmask_b32_e64 v7, v2, 1.0, s[0:1] ; D2000007 0001E502 > v_cndmask_b32_e64 v8, v6, 1.0, s[2:3] ; D2000008 0009E506 > v_cndmask_b32_e64 v9, v0, 1.0, s[4:5] ; D2000009 0011E500 > v_bfrev_b32_e32 v1, 14 ; 7E02708E > v_cmp_le_f32_e64 s[0:1], 0, v7 ; D0060000 00020E80 > v_cmp_le_f32_e64 s[2:3], 0, v8 ; D0060002 00021080 > v_bfrev_b32_e32 v4, 15 ; 7E08708F > v_mul_f32_e32 v7, v1, v7 ; 100E0F01 > v_mul_f32_e32 v8, v1, v8 ; 10101101 > v_cmp_le_f32_e64 s[4:5], 0, v9 ; D0060004 00021280 > v_mul_f32_e32 v1, v1, v9 ; 10021301 > v_cndmask_b32_e64 v7, v4, v7, s[0:1] ; D2000007 00020F04 > v_cndmask_b32_e64 v8, v4, v8, s[2:3] ; D2000008 000A1104 > v_cndmask_b32_e64 v4, v4, v1, s[4:5] ; D2000004 00120304 > v_mul_f32_e32 v1, v5, v2 ; 10020505 > v_mul_f32_e32 v2, v5, v6 ; 10040D05 > v_mul_f32_e32 v5, v5, v0 ; 100A0105 > v_cndmask_b32_e32 v0, v1, v7 ; 00000F01 > v_cndmask_b32_e32 v1, v2, v8 ; 00021102 > v_cndmask_b32_e32 v2, v5, v4 ; 00040905 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 40 >Code Size: 1232 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 6 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v1, 0xc0e00000 ; 7E0202FF C0E00000 > v_mov_b32_e32 v7, 0xc0a00000 ; 7E0E02FF C0A00000 > v_mov_b32_e32 v8, 0xc0400000 ; 7E1002FF C0400000 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v5, 0xc0c00000 ; 7E0A02FF C0C00000 > v_bfrev_b32_e32 v9, 14 ; 7E12708E > v_bfrev_b32_e32 v6, 15 ; 7E0C708F > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v10, s4 ; 7E145404 > v_rcp_f32_e32 v11, s0 ; 7E165400 > v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 > v_cmp_eq_f32_e64 s[0:1], 0, s0 ; D0040000 00000080 > v_mul_f32_e32 v1, v10, v1 ; 1002030A > v_mul_f32_e32 v11, 0, v11 ; 10161680 > v_mul_f32_e32 v5, v10, v5 ; 100A0B0A > v_mul_f32_e32 v7, v10, v7 ; 100E0F0A > v_mul_f32_e32 v12, -4.0, v10 ; 101814F7 > v_mul_f32_e32 v8, v10, v8 ; 1010110A > v_mul_f32_e32 v13, -2.0, v10 ; 101A14F5 > v_xor_b32_e32 v14, 0x80000000, v10 ; 3A1C14FF 80000000 > v_cndmask_b32_e32 v15, v10, v9 ; 001E130A > v_add_f32_e32 v16, v10, v10 ; 0620150A > v_mul_f32_e32 v17, 0x40400000, v10 ; 102214FF 40400000 > v_mul_f32_e32 v18, 4.0, v10 ; 102414F6 > v_mul_f32_e32 v19, 0x40a00000, v10 ; 102614FF 40A00000 > v_mul_f32_e32 v20, 0x40c00000, v10 ; 102814FF 40C00000 > v_mul_f32_e32 v10, 0x40e00000, v10 ; 101414FF 40E00000 > v_cndmask_b32_e64 v11, v11, 0, s[0:1] ; D200000B 0001010B > v_cndmask_b32_e32 v1, v1, v6 ; 00020D01 > v_cndmask_b32_e32 v5, v5, v6 ; 000A0D05 > v_cndmask_b32_e32 v7, v7, v6 ; 000E0D07 > v_cndmask_b32_e32 v12, v12, v6 ; 00180D0C > v_cndmask_b32_e32 v8, v8, v6 ; 00100D08 > v_cndmask_b32_e32 v13, v13, v6 ; 001A0D0D > v_cndmask_b32_e32 v6, v14, v6 ; 000C0D0E > v_cndmask_b32_e32 v14, v16, v9 ; 001C1310 > v_cndmask_b32_e32 v16, v17, v9 ; 00201311 > v_cndmask_b32_e32 v17, v18, v9 ; 00221312 > v_cndmask_b32_e32 v18, v19, v9 ; 00241313 > v_cndmask_b32_e32 v19, v20, v9 ; 00261314 > v_cndmask_b32_e32 v9, v10, v9 ; 0012130A > v_mov_b32_e32 v0, 0 ; 7E000280 > v_fma_f32 v10, v3, 0.5, 0.5 ; D296000A 03C1E103 > v_fma_f32 v20, v4, -0.5, 0.5 ; D2960014 03C1E304 > v_add_f32_e32 v1, v10, v1 ; 0602030A > v_add_f32_e32 v11, v20, v11 ; 06161714 > v_add_f32_e32 v5, v10, v5 ; 060A0B0A > exp 15, 32, 0, 0, 0, v1, v11, v5, v11 ; F800020F 0B050B01 > v_add_f32_e32 v7, v10, v7 ; 060E0F0A > v_add_f32_e32 v12, v10, v12 ; 0618190A > v_add_f32_e32 v8, v10, v8 ; 0610110A > v_add_f32_e32 v13, v10, v13 ; 061A1B0A > exp 15, 33, 0, 0, 0, v7, v11, v12, v11 ; F800021F 0B0C0B07 > v_add_f32_e32 v6, v10, v6 ; 060C0D0A > exp 15, 34, 0, 0, 0, v8, v11, v13, v11 ; F800022F 0B0D0B08 > v_add_f32_e32 v15, v10, v15 ; 061E1F0A > v_add_f32_e32 v14, v10, v14 ; 061C1D0A > exp 15, 35, 0, 0, 0, v6, v11, v10, v20 ; F800023F 140A0B06 > v_add_f32_e32 v16, v10, v16 ; 0620210A > v_add_f32_e32 v17, v10, v17 ; 0622230A > exp 15, 36, 0, 0, 0, v15, v11, v14, v11 ; F800024F 0B0E0B0F > v_add_f32_e32 v18, v10, v18 ; 0624250A > v_add_f32_e32 v19, v10, v19 ; 0626270A > exp 15, 37, 0, 0, 0, v16, v11, v17, v11 ; F800025F 0B110B10 > v_add_f32_e32 v9, v10, v9 ; 0612130A > exp 15, 38, 0, 0, 0, v18, v11, v19, v11 ; F800026F 0B130B12 > exp 15, 39, 0, 0, 0, v9, v11, v0, v0 ; F800027F 00000B09 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 1.0 ; 7E0202F2 > exp 15, 12, 0, 1, 0, v3, v4, v0, v1 ; F80008CF 01000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 24 >Code Size: 412 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v5, v2, 3, 0, [m0] ; C8140302 > v_interp_p2_f32 v5, [v5], v3, 3, 0, [m0] ; C8150303 > v_interp_p1_f32 v6, v2, 0, 1, [m0] ; C8180402 > v_interp_p2_f32 v6, [v6], v3, 0, 1, [m0] ; C8190403 > v_interp_p1_f32 v7, v2, 1, 1, [m0] ; C81C0502 > v_interp_p2_f32 v7, [v7], v3, 1, 1, [m0] ; C81D0503 > v_interp_p1_f32 v8, v2, 2, 1, [m0] ; C8200602 > v_interp_p2_f32 v8, [v8], v3, 2, 1, [m0] ; C8210603 > v_interp_p1_f32 v9, v2, 3, 1, [m0] ; C8240702 > v_interp_p2_f32 v9, [v9], v3, 3, 1, [m0] ; C8250703 > v_interp_p1_f32 v10, v2, 0, 2, [m0] ; C8280802 > v_interp_p2_f32 v10, [v10], v3, 0, 2, [m0] ; C8290803 > v_interp_p1_f32 v11, v2, 1, 2, [m0] ; C82C0902 > v_interp_p2_f32 v11, [v11], v3, 1, 2, [m0] ; C82D0903 > v_interp_p1_f32 v14, v2, 2, 2, [m0] ; C8380A02 > v_interp_p2_f32 v14, [v14], v3, 2, 2, [m0] ; C8390A03 > v_interp_p1_f32 v15, v2, 3, 2, [m0] ; C83C0B02 > v_interp_p2_f32 v15, [v15], v3, 3, 2, [m0] ; C83D0B03 > v_interp_p1_f32 v16, v2, 0, 3, [m0] ; C8400C02 > v_interp_p2_f32 v16, [v16], v3, 0, 3, [m0] ; C8410C03 > v_interp_p1_f32 v17, v2, 1, 3, [m0] ; C8440D02 > v_interp_p2_f32 v17, [v17], v3, 1, 3, [m0] ; C8450D03 > v_interp_p1_f32 v18, v2, 2, 3, [m0] ; C8480E02 > v_interp_p2_f32 v18, [v18], v3, 2, 3, [m0] ; C8490E03 > v_interp_p1_f32 v19, v2, 3, 3, [m0] ; C84C0F02 > v_interp_p2_f32 v19, [v19], v3, 3, 3, [m0] ; C84D0F03 > v_interp_p1_f32 v20, v2, 0, 4, [m0] ; C8501002 > v_interp_p2_f32 v20, [v20], v3, 0, 4, [m0] ; C8511003 > v_interp_p1_f32 v21, v2, 1, 4, [m0] ; C8541102 > v_interp_p2_f32 v21, [v21], v3, 1, 4, [m0] ; C8551103 > v_interp_p1_f32 v22, v2, 2, 4, [m0] ; C8581202 > v_interp_p2_f32 v22, [v22], v3, 2, 4, [m0] ; C8591203 > v_interp_p1_f32 v23, v2, 3, 4, [m0] ; C85C1302 > v_interp_p2_f32 v23, [v23], v3, 3, 4, [m0] ; C85D1303 > v_interp_p1_f32 v24, v2, 0, 5, [m0] ; C8601402 > v_interp_p2_f32 v24, [v24], v3, 0, 5, [m0] ; C8611403 > v_interp_p1_f32 v25, v2, 1, 5, [m0] ; C8641502 > v_interp_p2_f32 v25, [v25], v3, 1, 5, [m0] ; C8651503 > v_interp_p1_f32 v26, v2, 2, 5, [m0] ; C8681602 > v_interp_p2_f32 v26, [v26], v3, 2, 5, [m0] ; C8691603 > v_interp_p1_f32 v27, v2, 3, 5, [m0] ; C86C1702 > v_interp_p2_f32 v27, [v27], v3, 3, 5, [m0] ; C86D1703 > v_interp_p1_f32 v28, v2, 0, 6, [m0] ; C8701802 > s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C > s_load_dwordx8 s[16:23], s[4:5], 0x0 ; C0C80500 > v_interp_p2_f32 v28, [v28], v3, 0, 6, [m0] ; C8711803 > v_interp_p1_f32 v29, v2, 1, 6, [m0] ; C8741902 > v_interp_p2_f32 v29, [v29], v3, 1, 6, [m0] ; C8751903 > v_interp_p1_f32 v30, v2, 2, 6, [m0] ; C8781A02 > v_interp_p2_f32 v30, [v30], v3, 2, 6, [m0] ; C8791A03 > v_interp_p1_f32 v31, v2, 3, 6, [m0] ; C87C1B02 > v_interp_p2_f32 v31, [v31], v3, 3, 6, [m0] ; C87D1B03 > v_interp_p1_f32 v32, v2, 0, 7, [m0] ; C8801C02 > v_interp_p2_f32 v32, [v32], v3, 0, 7, [m0] ; C8811C03 > v_interp_p1_f32 v33, v2, 1, 7, [m0] ; C8841D02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s12, s12, s23 ; 870C170C > v_interp_p2_f32 v33, [v33], v3, 1, 7, [m0] ; C8851D03 > image_sample v[2:5], v[4:5], s[16:23], s[12:15] dmask:0xf ; F0800F00 00640204 > image_sample v[34:37], v[0:1], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642200 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v2, v2, v5 ; 10040B02 > v_mul_f32_e32 v3, v3, v5 ; 10060B03 > v_mul_f32_e32 v4, v4, v5 ; 10080B04 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v0, v34, v37 ; 10004B22 > v_mul_f32_e32 v1, v35, v37 ; 10024B23 > v_mul_f32_e32 v5, v36, v37 ; 100A4B24 > image_sample v[34:37], v[6:7], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642206 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v34, v37 ; 100C4B22 > v_mul_f32_e32 v7, v35, v37 ; 100E4B23 > v_mul_f32_e32 v12, v36, v37 ; 10184B24 > image_sample v[34:37], v[8:9], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642208 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v8, v34, v37 ; 10104B22 > v_mul_f32_e32 v9, v35, v37 ; 10124B23 > v_mul_f32_e32 v34, v36, v37 ; 10444B24 > image_sample v[35:38], v[10:11], s[16:23], s[12:15] dmask:0xf ; F0800F00 0064230A > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v10, v35, v38 ; 10144D23 > v_mul_f32_e32 v11, v36, v38 ; 10164D24 > v_mul_f32_e32 v35, v37, v38 ; 10464D25 > image_sample v[36:39], v[14:15], s[16:23], s[12:15] dmask:0xf ; F0800F00 0064240E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v14, v36, v39 ; 101C4F24 > v_mul_f32_e32 v15, v37, v39 ; 101E4F25 > v_mul_f32_e32 v36, v38, v39 ; 10484F26 > image_sample v[37:40], v[16:17], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642510 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v16, v37, v40 ; 10205125 > v_mul_f32_e32 v17, v38, v40 ; 10225126 > v_mul_f32_e32 v37, v39, v40 ; 104A5127 > image_sample v[38:41], v[18:19], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642612 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v18, v38, v41 ; 10245326 > v_mul_f32_e32 v19, v39, v41 ; 10265327 > v_mul_f32_e32 v38, v40, v41 ; 104C5328 > image_sample v[39:42], v[20:21], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642714 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v20, v39, v42 ; 10285527 > v_mul_f32_e32 v21, v40, v42 ; 102A5528 > v_mul_f32_e32 v39, v41, v42 ; 104E5529 > image_sample v[40:43], v[22:23], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642816 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v22, v40, v43 ; 102C5728 > v_mul_f32_e32 v23, v41, v43 ; 102E5729 > v_mul_f32_e32 v40, v42, v43 ; 1050572A > image_sample v[41:44], v[24:25], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642918 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v24, v41, v44 ; 10305929 > v_mul_f32_e32 v25, v42, v44 ; 1032592A > v_mul_f32_e32 v41, v43, v44 ; 1052592B > image_sample v[42:45], v[26:27], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642A1A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v26, v42, v45 ; 10345B2A > v_mul_f32_e32 v27, v43, v45 ; 10365B2B > v_mul_f32_e32 v42, v44, v45 ; 10545B2C > image_sample v[43:46], v[28:29], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642B1C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v28, v43, v46 ; 10385D2B > v_mul_f32_e32 v29, v44, v46 ; 103A5D2C > v_mul_f32_e32 v43, v45, v46 ; 10565D2D > image_sample v[44:47], v[30:31], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642C1E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v30, v44, v47 ; 103C5F2C > v_mul_f32_e32 v31, v45, v47 ; 103E5F2D > v_mul_f32_e32 v44, v46, v47 ; 10585F2E > image_sample v[45:48], v[32:33], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642D20 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v33, v46, v48 ; 1042612E > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v2, s4, v2 ; 10040404 > v_mov_b32_e32 v46, 0x3cddd257 ; 7E5C02FF 3CDDD257 > v_mul_f32_e32 v32, v45, v48 ; 1040612D > v_mul_f32_e32 v45, v47, v48 ; 105A612F > v_mul_f32_e32 v2, v46, v2 ; 1004052E > v_mul_f32_e32 v0, s4, v0 ; 10000004 > v_mov_b32_e32 v47, 0x3c827c43 ; 7E5E02FF 3C827C43 > v_fma_f32 v0, v0, v47, v2 ; D2960000 040A5F00 > v_mul_f32_e32 v2, s4, v3 ; 10040604 > v_mul_f32_e32 v2, v46, v2 ; 1004052E > v_mul_f32_e32 v1, s4, v1 ; 10020204 > v_fma_f32 v1, v1, v47, v2 ; D2960001 040A5F01 > v_mul_f32_e32 v2, s4, v4 ; 10040804 > v_mul_f32_e32 v3, s4, v5 ; 10060A04 > v_mul_f32_e32 v2, v46, v2 ; 1004052E > v_fma_f32 v2, v3, v47, v2 ; D2960002 040A5F03 > v_mul_f32_e32 v3, s4, v6 ; 10060C04 > v_mov_b32_e32 v4, 0x3d2dc3f0 ; 7E0802FF 3D2DC3F0 > v_fma_f32 v0, v3, v4, v0 ; D2960000 04020903 > v_mul_f32_e32 v3, s4, v7 ; 10060E04 > v_fma_f32 v1, v3, v4, v1 ; D2960001 04060903 > v_mul_f32_e32 v3, s4, v12 ; 10061804 > v_fma_f32 v2, v3, v4, v2 ; D2960002 040A0903 > v_mul_f32_e32 v3, s4, v8 ; 10061004 > v_mov_b32_e32 v5, 0x3d7ae64e ; 7E0A02FF 3D7AE64E > v_fma_f32 v0, v3, v5, v0 ; D2960000 04020B03 > v_mul_f32_e32 v3, s4, v9 ; 10061204 > v_fma_f32 v1, v3, v5, v1 ; D2960001 04060B03 > v_mul_f32_e32 v3, s4, v34 ; 10064404 > v_fma_f32 v2, v3, v5, v2 ; D2960002 040A0B03 > v_mul_f32_e32 v3, s4, v10 ; 10061404 > v_mov_b32_e32 v6, 0x3da6f006 ; 7E0C02FF 3DA6F006 > v_fma_f32 v0, v3, v6, v0 ; D2960000 04020D03 > v_mul_f32_e32 v3, s4, v11 ; 10061604 > v_fma_f32 v1, v3, v6, v1 ; D2960001 04060D03 > v_mul_f32_e32 v3, s4, v35 ; 10064604 > v_fma_f32 v2, v3, v6, v2 ; D2960002 040A0D03 > v_mul_f32_e32 v3, s4, v14 ; 10061C04 > v_mov_b32_e32 v7, 0x3dccbb63 ; 7E0E02FF 3DCCBB63 > v_fma_f32 v0, v3, v7, v0 ; D2960000 04020F03 > v_mul_f32_e32 v3, s4, v15 ; 10061E04 > v_fma_f32 v1, v3, v7, v1 ; D2960001 04060F03 > v_mul_f32_e32 v3, s4, v36 ; 10064804 > v_fma_f32 v2, v3, v7, v2 ; D2960002 040A0F03 > v_mul_f32_e32 v3, s4, v16 ; 10062004 > v_mov_b32_e32 v8, 0x3de76692 ; 7E1002FF 3DE76692 > v_fma_f32 v0, v3, v8, v0 ; D2960000 04021103 > v_mul_f32_e32 v3, s4, v17 ; 10062204 > v_fma_f32 v1, v3, v8, v1 ; D2960001 04061103 > v_mul_f32_e32 v3, s4, v37 ; 10064A04 > v_fma_f32 v2, v3, v8, v2 ; D2960002 040A1103 > v_mul_f32_e32 v3, s4, v18 ; 10062404 > v_mov_b32_e32 v9, 0x3df10a7e ; 7E1202FF 3DF10A7E > v_fma_f32 v0, v3, v9, v0 ; D2960000 04021303 > v_mul_f32_e32 v3, s4, v19 ; 10062604 > v_fma_f32 v1, v3, v9, v1 ; D2960001 04061303 > v_mul_f32_e32 v3, s4, v38 ; 10064C04 > v_fma_f32 v2, v3, v9, v2 ; D2960002 040A1303 > v_mul_f32_e32 v3, s4, v20 ; 10062804 > v_fma_f32 v0, v3, v8, v0 ; D2960000 04021103 > v_mul_f32_e32 v3, s4, v21 ; 10062A04 > v_fma_f32 v1, v3, v8, v1 ; D2960001 04061103 > v_mul_f32_e32 v3, s4, v39 ; 10064E04 > v_fma_f32 v2, v3, v8, v2 ; D2960002 040A1103 > v_mul_f32_e32 v3, s4, v22 ; 10062C04 > v_fma_f32 v0, v3, v7, v0 ; D2960000 04020F03 > v_mul_f32_e32 v3, s4, v23 ; 10062E04 > v_fma_f32 v1, v3, v7, v1 ; D2960001 04060F03 > v_mul_f32_e32 v3, s4, v40 ; 10065004 > v_fma_f32 v2, v3, v7, v2 ; D2960002 040A0F03 > v_mul_f32_e32 v3, s4, v24 ; 10063004 > v_fma_f32 v0, v3, v6, v0 ; D2960000 04020D03 > v_mul_f32_e32 v3, s4, v25 ; 10063204 > v_fma_f32 v1, v3, v6, v1 ; D2960001 04060D03 > v_mul_f32_e32 v3, s4, v41 ; 10065204 > v_fma_f32 v2, v3, v6, v2 ; D2960002 040A0D03 > v_mul_f32_e32 v3, s4, v26 ; 10063404 > v_fma_f32 v0, v3, v5, v0 ; D2960000 04020B03 > v_mul_f32_e32 v3, s4, v27 ; 10063604 > v_fma_f32 v1, v3, v5, v1 ; D2960001 04060B03 > v_mul_f32_e32 v3, s4, v42 ; 10065404 > v_fma_f32 v2, v3, v5, v2 ; D2960002 040A0B03 > v_mul_f32_e32 v3, s4, v28 ; 10063804 > v_fma_f32 v0, v3, v4, v0 ; D2960000 04020903 > v_mul_f32_e32 v3, s4, v29 ; 10063A04 > v_fma_f32 v1, v3, v4, v1 ; D2960001 04060903 > v_mul_f32_e32 v3, s4, v43 ; 10065604 > v_fma_f32 v2, v3, v4, v2 ; D2960002 040A0903 > v_mul_f32_e32 v3, s4, v30 ; 10063C04 > v_fma_f32 v0, v3, v46, v0 ; D2960000 04025D03 > v_mul_f32_e32 v3, s4, v31 ; 10063E04 > s_buffer_load_dword s0, s[0:3], 0x1d ; C200011D > v_fma_f32 v1, v3, v46, v1 ; D2960001 04065D03 > v_mul_f32_e32 v3, s4, v44 ; 10065804 > v_fma_f32 v2, v3, v46, v2 ; D2960002 040A5D03 > v_mul_f32_e32 v3, s4, v32 ; 10064004 > v_mul_f32_e32 v4, s4, v33 ; 10084204 > v_mul_f32_e32 v5, s4, v45 ; 100A5A04 > v_fma_f32 v1, v4, v47, v1 ; D2960001 04065F04 > v_fma_f32 v0, v3, v47, v0 ; D2960000 04025F03 > v_fma_f32 v2, v5, v47, v2 ; D2960002 040A5F05 > v_max3_f32 v3, v2, v1, v0 ; D2A80003 04020302 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s0, v3 ; 10060600 > v_mul_f32_e32 v3, 0x437f0000, v3 ; 100606FF 437F0000 > v_ceil_f32_e32 v3, v3 ; 7E064503 > v_max_f32_e32 v3, 1.0, v3 ; 200606F2 > v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081 > v_mul_f32_e32 v4, s4, v3 ; 10080604 > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_rcp_f32_e32 v4, v4 ; 7E085504 > v_cmp_lt_f32_e64 s[0:1], 0, v0 ; D0020000 00020080 > v_cmp_lt_f32_e64 s[2:3], 0, v1 ; D0020002 00020280 > v_cmp_lt_f32_e64 s[4:5], 0, v2 ; D0020004 00020480 > v_cndmask_b32_e64 v5, v0, 1.0, s[0:1] ; D2000005 0001E500 > v_cndmask_b32_e64 v6, v1, 1.0, s[2:3] ; D2000006 0009E501 > v_cndmask_b32_e64 v7, v2, 1.0, s[4:5] ; D2000007 0011E502 > v_bfrev_b32_e32 v8, 14 ; 7E10708E > v_cmp_le_f32_e64 s[0:1], 0, v5 ; D0060000 00020A80 > v_cmp_le_f32_e64 s[2:3], 0, v6 ; D0060002 00020C80 > v_cmp_le_f32_e64 s[4:5], 0, v7 ; D0060004 00020E80 > v_mul_f32_e32 v5, v8, v5 ; 100A0B08 > v_mul_f32_e32 v6, v8, v6 ; 100C0D08 > v_mul_f32_e32 v7, v8, v7 ; 100E0F08 > v_bfrev_b32_e32 v8, 15 ; 7E10708F > v_cndmask_b32_e64 v5, v8, v5, s[0:1] ; D2000005 00020B08 > v_mul_f32_e32 v0, v4, v0 ; 10000104 > v_cndmask_b32_e64 v6, v8, v6, s[2:3] ; D2000006 000A0D08 > v_mul_f32_e32 v1, v4, v1 ; 10020304 > v_cndmask_b32_e64 v7, v8, v7, s[4:5] ; D2000007 00120F08 > v_mul_f32_e32 v2, v4, v2 ; 10040504 > v_cndmask_b32_e32 v0, v0, v5 ; 00000B00 > v_cndmask_b32_e32 v1, v1, v6 ; 00020D01 > v_cndmask_b32_e32 v2, v2, v7 ; 00040F02 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 52 >Code Size: 1448 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 4 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v1, 0xc0e00000 ; 7E0202FF C0E00000 > v_mov_b32_e32 v7, 0xc0a00000 ; 7E0E02FF C0A00000 > v_mov_b32_e32 v8, 0xc0400000 ; 7E1002FF C0400000 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v5, 0xc0c00000 ; 7E0A02FF C0C00000 > v_bfrev_b32_e32 v9, 14 ; 7E12708E > v_bfrev_b32_e32 v6, 15 ; 7E0C708F > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v10, s4 ; 7E145404 > v_rcp_f32_e32 v11, s0 ; 7E165400 > v_cmp_eq_f32_e64 vcc, 0, s0 ; D004006A 00000080 > v_cmp_eq_f32_e64 s[2:3], 0, s4 ; D0040002 00000880 > v_mul_f32_e32 v10, 0, v10 ; 10141480 > v_mul_f32_e32 v1, v11, v1 ; 1002030B > v_mul_f32_e32 v5, v11, v5 ; 100A0B0B > v_mul_f32_e32 v7, v11, v7 ; 100E0F0B > v_mul_f32_e32 v12, -4.0, v11 ; 101816F7 > v_mul_f32_e32 v8, v11, v8 ; 1010110B > v_mul_f32_e32 v13, -2.0, v11 ; 101A16F5 > v_xor_b32_e32 v14, 0x80000000, v11 ; 3A1C16FF 80000000 > v_cndmask_b32_e32 v15, v11, v9 ; 001E130B > v_add_f32_e32 v16, v11, v11 ; 0620170B > v_mul_f32_e32 v17, 0x40400000, v11 ; 102216FF 40400000 > v_mul_f32_e32 v18, 4.0, v11 ; 102416F6 > v_mul_f32_e32 v19, 0x40a00000, v11 ; 102616FF 40A00000 > v_mul_f32_e32 v20, 0x40c00000, v11 ; 102816FF 40C00000 > v_mul_f32_e32 v11, 0x40e00000, v11 ; 101616FF 40E00000 > v_cndmask_b32_e64 v10, v10, 0, s[2:3] ; D200000A 0009010A > v_cndmask_b32_e32 v1, v1, v6 ; 00020D01 > v_cndmask_b32_e32 v5, v5, v6 ; 000A0D05 > v_cndmask_b32_e32 v7, v7, v6 ; 000E0D07 > v_cndmask_b32_e32 v12, v12, v6 ; 00180D0C > v_cndmask_b32_e32 v8, v8, v6 ; 00100D08 > v_cndmask_b32_e32 v13, v13, v6 ; 001A0D0D > v_cndmask_b32_e32 v6, v14, v6 ; 000C0D0E > v_cndmask_b32_e32 v14, v16, v9 ; 001C1310 > v_cndmask_b32_e32 v16, v17, v9 ; 00201311 > v_cndmask_b32_e32 v17, v18, v9 ; 00221312 > v_cndmask_b32_e32 v18, v19, v9 ; 00241313 > v_cndmask_b32_e32 v19, v20, v9 ; 00261314 > v_cndmask_b32_e32 v9, v11, v9 ; 0012130B > v_mov_b32_e32 v0, 0 ; 7E000280 > v_fma_f32 v20, v4, -0.5, 0.5 ; D2960014 03C1E304 > v_fma_f32 v11, v3, 0.5, 0.5 ; D296000B 03C1E103 > v_add_f32_e32 v10, v11, v10 ; 0614150B > v_add_f32_e32 v1, v20, v1 ; 06020314 > v_add_f32_e32 v5, v20, v5 ; 060A0B14 > exp 15, 32, 0, 0, 0, v10, v1, v10, v5 ; F800020F 050A010A > v_add_f32_e32 v7, v20, v7 ; 060E0F14 > v_add_f32_e32 v12, v20, v12 ; 06181914 > v_add_f32_e32 v8, v20, v8 ; 06101114 > v_add_f32_e32 v13, v20, v13 ; 061A1B14 > exp 15, 33, 0, 0, 0, v10, v7, v10, v12 ; F800021F 0C0A070A > v_add_f32_e32 v6, v20, v6 ; 060C0D14 > exp 15, 34, 0, 0, 0, v10, v8, v10, v13 ; F800022F 0D0A080A > v_add_f32_e32 v15, v20, v15 ; 061E1F14 > v_add_f32_e32 v14, v20, v14 ; 061C1D14 > exp 15, 35, 0, 0, 0, v11, v6, v11, v20 ; F800023F 140B060B > v_add_f32_e32 v16, v20, v16 ; 06202114 > v_add_f32_e32 v17, v20, v17 ; 06222314 > exp 15, 36, 0, 0, 0, v10, v15, v10, v14 ; F800024F 0E0A0F0A > v_add_f32_e32 v18, v20, v18 ; 06242514 > v_add_f32_e32 v19, v20, v19 ; 06262714 > exp 15, 37, 0, 0, 0, v10, v16, v10, v17 ; F800025F 110A100A > v_add_f32_e32 v9, v20, v9 ; 06121314 > exp 15, 38, 0, 0, 0, v10, v18, v10, v19 ; F800026F 130A120A > exp 15, 39, 0, 0, 0, v10, v9, v0, v0 ; F800027F 0000090A > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 1.0 ; 7E0202F2 > exp 15, 12, 0, 1, 0, v3, v4, v0, v1 ; F80008CF 01000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 24 >Code Size: 412 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v5, v2, 3, 0, [m0] ; C8140302 > v_interp_p2_f32 v5, [v5], v3, 3, 0, [m0] ; C8150303 > v_interp_p1_f32 v6, v2, 0, 1, [m0] ; C8180402 > v_interp_p2_f32 v6, [v6], v3, 0, 1, [m0] ; C8190403 > v_interp_p1_f32 v7, v2, 1, 1, [m0] ; C81C0502 > v_interp_p2_f32 v7, [v7], v3, 1, 1, [m0] ; C81D0503 > v_interp_p1_f32 v8, v2, 2, 1, [m0] ; C8200602 > v_interp_p2_f32 v8, [v8], v3, 2, 1, [m0] ; C8210603 > v_interp_p1_f32 v9, v2, 3, 1, [m0] ; C8240702 > v_interp_p2_f32 v9, [v9], v3, 3, 1, [m0] ; C8250703 > v_interp_p1_f32 v10, v2, 0, 2, [m0] ; C8280802 > v_interp_p2_f32 v10, [v10], v3, 0, 2, [m0] ; C8290803 > v_interp_p1_f32 v11, v2, 1, 2, [m0] ; C82C0902 > v_interp_p2_f32 v11, [v11], v3, 1, 2, [m0] ; C82D0903 > v_interp_p1_f32 v14, v2, 2, 2, [m0] ; C8380A02 > v_interp_p2_f32 v14, [v14], v3, 2, 2, [m0] ; C8390A03 > v_interp_p1_f32 v15, v2, 3, 2, [m0] ; C83C0B02 > v_interp_p2_f32 v15, [v15], v3, 3, 2, [m0] ; C83D0B03 > v_interp_p1_f32 v16, v2, 0, 3, [m0] ; C8400C02 > v_interp_p2_f32 v16, [v16], v3, 0, 3, [m0] ; C8410C03 > v_interp_p1_f32 v17, v2, 1, 3, [m0] ; C8440D02 > v_interp_p2_f32 v17, [v17], v3, 1, 3, [m0] ; C8450D03 > v_interp_p1_f32 v18, v2, 2, 3, [m0] ; C8480E02 > v_interp_p2_f32 v18, [v18], v3, 2, 3, [m0] ; C8490E03 > v_interp_p1_f32 v19, v2, 3, 3, [m0] ; C84C0F02 > v_interp_p2_f32 v19, [v19], v3, 3, 3, [m0] ; C84D0F03 > v_interp_p1_f32 v20, v2, 0, 4, [m0] ; C8501002 > v_interp_p2_f32 v20, [v20], v3, 0, 4, [m0] ; C8511003 > v_interp_p1_f32 v21, v2, 1, 4, [m0] ; C8541102 > v_interp_p2_f32 v21, [v21], v3, 1, 4, [m0] ; C8551103 > v_interp_p1_f32 v22, v2, 2, 4, [m0] ; C8581202 > v_interp_p2_f32 v22, [v22], v3, 2, 4, [m0] ; C8591203 > v_interp_p1_f32 v23, v2, 3, 4, [m0] ; C85C1302 > v_interp_p2_f32 v23, [v23], v3, 3, 4, [m0] ; C85D1303 > v_interp_p1_f32 v24, v2, 0, 5, [m0] ; C8601402 > v_interp_p2_f32 v24, [v24], v3, 0, 5, [m0] ; C8611403 > v_interp_p1_f32 v25, v2, 1, 5, [m0] ; C8641502 > v_interp_p2_f32 v25, [v25], v3, 1, 5, [m0] ; C8651503 > v_interp_p1_f32 v26, v2, 2, 5, [m0] ; C8681602 > v_interp_p2_f32 v26, [v26], v3, 2, 5, [m0] ; C8691603 > v_interp_p1_f32 v27, v2, 3, 5, [m0] ; C86C1702 > v_interp_p2_f32 v27, [v27], v3, 3, 5, [m0] ; C86D1703 > v_interp_p1_f32 v28, v2, 0, 6, [m0] ; C8701802 > s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C > s_load_dwordx8 s[16:23], s[4:5], 0x0 ; C0C80500 > v_interp_p2_f32 v28, [v28], v3, 0, 6, [m0] ; C8711803 > v_interp_p1_f32 v29, v2, 1, 6, [m0] ; C8741902 > v_interp_p2_f32 v29, [v29], v3, 1, 6, [m0] ; C8751903 > v_interp_p1_f32 v30, v2, 2, 6, [m0] ; C8781A02 > v_interp_p2_f32 v30, [v30], v3, 2, 6, [m0] ; C8791A03 > v_interp_p1_f32 v31, v2, 3, 6, [m0] ; C87C1B02 > v_interp_p2_f32 v31, [v31], v3, 3, 6, [m0] ; C87D1B03 > v_interp_p1_f32 v32, v2, 0, 7, [m0] ; C8801C02 > v_interp_p2_f32 v32, [v32], v3, 0, 7, [m0] ; C8811C03 > v_interp_p1_f32 v33, v2, 1, 7, [m0] ; C8841D02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s12, s12, s23 ; 870C170C > v_interp_p2_f32 v33, [v33], v3, 1, 7, [m0] ; C8851D03 > image_sample v[2:5], v[4:5], s[16:23], s[12:15] dmask:0xf ; F0800F00 00640204 > image_sample v[34:37], v[0:1], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642200 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v2, v2, v5 ; 10040B02 > v_mul_f32_e32 v3, v3, v5 ; 10060B03 > v_mul_f32_e32 v4, v4, v5 ; 10080B04 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v0, v34, v37 ; 10004B22 > v_mul_f32_e32 v1, v35, v37 ; 10024B23 > v_mul_f32_e32 v5, v36, v37 ; 100A4B24 > image_sample v[34:37], v[6:7], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642206 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v34, v37 ; 100C4B22 > v_mul_f32_e32 v7, v35, v37 ; 100E4B23 > v_mul_f32_e32 v12, v36, v37 ; 10184B24 > image_sample v[34:37], v[8:9], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642208 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v8, v34, v37 ; 10104B22 > v_mul_f32_e32 v9, v35, v37 ; 10124B23 > v_mul_f32_e32 v34, v36, v37 ; 10444B24 > image_sample v[35:38], v[10:11], s[16:23], s[12:15] dmask:0xf ; F0800F00 0064230A > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v10, v35, v38 ; 10144D23 > v_mul_f32_e32 v11, v36, v38 ; 10164D24 > v_mul_f32_e32 v35, v37, v38 ; 10464D25 > image_sample v[36:39], v[14:15], s[16:23], s[12:15] dmask:0xf ; F0800F00 0064240E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v14, v36, v39 ; 101C4F24 > v_mul_f32_e32 v15, v37, v39 ; 101E4F25 > v_mul_f32_e32 v36, v38, v39 ; 10484F26 > image_sample v[37:40], v[16:17], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642510 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v16, v37, v40 ; 10205125 > v_mul_f32_e32 v17, v38, v40 ; 10225126 > v_mul_f32_e32 v37, v39, v40 ; 104A5127 > image_sample v[38:41], v[18:19], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642612 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v18, v38, v41 ; 10245326 > v_mul_f32_e32 v19, v39, v41 ; 10265327 > v_mul_f32_e32 v38, v40, v41 ; 104C5328 > image_sample v[39:42], v[20:21], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642714 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v20, v39, v42 ; 10285527 > v_mul_f32_e32 v21, v40, v42 ; 102A5528 > v_mul_f32_e32 v39, v41, v42 ; 104E5529 > image_sample v[40:43], v[22:23], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642816 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v22, v40, v43 ; 102C5728 > v_mul_f32_e32 v23, v41, v43 ; 102E5729 > v_mul_f32_e32 v40, v42, v43 ; 1050572A > image_sample v[41:44], v[24:25], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642918 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v24, v41, v44 ; 10305929 > v_mul_f32_e32 v25, v42, v44 ; 1032592A > v_mul_f32_e32 v41, v43, v44 ; 1052592B > image_sample v[42:45], v[26:27], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642A1A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v26, v42, v45 ; 10345B2A > v_mul_f32_e32 v27, v43, v45 ; 10365B2B > v_mul_f32_e32 v42, v44, v45 ; 10545B2C > image_sample v[43:46], v[28:29], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642B1C > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v28, v43, v46 ; 10385D2B > v_mul_f32_e32 v29, v44, v46 ; 103A5D2C > v_mul_f32_e32 v43, v45, v46 ; 10565D2D > image_sample v[44:47], v[30:31], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642C1E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v30, v44, v47 ; 103C5F2C > v_mul_f32_e32 v31, v45, v47 ; 103E5F2D > v_mul_f32_e32 v44, v46, v47 ; 10585F2E > image_sample v[45:48], v[32:33], s[16:23], s[12:15] dmask:0xf ; F0800F00 00642D20 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v33, v46, v48 ; 1042612E > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v2, s4, v2 ; 10040404 > v_mov_b32_e32 v46, 0x3cddd257 ; 7E5C02FF 3CDDD257 > v_mul_f32_e32 v32, v45, v48 ; 1040612D > v_mul_f32_e32 v45, v47, v48 ; 105A612F > v_mul_f32_e32 v2, v46, v2 ; 1004052E > v_mul_f32_e32 v0, s4, v0 ; 10000004 > v_mov_b32_e32 v47, 0x3c827c43 ; 7E5E02FF 3C827C43 > v_fma_f32 v0, v0, v47, v2 ; D2960000 040A5F00 > v_mul_f32_e32 v2, s4, v3 ; 10040604 > v_mul_f32_e32 v2, v46, v2 ; 1004052E > v_mul_f32_e32 v1, s4, v1 ; 10020204 > v_fma_f32 v1, v1, v47, v2 ; D2960001 040A5F01 > v_mul_f32_e32 v2, s4, v4 ; 10040804 > v_mul_f32_e32 v3, s4, v5 ; 10060A04 > v_mul_f32_e32 v2, v46, v2 ; 1004052E > v_fma_f32 v2, v3, v47, v2 ; D2960002 040A5F03 > v_mul_f32_e32 v3, s4, v6 ; 10060C04 > v_mov_b32_e32 v4, 0x3d2dc3f0 ; 7E0802FF 3D2DC3F0 > v_fma_f32 v0, v3, v4, v0 ; D2960000 04020903 > v_mul_f32_e32 v3, s4, v7 ; 10060E04 > v_fma_f32 v1, v3, v4, v1 ; D2960001 04060903 > v_mul_f32_e32 v3, s4, v12 ; 10061804 > v_fma_f32 v2, v3, v4, v2 ; D2960002 040A0903 > v_mul_f32_e32 v3, s4, v8 ; 10061004 > v_mov_b32_e32 v5, 0x3d7ae64e ; 7E0A02FF 3D7AE64E > v_fma_f32 v0, v3, v5, v0 ; D2960000 04020B03 > v_mul_f32_e32 v3, s4, v9 ; 10061204 > v_fma_f32 v1, v3, v5, v1 ; D2960001 04060B03 > v_mul_f32_e32 v3, s4, v34 ; 10064404 > v_fma_f32 v2, v3, v5, v2 ; D2960002 040A0B03 > v_mul_f32_e32 v3, s4, v10 ; 10061404 > v_mov_b32_e32 v6, 0x3da6f006 ; 7E0C02FF 3DA6F006 > v_fma_f32 v0, v3, v6, v0 ; D2960000 04020D03 > v_mul_f32_e32 v3, s4, v11 ; 10061604 > v_fma_f32 v1, v3, v6, v1 ; D2960001 04060D03 > v_mul_f32_e32 v3, s4, v35 ; 10064604 > v_fma_f32 v2, v3, v6, v2 ; D2960002 040A0D03 > v_mul_f32_e32 v3, s4, v14 ; 10061C04 > v_mov_b32_e32 v7, 0x3dccbb63 ; 7E0E02FF 3DCCBB63 > v_fma_f32 v0, v3, v7, v0 ; D2960000 04020F03 > v_mul_f32_e32 v3, s4, v15 ; 10061E04 > v_fma_f32 v1, v3, v7, v1 ; D2960001 04060F03 > v_mul_f32_e32 v3, s4, v36 ; 10064804 > v_fma_f32 v2, v3, v7, v2 ; D2960002 040A0F03 > v_mul_f32_e32 v3, s4, v16 ; 10062004 > v_mov_b32_e32 v8, 0x3de76692 ; 7E1002FF 3DE76692 > v_fma_f32 v0, v3, v8, v0 ; D2960000 04021103 > v_mul_f32_e32 v3, s4, v17 ; 10062204 > v_fma_f32 v1, v3, v8, v1 ; D2960001 04061103 > v_mul_f32_e32 v3, s4, v37 ; 10064A04 > v_fma_f32 v2, v3, v8, v2 ; D2960002 040A1103 > v_mul_f32_e32 v3, s4, v18 ; 10062404 > v_mov_b32_e32 v9, 0x3df10a7e ; 7E1202FF 3DF10A7E > v_fma_f32 v0, v3, v9, v0 ; D2960000 04021303 > v_mul_f32_e32 v3, s4, v19 ; 10062604 > v_fma_f32 v1, v3, v9, v1 ; D2960001 04061303 > v_mul_f32_e32 v3, s4, v38 ; 10064C04 > v_fma_f32 v2, v3, v9, v2 ; D2960002 040A1303 > v_mul_f32_e32 v3, s4, v20 ; 10062804 > v_fma_f32 v0, v3, v8, v0 ; D2960000 04021103 > v_mul_f32_e32 v3, s4, v21 ; 10062A04 > v_fma_f32 v1, v3, v8, v1 ; D2960001 04061103 > v_mul_f32_e32 v3, s4, v39 ; 10064E04 > v_fma_f32 v2, v3, v8, v2 ; D2960002 040A1103 > v_mul_f32_e32 v3, s4, v22 ; 10062C04 > v_fma_f32 v0, v3, v7, v0 ; D2960000 04020F03 > v_mul_f32_e32 v3, s4, v23 ; 10062E04 > v_fma_f32 v1, v3, v7, v1 ; D2960001 04060F03 > v_mul_f32_e32 v3, s4, v40 ; 10065004 > v_fma_f32 v2, v3, v7, v2 ; D2960002 040A0F03 > v_mul_f32_e32 v3, s4, v24 ; 10063004 > v_fma_f32 v0, v3, v6, v0 ; D2960000 04020D03 > v_mul_f32_e32 v3, s4, v25 ; 10063204 > v_fma_f32 v1, v3, v6, v1 ; D2960001 04060D03 > v_mul_f32_e32 v3, s4, v41 ; 10065204 > v_fma_f32 v2, v3, v6, v2 ; D2960002 040A0D03 > v_mul_f32_e32 v3, s4, v26 ; 10063404 > v_fma_f32 v0, v3, v5, v0 ; D2960000 04020B03 > v_mul_f32_e32 v3, s4, v27 ; 10063604 > v_fma_f32 v1, v3, v5, v1 ; D2960001 04060B03 > v_mul_f32_e32 v3, s4, v42 ; 10065404 > v_fma_f32 v2, v3, v5, v2 ; D2960002 040A0B03 > v_mul_f32_e32 v3, s4, v28 ; 10063804 > v_fma_f32 v0, v3, v4, v0 ; D2960000 04020903 > v_mul_f32_e32 v3, s4, v29 ; 10063A04 > v_fma_f32 v1, v3, v4, v1 ; D2960001 04060903 > v_mul_f32_e32 v3, s4, v43 ; 10065604 > v_fma_f32 v2, v3, v4, v2 ; D2960002 040A0903 > v_mul_f32_e32 v3, s4, v30 ; 10063C04 > v_fma_f32 v0, v3, v46, v0 ; D2960000 04025D03 > v_mul_f32_e32 v3, s4, v31 ; 10063E04 > s_buffer_load_dword s0, s[0:3], 0x1d ; C200011D > v_fma_f32 v1, v3, v46, v1 ; D2960001 04065D03 > v_mul_f32_e32 v3, s4, v44 ; 10065804 > v_fma_f32 v2, v3, v46, v2 ; D2960002 040A5D03 > v_mul_f32_e32 v3, s4, v32 ; 10064004 > v_mul_f32_e32 v4, s4, v33 ; 10084204 > v_mul_f32_e32 v5, s4, v45 ; 100A5A04 > v_fma_f32 v1, v4, v47, v1 ; D2960001 04065F04 > v_fma_f32 v0, v3, v47, v0 ; D2960000 04025F03 > v_fma_f32 v2, v5, v47, v2 ; D2960002 040A5F05 > v_max3_f32 v3, v2, v1, v0 ; D2A80003 04020302 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s0, v3 ; 10060600 > v_mul_f32_e32 v3, 0x437f0000, v3 ; 100606FF 437F0000 > v_ceil_f32_e32 v3, v3 ; 7E064503 > v_max_f32_e32 v3, 1.0, v3 ; 200606F2 > v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081 > v_mul_f32_e32 v4, s4, v3 ; 10080604 > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_rcp_f32_e32 v4, v4 ; 7E085504 > v_cmp_lt_f32_e64 s[0:1], 0, v0 ; D0020000 00020080 > v_cmp_lt_f32_e64 s[2:3], 0, v1 ; D0020002 00020280 > v_cmp_lt_f32_e64 s[4:5], 0, v2 ; D0020004 00020480 > v_cndmask_b32_e64 v5, v0, 1.0, s[0:1] ; D2000005 0001E500 > v_cndmask_b32_e64 v6, v1, 1.0, s[2:3] ; D2000006 0009E501 > v_cndmask_b32_e64 v7, v2, 1.0, s[4:5] ; D2000007 0011E502 > v_bfrev_b32_e32 v8, 14 ; 7E10708E > v_cmp_le_f32_e64 s[0:1], 0, v5 ; D0060000 00020A80 > v_cmp_le_f32_e64 s[2:3], 0, v6 ; D0060002 00020C80 > v_cmp_le_f32_e64 s[4:5], 0, v7 ; D0060004 00020E80 > v_mul_f32_e32 v5, v8, v5 ; 100A0B08 > v_mul_f32_e32 v6, v8, v6 ; 100C0D08 > v_mul_f32_e32 v7, v8, v7 ; 100E0F08 > v_bfrev_b32_e32 v8, 15 ; 7E10708F > v_cndmask_b32_e64 v5, v8, v5, s[0:1] ; D2000005 00020B08 > v_mul_f32_e32 v0, v4, v0 ; 10000104 > v_cndmask_b32_e64 v6, v8, v6, s[2:3] ; D2000006 000A0D08 > v_mul_f32_e32 v1, v4, v1 ; 10020304 > v_cndmask_b32_e64 v7, v8, v7, s[4:5] ; D2000007 00120F08 > v_mul_f32_e32 v2, v4, v2 ; 10040504 > v_cndmask_b32_e32 v0, v0, v5 ; 00000B00 > v_cndmask_b32_e32 v1, v1, v6 ; 00020D01 > v_cndmask_b32_e32 v2, v2, v7 ; 00040F02 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 52 >Code Size: 1448 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 4 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v3, 0.5, 0.5 ; D2960000 03C1E103 > v_fma_f32 v1, v4, -0.5, 0.5 ; D2960001 03C1E304 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 76 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C > s_load_dwordx8 s[32:39], s[4:5], 0x0 ; C0D00500 > s_load_dwordx4 s[16:19], s[4:5], 0x1c ; C088051C > s_load_dwordx8 s[40:47], s[4:5], 0x10 ; C0D40510 > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 > s_load_dwordx4 s[20:23], s[4:5], 0x2c ; C08A052C > s_load_dwordx8 s[24:31], s[4:5], 0x20 ; C0CC0520 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > s_and_b32 s12, s12, s39 ; 870C270C > image_sample v[3:6], v[0:1], s[32:39], s[12:15] dmask:0xf ; F0800F00 00680300 > s_and_b32 s16, s16, s47 ; 87102F10 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v3, v3, v6 ; 10060D03 > v_mul_f32_e32 v4, v4, v6 ; 10080D04 > v_mul_f32_e32 v5, v5, v6 ; 100A0D05 > image_sample v[7:10], v[0:1], s[40:47], s[16:19] dmask:0xf ; F0800F00 008A0700 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v7, v10 ; 100C1507 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s6, v3 ; 10060606 > v_mul_f32_e32 v7, v8, v10 ; 100E1508 > v_mul_f32_e32 v4, s6, v4 ; 10080806 > v_mul_f32_e32 v8, v9, v10 ; 10101509 > v_mul_f32_e32 v5, s6, v5 ; 100A0A06 > s_and_b32 s20, s20, s31 ; 87141F14 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > v_fma_f32 v3, v6, s6, v3 ; D2960003 040C0D06 > v_fma_f32 v4, v7, s6, v4 ; D2960004 04100D07 > v_fma_f32 v5, v8, s6, v5 ; D2960005 04140D08 > image_sample v[6:9], v[0:1], s[24:31], s[20:23] dmask:0xf ; F0800F00 00A60600 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v0, v6, v9 ; 10001306 > v_mul_f32_e32 v1, v7, v9 ; 10021307 > v_mul_f32_e32 v6, v8, v9 ; 100C1308 > v_fma_f32 v0, v0, s6, v3 ; D2960000 040C0D00 > v_mov_b32_e32 v2, 0x3eaaaaab ; 7E0402FF 3EAAAAAB > v_fma_f32 v1, v1, s6, v4 ; D2960001 04100D01 > v_fma_f32 v3, v6, s6, v5 ; D2960003 04140D06 > v_mul_f32_e32 v1, v2, v1 ; 10020302 > v_mul_f32_e32 v0, v2, v0 ; 10000102 > v_mul_f32_e32 v2, v2, v3 ; 10040702 > v_max3_f32 v3, v2, v1, v0 ; D2A80003 04020302 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v3, s0, v3 ; 10060600 > v_mul_f32_e32 v3, 0x437f0000, v3 ; 100606FF 437F0000 > v_ceil_f32_e32 v3, v3 ; 7E064503 > v_max_f32_e32 v3, 1.0, v3 ; 200606F2 > v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081 > v_mul_f32_e32 v4, s6, v3 ; 10080606 > v_cmp_lt_f32_e64 s[0:1], 0, v0 ; D0020000 00020080 > v_cndmask_b32_e64 v5, v0, 1.0, s[0:1] ; D2000005 0001E500 > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_cmp_lt_f32_e64 s[2:3], 0, v1 ; D0020002 00020280 > v_rcp_f32_e32 v4, v4 ; 7E085504 > v_bfrev_b32_e32 v8, 14 ; 7E10708E > v_cndmask_b32_e64 v6, v1, 1.0, s[2:3] ; D2000006 0009E501 > v_cmp_le_f32_e64 s[0:1], 0, v5 ; D0060000 00020A80 > v_cmp_lt_f32_e64 s[2:3], 0, v2 ; D0020002 00020480 > v_mul_f32_e32 v5, v8, v5 ; 100A0B08 > v_bfrev_b32_e32 v9, 15 ; 7E12708F > v_cndmask_b32_e64 v7, v2, 1.0, s[2:3] ; D2000007 0009E502 > v_cndmask_b32_e64 v5, v9, v5, s[0:1] ; D2000005 00020B09 > v_cmp_le_f32_e64 s[0:1], 0, v6 ; D0060000 00020C80 > v_mul_f32_e32 v6, v8, v6 ; 100C0D08 > v_cndmask_b32_e64 v6, v9, v6, s[0:1] ; D2000006 00020D09 > v_cmp_le_f32_e64 s[0:1], 0, v7 ; D0060000 00020E80 > v_mul_f32_e32 v7, v8, v7 ; 100E0F08 > v_mul_f32_e32 v0, v4, v0 ; 10000104 > v_mul_f32_e32 v1, v4, v1 ; 10020304 > v_cndmask_b32_e64 v7, v9, v7, s[0:1] ; D2000007 00020F09 > v_mul_f32_e32 v2, v4, v2 ; 10040504 > v_cndmask_b32_e32 v0, v0, v5 ; 00000B00 > v_cndmask_b32_e32 v1, v1, v6 ; 00020D01 > v_cndmask_b32_e32 v2, v2, v7 ; 00040F02 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 15 >Code Size: 444 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v3, 0.5, 0.5 ; D2960000 03C1E103 > v_fma_f32 v1, v4, -0.5, 0.5 ; D2960001 03C1E304 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 76 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C > s_load_dwordx4 s[20:23], s[4:5], 0x1c ; C08A051C > s_load_dwordx8 s[28:35], s[4:5], 0x10 ; C0CE0510 > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C > s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D > s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > s_and_b32 s24, s24, s19 ; 87181318 > image_sample v[2:5], v[0:1], s[12:19], s[24:27] dmask:0xf ; F0800F00 00C30200 > s_and_b32 s20, s20, s35 ; 87142314 > image_sample v[6:8], v[0:1], s[28:35], s[20:23] dmask:0x7 ; F0800700 00A70600 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v0, v2, v5 ; 10000B02 > v_mul_f32_e32 v1, v3, v5 ; 10020B03 > v_mul_f32_e32 v2, v4, v5 ; 10040B04 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v0, s4, v0 ; 10000004 > v_mul_f32_e32 v1, s4, v1 ; 10020204 > v_mul_f32_e32 v2, s4, v2 ; 10040404 > v_mul_f32_e32 v3, s0, v0 ; 10060000 > v_mul_f32_e32 v4, s0, v1 ; 10080200 > v_mul_f32_e32 v5, s0, v2 ; 100A0400 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_max_f32_e32 v3, v6, v3 ; 20060706 > v_max_f32_e32 v4, v7, v4 ; 20080907 > v_max_f32_e32 v5, v8, v5 ; 200A0B08 > v_fma_f32 v0, v0, s6, v3 ; D2960000 040C0D00 > v_fma_f32 v1, v1, s6, v4 ; D2960001 04100D01 > v_fma_f32 v2, v2, s6, v5 ; D2960002 04140D02 > v_mov_b32_e32 v3, s5 ; 7E060205 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 40 >VGPRS: 15 >Code Size: 196 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v0, 0 ; 7E000280 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A > s_buffer_load_dword s0, s[0:3], 0x1b ; C200011B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v6, v3, 0.5, 0.5 ; D2960006 03C1E103 > v_fma_f32 v7, v4, -0.5, 0.5 ; D2960007 03C1E304 > v_add_f32_e32 v1, 0, v3 ; 06020680 > v_add_f32_e32 v5, 0, v4 ; 060A0880 > exp 15, 32, 0, 0, 0, v1, v5, v6, v7 ; F800020F 07060501 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_subrev_f32_e32 v8, s4, v6 ; 0A100C04 > v_subrev_f32_e32 v9, s0, v7 ; 0A120E00 > v_add_f32_e32 v10, s4, v6 ; 06140C04 > v_add_f32_e32 v11, s0, v7 ; 06160E00 > exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 1.0 ; 7E0202F2 > exp 15, 12, 0, 1, 0, v3, v4, v0, v1 ; F80008CF 01000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 12 >Code Size: 124 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[24:27], s[2:3], 0x4 ; C08C0304 > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 2, 0, [m0] ; C8000202 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s0, s[24:27], 0x2c ; C200192C > v_interp_p2_f32 v0, [v0], v3, 2, 0, [m0] ; C8010203 > v_interp_p1_f32 v1, v2, 3, 0, [m0] ; C8040302 > v_interp_p2_f32 v1, [v1], v3, 3, 0, [m0] ; C8050303 > v_interp_p1_f32 v6, v2, 0, 1, [m0] ; C8180402 > v_interp_p2_f32 v6, [v6], v3, 0, 1, [m0] ; C8190403 > v_interp_p1_f32 v9, v2, 1, 1, [m0] ; C8240502 > v_interp_p2_f32 v9, [v9], v3, 1, 1, [m0] ; C8250503 > v_interp_p1_f32 v4, v2, 2, 1, [m0] ; C8100602 > v_interp_p2_f32 v4, [v4], v3, 2, 1, [m0] ; C8110603 > v_interp_p1_f32 v5, v2, 3, 1, [m0] ; C8140702 > s_and_b32 s20, s20, s19 ; 87141314 > v_mov_b32_e32 v7, v9 ; 7E0E0309 > v_interp_p2_f32 v5, [v5], v3, 3, 1, [m0] ; C8150703 > image_sample v3, v[6:7], s[12:19], s[20:23] dmask:0x1 ; F0800100 00A30306 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v10, s0 ; 7E140200 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_eq_f32_e32 vcc, 0, v3 ; 7C040680 > s_and_saveexec_b64 s[2:3], vcc ; BE82246A > s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E > v_cmp_lt_f32_e64 s[6:7], 0, s0 ; D0020006 00000080 > v_cndmask_b32_e64 v2, v10, 1.0, s[6:7] ; D2000002 0019E50A > v_cmp_le_f32_e32 vcc, 0, v2 ; 7C060480 > v_mul_f32_e32 v2, 0x70000000, v2 ; 100404FF 70000000 > v_bfrev_b32_e32 v7, 15 ; 7E0E708F > v_cndmask_b32_e32 v2, v7, v2 ; 00040507 > s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 > s_xor_b64 exec, exec, s[2:3] ; 89FE027E > v_rcp_f32_e32 v2, v3 ; 7E045503 > v_mul_f32_e32 v2, s0, v2 ; 10040400 > s_or_b64 exec, exec, s[2:3] ; 88FE027E > image_sample v7, v[4:5], s[12:19], s[20:23] dmask:0x1 ; F0800100 00A30704 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_eq_f32_e32 vcc, 0, v7 ; 7C040E80 > s_and_saveexec_b64 s[2:3], vcc ; BE82246A > s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E > v_cmp_lt_f32_e64 s[6:7], 0, s0 ; D0020006 00000080 > v_cndmask_b32_e64 v3, v10, 1.0, s[6:7] ; D2000003 0019E50A > v_cmp_le_f32_e32 vcc, 0, v3 ; 7C060680 > v_mul_f32_e32 v3, 0x70000000, v3 ; 100606FF 70000000 > v_bfrev_b32_e32 v8, 15 ; 7E10708F > v_cndmask_b32_e32 v3, v8, v3 ; 00060708 > s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 > s_xor_b64 exec, exec, s[2:3] ; 89FE027E > v_rcp_f32_e32 v3, v7 ; 7E065507 > v_mul_f32_e32 v3, s0, v3 ; 10060600 > s_or_b64 exec, exec, s[2:3] ; 88FE027E > image_sample v7, v[0:1], s[12:19], s[20:23] dmask:0x1 ; F0800100 00A30700 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_eq_f32_e32 vcc, 0, v7 ; 7C040E80 > s_and_saveexec_b64 s[2:3], vcc ; BE82246A > s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E > v_cmp_lt_f32_e64 s[6:7], 0, s0 ; D0020006 00000080 > v_cndmask_b32_e64 v8, v10, 1.0, s[6:7] ; D2000008 0019E50A > v_cmp_le_f32_e32 vcc, 0, v8 ; 7C061080 > v_mul_f32_e32 v8, 0x70000000, v8 ; 101010FF 70000000 > v_bfrev_b32_e32 v11, 15 ; 7E16708F > v_cndmask_b32_e32 v11, v11, v8 ; 0016110B > s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 > s_xor_b64 exec, exec, s[2:3] ; 89FE027E > v_rcp_f32_e32 v7, v7 ; 7E0E5507 > v_mul_f32_e32 v11, s0, v7 ; 10160E00 > s_or_b64 exec, exec, s[2:3] ; 88FE027E > v_mov_b32_e32 v8, v4 ; 7E100304 > image_sample v7, v[8:9], s[12:19], s[20:23] dmask:0x1 ; F0800100 00A30708 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_eq_f32_e32 vcc, 0, v7 ; 7C040E80 > s_and_saveexec_b64 s[2:3], vcc ; BE82246A > s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E > v_cmp_lt_f32_e64 s[6:7], 0, s0 ; D0020006 00000080 > v_cndmask_b32_e64 v8, v10, 1.0, s[6:7] ; D2000008 0019E50A > v_cmp_le_f32_e32 vcc, 0, v8 ; 7C061080 > v_mul_f32_e32 v8, 0x70000000, v8 ; 101010FF 70000000 > v_bfrev_b32_e32 v12, 15 ; 7E18708F > v_cndmask_b32_e32 v8, v12, v8 ; 0010110C > s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 > s_xor_b64 exec, exec, s[2:3] ; 89FE027E > v_rcp_f32_e32 v7, v7 ; 7E0E5507 > v_mul_f32_e32 v8, s0, v7 ; 10100E00 > s_or_b64 exec, exec, s[2:3] ; 88FE027E > s_buffer_load_dword s8, s[24:27], 0x1a ; C204191A > s_buffer_load_dword s9, s[24:27], 0x1b ; C204991B > v_mov_b32_e32 v7, v5 ; 7E0E0305 > image_sample v7, v[6:7], s[12:19], s[20:23] dmask:0x1 ; F0800100 00A30706 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_eq_f32_e32 vcc, 0, v7 ; 7C040E80 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_saveexec_b64 s[2:3], vcc ; BE82246A > s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E > v_cmp_lt_f32_e64 s[6:7], 0, s0 ; D0020006 00000080 > v_cndmask_b32_e64 v10, v10, 1.0, s[6:7] ; D200000A 0019E50A > v_cmp_le_f32_e32 vcc, 0, v10 ; 7C061480 > v_mul_f32_e32 v10, 0x70000000, v10 ; 101414FF 70000000 > v_bfrev_b32_e32 v12, 15 ; 7E18708F > v_cndmask_b32_e32 v12, v12, v10 ; 0018150C > s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 > s_load_dwordx4 s[12:15], s[4:5], 0x1c ; C086051C > s_waitcnt lgkmcnt(0) ; BF8C007F > s_xor_b64 exec, exec, s[2:3] ; 89FE027E > v_rcp_f32_e32 v7, v7 ; 7E0E5507 > v_mul_f32_e32 v12, s0, v7 ; 10180E00 > s_or_b64 exec, exec, s[2:3] ; 88FE027E > s_load_dwordx8 s[0:7], s[4:5], 0x10 ; C0C00510 > v_add_f32_e32 v2, v2, v3 ; 06040702 > v_add_f32_e32 v3, v8, v12 ; 06061908 > v_fma_f32 v2, -v11, 2.0, v2 ; D2960002 2409E90B > v_fma_f32 v3, -v11, 2.0, v3 ; D2960003 240DE90B > v_add_f32_e64 v2, |v3|, |v2| ; D2060302 00020503 > v_mov_b32_e32 v3, 0x461c4000 ; 7E0602FF 461C4000 > v_fma_f32 v2, -v2, v3, 1.0 ; D2960002 23CA0702 > v_mov_b32_e32 v15, v5 ; 7E1E0305 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s12, s12, s7 ; 870C070C > v_mov_b32_e32 v7, v9 ; 7E0E0309 > v_max_f32_e32 v2, 0, v2 ; 20040480 > v_mov_b32_e32 v3, 0x3e000000 ; 7E0602FF 3E000000 > v_mov_b32_e32 v14, v4 ; 7E1C0304 > v_mov_b32_e32 v15, v9 ; 7E1E0309 > image_sample v[10:12], v[6:7], s[0:7], s[12:15] dmask:0x7 ; F0800700 00600A06 > v_fma_f32 v2, 4.0, v2, v3 ; D2960002 040E04F6 > v_mov_b32_e32 v3, 0x3e991687 ; 7E0602FF 3E991687 > image_sample v[14:16], v[14:15], s[0:7], s[12:15] dmask:0x7 ; F0800700 00600E0E > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v8, v3, v10 ; 10101503 > v_mov_b32_e32 v10, 0x3f1645a2 ; 7E1402FF 3F1645A2 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v9, v3, v14 ; 10121D03 > v_mac_f32_e32 v8, v10, v11 ; 3E10170A > v_mov_b32_e32 v11, 0x3de978d5 ; 7E1602FF 3DE978D5 > v_mac_f32_e32 v9, v10, v15 ; 3E121F0A > v_mov_b32_e32 v7, v5 ; 7E0E0305 > v_mac_f32_e32 v9, v11, v16 ; 3E12210B > image_sample v[14:16], v[6:7], s[0:7], s[12:15] dmask:0x7 ; F0800700 00600E06 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v6, v3, v14 ; 100C1D03 > v_mac_f32_e32 v6, v10, v15 ; 3E0C1F0A > v_mac_f32_e32 v6, v11, v16 ; 3E0C210B > image_sample v[14:16], v[4:5], s[0:7], s[12:15] dmask:0x7 ; F0800700 00600E04 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v4, v3, v14 ; 10081D03 > v_mac_f32_e32 v8, v11, v12 ; 3E10190B > v_mac_f32_e32 v4, v10, v15 ; 3E081F0A > v_add_f32_e32 v5, v9, v8 ; 060A1109 > v_mac_f32_e32 v4, v11, v16 ; 3E08210B > v_add_f32_e32 v7, v6, v5 ; 060E0B06 > v_add_f32_e32 v7, v4, v7 ; 060E0F04 > v_mul_f32_e32 v2, v2, v7 ; 10040F02 > v_mov_b32_e32 v7, 0x3e800000 ; 7E0E02FF 3E800000 > v_add_f32_e32 v12, v6, v4 ; 06180906 > v_add_f32_e32 v14, v8, v6 ; 061C0D08 > v_add_f32_e32 v15, v9, v4 ; 061E0909 > v_mul_f32_e32 v2, v7, v2 ; 10040507 > v_subrev_f32_e32 v5, v12, v5 ; 0A0A0B0C > v_subrev_f32_e32 v12, v15, v14 ; 0A181D0F > v_max_f32_e32 v2, 0x3b800000, v2 ; 200404FF 3B800000 > v_min_f32_e64 v14, |v5|, |v12| ; D21E030E 00021905 > v_add_f32_e32 v2, v14, v2 ; 0604050E > v_rcp_f32_e32 v14, v2 ; 7E1C5502 > v_cmp_neq_f32_e32 vcc, 0, v2 ; 7C1A0480 > v_bfrev_b32_e32 v2, 14 ; 7E04708E > v_cndmask_b32_e32 v2, v2, v14 ; 00041D02 > v_mul_f32_e32 v5, v2, v5 ; 100A0B02 > v_mul_f32_e32 v2, v12, v2 ; 1004050C > v_med3_f32 v5, -v5, -2.0, 2.0 ; D2AE0005 23D1EB05 > v_med3_f32 v2, v2, -2.0, 2.0 ; D2AE0002 03D1EB02 > v_mul_f32_e32 v5, s8, v5 ; 100A0A08 > v_mul_f32_e32 v2, s9, v2 ; 10040409 > v_fma_f32 v14, v5, -0.5, v0 ; D296000E 0401E305 > v_fma_f32 v15, v2, -0.5, v1 ; D296000F 0405E302 > v_fma_f32 v16, v5, 0.5, v0 ; D2960010 0401E105 > v_fma_f32 v17, v2, 0.5, v1 ; D2960011 0405E102 > image_sample v[16:19], v[16:17], s[0:7], s[12:15] dmask:0xf ; F0800F00 00601010 > image_sample v[20:23], v[14:15], s[0:7], s[12:15] dmask:0xf ; F0800F00 0060140E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v14, v21, v17 ; 061C2315 > v_mov_b32_e32 v17, 0xbe2aaaab ; 7E2202FF BE2AAAAB > v_add_f32_e32 v12, v20, v16 ; 06182114 > v_add_f32_e32 v15, v22, v18 ; 061E2516 > v_add_f32_e32 v16, v23, v19 ; 06202717 > v_fma_f32 v18, v5, v17, v0 ; D2960012 04022305 > v_fma_f32 v19, v2, v17, v1 ; D2960013 04062302 > v_mov_b32_e32 v17, 0x3e2aaaab ; 7E2202FF 3E2AAAAB > v_fma_f32 v20, v5, v17, v0 ; D2960014 04022305 > v_fma_f32 v21, v2, v17, v1 ; D2960015 04062302 > image_sample v[22:25], v[18:19], s[0:7], s[12:15] dmask:0xf ; F0800F00 00601612 > image_sample v[17:20], v[20:21], s[0:7], s[12:15] dmask:0xf ; F0800F00 00601114 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v2, v22, v17 ; 06042316 > v_add_f32_e32 v5, v23, v18 ; 060A2517 > v_add_f32_e32 v17, v24, v19 ; 06222718 > v_add_f32_e32 v18, v25, v20 ; 06242919 > image_sample v[19:21], v[0:1], s[0:7], s[12:15] dmask:0x7 ; F0800700 00601300 > v_mul_f32_e32 v1, v7, v12 ; 10021907 > v_mul_f32_e32 v12, v7, v14 ; 10181D07 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v0, v3, v19 ; 10002703 > v_fma_f32 v1, v2, v7, v1 ; D2960001 04060F02 > v_mul_f32_e32 v14, v7, v15 ; 101C1F07 > v_mac_f32_e32 v0, v10, v20 ; 3E00290A > v_fma_f32 v12, v5, v7, v12 ; D296000C 04320F05 > v_mul_f32_e32 v3, v3, v1 ; 10060303 > v_mac_f32_e32 v3, v10, v12 ; 3E06190A > v_fma_f32 v14, v17, v7, v14 ; D296000E 043A0F11 > v_min_f32_e32 v10, v8, v9 ; 1E141308 > v_max_f32_e32 v8, v8, v9 ; 20101308 > v_min_f32_e32 v9, v6, v4 ; 1E120906 > v_mac_f32_e32 v0, v11, v21 ; 3E002B0B > v_max_f32_e32 v4, v6, v4 ; 20080906 > v_min3_f32 v6, v9, v10, v0 ; D2A20006 04021509 > v_mac_f32_e32 v3, v11, v14 ; 3E061D0B > v_max3_f32 v0, v4, v8, v0 ; D2A80000 04021104 > v_mul_f32_e32 v15, v7, v16 ; 101E2107 > v_cmp_lt_f32_e32 vcc, v0, v3 ; 7C020700 > v_cmp_lt_f32_e64 s[0:1], v3, v6 ; D0020000 00020D03 > v_fma_f32 v7, v18, v7, v15 ; D2960007 043E0F12 > v_mul_f32_e32 v2, 0.5, v2 ; 100404F0 > s_or_b64 vcc, vcc, s[0:1] ; 88EA006A > v_mul_f32_e32 v10, 0.5, v18 ; 101424F0 > v_mul_f32_e32 v5, 0.5, v5 ; 100A0AF0 > v_mul_f32_e32 v9, 0.5, v17 ; 101222F0 > v_cndmask_b32_e32 v0, v1, v2 ; 00000501 > v_cndmask_b32_e32 v1, v12, v5 ; 00020B0C > v_cndmask_b32_e32 v2, v14, v9 ; 0004130E > v_cndmask_b32_e32 v3, v7, v10 ; 00061507 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 28 >Code Size: 1192 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v3, 0.5, 0.5 ; D2960000 03C1E103 > v_fma_f32 v1, v4, -0.5, 0.5 ; D2960001 03C1E304 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 76 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v2, s6 ; 7E045406 > v_rcp_f32_e32 v3, s0 ; 7E065400 > v_cmp_eq_f32_e64 s[0:1], 0, s0 ; D0040000 00000080 > v_bfrev_b32_e32 v4, 14 ; 7E08708E > v_cmp_eq_f32_e64 vcc, 0, s6 ; D004006A 00000C80 > v_cndmask_b32_e64 v3, v3, v4, s[0:1] ; D2000003 00020903 > v_cndmask_b32_e32 v2, v2, v4 ; 00040902 > v_subrev_f32_e32 v5, v3, v1 ; 0A0A0303 > s_and_b32 s20, s20, s19 ; 87141314 > v_subrev_f32_e32 v4, v2, v0 ; 0A080102 > v_add_f32_e32 v10, v0, v2 ; 06140500 > v_add_f32_e32 v0, v1, v3 ; 06000701 > v_mov_b32_e32 v11, v5 ; 7E160305 > image_sample v[6:9], v[4:5], s[12:19], s[20:23] dmask:0xf ; F0800F00 00A30604 > image_sample v[14:17], v[10:11], s[12:19], s[20:23] dmask:0xf ; F0800F00 00A30E0A > v_mov_b32_e32 v5, v0 ; 7E0A0300 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v1, v14, v6 ; 06020D0E > v_add_f32_e32 v2, v15, v7 ; 06040F0F > v_add_f32_e32 v3, v16, v8 ; 06061110 > v_add_f32_e32 v6, v17, v9 ; 060C1311 > image_sample v[14:17], v[4:5], s[12:19], s[20:23] dmask:0xf ; F0800F00 00A30E04 > v_mov_b32_e32 v11, v0 ; 7E160300 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v4, v6, v17 ; 06082306 > v_add_f32_e32 v1, v1, v14 ; 06021D01 > image_sample v[5:8], v[10:11], s[12:19], s[20:23] dmask:0xf ; F0800F00 00A3050A > v_add_f32_e32 v2, v2, v15 ; 06041F02 > v_add_f32_e32 v3, v3, v16 ; 06062103 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v0, v1, v5 ; 06000B01 > v_add_f32_e32 v1, v2, v6 ; 06020D02 > v_add_f32_e32 v2, v3, v7 ; 06040F03 > v_add_f32_e32 v3, v4, v8 ; 06061104 > v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000 > v_mul_f32_e32 v0, v4, v0 ; 10000104 > v_mul_f32_e32 v1, v4, v1 ; 10020304 > v_mul_f32_e32 v2, v4, v2 ; 10040504 > v_mul_f32_e32 v3, v4, v3 ; 10060704 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 20 >Code Size: 260 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v3, 0.5, 0.5 ; D2960000 03C1E103 > v_fma_f32 v1, v4, -0.5, 0.5 ; D2960001 03C1E304 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 76 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s6, s[0:3], 0x15 ; C2030115 > s_buffer_load_dword s7, s[0:3], 0x16 ; C2038116 > s_buffer_load_dword s0, s[0:3], 0x19 ; C2000119 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > s_and_b32 s20, s20, s19 ; 87141314 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > image_sample v0, v[0:1], s[12:19], s[20:23] dmask:0x1 ; F0800100 00A30000 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v1, s0 ; 7E020200 > v_cndmask_b32_e32 v0, v1, v0 ; 00000101 > v_sub_f32_e32 v0, s6, v0 ; 08000006 > v_mul_f32_e32 v0, s7, v0 ; 10000007 > v_add_f32_e64 v3, 0, v0 clamp ; D2060803 00020080 > v_mov_b32_e32 v0, 0 ; 7E000280 > v_mov_b32_e32 v1, 0 ; 7E020280 > v_mov_b32_e32 v2, 0 ; 7E040280 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 15 >Code Size: 132 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v3, 0.5, 0.5 ; D2960000 03C1E103 > v_fma_f32 v1, v4, -0.5, 0.5 ; D2960001 03C1E304 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 76 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 > s_buffer_load_dword s0, s[0:3], 0x15 ; C2000115 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[4:7], s[4:5], 0xc ; C082050C > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v2, s8 ; 7E045408 > v_rcp_f32_e32 v3, s0 ; 7E065400 > v_cmp_eq_f32_e64 vcc, 0, s8 ; D004006A 00001080 > v_bfrev_b32_e32 v4, 14 ; 7E08708E > v_cmp_eq_f32_e64 s[0:1], 0, s0 ; D0040000 00000080 > v_cndmask_b32_e32 v2, v2, v4 ; 00040902 > v_cndmask_b32_e64 v3, v3, v4, s[0:1] ; D2000003 00020903 > v_mov_b32_e32 v4, 0xbf0c52b9 ; 7E0802FF BF0C52B9 > v_fma_f32 v8, v3, v4, v1 ; D2960008 04060903 > v_fma_f32 v5, v2, v4, v0 ; D2960005 04020902 > v_mov_b32_e32 v4, 0x3f0c52b9 ; 7E0802FF 3F0C52B9 > v_fma_f32 v7, v2, v4, v0 ; D2960007 04020902 > s_and_b32 s4, s4, s19 ; 87041304 > v_fma_f32 v0, v3, v4, v1 ; D2960000 04060903 > image_sample v[1:4], v[7:8], s[12:19], s[4:7] dmask:0xf ; F0800F00 00230107 > v_mov_b32_e32 v9, 0x3e800000 ; 7E1202FF 3E800000 > v_mov_b32_e32 v6, v8 ; 7E0C0308 > image_sample v[14:17], v[5:6], s[12:19], s[4:7] dmask:0xf ; F0800F00 00230E05 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v1, v9, v1 ; 10020309 > v_mul_f32_e32 v2, v9, v2 ; 10040509 > v_mul_f32_e32 v3, v9, v3 ; 10060709 > v_mul_f32_e32 v4, v9, v4 ; 10080909 > v_mov_b32_e32 v6, v0 ; 7E0C0300 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v1, v14, v9, v1 ; D2960001 0406130E > v_fma_f32 v2, v15, v9, v2 ; D2960002 040A130F > v_fma_f32 v3, v16, v9, v3 ; D2960003 040E1310 > v_fma_f32 v4, v17, v9, v4 ; D2960004 04121311 > image_sample v[14:17], v[5:6], s[12:19], s[4:7] dmask:0xf ; F0800F00 00230E05 > v_mov_b32_e32 v8, v0 ; 7E100300 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v14, v9, v1 ; D2960000 0406130E > image_sample v[5:8], v[7:8], s[12:19], s[4:7] dmask:0xf ; F0800F00 00230507 > v_fma_f32 v1, v15, v9, v2 ; D2960001 040A130F > v_fma_f32 v2, v16, v9, v3 ; D2960002 040E1310 > v_fma_f32 v3, v17, v9, v4 ; D2960003 04121311 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v5, v9, v0 ; D2960000 04021305 > v_fma_f32 v1, v6, v9, v1 ; D2960001 04061306 > v_fma_f32 v2, v7, v9, v2 ; D2960002 040A1307 > v_fma_f32 v3, v8, v9, v3 ; D2960003 040E1308 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 20 >Code Size: 344 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v3, 0.5, 0.5 ; D2960000 03C1E103 > v_fma_f32 v1, v4, -0.5, 0.5 ; D2960001 03C1E304 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 76 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[20:23], s[2:3], 0x4 ; C08A0304 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s4, s[20:23], 0x14 ; C2021514 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v6, v2, 1, 0, [m0] ; C8180102 > v_bfrev_b32_e32 v1, 14 ; 7E02708E > v_interp_p2_f32 v6, [v6], v3, 1, 0, [m0] ; C8190103 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v2, s4 ; 7E045404 > v_cmp_neq_f32_e64 vcc, 0, s4 ; D01A006A 00000880 > v_mov_b32_e32 v4, 0xc01cbef9 ; 7E0802FF C01CBEF9 > v_mov_b32_e32 v3, 0xc0cbda1b ; 7E0602FF C0CBDA1B > v_cndmask_b32_e32 v1, v1, v2 ; 00020501 > v_mov_b32_e32 v2, 0xc08d1805 ; 7E0402FF C08D1805 > v_fma_f32 v5, v2, v1, v0 ; D2960005 04020302 > s_and_b32 s0, s0, s19 ; 87001300 > v_fma_f32 v2, v4, v1, v0 ; D2960002 04020304 > v_fma_f32 v4, v3, v1, v0 ; D2960004 04020303 > v_mov_b32_e32 v3, 0xbf28543f ; 7E0602FF BF28543F > image_sample v[7:10], v[5:6], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030705 > v_mov_b32_e32 v14, 0x3dd4551f ; 7E1C02FF 3DD4551F > v_fma_f32 v11, v3, v1, v0 ; D296000B 04020303 > v_mov_b32_e32 v12, v6 ; 7E180306 > v_mov_b32_e32 v3, v6 ; 7E060306 > v_mov_b32_e32 v5, v6 ; 7E0A0306 > image_sample v[15:18], v[11:12], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030F0B > image_sample v[19:22], v[2:3], s[12:19], s[0:3] dmask:0xf ; F0800F00 00031302 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v7, v14, v7 ; 100E0F0E > v_mul_f32_e32 v8, v14, v8 ; 1010110E > v_mul_f32_e32 v9, v14, v9 ; 1012130E > v_mul_f32_e32 v10, v14, v10 ; 1014150E > image_sample v[2:5], v[4:5], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030204 > v_mov_b32_e32 v11, 0x3d30274d ; 7E1602FF 3D30274D > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v2, v2, v11, v7 ; D2960002 041E1702 > v_fma_f32 v3, v3, v11, v8 ; D2960003 04221703 > v_mov_b32_e32 v7, 0x3e39d5b5 ; 7E0E02FF 3E39D5B5 > v_fma_f32 v4, v4, v11, v9 ; D2960004 04261704 > v_fma_f32 v5, v5, v11, v10 ; D2960005 042A1705 > v_mov_b32_e32 v9, 0x3f28543f ; 7E1202FF 3F28543F > v_fma_f32 v2, v19, v7, v2 ; D2960002 040A0F13 > v_mov_b32_e32 v8, 0x3e2ff5e9 ; 7E1002FF 3E2FF5E9 > v_fma_f32 v3, v20, v7, v3 ; D2960003 040E0F14 > v_fma_f32 v4, v21, v7, v4 ; D2960004 04120F15 > v_fma_f32 v5, v22, v7, v5 ; D2960005 04160F16 > v_fma_f32 v9, v9, v1, v0 ; D2960009 04020309 > v_mov_b32_e32 v10, v6 ; 7E140306 > v_fma_f32 v2, v15, v8, v2 ; D2960002 040A110F > v_fma_f32 v3, v16, v8, v3 ; D2960003 040E1110 > v_fma_f32 v4, v17, v8, v4 ; D2960004 04121111 > v_fma_f32 v5, v18, v8, v5 ; D2960005 04161112 > image_sample v[15:18], v[9:10], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030F09 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v2, v15, v8, v2 ; D2960002 040A110F > v_fma_f32 v3, v16, v8, v3 ; D2960003 040E1110 > v_fma_f32 v4, v17, v8, v4 ; D2960004 04121111 > v_fma_f32 v5, v18, v8, v5 ; D2960005 04161112 > v_mov_b32_e32 v8, 0x401cbef9 ; 7E1002FF 401CBEF9 > v_fma_f32 v8, v8, v1, v0 ; D2960008 04020308 > v_mov_b32_e32 v9, v6 ; 7E120306 > image_sample v[15:18], v[8:9], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030F08 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v2, v15, v7, v2 ; D2960002 040A0F0F > v_fma_f32 v3, v16, v7, v3 ; D2960003 040E0F10 > v_fma_f32 v4, v17, v7, v4 ; D2960004 04120F11 > v_fma_f32 v5, v18, v7, v5 ; D2960005 04160F12 > v_mov_b32_e32 v7, 0x408d1805 ; 7E0E02FF 408D1805 > v_fma_f32 v7, v7, v1, v0 ; D2960007 04020307 > v_mov_b32_e32 v8, v6 ; 7E100306 > image_sample v[7:10], v[7:8], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030707 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v2, v7, v14, v2 ; D2960002 040A1D07 > v_mov_b32_e32 v7, 0x40cbda1b ; 7E0E02FF 40CBDA1B > v_fma_f32 v0, v7, v1, v0 ; D2960000 04020307 > v_mov_b32_e32 v1, v6 ; 7E020306 > v_fma_f32 v3, v8, v14, v3 ; D2960003 040E1D08 > v_fma_f32 v4, v9, v14, v4 ; D2960004 04121D09 > image_sample v[6:9], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030600 > v_fma_f32 v5, v10, v14, v5 ; D2960005 04161D0A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v6, v11, v2 ; D2960000 040A1706 > v_fma_f32 v1, v7, v11, v3 ; D2960001 040E1707 > v_fma_f32 v2, v8, v11, v4 ; D2960002 04121708 > v_fma_f32 v3, v9, v11, v5 ; D2960003 04161709 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 24 >Code Size: 608 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v3, 0.5, 0.5 ; D2960000 03C1E103 > v_fma_f32 v1, v4, -0.5, 0.5 ; D2960001 03C1E304 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 76 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[20:23], s[2:3], 0x4 ; C08A0304 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v4, v2, 0, 0, [m0] ; C8100002 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s4, s[20:23], 0x15 ; C2021515 > v_interp_p2_f32 v4, [v4], v3, 0, 0, [m0] ; C8110003 > v_interp_p1_f32 v2, v2, 1, 0, [m0] ; C8080102 > v_bfrev_b32_e32 v0, 14 ; 7E00708E > v_interp_p2_f32 v2, [v2], v3, 1, 0, [m0] ; C8090103 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v5, s4 ; 7E0A5404 > v_cmp_neq_f32_e64 vcc, 0, s4 ; D01A006A 00000880 > v_mov_b32_e32 v6, 0xbf28543f ; 7E0C02FF BF28543F > s_and_b32 s0, s0, s19 ; 87001300 > v_cndmask_b32_e32 v0, v0, v5 ; 00000B00 > v_mov_b32_e32 v5, 0xc08d1805 ; 7E0A02FF C08D1805 > v_mov_b32_e32 v3, 0xc01cbef9 ; 7E0602FF C01CBEF9 > v_fma_f32 v5, v5, v0, v2 ; D2960005 040A0105 > v_fma_f32 v6, v6, v0, v2 ; D2960006 040A0106 > v_mov_b32_e32 v1, 0xc0cbda1b ; 7E0202FF C0CBDA1B > image_sample v[7:10], v[4:5], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030704 > v_mov_b32_e32 v5, v6 ; 7E0A0306 > v_fma_f32 v3, v3, v0, v2 ; D2960003 040A0103 > image_sample v[14:17], v[4:5], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030E04 > v_mov_b32_e32 v5, v3 ; 7E0A0303 > v_fma_f32 v1, v1, v0, v2 ; D2960001 040A0101 > v_mov_b32_e32 v11, 0x3dd4551f ; 7E1602FF 3DD4551F > image_sample v[18:21], v[4:5], s[12:19], s[0:3] dmask:0xf ; F0800F00 00031204 > v_mov_b32_e32 v5, v1 ; 7E0A0301 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mul_f32_e32 v8, v11, v8 ; 1010110B > image_sample v[22:25], v[4:5], s[12:19], s[0:3] dmask:0xf ; F0800F00 00031604 > v_mov_b32_e32 v3, 0x3d30274d ; 7E0602FF 3D30274D > v_mul_f32_e32 v7, v11, v7 ; 100E0F0B > v_mul_f32_e32 v9, v11, v9 ; 1012130B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v5, v23, v3, v8 ; D2960005 04220717 > v_mov_b32_e32 v8, 0x3e39d5b5 ; 7E1002FF 3E39D5B5 > v_mul_f32_e32 v10, v11, v10 ; 1014150B > v_fma_f32 v1, v22, v3, v7 ; D2960001 041E0716 > v_fma_f32 v6, v24, v3, v9 ; D2960006 04260718 > v_fma_f32 v7, v25, v3, v10 ; D2960007 042A0719 > v_mov_b32_e32 v9, 0x3f28543f ; 7E1202FF 3F28543F > v_fma_f32 v5, v19, v8, v5 ; D2960005 04161113 > v_mov_b32_e32 v10, 0x3e2ff5e9 ; 7E1402FF 3E2FF5E9 > v_fma_f32 v1, v18, v8, v1 ; D2960001 04061112 > v_fma_f32 v12, v15, v10, v5 ; D296000C 0416150F > v_fma_f32 v9, v9, v0, v2 ; D2960009 040A0109 > v_mov_b32_e32 v5, 0x401cbef9 ; 7E0A02FF 401CBEF9 > v_fma_f32 v6, v20, v8, v6 ; D2960006 041A1114 > v_fma_f32 v7, v21, v8, v7 ; D2960007 041E1115 > v_fma_f32 v1, v14, v10, v1 ; D2960001 0406150E > v_fma_f32 v14, v5, v0, v2 ; D296000E 040A0105 > v_mov_b32_e32 v5, v9 ; 7E0A0309 > v_fma_f32 v6, v16, v10, v6 ; D2960006 041A1510 > v_fma_f32 v7, v17, v10, v7 ; D2960007 041E1511 > image_sample v[15:18], v[4:5], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030F04 > v_mov_b32_e32 v5, v14 ; 7E0A030E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v1, v15, v10, v1 ; D2960001 0406150F > v_fma_f32 v9, v16, v10, v12 ; D2960009 04321510 > v_fma_f32 v6, v17, v10, v6 ; D2960006 041A1511 > image_sample v[14:17], v[4:5], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030E04 > v_fma_f32 v7, v18, v10, v7 ; D2960007 041E1512 > v_mov_b32_e32 v5, 0x408d1805 ; 7E0A02FF 408D1805 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v1, v14, v8, v1 ; D2960001 0406110E > v_fma_f32 v9, v15, v8, v9 ; D2960009 0426110F > v_fma_f32 v6, v16, v8, v6 ; D2960006 041A1110 > v_fma_f32 v7, v17, v8, v7 ; D2960007 041E1111 > v_mov_b32_e32 v8, 0x40cbda1b ; 7E1002FF 40CBDA1B > v_fma_f32 v5, v5, v0, v2 ; D2960005 040A0105 > v_fma_f32 v0, v8, v0, v2 ; D2960000 040A0108 > image_sample v[14:17], v[4:5], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030E04 > v_mov_b32_e32 v5, v0 ; 7E0A0300 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v1, v14, v11, v1 ; D2960001 0406170E > v_fma_f32 v2, v15, v11, v9 ; D2960002 0426170F > v_fma_f32 v6, v16, v11, v6 ; D2960006 041A1710 > v_fma_f32 v7, v17, v11, v7 ; D2960007 041E1711 > image_sample v[8:11], v[4:5], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030804 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v0, v8, v3, v1 ; D2960000 04060708 > v_fma_f32 v1, v9, v3, v2 ; D2960001 040A0709 > v_fma_f32 v2, v10, v3, v6 ; D2960002 041A070A > v_fma_f32 v3, v11, v3, v7 ; D2960003 041E070B >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 28 >Code Size: 604 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_bfrev_b32_e32 v1, 14 ; 7E02708E > v_mov_b32_e32 v7, 0x3fab6c59 ; 7E0E02FF 3FAB6C59 > v_mov_b32_e32 v8, 0x3ee3ac0c ; 7E1002FF 3EE3AC0C > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124 > s_buffer_load_dword s0, s[0:3], 0x25 ; C2000125 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mov_b32_e32 v5, 0xbee3ac0c ; 7E0A02FF BEE3AC0C > v_mov_b32_e32 v6, 0xbfab6c59 ; 7E0C02FF BFAB6C59 > v_mov_b32_e32 v0, 0 ; 7E000280 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_rcp_f32_e32 v9, s4 ; 7E125404 > v_rcp_f32_e32 v10, s0 ; 7E145400 > v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 > v_cmp_eq_f32_e64 s[0:1], 0, s0 ; D0040000 00000080 > v_cndmask_b32_e32 v9, v9, v1 ; 00120309 > v_cndmask_b32_e64 v1, v10, v1, s[0:1] ; D2000001 0002030A > v_fma_f32 v10, v3, 0.5, 0.5 ; D296000A 03C1E103 > v_fma_f32 v11, v4, -0.5, 0.5 ; D296000B 03C1E304 > v_add_f32_e32 v12, 0, v3 ; 06180680 > v_sub_f32_e32 v13, 0, v4 ; 081A0880 > v_fma_f32 v14, v9, v5, v10 ; D296000E 042A0B09 > v_fma_f32 v15, v1, v6, v11 ; D296000F 042E0D01 > v_fma_f32 v16, v9, v7, v10 ; D2960010 042A0F09 > v_fma_f32 v5, v1, v5, v11 ; D2960005 042E0B01 > exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A > v_fma_f32 v17, v1, v8, v11 ; D2960011 042E1101 > v_fma_f32 v6, v9, v6, v10 ; D2960006 042A0D09 > v_fma_f32 v8, v9, v8, v10 ; D2960008 042A1109 > v_fma_f32 v1, v1, v7, v11 ; D2960001 042E0F01 > exp 15, 33, 0, 0, 0, v14, v15, v16, v5 ; F800021F 05100F0E > exp 15, 34, 0, 0, 0, v6, v17, v8, v1 ; F800022F 01081106 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 1.0 ; 7E0202F2 > exp 15, 12, 0, 1, 0, v3, v4, v0, v1 ; F80008CF 01000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 20 >Code Size: 252 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v4, v2, 0, 0, [m0] ; C8100002 > v_interp_p2_f32 v4, [v4], v3, 0, 0, [m0] ; C8110003 > v_interp_p1_f32 v5, v2, 1, 0, [m0] ; C8140102 > v_interp_p2_f32 v5, [v5], v3, 1, 0, [m0] ; C8150103 > v_interp_p1_f32 v0, v2, 0, 1, [m0] ; C8000402 > v_interp_p2_f32 v0, [v0], v3, 0, 1, [m0] ; C8010403 > v_interp_p1_f32 v1, v2, 1, 1, [m0] ; C8040502 > s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 > v_interp_p2_f32 v1, [v1], v3, 1, 1, [m0] ; C8050503 > v_interp_p1_f32 v7, v2, 2, 1, [m0] ; C81C0602 > s_load_dwordx8 s[20:27], s[4:5], 0x0 ; C0CA0500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > v_interp_p2_f32 v7, [v7], v3, 2, 1, [m0] ; C81D0603 > v_interp_p1_f32 v8, v2, 3, 1, [m0] ; C8200702 > v_interp_p2_f32 v8, [v8], v3, 3, 1, [m0] ; C8210703 > v_interp_p1_f32 v9, v2, 0, 2, [m0] ; C8240802 > v_interp_p2_f32 v9, [v9], v3, 0, 2, [m0] ; C8250803 > v_interp_p1_f32 v10, v2, 1, 2, [m0] ; C8280902 > v_interp_p2_f32 v10, [v10], v3, 1, 2, [m0] ; C8290903 > v_interp_p1_f32 v11, v2, 2, 2, [m0] ; C82C0A02 > v_interp_p2_f32 v11, [v11], v3, 2, 2, [m0] ; C82D0A03 > v_interp_p1_f32 v12, v2, 3, 2, [m0] ; C8300B02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s27 ; 87001B00 > v_interp_p2_f32 v12, [v12], v3, 3, 2, [m0] ; C8310B03 > image_sample v2, v[4:5], s[20:27], s[0:3] dmask:0x1 ; F0800100 00050204 > s_buffer_load_dword s3, s[12:15], 0x1c ; C2018D1C > s_buffer_load_dword s2, s[12:15], 0x1d ; C2010D1D > s_buffer_load_dword s1, s[12:15], 0x1e ; C2008D1E > s_buffer_load_dword s6, s[12:15], 0x28 ; C2030D28 > s_buffer_load_dword s0, s[12:15], 0x22 ; C2000D22 > s_load_dwordx4 s[16:19], s[4:5], 0x3c ; C088053C > s_load_dwordx4 s[20:23], s[4:5], 0x4c ; C08A054C > s_load_dwordx4 s[12:15], s[4:5], 0x5c ; C086055C > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_subrev_f32_e32 v2, s3, v2 ; 0A040403 > v_mul_f32_e32 v2, s2, v2 ; 10040402 > v_add_f32_e64 v3, 0, v2 clamp ; D2060803 00020480 > v_mul_f32_e32 v2, s1, v3 ; 10040601 > s_cmp_eq_i32 s6, 0 ; BF008006 > s_cbranch_scc1 BB0_3 ; BF850000 > s_load_dwordx8 s[24:31], s[4:5], 0x10 ; C0CC0510 > s_load_dwordx4 s[32:35], s[4:5], 0x1c ; C090051C > v_mov_b32_e32 v6, 0 ; 7E0C0280 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s32, s32, s31 ; 87201F20 > image_sample_l v6, v[4:7], s[24:31], s[32:35] dmask:0x1 ; F0900100 01060604 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_cmp_gt_f32_e32 vcc, 1.0, v6 ; 7C080CF2 > s_and_saveexec_b64 s[6:7], vcc ; BE86246A > s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E > s_cbranch_execz BB0_2 ; BF880000 > s_load_dwordx8 s[24:31], s[4:5], 0x20 ; C0CC0520 > s_load_dwordx4 s[32:35], s[4:5], 0x2c ; C090052C > v_mov_b32_e32 v16, 0 ; 7E200280 > v_mov_b32_e32 v14, v4 ; 7E1C0304 > v_mov_b32_e32 v15, v5 ; 7E1E0305 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s32, s32, s31 ; 87201F20 > image_sample_l v2, v[14:17], s[24:31], s[32:35] dmask:0x1 ; F0900100 0106020E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v2, s3, v2 ; 0A040403 > v_mul_f32_e32 v2, s2, v2 ; 10040402 > v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 > v_mul_f32_e32 v2, s1, v2 ; 10040401 > v_fma_f32 v3, v3, s1, -v2 ; D2960003 84080303 > v_fma_f32 v2, v6, v3, v2 ; D2960002 040A0706 > s_or_b64 exec, exec, s[6:7] ; 88FE067E > s_load_dwordx8 s[24:31], s[4:5], 0x30 ; C0CC0530 > s_load_dwordx8 s[32:39], s[4:5], 0x40 ; C0D00540 > v_mov_b32_e32 v3, 0x3e65afb0 ; 7E0602FF 3E65AFB0 > v_mov_b32_e32 v6, 0xbf2aaaab ; 7E0C02FF BF2AAAAB > v_mov_b32_e32 v14, 0x3f2aaaab ; 7E1C02FF 3F2AAAAB > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s16, s16, s31 ; 87101F10 > image_sample v[18:20], v[0:1], s[24:31], s[16:19] dmask:0x7 ; F0800700 00861200 > image_sample v[21:23], v[7:8], s[24:31], s[16:19] dmask:0x7 ; F0800700 00861507 > image_sample v[7:9], v[9:10], s[24:31], s[16:19] dmask:0x7 ; F0800700 00860709 > image_sample v[10:12], v[11:12], s[24:31], s[16:19] dmask:0x7 ; F0800700 00860A0B > s_load_dwordx8 s[24:31], s[4:5], 0x50 ; C0CC0550 > s_and_b32 s20, s20, s39 ; 87142714 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_add_f32_e32 v0, v21, v18 ; 06002515 > v_add_f32_e32 v1, v22, v19 ; 06022716 > image_sample v[24:26], v[4:5], s[32:39], s[20:23] dmask:0x7 ; F0800700 00A81804 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s12, s12, s31 ; 870C1F0C > v_add_f32_e32 v18, v23, v20 ; 06242917 > image_sample v[19:22], v[4:5], s[24:31], s[12:15] dmask:0xf ; F0800F00 00661304 > s_waitcnt vmcnt(3) ; BF8C0F73 > v_add_f32_e32 v0, v0, v7 ; 06000F00 > v_add_f32_e32 v1, v1, v8 ; 06021101 > v_add_f32_e32 v4, v18, v9 ; 06081312 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v5, s0, v22 ; 100A2C00 > v_add_f32_e32 v0, v0, v10 ; 06001500 > v_add_f32_e32 v1, v1, v11 ; 06021701 > v_add_f32_e32 v4, v4, v12 ; 06081904 > v_max_f32_e32 v2, v5, v2 ; 20040505 > v_mov_b32_e32 v15, 0xbf19999a ; 7E1E02FF BF19999A > v_mov_b32_e32 v16, 0x3ecccccd ; 7E2002FF 3ECCCCCD > v_mov_b32_e32 v17, 0xbecccccd ; 7E2202FF BECCCCCD > v_mov_b32_e32 v7, 0x3dcccccd ; 7E0E02FF 3DCCCCCD > v_fma_f32 v5, v14, v2, 0 ; D2960005 0202050E > v_mul_f32_e32 v0, v3, v0 ; 10000103 > v_mul_f32_e32 v1, v3, v1 ; 10020303 > v_mul_f32_e32 v4, v3, v4 ; 10080903 > v_fma_f32 v3, v6, v2, 1.0 ; D2960003 03CA0506 > v_fma_f32 v6, v2, v16, v15 ; D2960006 043E2102 > v_fma_f32 v2, v2, v7, v17 ; D2960002 04460F02 > v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 > v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 > v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 > v_subrev_f32_e32 v5, v6, v5 ; 0A0A0B06 > v_subrev_f32_e32 v6, v2, v6 ; 0A0C0D02 > v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 > v_mov_b32_e32 v7, 0x3dd281fe ; 7E0E02FF 3DD281FE > v_fma_f32 v3, v7, v5, v3 ; D2960003 040E0B07 > v_mul_f32_e32 v7, v6, v24 ; 100E3106 > v_mul_f32_e32 v8, v6, v25 ; 10103306 > v_mul_f32_e32 v6, v6, v26 ; 100C3506 > v_fma_f32 v0, v0, v5, v7 ; D2960000 041E0B00 > v_fma_f32 v1, v1, v5, v8 ; D2960001 04220B01 > v_fma_f32 v4, v4, v5, v6 ; D2960004 041A0B04 > v_fma_f32 v0, v19, v2, v0 ; D2960000 04020513 > v_fma_f32 v1, v20, v2, v1 ; D2960001 04060514 > v_fma_f32 v2, v21, v2, v4 ; D2960002 04120515 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 48 >VGPRS: 28 >Code Size: 680 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[8:11], s[10:11], 0x4 ; C0840B04 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[4:7], 0 idxen ; E00C2000 80010604 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[9:12], v5, s[8:11], 0 idxen ; E00C2000 80020905 > s_buffer_load_dword s17, s[0:3], 0x59 ; C2088159 > s_buffer_load_dword s5, s[0:3], 0x4d ; C202814D > s_buffer_load_dword s9, s[0:3], 0x51 ; C2048151 > s_buffer_load_dword s16, s[0:3], 0x58 ; C2080158 > s_buffer_load_dword s13, s[0:3], 0x55 ; C2068155 > s_buffer_load_dword s4, s[0:3], 0x4c ; C202014C > s_buffer_load_dword s8, s[0:3], 0x50 ; C2040150 > s_buffer_load_dword s18, s[0:3], 0x5a ; C209015A > s_buffer_load_dword s6, s[0:3], 0x4e ; C203014E > s_buffer_load_dword s10, s[0:3], 0x52 ; C2050152 > s_buffer_load_dword s12, s[0:3], 0x54 ; C2060154 > s_buffer_load_dword s19, s[0:3], 0x5b ; C209815B > s_buffer_load_dword s7, s[0:3], 0x4f ; C203814F > s_buffer_load_dword s11, s[0:3], 0x53 ; C2058153 > s_buffer_load_dword s14, s[0:3], 0x56 ; C2070156 > s_buffer_load_dword s15, s[0:3], 0x57 ; C2078157 > s_buffer_load_dword s20, s[0:3], 0x90 ; C20A0190 > s_buffer_load_dword s0, s[0:3], 0x91 ; C2000191 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mul_f32_e32 v4, s17, v7 ; 10080E11 > v_mul_f32_e32 v0, s5, v7 ; 10000E05 > v_mul_f32_e32 v1, s9, v7 ; 10020E09 > v_mac_f32_e32 v4, s16, v6 ; 3E080C10 > v_mul_f32_e32 v3, s13, v7 ; 10060E0D > v_mac_f32_e32 v0, s4, v6 ; 3E000C04 > v_mac_f32_e32 v1, s8, v6 ; 3E020C08 > v_mac_f32_e32 v4, s18, v8 ; 3E081012 > v_mac_f32_e32 v3, s12, v6 ; 3E060C0C > v_mac_f32_e32 v0, s6, v8 ; 3E001006 > v_mac_f32_e32 v1, s10, v8 ; 3E02100A > v_add_f32_e32 v4, s19, v4 ; 06080813 > v_mac_f32_e32 v3, s14, v8 ; 3E06100E > v_add_f32_e32 v0, s7, v0 ; 06000007 > v_mul_f32_e32 v5, s20, v4 ; 100A0814 > v_add_f32_e32 v1, s11, v1 ; 0602020B > v_mul_f32_e32 v6, s0, v4 ; 100C0800 > v_add_f32_e32 v3, s15, v3 ; 0606060F > v_fma_f32 v5, v0, s20, v5 ; D2960005 04142900 > v_fma_f32 v6, v1, -s0, v6 ; D2960006 44180101 > exp 15, 32, 0, 0, 0, v5, v6, v3, v4 ; F800020F 04030605 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 33, 0, 0, 0, v9, v10, v11, v12 ; F800021F 0C0B0A09 > exp 15, 12, 0, 1, 0, v0, v1, v3, v4 ; F80008CF 04030100 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 16 >Code Size: 244 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_rcp_f32_e32 v5, v4 ; 7E0A5504 > v_cmp_lt_f32_e64 s[0:1], 0, v0 ; D0020000 00020080 > v_interp_p1_f32 v17, v2, 3, 1, [m0] ; C8440702 > v_bfrev_b32_e32 v2, 14 ; 7E04708E > v_mul_f32_e32 v9, v5, v0 ; 10120105 > v_cndmask_b32_e64 v0, v0, 1.0, s[0:1] ; D2000000 0001E500 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s0, s[12:15], 0x40 ; C2000D40 > v_interp_p2_f32 v17, [v17], v3, 3, 1, [m0] ; C8450703 > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_bfrev_b32_e32 v3, 15 ; 7E06708F > v_mul_f32_e32 v5, v5, v1 ; 100A0305 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v4, s0 ; 7E080200 > v_cmp_le_f32_e64 s[0:1], 0, v0 ; D0060000 00020080 > v_mul_f32_e32 v0, v2, v0 ; 10000102 > v_cndmask_b32_e64 v0, v3, v0, s[0:1] ; D2000000 00020103 > v_cmp_lt_f32_e64 s[0:1], 0, v1 ; D0020000 00020280 > v_cndmask_b32_e64 v1, v1, 1.0, s[0:1] ; D2000001 0001E501 > v_cmp_le_f32_e64 s[0:1], 0, v1 ; D0060000 00020280 > v_mul_f32_e32 v1, v2, v1 ; 10020302 > v_cndmask_b32_e64 v1, v3, v1, s[0:1] ; D2000001 00020303 > v_cndmask_b32_e32 v1, v5, v1 ; 00020305 > v_cndmask_b32_e32 v0, v9, v0 ; 00000109 > v_fma_f32 v5, v0, 2.0, -1.0 ; D2960005 03CDE900 > v_fma_f32 v3, v1, 2.0, -1.0 ; D2960003 03CDE901 > v_max_f32_e64 v9, |v5|, |v3| ; D2200309 00020705 > v_rcp_f32_e32 v21, v9 ; 7E2A5509 > s_buffer_load_dword s23, s[12:15], 0x49 ; C20B8D49 > s_buffer_load_dword s6, s[12:15], 0x3c ; C2030D3C > s_buffer_load_dword s7, s[12:15], 0x3d ; C2038D3D > s_buffer_load_dword s8, s[12:15], 0x3e ; C2040D3E > s_buffer_load_dword s9, s[12:15], 0x3f ; C2048D3F > s_buffer_load_dword s11, s[12:15], 0x41 ; C2058D41 > s_buffer_load_dword s16, s[12:15], 0x42 ; C2080D42 > s_buffer_load_dword s17, s[12:15], 0x43 ; C2088D43 > s_buffer_load_dword s18, s[12:15], 0x44 ; C2090D44 > s_buffer_load_dword s19, s[12:15], 0x45 ; C2098D45 > s_buffer_load_dword s20, s[12:15], 0x46 ; C20A0D46 > s_buffer_load_dword s21, s[12:15], 0x47 ; C20A8D47 > s_buffer_load_dword s22, s[12:15], 0x48 ; C20B0D48 > s_buffer_load_dword s24, s[12:15], 0x4a ; C20C0D4A > s_buffer_load_dword s12, s[12:15], 0x4b ; C2060D4B > v_mul_f32_e32 v26, v5, v5 ; 10340B05 > v_mac_f32_e32 v26, v3, v3 ; 3E340703 > v_cmp_neq_f32_e32 vcc, 0, v9 ; 7C1A1280 > v_sqrt_f32_e32 v26, v26 ; 7E34671A > v_cndmask_b32_e32 v9, v2, v21 ; 00122B02 > v_min_f32_e64 v22, |v5|, |v3| ; D21E0316 00020705 > v_add_f32_e32 v26, v26, v26 ; 0634351A > v_mul_f32_e32 v9, v22, v9 ; 10121316 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_fma_f32 v21, v4, s12, v26 ; D2960015 04681904 > v_mov_b32_e32 v6, 0xbdae5a36 ; 7E0C02FF BDAE5A36 > v_mov_b32_e32 v7, 0x3caaae5f ; 7E0E02FF 3CAAAE5F > v_mul_f32_e32 v4, v9, v9 ; 10081309 > v_fma_f32 v6, v4, v7, v6 ; D2960006 041A0F04 > v_mov_b32_e32 v8, 0x3e3876e2 ; 7E1002FF 3E3876E2 > v_mov_b32_e32 v10, 0xbea91d04 ; 7E1402FF BEA91D04 > v_fma_f32 v6, v6, v4, v8 ; D2960006 04220906 > v_mov_b32_e32 v11, 0x3f7ff738 ; 7E1602FF 3F7FF738 > v_fma_f32 v6, v6, v4, v10 ; D2960006 042A0906 > v_fma_f32 v4, v6, v4, v11 ; D2960004 042E0906 > v_mov_b32_e32 v12, 0x3fc90fdb ; 7E1802FF 3FC90FDB > v_mul_f32_e32 v6, v9, v4 ; 100C0909 > v_fma_f32 v6, -2.0, v6, v12 ; D2960006 04320CF5 > v_cmp_lt_f32_e64 vcc, |v5|, |v3| ; D002036A 00020705 > v_cndmask_b32_e32 v6, 0, v6 ; 000C0C80 > v_mov_b32_e32 v16, v13 ; 7E20030D > v_mov_b32_e32 v13, 0xc0490fdb ; 7E1A02FF C0490FDB > v_cmp_lt_f32_e64 vcc, v5, -v5 ; D002006A 40020B05 > v_min_f32_e32 v23, v3, v5 ; 1E2E0B03 > v_max_f32_e32 v25, v3, v5 ; 20320B03 > v_cndmask_b32_e32 v5, 0, v13 ; 000A1A80 > v_fma_f32 v4, v9, v4, v6 ; D2960004 041A0909 > v_add_f32_e32 v4, v4, v5 ; 06080B04 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_load_dwordx8 s[28:35], s[4:5], 0x0 ; C0CE0500 > v_cmp_lt_f32_e64 vcc, v23, -v23 ; D002006A 40022F17 > v_xor_b32_e32 v5, 0x80000000, v4 ; 3A0A08FF 80000000 > v_cndmask_b32_e32 v5, v4, v5 ; 000A0B04 > v_cmp_ge_f32_e64 vcc, v25, -v25 ; D00C006A 40023319 > v_cndmask_b32_e32 v4, v4, v5 ; 00080B04 > v_mov_b32_e32 v5, s11 ; 7E0A020B > v_mul_f32_e32 v4, 0x3e22fabe, v4 ; 100808FF 3E22FABE > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s35 ; 87002300 > v_fma_f32 v22, v5, s12, v4 ; D2960016 04101905 > image_sample v[5:6], v[21:22], s[28:35], s[0:3] dmask:0x3 ; F0800300 00070515 > s_load_dwordx8 s[36:43], s[4:5], 0x10 ; C0D20510 > s_load_dwordx4 s[0:3], s[4:5], 0x1c ; C080051C > v_mov_b32_e32 v7, s24 ; 7E0E0218 > v_mov_b32_e32 v27, s23 ; 7E360217 > v_fma_f32 v4, v7, s12, v4 ; D2960004 04101907 > v_fma_f32 v3, v27, s12, v26 ; D2960003 0468191B > v_mul_f32_e32 v7, s21, v3 ; 100E0615 > v_mul_f32_e32 v8, s22, v4 ; 10100816 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s43 ; 87002B00 > image_sample v[3:4], v[7:8], s[36:43], s[0:3] dmask:0xa ; F0800A00 00090307 > v_mov_b32_e32 v15, 0x3ea8f5c3 ; 7E1E02FF 3EA8F5C3 > v_add_f32_e32 v8, -0.5, v1 ; 061002F1 > v_add_f32_e32 v7, -0.5, v0 ; 060E00F1 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mul_f32_e32 v6, s9, v6 ; 100C0C09 > v_mul_f32_e32 v8, v15, v8 ; 1010110F > v_mul_f32_e32 v7, v15, v7 ; 100E0F0F > v_mul_f32_e32 v5, s9, v5 ; 100A0A09 > v_fma_f32 v6, v6, v8, v1 ; D2960006 04061106 > s_load_dwordx8 s[24:31], s[4:5], 0x20 ; C0CC0520 > s_load_dwordx4 s[0:3], s[4:5], 0x2c ; C080052C > v_fma_f32 v5, v5, v7, v0 ; D2960005 04020F05 > v_fma_f32 v9, v6, 2.0, -1.0 ; D2960009 03CDE906 > v_mad_f32 v12, -s18, 0.5, 0.5 ; D282000C 23C1E012 > v_fma_f32 v0, v0, s18, v12 ; D2960000 04302500 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v3, v3, 2.0, -1.0 ; D2960003 03CDE903 > v_mad_f32 v12, -s19, 0.5, 0.5 ; D282000C 23C1E013 > v_mad_f32 v7, -s17, 0.5, 0.5 ; D2820007 23C1E011 > v_fma_f32 v8, v5, 2.0, -1.0 ; D2960008 03CDE905 > v_madak_f32_e32 v9, v9, v9, 0xbe800000 ; 42121309 BE800000 > v_mac_f32_e32 v9, v8, v8 ; 3E121108 > v_fma_f32 v8, v5, s17, v7 ; D2960008 041C2305 > v_fma_f32 v0, v3, s20, v0 ; D2960000 04002903 > v_fma_f32 v7, v6, s17, v7 ; D2960007 041C2306 > v_fma_f32 v1, v1, s19, v12 ; D2960001 04302701 > v_fma_f32 v4, v4, 2.0, -1.0 ; D2960004 03CDE904 > v_rcp_f32_e32 v3, s7 ; 7E065407 > v_fma_f32 v1, v4, s20, v1 ; D2960001 04042904 > v_subrev_f32_e32 v8, v5, v8 ; 0A101105 > v_mul_f32_e32 v9, v9, v9 ; 10121309 > v_subrev_f32_e32 v7, v6, v7 ; 0A0E0F06 > s_load_dwordx4 s[12:15], s[4:5], 0x3c ; C086053C > s_load_dwordx8 s[32:39], s[4:5], 0x30 ; C0D00530 > v_fma_f32 v1, v1, 2.0, -1.0 ; D2960001 03CDE901 > v_fma_f32 v10, v9, v8, v5 ; D296000A 04161109 > v_fma_f32 v11, v9, v7, v6 ; D296000B 041A0F09 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s31 ; 87001F00 > image_sample v[5:7], v[10:11], s[24:31], s[0:3] dmask:0x7 ; F0800700 0006050A > s_load_dwordx4 s[40:43], s[4:5], 0x4c ; C094054C > s_load_dwordx8 s[44:51], s[4:5], 0x40 ; C0D60540 > s_load_dwordx8 s[24:31], s[4:5], 0x50 ; C0CC0550 > s_load_dwordx4 s[0:3], s[4:5], 0x5c ; C080055C > v_fma_f32 v0, v0, 2.0, -1.0 ; D2960000 03CDE900 > v_mad_f32 v1, v1, v1, -s6 ; D2820001 801A0301 > v_cmp_neq_f32_e64 vcc, 0, s7 ; D01A006A 00000E80 > v_mac_f32_e32 v1, v0, v0 ; 3E020100 > v_cndmask_b32_e32 v0, v2, v3 ; 00000702 > v_mul_f32_e32 v0, v1, v0 ; 10000101 > s_and_b32 s12, s12, s39 ; 870C270C > v_mov_b32_e32 v14, 0x40400000 ; 7E1C02FF 40400000 > v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 > v_fma_f32 v1, v0, -2.0, v14 ; D2960001 0439EB00 > v_mul_f32_e32 v0, v0, v0 ; 10000100 > v_mov_b32_e32 v19, 0x3d800000 ; 7E2602FF 3D800000 > v_mov_b32_e32 v20, 0x3f600000 ; 7E2802FF 3F600000 > image_sample v8, v[10:11], s[32:39], s[12:15] dmask:0x8 ; F0800800 0068080A > v_mul_f32_e32 v0, v1, v0 ; 10000101 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v1, 0x437f0000, v8 ; 100210FF 437F0000 > v_mov_b32_e32 v24, 0 ; 7E300280 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s40, s40, s51 ; 87283328 > v_fma_f32 v21, v5, v20, v19 ; D2960015 044E2905 > v_fma_f32 v22, v6, v20, v19 ; D2960016 044E2906 > v_fma_f32 v23, v7, v20, v19 ; D2960017 044E2907 > s_and_b32 s0, s0, s31 ; 87001F00 > image_sample_l v[9:11], v[21:24], s[44:51], s[40:43] dmask:0x7 ; F0900700 014B0915 > image_sample_l v[19:21], v[21:24], s[24:31], s[0:3] dmask:0x7 ; F0900700 00061315 > v_mov_b32_e32 v18, 0x42c60000 ; 7E2402FF 42C60000 > v_rndne_f32_e32 v1, v1 ; 7E024701 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v2, v20, v10 ; 0A041514 > v_cmp_lt_f32_e32 vcc, v18, v1 ; 7C020312 > v_subrev_f32_e32 v1, v19, v9 ; 0A021313 > v_subrev_f32_e32 v3, v21, v11 ; 0A061715 > v_fma_f32 v1, v0, v1, v19 ; D2960001 044E0300 > v_fma_f32 v2, v0, v2, v20 ; D2960002 04520500 > v_fma_f32 v0, v0, v3, v21 ; D2960000 04560700 > v_subrev_f32_e32 v3, v7, v0 ; 0A060107 > v_add_f32_e64 v0, 0, s16 clamp ; D2060800 00002080 > v_add_f32_e64 v4, 0, s8 clamp ; D2060804 00001080 > v_mul_f32_e32 v4, v0, v4 ; 10080900 > v_subrev_f32_e32 v1, v5, v1 ; 0A020305 > v_cndmask_b32_e32 v4, v0, v4 ; 00080900 > v_subrev_f32_e32 v2, v6, v2 ; 0A040506 > v_fma_f32 v0, v4, v1, v5 ; D2960000 04160304 > v_fma_f32 v1, v4, v2, v6 ; D2960001 041A0504 > v_fma_f32 v2, v4, v3, v7 ; D2960002 041E0704 > v_mov_b32_e32 v4, 0 ; 7E080280 > v_mov_b32_e32 v5, 0 ; 7E0A0280 > v_mov_b32_e32 v6, 0 ; 7E0C0280 > v_mov_b32_e32 v7, 0 ; 7E0E0280 > v_mov_b32_e32 v8, 0 ; 7E100280 > v_mov_b32_e32 v9, 0 ; 7E120280 > v_mov_b32_e32 v10, 0 ; 7E140280 > v_mov_b32_e32 v11, 0 ; 7E160280 > v_mov_b32_e32 v12, 0 ; 7E180280 > v_mov_b32_e32 v13, 0 ; 7E1A0280 > v_mov_b32_e32 v14, 0 ; 7E1C0280 > v_mov_b32_e32 v15, 0 ; 7E1E0280 > v_mov_b32_e32 v3, v17 ; 7E060311 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 > v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 > exp 15, 1, 1, 1, 1, v0, v1, v0, v0 ; F8001C1F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 28 >Code Size: 1216 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 9 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 > s_buffer_load_dword s0, s[0:3], 0x1a ; C200011A > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_fma_f32 v0, v3, s4, s4 ; D2960000 00100903 > v_fma_f32 v1, v4, -s0, s0 ; D2960001 40000104 > exp 15, 32, 0, 0, 0, v0, v1, v0, v0 ; F800020F 00000100 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v3, v4, v1, v0 ; F80008CF 00010403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 88 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C > s_load_dwordx8 s[32:39], s[4:5], 0x0 ; C0D00500 > s_mov_b32 m0, s11 ; BEFC030B > s_load_dwordx4 s[16:19], s[4:5], 0x1c ; C088051C > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 > s_buffer_load_dword s7, s[0:3], 0x15 ; C2038115 > s_load_dwordx8 s[40:47], s[4:5], 0x10 ; C0D40510 > s_load_dwordx4 s[20:23], s[4:5], 0x2c ; C08A052C > s_load_dwordx8 s[24:31], s[4:5], 0x20 ; C0CC0520 > s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 > s_buffer_load_dword s8, s[0:3], 0x16 ; C2040116 > v_mov_b32_e32 v0, v3 ; 7E000303 > v_interp_p1_f32 v4, v2, 0, 0, [m0] ; C8100002 > v_interp_p2_f32 v4, [v4], v0, 0, 0, [m0] ; C8110000 > v_interp_p1_f32 v5, v2, 1, 0, [m0] ; C8140102 > v_interp_p2_f32 v5, [v5], v0, 1, 0, [m0] ; C8150100 > s_and_b32 s12, s12, s39 ; 870C270C > s_buffer_load_dword s0, s[0:3], 0x18 ; C2000118 > v_mov_b32_e32 v9, 0 ; 7E120280 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v0, s7 ; 7E000207 > image_sample v[10:12], v[4:5], s[32:39], s[12:15] dmask:0x7 ; F0800700 00680A04 > v_mov_b32_e32 v1, s9 ; 7E020209 > v_mov_b32_e32 v17, v9 ; 7E220309 > s_and_b32 s20, s20, s31 ; 87141F14 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v6, s6, v10, v0 ; D2960006 04021406 > v_fma_f32 v7, s6, v11, v0 ; D2960007 04021606 > v_fma_f32 v8, s6, v12, v0 ; D2960008 04021806 > s_and_b32 s16, s16, s47 ; 87102F10 > v_fma_f32 v14, s8, v10, v1 ; D296000E 04061408 > v_fma_f32 v15, s8, v11, v1 ; D296000F 04061608 > v_fma_f32 v16, s8, v12, v1 ; D2960010 04061808 > image_sample_l v[4:6], v[6:9], s[40:47], s[16:19] dmask:0x7 ; F0900700 008A0406 > image_sample_l v[0:2], v[14:17], s[24:31], s[20:23] dmask:0x7 ; F0900700 00A6000E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v0, v4, v0 ; 0A000104 > v_subrev_f32_e32 v1, v5, v1 ; 0A020305 > v_subrev_f32_e32 v2, v6, v2 ; 0A040506 > v_mov_b32_e32 v3, 1.0 ; 7E0602F2 > v_fma_f32 v0, s0, v0, v4 ; D2960000 04120000 > v_fma_f32 v1, s0, v1, v5 ; D2960001 04160200 > v_fma_f32 v2, s0, v2, v6 ; D2960002 041A0400 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 48 >VGPRS: 20 >Code Size: 252 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL SV[0], INSTANCEID >DCL OUT[0], POSITION >DCL OUT[1], LAYER > 0: MOV OUT[0], IN[0] > 1: MOV OUT[1], SV[0] > 2: END >radeonsi: Compiling shader 394 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %13) > %17 = extractelement <4 x float> %16, i32 0 > %18 = extractelement <4 x float> %16, i32 1 > %19 = extractelement <4 x float> %16, i32 2 > %20 = extractelement <4 x float> %16, i32 3 > %21 = bitcast i32 %12 to float > %22 = bitcast i32 %12 to float > %23 = bitcast i32 %12 to float > %24 = bitcast i32 %12 to float > %25 = bitcast i32 %11 to float > %26 = insertvalue <{ float, float, float }> undef, float %25, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float %23, float %24) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %17, float %18, float %19, float %20) > call void @llvm.SI.export(i32 4, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float %21, float 0.000000e+00) > ret <{ float, float, float }> %26 >} > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], LAYER, CONSTANT >DCL SV[0], POSITION >DCL OUT[0], COLOR >DCL SAMP[0] >DCL CONST[0] >DCL TEMP[0] >IMM[0] UINT32 {0, 0, 0, 0} > 0: F2I TEMP[0].xy, SV[0].xyyy > 1: UADD TEMP[0].xy, TEMP[0].xyyy, CONST[0].xyyy > 2: UMAD TEMP[0].x, CONST[0].zzzz, TEMP[0].yyyy, TEMP[0].xxxx > 3: UMAD TEMP[0].x, CONST[0].wwww, IN[0].xxxx, TEMP[0].xxxx > 4: MOV TEMP[0].w, IMM[0].xxxx > 5: TXF OUT[0], TEMP[0], SAMP[0], BUFFER > 6: END >radeonsi: Compiling shader 395 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) > %29 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to <2 x i128> addrspace(2)* > %30 = load <2 x i128>, <2 x i128> addrspace(2)* %29, align 32, !tbaa !0 > %31 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) > %32 = fptosi float %15 to i32 > %33 = fptosi float %16 to i32 > %34 = bitcast float %25 to i32 > %35 = add i32 %32, %34 > %36 = bitcast float %26 to i32 > %37 = add i32 %33, %36 > %38 = bitcast float %27 to i32 > %39 = mul i32 %38, %37 > %40 = add i32 %39, %35 > %41 = bitcast float %28 to i32 > %42 = bitcast float %31 to i32 > %43 = mul i32 %41, %42 > %44 = add i32 %43, %40 > %45 = extractelement <2 x i128> %30, i32 1 > %46 = bitcast i128 %45 to <16 x i8> > %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %44) > %48 = extractelement <4 x float> %47, i32 0 > %49 = extractelement <4 x float> %47, i32 1 > %50 = extractelement <4 x float> %47, i32 2 > %51 = extractelement <4 x float> %47, i32 3 > %52 = bitcast float %5 to i32 > %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %52, 10 > %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %48, 11 > %55 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54, float %49, 12 > %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %55, float %50, 13 > %57 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %51, 14 > %58 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %57, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %58 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.constant(i32, i32, i32) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 > v_mov_b32_e32 v0, 0 ; 7E000280 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[4:7], v4, s[0:3], 0 idxen ; E00C2000 80000404 > exp 15, 32, 0, 0, 0, v3, v3, v3, v3 ; F800020F 03030303 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 12, 0, 0, 0, v4, v5, v6, v7 ; F80000CF 07060504 > exp 4, 13, 0, 1, 0, v0, v0, v3, v0 ; F80008D4 00030000 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 8 >Code Size: 60 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 > v_cvt_i32_f32_e32 v0, v13 ; 7E00110D > s_mov_b32 m0, s11 ; BEFC030B > v_cvt_i32_f32_e32 v2, v12 ; 7E04110C > v_interp_mov_f32 v1, P0, 0, 0, [m0] ; C8060002 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 > s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 > s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 > s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 > s_load_dwordx2 s[12:13], s[4:5], 0x4 ; C0460504 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_add_i32_e32 v0, vcc, s7, v0 ; 4A000007 > v_mul_lo_i32 v0, s8, v0 ; D2D60000 00020008 > s_load_dwordx2 s[14:15], s[4:5], 0x6 ; C0470506 > v_mul_lo_i32 v1, s0, v1 ; D2D60001 00020200 > v_add_i32_e32 v2, vcc, s6, v2 ; 4A040406 > v_add_i32_e32 v0, vcc, v0, v2 ; 4A000500 > v_mov_b32_e32 v13, v15 ; 7E1A030F > v_add_i32_e32 v0, vcc, v1, v0 ; 4A000101 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[0:3], v0, s[12:15], 0 idxen ; E00C2000 80030000 > s_waitcnt vmcnt(0) ; BF8C0F70 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd377 >SPI_PS_INPUT_ENA = 0x0320 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 17 >Code Size: 124 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[8:11], s[10:11], 0x4 ; C0840B04 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[4:7], 0 idxen ; E00C2000 80010604 > buffer_load_format_xyzw v[10:13], v5, s[8:11], 0 idxen ; E00C2000 80020A05 > s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 > s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115 > s_buffer_load_dword s23, s[0:3], 0x11 ; C20B8111 > s_buffer_load_dword s17, s[0:3], 0xd ; C208810D > s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 > s_buffer_load_dword s16, s[0:3], 0xc ; C208010C > s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114 > s_buffer_load_dword s22, s[0:3], 0x10 ; C20B0110 > s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 > s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 > s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 > s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 > s_buffer_load_dword s14, s[0:3], 0xa ; C207010A > s_buffer_load_dword s18, s[0:3], 0xe ; C209010E > s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 > s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 > s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 > s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 > s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 > s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 > s_buffer_load_dword s15, s[0:3], 0xb ; C207810B > s_buffer_load_dword s21, s[0:3], 0xf ; C20A810F > s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 > s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v0, s4 ; 7E000204 > v_mov_b32_e32 v1, s5 ; 7E020205 > v_mov_b32_e32 v3, s6 ; 7E060206 > v_mov_b32_e32 v4, s7 ; 7E080207 > v_mov_b32_e32 v16, s11 ; 7E20020B > s_waitcnt vmcnt(1) ; BF8C0F71 > exp 15, 32, 0, 0, 0, v6, v7, v8, v9 ; F800020F 09080706 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v5, s23, v11 ; 100A1617 > v_mul_f32_e32 v14, s20, v11 ; 101C1614 > v_mul_f32_e32 v15, s13, v11 ; 101E160D > v_mul_f32_e32 v11, s17, v11 ; 10161611 > v_mac_f32_e32 v5, s22, v10 ; 3E0A1416 > v_mac_f32_e32 v14, s19, v10 ; 3E1C1413 > v_mac_f32_e32 v15, s12, v10 ; 3E1E140C > v_mac_f32_e32 v11, s16, v10 ; 3E161410 > v_mac_f32_e32 v5, s24, v12 ; 3E0A1818 > v_mac_f32_e32 v14, s26, v12 ; 3E1C181A > v_mac_f32_e32 v15, s14, v12 ; 3E1E180E > v_mac_f32_e32 v11, s18, v12 ; 3E161812 > exp 15, 33, 0, 0, 0, v0, v1, v3, v4 ; F800021F 04030100 > v_mac_f32_e32 v5, s25, v13 ; 3E0A1A19 > v_mac_f32_e32 v14, s0, v13 ; 3E1C1A00 > v_mac_f32_e32 v15, s15, v13 ; 3E1E1A0F > v_mac_f32_e32 v11, s21, v13 ; 3E161A15 > v_mov_b32_e32 v10, s8 ; 7E140208 > v_mov_b32_e32 v12, s9 ; 7E180209 > v_mov_b32_e32 v13, s10 ; 7E1A020A > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > exp 15, 34, 0, 0, 0, v10, v12, v13, v16 ; F800022F 100D0C0A > exp 15, 35, 0, 0, 0, v5, v14, v0, v0 ; F800023F 00000E05 > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v15, v11, v1, v0 ; F80008CF 00010B0F > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 20 >Code Size: 304 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_interp_p1_f32 v0, v2, 0, 1, [m0] ; C8000402 > v_interp_p2_f32 v0, [v0], v3, 0, 1, [m0] ; C8010403 > v_interp_p1_f32 v1, v2, 1, 1, [m0] ; C8040502 > v_interp_p2_f32 v1, [v1], v3, 1, 1, [m0] ; C8050503 > v_interp_p1_f32 v5, v2, 2, 1, [m0] ; C8140602 > v_interp_p2_f32 v5, [v5], v3, 2, 1, [m0] ; C8150603 > v_interp_p1_f32 v6, v2, 3, 1, [m0] ; C8180702 > v_interp_p2_f32 v6, [v6], v3, 3, 1, [m0] ; C8190703 > v_interp_p1_f32 v7, v2, 0, 2, [m0] ; C81C0802 > v_interp_p2_f32 v7, [v7], v3, 0, 2, [m0] ; C81D0803 > v_interp_p1_f32 v8, v2, 1, 2, [m0] ; C8200902 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > v_interp_p2_f32 v8, [v8], v3, 1, 2, [m0] ; C8210903 > v_interp_p1_f32 v9, v2, 2, 2, [m0] ; C8240A02 > v_interp_p2_f32 v9, [v9], v3, 2, 2, [m0] ; C8250A03 > v_interp_p1_f32 v10, v2, 3, 2, [m0] ; C8280B02 > v_interp_p2_f32 v10, [v10], v3, 3, 2, [m0] ; C8290B03 > v_interp_p1_f32 v11, v2, 0, 3, [m0] ; C82C0C02 > v_interp_p2_f32 v11, [v11], v3, 0, 3, [m0] ; C82D0C03 > v_interp_p1_f32 v12, v2, 1, 3, [m0] ; C8300D02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v12, [v12], v3, 1, 3, [m0] ; C8310D03 > image_sample v[14:17], v[11:12], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030E0B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v3, v17, v10, v6 ; D2960003 041A1511 > v_fma_f32 v0, v14, v7, v0 ; D2960000 04020F0E > v_fma_f32 v1, v15, v8, v1 ; D2960001 0406110F > v_fma_f32 v2, v16, v9, v5 ; D2960002 04161310 > v_mul_f32_e32 v3, v4, v3 ; 10060704 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 20 >Code Size: 180 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[8:11], s[10:11], 0x4 ; C0840B04 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[4:7], 0 idxen ; E00C2000 80010604 > buffer_load_format_xyzw v[10:13], v5, s[8:11], 0 idxen ; E00C2000 80020A05 > s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 > s_buffer_load_dword s17, s[0:3], 0xd ; C208810D > s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 > s_buffer_load_dword s25, s[0:3], 0x15 ; C20C8115 > s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 > s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 > s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 > s_buffer_load_dword s16, s[0:3], 0xc ; C208010C > s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 > s_buffer_load_dword s24, s[0:3], 0x14 ; C20C0114 > s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 > s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 > s_buffer_load_dword s14, s[0:3], 0xa ; C207010A > s_buffer_load_dword s18, s[0:3], 0xe ; C209010E > s_buffer_load_dword s22, s[0:3], 0x12 ; C20B0112 > s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 > s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 > s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 > s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 > s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 > s_buffer_load_dword s15, s[0:3], 0xb ; C207810B > s_buffer_load_dword s19, s[0:3], 0xf ; C209810F > s_buffer_load_dword s23, s[0:3], 0x13 ; C20B8113 > s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 > s_waitcnt vmcnt(1) ; BF8C0F71 > exp 15, 32, 0, 0, 0, v6, v7, v8, v9 ; F800020F 09080706 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v0, s13, v11 ; 1000160D > v_mul_f32_e32 v1, s17, v11 ; 10021611 > v_mul_f32_e32 v3, s21, v11 ; 10061615 > v_mul_f32_e32 v4, s25, v11 ; 10081619 > v_mul_f32_e32 v5, s5, v11 ; 100A1605 > v_mul_f32_e32 v11, s9, v11 ; 10161609 > v_mac_f32_e32 v0, s12, v10 ; 3E00140C > v_mac_f32_e32 v1, s16, v10 ; 3E021410 > v_mac_f32_e32 v3, s20, v10 ; 3E061414 > v_mac_f32_e32 v4, s24, v10 ; 3E081418 > v_mac_f32_e32 v5, s4, v10 ; 3E0A1404 > v_mac_f32_e32 v11, s8, v10 ; 3E161408 > v_mac_f32_e32 v0, s14, v12 ; 3E00180E > v_mac_f32_e32 v1, s18, v12 ; 3E021812 > v_mac_f32_e32 v3, s22, v12 ; 3E061816 > v_mac_f32_e32 v4, s26, v12 ; 3E08181A > v_mac_f32_e32 v5, s6, v12 ; 3E0A1806 > v_mac_f32_e32 v11, s10, v12 ; 3E16180A > v_mac_f32_e32 v0, s15, v13 ; 3E001A0F > v_mac_f32_e32 v1, s19, v13 ; 3E021A13 > v_mac_f32_e32 v3, s23, v13 ; 3E061A17 > v_mac_f32_e32 v4, s0, v13 ; 3E081A00 > exp 15, 33, 0, 0, 0, v0, v1, v3, v4 ; F800021F 04030100 > v_mac_f32_e32 v5, s7, v13 ; 3E0A1A07 > v_mac_f32_e32 v11, s11, v13 ; 3E161A0B > v_mov_b32_e32 v10, 1.0 ; 7E1402F2 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 0 ; 7E000280 > exp 15, 12, 0, 1, 0, v5, v11, v0, v10 ; F80008CF 0A000B05 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 16 >Code Size: 284 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v4, v2, 0, 0, [m0] ; C8100002 > v_interp_p2_f32 v4, [v4], v3, 0, 0, [m0] ; C8110003 > v_interp_p1_f32 v5, v2, 3, 0, [m0] ; C8140302 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_load_dwordx8 s[24:31], s[4:5], 0x0 ; C0CC0500 > s_load_dwordx8 s[12:19], s[4:5], 0x10 ; C0C60510 > s_load_dwordx4 s[20:23], s[4:5], 0x1c ; C08A051C > v_interp_p2_f32 v5, [v5], v3, 3, 0, [m0] ; C8150303 > v_interp_p1_f32 v0, v2, 0, 1, [m0] ; C8000402 > v_interp_p2_f32 v0, [v0], v3, 0, 1, [m0] ; C8010403 > v_interp_p1_f32 v1, v2, 1, 1, [m0] ; C8040502 > v_interp_p2_f32 v1, [v1], v3, 1, 1, [m0] ; C8050503 > v_interp_p1_f32 v6, v2, 2, 1, [m0] ; C8180602 > v_interp_p2_f32 v6, [v6], v3, 2, 1, [m0] ; C8190603 > v_interp_p1_f32 v7, v2, 3, 1, [m0] ; C81C0702 > v_interp_p2_f32 v7, [v7], v3, 3, 1, [m0] ; C81D0703 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s20, s20, s19 ; 87141314 > s_and_b32 s0, s0, s31 ; 87001F00 > image_sample v[6:9], v[6:7], s[12:19], s[20:23] dmask:0xf ; F0800F00 00A30606 > image_sample v[0:3], v[0:1], s[24:31], s[0:3] dmask:0xf ; F0800F00 00060000 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_subrev_f32_e32 v3, v9, v3 ; 0A060709 > v_subrev_f32_e32 v0, v6, v0 ; 0A000106 > v_subrev_f32_e32 v1, v7, v1 ; 0A020307 > v_subrev_f32_e32 v2, v8, v2 ; 0A040508 > v_fma_f32 v3, v4, v3, v9 ; D2960003 04260704 > v_fma_f32 v0, v4, v0, v6 ; D2960000 041A0104 > v_fma_f32 v1, v4, v1, v7 ; D2960001 041E0304 > v_fma_f32 v2, v4, v2, v8 ; D2960002 04220504 > v_mul_f32_e32 v3, v5, v3 ; 10060705 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 15 >Code Size: 176 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > v_mov_b32_e32 v0, 0x40066666 ; 7E0002FF 40066666 > v_mov_b32_e32 v1, 0x447f028f ; 7E0202FF 447F028F > v_mov_b32_e32 v3, 0x40466666 ; 7E0602FF 40466666 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[7:10], v4, s[4:7], 0 idxen ; E00C2000 80010704 > buffer_load_format_xyzw v[11:14], v5, s[12:15], 0 idxen ; E00C2000 80030B05 > v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD > s_load_dwordx4 s[4:7], s[10:11], 0x8 ; C0820B08 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v5, 0x3f8ccccd ; 7E0A02FF 3F8CCCCD > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[26:29], v6, s[4:7], 0 idxen ; E00C2000 80011A06 > s_waitcnt vmcnt(2) ; BF8C0F72 > exp 15, 32, 0, 0, 0, v7, v8, v9, v10 ; F800020F 0A090807 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_fma_f32 v0, v1, v13, v0 ; D2960000 04021B01 > v_fma_f32 v3, v1, v13, v3 ; D2960003 040E1B01 > v_fma_f32 v4, v1, v13, v4 ; D2960004 04121B01 > v_cvt_u32_f32_e32 v4, v4 ; 7E080F04 > v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 > v_cvt_u32_f32_e32 v3, v3 ; 7E060F03 > v_fma_f32 v1, v1, v13, v5 ; D2960001 04161B01 > v_cvt_u32_f32_e32 v1, v1 ; 7E020F01 > v_lshlrev_b32_e32 v4, 4, v4 ; 34080884 > v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 > v_lshlrev_b32_e32 v3, 4, v3 ; 34060684 > v_or_b32_e32 v21, 4, v0 ; 382A0084 > v_or_b32_e32 v15, 4, v4 ; 381E0884 > v_or_b32_e32 v24, 4, v3 ; 38300684 > buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 > buffer_load_dword v6, v15, s[0:3], 0 offen ; E0301000 8000060F > buffer_load_dword v15, v24, s[0:3], 0 offen ; E0301000 80000F18 > v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 > v_or_b32_e32 v16, 8, v4 ; 38200888 > v_or_b32_e32 v22, 8, v0 ; 382C0088 > buffer_load_dword v5, v4, s[0:3], 0 offen ; E0301000 80000504 > v_or_b32_e32 v4, 12, v4 ; 3808088C > buffer_load_dword v20, v0, s[0:3], 0 offen ; E0301000 80001400 > v_or_b32_e32 v25, 8, v3 ; 38320688 > buffer_load_dword v23, v3, s[0:3], 0 offen ; E0301000 80001703 > v_or_b32_e32 v18, 4, v1 ; 38240284 > v_or_b32_e32 v19, 8, v1 ; 38260288 > v_or_b32_e32 v0, 12, v0 ; 3800008C > v_or_b32_e32 v3, 12, v3 ; 3806068C > buffer_load_dword v17, v1, s[0:3], 0 offen ; E0301000 80001101 > v_or_b32_e32 v1, 12, v1 ; 3802028C > buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 > buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 > buffer_load_dword v24, v25, s[0:3], 0 offen ; E0301000 80001819 > buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 > buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 > buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 > buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 > buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 > buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 > exp 15, 33, 0, 0, 0, v11, v12, v13, v14 ; F800021F 0E0D0C0B > s_waitcnt ; BF8C0F7F > v_mul_f32_e32 v21, v21, v27 ; 102A3715 > s_waitcnt vmcnt(13) ; BF8C0F7D > v_mul_f32_e32 v15, v15, v27 ; 101E370F > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mac_f32_e32 v21, v20, v26 ; 3E2A3514 > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mac_f32_e32 v15, v23, v26 ; 3E1E3517 > s_waitcnt vmcnt(7) ; BF8C0F77 > v_mac_f32_e32 v21, v22, v28 ; 3E2A3916 > s_waitcnt vmcnt(6) ; BF8C0F76 > v_mac_f32_e32 v15, v24, v28 ; 3E1E3918 > s_waitcnt vmcnt(5) ; BF8C0F75 > exp 15, 34, 0, 0, 0, v5, v6, v16, v4 ; F800022F 04100605 > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mac_f32_e32 v21, v0, v29 ; 3E2A3B00 > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mac_f32_e32 v15, v3, v29 ; 3E1E3B03 > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 35, 0, 0, 0, v17, v18, v19, v1 ; F800023F 01131211 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v21, v15, v1, v0 ; F80008CF 00010F15 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 32 >Code Size: 480 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 8 >******************** > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v5, v2, 3, 0, [m0] ; C8140302 > v_interp_p2_f32 v5, [v5], v3, 3, 0, [m0] ; C8150303 > v_interp_p1_f32 v6, v2, 3, 1, [m0] ; C8180702 > v_interp_p2_f32 v6, [v6], v3, 3, 1, [m0] ; C8190703 > v_interp_p1_f32 v7, v2, 0, 2, [m0] ; C81C0802 > v_interp_p2_f32 v7, [v7], v3, 0, 2, [m0] ; C81D0803 > v_interp_p1_f32 v8, v2, 1, 2, [m0] ; C8200902 > v_interp_p2_f32 v8, [v8], v3, 1, 2, [m0] ; C8210903 > v_interp_p1_f32 v9, v2, 2, 2, [m0] ; C8240A02 > v_interp_p2_f32 v9, [v9], v3, 2, 2, [m0] ; C8250A03 > v_interp_p1_f32 v10, v2, 3, 2, [m0] ; C8280B02 > v_interp_p2_f32 v10, [v10], v3, 3, 2, [m0] ; C8290B03 > v_interp_p1_f32 v11, v2, 0, 3, [m0] ; C82C0C02 > v_interp_p2_f32 v11, [v11], v3, 0, 3, [m0] ; C82D0C03 > v_interp_p1_f32 v12, v2, 1, 3, [m0] ; C8300D02 > v_interp_p2_f32 v12, [v12], v3, 1, 3, [m0] ; C8310D03 > v_interp_p1_f32 v14, v2, 2, 3, [m0] ; C8380E02 > v_interp_p2_f32 v14, [v14], v3, 2, 3, [m0] ; C8390E03 > v_interp_p1_f32 v15, v2, 3, 3, [m0] ; C83C0F02 > v_interp_p2_f32 v15, [v15], v3, 3, 3, [m0] ; C83D0F03 > v_fma_f32 v3, v5, v15, v10 ; D2960003 042A1F05 > v_fma_f32 v0, v0, v11, v7 ; D2960000 041E1700 > v_fma_f32 v1, v1, v12, v8 ; D2960001 04221901 > v_fma_f32 v2, v4, v14, v9 ; D2960002 04261D04 > v_mul_f32_e32 v3, v6, v3 ; 10060706 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 16 >VGPRS: 16 >Code Size: 164 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[8:11], s[10:11], 0x8 ; C0840B08 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > v_mov_b32_e32 v0, 1.0 ; 7E0002F2 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[7:10], v4, s[4:7], 0 idxen ; E00C2000 80010704 > buffer_load_format_xyzw v[11:14], v5, s[12:15], 0 idxen ; E00C2000 80030B05 > buffer_load_format_xyzw v[3:6], v6, s[8:11], 0 idxen ; E00C2000 80020306 > s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 > s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 > s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 > s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 > s_buffer_load_dword s16, s[0:3], 0x5 ; C2080105 > s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 > s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 > s_buffer_load_dword s12, s[0:3], 0x4 ; C2060104 > s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 > v_mov_b32_e32 v14, s10 ; 7E1C020A > s_buffer_load_dword s7, s[0:3], 0x9 ; C2038109 > s_buffer_load_dword s10, s[0:3], 0xd ; C205010D > v_mov_b32_e32 v1, s8 ; 7E020208 > v_mov_b32_e32 v13, s9 ; 7E1A0209 > s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 > v_mov_b32_e32 v15, s11 ; 7E1E020B > s_buffer_load_dword s9, s[0:3], 0xc ; C204810C > s_buffer_load_dword s8, s[0:3], 0xb ; C204010B > v_fma_f32 v1, s12, v7, v1 ; D2960001 04060E0C > v_fma_f32 v7, s16, v8, v13 ; D2960007 04361010 > v_fma_f32 v8, s4, v9, v14 ; D2960008 043A1204 > v_fma_f32 v9, s5, v10, v15 ; D2960009 043E1405 > s_buffer_load_dword s4, s[0:3], 0xa ; C202010A > s_buffer_load_dword s5, s[0:3], 0xe ; C202810E > s_buffer_load_dword s0, s[0:3], 0xf ; C200010F > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v10, s7, v4 ; 10140807 > v_mul_f32_e32 v4, s10, v4 ; 1008080A > v_mac_f32_e32 v10, s6, v3 ; 3E140606 > v_mac_f32_e32 v4, s9, v3 ; 3E080609 > v_mac_f32_e32 v10, s4, v5 ; 3E140A04 > v_mac_f32_e32 v4, s5, v5 ; 3E080A05 > exp 15, 32, 0, 0, 0, v11, v12, v0, v0 ; F800020F 00000C0B > exp 15, 33, 0, 0, 0, v1, v7, v8, v9 ; F800021F 09080701 > v_mac_f32_e32 v10, s8, v6 ; 3E140C08 > v_mac_f32_e32 v4, s0, v6 ; 3E080C00 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v1, 0 ; 7E020280 > exp 15, 12, 0, 1, 0, v10, v4, v1, v0 ; F80008CF 0001040A > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 16 >Code Size: 252 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v5, v2, 0, 0, [m0] ; C8140002 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > v_interp_p2_f32 v5, [v5], v3, 0, 0, [m0] ; C8150003 > v_interp_p1_f32 v6, v2, 1, 0, [m0] ; C8180102 > v_interp_p2_f32 v6, [v6], v3, 1, 0, [m0] ; C8190103 > v_interp_p1_f32 v0, v2, 0, 1, [m0] ; C8000402 > v_interp_p2_f32 v0, [v0], v3, 0, 1, [m0] ; C8010403 > v_interp_p1_f32 v1, v2, 1, 1, [m0] ; C8040502 > v_interp_p2_f32 v1, [v1], v3, 1, 1, [m0] ; C8050503 > v_interp_p1_f32 v4, v2, 2, 1, [m0] ; C8100602 > v_interp_p2_f32 v4, [v4], v3, 2, 1, [m0] ; C8110603 > v_interp_p1_f32 v2, v2, 3, 1, [m0] ; C8080702 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v2, [v2], v3, 3, 1, [m0] ; C8090703 > image_sample v3, v[5:6], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030305 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v3, v2, v3 ; 10060702 > v_mov_b32_e32 v2, v4 ; 7E040304 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 112 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[8:11], s[10:11], 0x4 ; C0840B04 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[4:7], 0 idxen ; E00C2000 80010604 > buffer_load_format_xyzw v[10:13], v5, s[8:11], 0 idxen ; E00C2000 80020A05 > s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 > s_buffer_load_dword s12, s[0:3], 0xd ; C206010D > s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 > s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 > s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 > s_buffer_load_dword s11, s[0:3], 0xc ; C205810C > s_buffer_load_dword s9, s[0:3], 0xa ; C204810A > s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 > s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 > s_buffer_load_dword s18, s[0:3], 0xe ; C209010E > s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 > s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 > s_buffer_load_dword s10, s[0:3], 0xb ; C205010B > s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 > s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 > s_buffer_load_dword s0, s[0:3], 0xf ; C200010F > v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 > s_waitcnt vmcnt(1) ; BF8C0F71 > exp 15, 32, 0, 0, 0, v6, v7, v8, v9 ; F800020F 09080706 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v0, s8, v11 ; 10001608 > v_mul_f32_e32 v1, s12, v11 ; 1002160C > v_mul_f32_e32 v3, s14, v11 ; 1006160E > v_mul_f32_e32 v4, s4, v11 ; 10081604 > v_mac_f32_e32 v0, s7, v10 ; 3E001407 > v_mac_f32_e32 v1, s11, v10 ; 3E02140B > v_mac_f32_e32 v3, s13, v10 ; 3E06140D > v_mac_f32_e32 v4, s17, v10 ; 3E081411 > v_mac_f32_e32 v0, s9, v12 ; 3E001809 > v_mac_f32_e32 v1, s18, v12 ; 3E021812 > v_mac_f32_e32 v3, s15, v12 ; 3E06180F > v_mac_f32_e32 v4, s5, v12 ; 3E081805 > v_mac_f32_e32 v0, s10, v13 ; 3E001A0A > v_mac_f32_e32 v1, s0, v13 ; 3E021A00 > exp 15, 33, 0, 0, 0, v0, v1, v0, v0 ; F800021F 00000100 > v_mac_f32_e32 v3, s16, v13 ; 3E061A10 > v_mac_f32_e32 v4, s6, v13 ; 3E081A06 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v0, 0 ; 7E000280 > exp 15, 12, 0, 1, 0, v3, v4, v0, v5 ; F80008CF 05000403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 16 >Code Size: 220 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_interp_p1_f32 v0, v2, 0, 1, [m0] ; C8000402 > v_interp_p2_f32 v0, [v0], v3, 0, 1, [m0] ; C8010403 > v_interp_p1_f32 v1, v2, 1, 1, [m0] ; C8040502 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v1, [v1], v3, 1, 1, [m0] ; C8050503 > image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v3, v4, v3 ; 10060704 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 84 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[8:11], s[10:11], 0x8 ; C0840B08 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[7:10], v4, s[4:7], 0 idxen ; E00C2000 80010704 > s_waitcnt vmcnt(0) ; BF8C0F70 > buffer_load_format_xyzw v[10:13], v5, s[12:15], 0 idxen ; E00C2000 80030A05 > buffer_load_format_xyzw v[3:6], v6, s[8:11], 0 idxen ; E00C2000 80020306 > s_buffer_load_dword s12, s[0:3], 0x2d ; C206012D > s_buffer_load_dword s11, s[0:3], 0x2c ; C205812C > s_buffer_load_dword s4, s[0:3], 0x25 ; C2020125 > s_buffer_load_dword s8, s[0:3], 0x29 ; C2040129 > s_buffer_load_dword s13, s[0:3], 0x2e ; C206812E > s_buffer_load_dword s17, s[0:3], 0x31 ; C2088131 > s_buffer_load_dword s16, s[0:3], 0x24 ; C2080124 > s_buffer_load_dword s7, s[0:3], 0x28 ; C2038128 > s_buffer_load_dword s14, s[0:3], 0x2f ; C207012F > s_buffer_load_dword s15, s[0:3], 0x30 ; C2078130 > s_buffer_load_dword s5, s[0:3], 0x26 ; C2028126 > s_buffer_load_dword s9, s[0:3], 0x2a ; C204812A > s_buffer_load_dword s18, s[0:3], 0x32 ; C2090132 > s_buffer_load_dword s6, s[0:3], 0x27 ; C2030127 > s_buffer_load_dword s10, s[0:3], 0x2b ; C205012B > s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133 > s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 > v_mul_f32_e32 v12, s12, v8 ; 1018100C > v_mac_f32_e32 v12, s11, v7 ; 3E180E0B > v_mul_f32_e32 v0, s4, v8 ; 10001004 > v_mul_f32_e32 v1, s8, v8 ; 10021008 > v_mul_f32_e32 v8, s17, v8 ; 10101011 > v_mac_f32_e32 v12, s13, v9 ; 3E18120D > v_mac_f32_e32 v0, s16, v7 ; 3E000E10 > v_mac_f32_e32 v1, s7, v7 ; 3E020E07 > v_mac_f32_e32 v8, s15, v7 ; 3E100E0F > v_add_f32_e32 v7, s14, v12 ; 060E180E > v_mov_b32_e32 v12, 1.0 ; 7E1802F2 > v_mac_f32_e32 v0, s5, v9 ; 3E001205 > v_mac_f32_e32 v1, s9, v9 ; 3E021209 > v_mac_f32_e32 v8, s18, v9 ; 3E101212 > exp 15, 32, 0, 0, 0, v10, v11, v9, v12 ; F800020F 0C090B0A > v_add_f32_e32 v0, s6, v0 ; 06000006 > v_add_f32_e32 v1, s10, v1 ; 0602020A > v_add_f32_e32 v8, s0, v8 ; 06101000 > s_waitcnt vmcnt(0) ; BF8C0F70 > exp 15, 33, 0, 0, 0, v3, v4, v5, v6 ; F800021F 06050403 > exp 15, 12, 0, 1, 0, v0, v1, v7, v8 ; F80008CF 08070100 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 16 >Code Size: 232 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 0, 1, [m0] ; C8100402 > v_interp_p2_f32 v4, [v4], v3, 0, 1, [m0] ; C8110403 > v_interp_p1_f32 v5, v2, 1, 1, [m0] ; C8140502 > v_interp_p2_f32 v5, [v5], v3, 1, 1, [m0] ; C8150503 > v_interp_p1_f32 v6, v2, 2, 1, [m0] ; C8180602 > v_interp_p2_f32 v6, [v6], v3, 2, 1, [m0] ; C8190603 > v_interp_p1_f32 v7, v2, 3, 1, [m0] ; C81C0702 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v7, [v7], v3, 3, 1, [m0] ; C81D0703 > image_sample v[8:11], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030800 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v0, v4, v8 ; 10001104 > v_mul_f32_e32 v1, v5, v9 ; 10021305 > v_mul_f32_e32 v2, v6, v10 ; 10041506 > v_mul_f32_e32 v3, v7, v11 ; 10061707 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 120 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL CONST[1][0..26] >DCL TEMP[0..5], LOCAL >IMM[0] UINT32 {0, 352, 208, 224} >IMM[1] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} >IMM[2] UINT32 {240, 144, 160, 176} >IMM[3] UINT32 {192, 416, 320, 0} > 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1][22].xyyy > 1: FMA TEMP[1].x, IN[0].zzzz, CONST[1][22].zzzz, CONST[1][22].zzzz > 2: MOV TEMP[0].z, TEMP[1].xxxx > 3: MOV TEMP[0].w, IMM[1].xxxx > 4: DP4 TEMP[1].x, CONST[1][13], TEMP[0] > 5: DP4 TEMP[2].x, CONST[1][14], TEMP[0] > 6: MOV TEMP[1].y, TEMP[2].xxxx > 7: DP4 TEMP[2].x, CONST[1][15], TEMP[0] > 8: MOV TEMP[1].z, TEMP[2].xxxx > 9: MOV TEMP[1].w, IMM[1].xxxx > 10: DP4 TEMP[0].x, CONST[1][9], TEMP[1] > 11: DP4 TEMP[2].x, CONST[1][10], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][11], TEMP[1] > 14: MOV TEMP[0].z, TEMP[2].xxxx > 15: DP4 TEMP[2].x, CONST[1][12], TEMP[1] > 16: MOV TEMP[0].w, TEMP[2].xxxx > 17: MOV TEMP[3], TEMP[0] > 18: MOV TEMP[4].zw, TEMP[0].wwzw > 19: MUL TEMP[1].xy, TEMP[2].xxxx, CONST[1][26].zwww > 20: MUL TEMP[2].xy, TEMP[2].xxxx, CONST[1][20].xyyy > 21: MUL TEMP[5].xy, CONST[1][26].zwww, IMM[1].xyyy > 22: FMA TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xyyy, TEMP[1].xyyy > 23: MOV TEMP[4].xy, TEMP[0].xyxx > 24: FMA TEMP[0].xy, TEMP[0].xyyy, CONST[1][20].zwww, TEMP[2].xyyy > 25: MOV OUT[2], TEMP[0] > 26: MOV OUT[1], TEMP[4] > 27: MOV OUT[0], TEMP[3] > 28: END >radeonsi: Compiling shader 396 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 144) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 148) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 152) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 156) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 160) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 164) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 168) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 172) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 176) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 180) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 184) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 188) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 192) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 196) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 200) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 204) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 208) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 212) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 216) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 220) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 224) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 228) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 232) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 236) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 240) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 244) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 248) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 252) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 320) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 324) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 328) > %47 = call float @llvm.SI.load.const(<16 x i8> %15, i32 332) > %48 = call float @llvm.SI.load.const(<16 x i8> %15, i32 352) > %49 = call float @llvm.SI.load.const(<16 x i8> %15, i32 356) > %50 = call float @llvm.SI.load.const(<16 x i8> %15, i32 360) > %51 = call float @llvm.SI.load.const(<16 x i8> %15, i32 424) > %52 = call float @llvm.SI.load.const(<16 x i8> %15, i32 428) > %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 > %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %13) > %56 = extractelement <4 x float> %55, i32 0 > %57 = extractelement <4 x float> %55, i32 1 > %58 = extractelement <4 x float> %55, i32 2 > %59 = fmul float %56, %48 > %60 = fmul float %57, %49 > %61 = call float @llvm.fma.f32(float %58, float %50, float %50) > %62 = fmul float %32, %59 > %63 = fmul float %33, %60 > %64 = fadd float %62, %63 > %65 = fmul float %34, %61 > %66 = fadd float %64, %65 > %67 = fadd float %66, %35 > %68 = fmul float %36, %59 > %69 = fmul float %37, %60 > %70 = fadd float %68, %69 > %71 = fmul float %38, %61 > %72 = fadd float %70, %71 > %73 = fadd float %72, %39 > %74 = fmul float %40, %59 > %75 = fmul float %41, %60 > %76 = fadd float %74, %75 > %77 = fmul float %42, %61 > %78 = fadd float %76, %77 > %79 = fadd float %78, %43 > %80 = fmul float %16, %67 > %81 = fmul float %17, %73 > %82 = fadd float %80, %81 > %83 = fmul float %18, %79 > %84 = fadd float %82, %83 > %85 = fadd float %84, %19 > %86 = fmul float %20, %67 > %87 = fmul float %21, %73 > %88 = fadd float %86, %87 > %89 = fmul float %22, %79 > %90 = fadd float %88, %89 > %91 = fadd float %90, %23 > %92 = fmul float %24, %67 > %93 = fmul float %25, %73 > %94 = fadd float %92, %93 > %95 = fmul float %26, %79 > %96 = fadd float %94, %95 > %97 = fadd float %96, %27 > %98 = fmul float %28, %67 > %99 = fmul float %29, %73 > %100 = fadd float %98, %99 > %101 = fmul float %30, %79 > %102 = fadd float %100, %101 > %103 = fadd float %102, %31 > %104 = fmul float %103, %51 > %105 = fmul float %103, %52 > %106 = fmul float %103, %44 > %107 = fmul float %103, %45 > %108 = fsub float -0.000000e+00, %52 > %109 = call float @llvm.fma.f32(float %85, float %51, float %104) > %110 = call float @llvm.fma.f32(float %91, float %108, float %105) > %111 = call float @llvm.fma.f32(float %109, float %46, float %106) > %112 = call float @llvm.fma.f32(float %110, float %47, float %107) > %113 = bitcast i32 %11 to float > %114 = insertvalue <{ float, float, float }> undef, float %113, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %109, float %110, float %97, float %103) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %111, float %112, float %97, float %103) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %85, float %91, float %97, float %103) > ret <{ float, float, float }> %114 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SAMP[1] >DCL SAMP[2] >DCL SAMP[3] >DCL SVIEW[0], 2D, FLOAT >DCL SVIEW[1], 2D, FLOAT >DCL SVIEW[2], 2D, FLOAT >DCL SVIEW[3], 2D, FLOAT >DCL CONST[1][0..26] >DCL TEMP[0..4], LOCAL >IMM[0] FLT32 { 0.0000, 158456325028528675187087900672.0000, -0.5020, 1.0000} >IMM[1] INT32 {1, 0, 0, 0} >IMM[2] UINT32 {0, 416, 96, 112} >IMM[3] UINT32 {128, 368, 80, 336} > 0: FSEQ TEMP[0].xy, IN[0].wwww, IMM[0].xxxx > 1: SSG TEMP[1].xy, IN[0].xyyy > 2: MUL TEMP[1].xy, IMM[0].yyyy, TEMP[1].xyyy > 3: RCP TEMP[2].xy, IN[0].wwww > 4: MUL TEMP[2].xy, IN[0].xyyy, TEMP[2].xyyy > 5: UCMP TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy, TEMP[2].xyyy > 6: MOV TEMP[1].xy, TEMP[0].xyyy > 7: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D > 8: MOV TEMP[2].xy, TEMP[0].xyyy > 9: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D > 10: MOV TEMP[2].z, TEMP[2].xxxx > 11: ADD TEMP[0].x, TEMP[1].wwww, IMM[0].zzzz > 12: FSLT TEMP[1].x, -TEMP[0].xxxx, IMM[0].xxxx > 13: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx > 14: INEG TEMP[1].x, TEMP[1].xxxx > 15: USNE TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx > 16: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww > 17: KILL_IF -TEMP[1].xxxx > 18: MOV TEMP[2].w, IMM[0].wwww > 19: FSEQ TEMP[1].xy, IN[0].wwww, IMM[0].xxxx > 20: SSG TEMP[3].xy, IN[1].xyyy > 21: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[3].xyyy > 22: RCP TEMP[4].xy, IN[0].wwww > 23: MUL TEMP[4].xy, IN[1].xyyy, TEMP[4].xyyy > 24: UCMP TEMP[1].xy, TEMP[1].xyyy, TEMP[3].xyyy, TEMP[4].xyyy > 25: DP2 TEMP[3].x, TEMP[2].zwww, CONST[1][26].xyyy > 26: MUL TEMP[2].xy, TEMP[1].xyyy, TEMP[3].xxxx > 27: DP4 TEMP[1].x, CONST[1][6], TEMP[2] > 28: MOV_SAT TEMP[0].x, TEMP[1].xxxx > 29: DP4 TEMP[1].x, CONST[1][7], TEMP[2] > 30: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 31: MOV TEMP[0].y, TEMP[1].xxxx > 32: DP4 TEMP[1].x, CONST[1][8], TEMP[2] > 33: MOV_SAT TEMP[1].x, TEMP[1].xxxx > 34: MOV TEMP[0].z, TEMP[1].xxxx > 35: MOV TEMP[1].xy, TEMP[0].xyyy > 36: TEX TEMP[1], TEMP[1], SAMP[2], 2D > 37: MOV TEMP[0].w, IMM[0].xxxx > 38: MOV TEMP[2].xy, TEMP[0].wzzz > 39: TEX TEMP[2], TEMP[2], SAMP[3], 2D > 40: MUL TEMP[0], TEMP[2], TEMP[1] > 41: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][23].xyzz > 42: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][23].wwww > 43: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 44: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[1][5].xxxx > 45: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][21].xyzz > 46: MOV TEMP[1].w, TEMP[1].xxxx > 47: MOV TEMP[1].xyz, TEMP[0].xyzx > 48: MOV OUT[0], TEMP[1] > 49: END >radeonsi: Compiling shader 397 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 80) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 104) > %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 108) > %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 112) > %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 116) > %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 120) > %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 124) > %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 128) > %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 132) > %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 136) > %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 140) > %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) > %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 340) > %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 344) > %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 368) > %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 372) > %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 376) > %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 380) > %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 416) > %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 420) > %47 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 > %49 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %50 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %49, i64 0, i64 3 > %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 > %52 = extractelement <8 x i32> %48, i32 7 > %53 = extractelement <4 x i32> %51, i32 0 > %54 = and i32 %53, %52 > %55 = insertelement <4 x i32> %51, i32 %54, i32 0 > %56 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2 > %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 > %58 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 7 > %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0 > %61 = extractelement <8 x i32> %57, i32 7 > %62 = extractelement <4 x i32> %60, i32 0 > %63 = and i32 %62, %61 > %64 = insertelement <4 x i32> %60, i32 %63, i32 0 > %65 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4 > %66 = load <8 x i32>, <8 x i32> addrspace(2)* %65, align 32, !tbaa !0 > %67 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %68 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %67, i64 0, i64 11 > %69 = load <4 x i32>, <4 x i32> addrspace(2)* %68, align 16, !tbaa !0 > %70 = extractelement <8 x i32> %66, i32 7 > %71 = extractelement <4 x i32> %69, i32 0 > %72 = and i32 %71, %70 > %73 = insertelement <4 x i32> %69, i32 %72, i32 0 > %74 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6 > %75 = load <8 x i32>, <8 x i32> addrspace(2)* %74, align 32, !tbaa !0 > %76 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %77 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %76, i64 0, i64 15 > %78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0 > %79 = extractelement <8 x i32> %75, i32 7 > %80 = extractelement <4 x i32> %78, i32 0 > %81 = and i32 %80, %79 > %82 = insertelement <4 x i32> %78, i32 %81, i32 0 > %83 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %84 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %85 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) > %86 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %87 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %88 = fcmp oeq float %85, 0.000000e+00 > %89 = fcmp oeq float %85, 0.000000e+00 > %90 = fcmp ogt float %83, 0.000000e+00 > %91 = select i1 %90, float 1.000000e+00, float %83 > %92 = fcmp oge float %91, 0.000000e+00 > %93 = fcmp ogt float %84, 0.000000e+00 > %94 = select i1 %93, float 1.000000e+00, float %84 > %95 = fcmp oge float %94, 0.000000e+00 > %.op = fmul float %91, 0x4600000000000000 > %96 = select i1 %92, float %.op, float 0xC600000000000000 > %.op20 = fmul float %94, 0x4600000000000000 > %97 = select i1 %95, float %.op20, float 0xC600000000000000 > %98 = fdiv float 1.000000e+00, %85 > %99 = fmul float %83, %98 > %100 = fmul float %84, %98 > %101 = select i1 %88, float %96, float %99 > %102 = select i1 %89, float %97, float %100 > %103 = bitcast float %101 to i32 > %104 = bitcast float %102 to i32 > %105 = insertelement <2 x i32> undef, i32 %103, i32 0 > %106 = insertelement <2 x i32> %105, i32 %104, i32 1 > %107 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %106, <8 x i32> %48, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %108 = extractelement <4 x float> %107, i32 3 > %109 = bitcast float %101 to i32 > %110 = bitcast float %102 to i32 > %111 = insertelement <2 x i32> undef, i32 %109, i32 0 > %112 = insertelement <2 x i32> %111, i32 %110, i32 1 > %113 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %112, <8 x i32> %57, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %114 = extractelement <4 x float> %113, i32 0 > %115 = fadd float %108, 0xBFE0101020000000 > %116 = fcmp ogt float %115, -0.000000e+00 > %117 = select i1 %116, float -1.000000e+00, float 0.000000e+00 > call void @llvm.AMDGPU.kill(float %117) > %118 = fcmp oeq float %85, 0.000000e+00 > %119 = fcmp oeq float %85, 0.000000e+00 > %120 = fcmp ogt float %86, 0.000000e+00 > %121 = select i1 %120, float 1.000000e+00, float %86 > %122 = fcmp oge float %121, 0.000000e+00 > %123 = fcmp ogt float %87, 0.000000e+00 > %124 = select i1 %123, float 1.000000e+00, float %87 > %125 = fcmp oge float %124, 0.000000e+00 > %.op21 = fmul float %121, 0x4600000000000000 > %126 = select i1 %122, float %.op21, float 0xC600000000000000 > %.op22 = fmul float %124, 0x4600000000000000 > %127 = select i1 %125, float %.op22, float 0xC600000000000000 > %128 = fdiv float 1.000000e+00, %85 > %129 = fmul float %86, %128 > %130 = fmul float %87, %128 > %131 = select i1 %118, float %126, float %129 > %132 = select i1 %119, float %127, float %130 > %133 = fmul float %114, %45 > %134 = fadd float %133, %46 > %135 = fmul float %131, %134 > %136 = fmul float %132, %134 > %137 = fmul float %26, %135 > %138 = fmul float %27, %136 > %139 = fadd float %137, %138 > %140 = fmul float %28, %114 > %141 = fadd float %139, %140 > %142 = fadd float %141, %29 > %143 = call float @llvm.AMDGPU.clamp.(float %142, float 0.000000e+00, float 1.000000e+00) > %144 = fmul float %30, %135 > %145 = fmul float %31, %136 > %146 = fadd float %144, %145 > %147 = fmul float %32, %114 > %148 = fadd float %146, %147 > %149 = fadd float %148, %33 > %150 = call float @llvm.AMDGPU.clamp.(float %149, float 0.000000e+00, float 1.000000e+00) > %151 = fmul float %34, %135 > %152 = fmul float %35, %136 > %153 = fadd float %151, %152 > %154 = fmul float %36, %114 > %155 = fadd float %153, %154 > %156 = fadd float %155, %37 > %157 = call float @llvm.AMDGPU.clamp.(float %156, float 0.000000e+00, float 1.000000e+00) > %158 = bitcast float %143 to i32 > %159 = bitcast float %150 to i32 > %160 = insertelement <2 x i32> undef, i32 %158, i32 0 > %161 = insertelement <2 x i32> %160, i32 %159, i32 1 > %162 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %161, <8 x i32> %66, <4 x i32> %73, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %163 = extractelement <4 x float> %162, i32 0 > %164 = extractelement <4 x float> %162, i32 1 > %165 = extractelement <4 x float> %162, i32 2 > %166 = extractelement <4 x float> %162, i32 3 > %167 = bitcast float %157 to i32 > %168 = insertelement <2 x i32> <i32 0, i32 undef>, i32 %167, i32 1 > %169 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %168, <8 x i32> %75, <4 x i32> %82, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %170 = extractelement <4 x float> %169, i32 0 > %171 = extractelement <4 x float> %169, i32 1 > %172 = extractelement <4 x float> %169, i32 2 > %173 = extractelement <4 x float> %169, i32 3 > %174 = fmul float %170, %163 > %175 = fmul float %171, %164 > %176 = fmul float %172, %165 > %177 = fmul float %173, %166 > %178 = fmul float %174, %41 > %179 = fmul float %175, %42 > %180 = fmul float %176, %43 > %181 = fmul float %178, %44 > %182 = fmul float %179, %44 > %183 = fmul float %180, %44 > %184 = fmul float %177, %181 > %185 = fmul float %177, %182 > %186 = fmul float %177, %183 > %187 = fmul float %184, %25 > %188 = fmul float %185, %25 > %189 = fmul float %186, %25 > %190 = fmul float %187, %38 > %191 = fmul float %188, %39 > %192 = fadd float %191, %190 > %193 = fmul float %189, %40 > %194 = fadd float %192, %193 > %195 = bitcast float %5 to i32 > %196 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %195, 10 > %197 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %196, float %187, 11 > %198 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %197, float %188, 12 > %199 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %198, float %189, 13 > %200 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %199, float %194, 14 > %201 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %200, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %201 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >declare void @llvm.AMDGPU.kill(float) > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #2 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } >attributes #2 = { readnone } > >!0 = !{!"const", null, i32 1} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_buffer_load_dword s4, s[0:3], 0x54 ; C2020154 > s_buffer_load_dword s26, s[0:3], 0xd9 ; C20D01D9 > s_buffer_load_dword s22, s[0:3], 0xd5 ; C20B01D5 > s_buffer_load_dword s25, s[0:3], 0xd8 ; C20C81D8 > s_buffer_load_dword s30, s[0:3], 0xdd ; C20F01DD > s_buffer_load_dword s21, s[0:3], 0xd4 ; C20A81D4 > s_buffer_load_dword s27, s[0:3], 0xda ; C20D81DA > s_buffer_load_dword s29, s[0:3], 0xdc ; C20E81DC > s_buffer_load_dword s23, s[0:3], 0xd6 ; C20B81D6 > s_buffer_load_dword s28, s[0:3], 0xdb ; C20E01DB > s_buffer_load_dword s31, s[0:3], 0xde ; C20F81DE > s_buffer_load_dword s6, s[0:3], 0xb9 ; C20301B9 > s_buffer_load_dword s10, s[0:3], 0xbd ; C20501BD > s_buffer_load_dword s14, s[0:3], 0xc1 ; C20701C1 > s_buffer_load_dword s18, s[0:3], 0xc5 ; C20901C5 > s_buffer_load_dword s24, s[0:3], 0xd7 ; C20C01D7 > s_buffer_load_dword s5, s[0:3], 0xb8 ; C20281B8 > s_buffer_load_dword s7, s[0:3], 0xba ; C20381BA > s_buffer_load_dword s8, s[0:3], 0xbb ; C20401BB > s_buffer_load_dword s9, s[0:3], 0xbc ; C20481BC > s_buffer_load_dword s11, s[0:3], 0xbe ; C20581BE > s_buffer_load_dword s12, s[0:3], 0xbf ; C20601BF > s_buffer_load_dword s13, s[0:3], 0xc0 ; C20681C0 > s_buffer_load_dword s15, s[0:3], 0xc2 ; C20781C2 > s_buffer_load_dword s16, s[0:3], 0xc3 ; C20801C3 > s_buffer_load_dword s17, s[0:3], 0xc4 ; C20881C4 > s_buffer_load_dword s19, s[0:3], 0xc6 ; C20981C6 > s_buffer_load_dword s20, s[0:3], 0xc7 ; C20A01C7 > s_buffer_load_dword s0, s[0:3], 0xdf ; C20001DF > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v1, s4, v4 ; 10020804 > v_mul_f32_e32 v0, s4, v3 ; 10000604 > v_mul_f32_e32 v3, s4, v5 ; 10060A04 > v_mul_f32_e32 v5, s26, v1 ; 100A021A > v_mul_f32_e32 v4, s22, v1 ; 10080216 > v_mul_f32_e32 v1, s30, v1 ; 1002021E > v_mac_f32_e32 v5, s25, v0 ; 3E0A0019 > v_mac_f32_e32 v4, s21, v0 ; 3E080015 > v_mac_f32_e32 v1, s29, v0 ; 3E02001D > v_mac_f32_e32 v5, s27, v3 ; 3E0A061B > v_mac_f32_e32 v1, s31, v3 ; 3E02061F > v_mac_f32_e32 v4, s23, v3 ; 3E080617 > v_add_f32_e32 v3, s28, v5 ; 06060A1C > v_add_f32_e32 v0, s24, v4 ; 06000818 > v_mul_f32_e32 v4, s6, v3 ; 10080606 > v_mul_f32_e32 v5, s10, v3 ; 100A060A > v_mul_f32_e32 v6, s14, v3 ; 100C060E > v_mul_f32_e32 v3, s18, v3 ; 10060612 > v_add_f32_e32 v1, s0, v1 ; 06020200 > v_mac_f32_e32 v4, s5, v0 ; 3E080005 > v_mac_f32_e32 v5, s9, v0 ; 3E0A0009 > v_mac_f32_e32 v6, s13, v0 ; 3E0C000D > v_mac_f32_e32 v3, s17, v0 ; 3E060011 > v_mac_f32_e32 v3, s19, v1 ; 3E060213 > v_mac_f32_e32 v4, s7, v1 ; 3E080207 > v_mac_f32_e32 v5, s11, v1 ; 3E0A020B > v_mac_f32_e32 v6, s15, v1 ; 3E0C020F > v_add_f32_e32 v0, s8, v4 ; 06000808 > v_add_f32_e32 v1, s12, v5 ; 06020A0C > v_add_f32_e32 v4, s16, v6 ; 06080C10 > v_add_f32_e32 v3, s20, v3 ; 06060614 > exp 15, 12, 0, 1, 0, v0, v1, v4, v3 ; F80008CF 03040100 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 8 >Code Size: 284 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: >Shader epilog disassembly: > exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0020 >*** SHADER STATS *** >SGPRS: 14 >VGPRS: 15 >Code Size: 12 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304 > s_load_dwordx4 s[4:7], s[2:3], 0x4 ; C0820304 > s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s8, s[4:7], 0x54 ; C2040554 > s_buffer_load_dword s30, s[4:7], 0xd9 ; C20F05D9 > s_buffer_load_dword s26, s[4:7], 0xd5 ; C20D05D5 > s_buffer_load_dword s29, s[4:7], 0xd8 ; C20E85D8 > s_buffer_load_dword s34, s[4:7], 0xdd ; C21105DD > s_buffer_load_dword s25, s[4:7], 0xd4 ; C20C85D4 > s_buffer_load_dword s31, s[4:7], 0xda ; C20F85DA > s_buffer_load_dword s33, s[4:7], 0xdc ; C21085DC > s_buffer_load_dword s27, s[4:7], 0xd6 ; C20D85D6 > s_buffer_load_dword s32, s[4:7], 0xdb ; C21005DB > s_buffer_load_dword s35, s[4:7], 0xde ; C21185DE > s_buffer_load_dword s10, s[4:7], 0xb9 ; C20505B9 > s_buffer_load_dword s14, s[4:7], 0xbd ; C20705BD > s_buffer_load_dword s18, s[4:7], 0xc1 ; C20905C1 > s_buffer_load_dword s22, s[4:7], 0xc5 ; C20B05C5 > s_buffer_load_dword s28, s[4:7], 0xd7 ; C20E05D7 > s_buffer_load_dword s9, s[4:7], 0xb8 ; C20485B8 > s_buffer_load_dword s13, s[4:7], 0xbc ; C20685BC > s_buffer_load_dword s17, s[4:7], 0xc0 ; C20885C0 > s_buffer_load_dword s21, s[4:7], 0xc4 ; C20A85C4 > s_buffer_load_dword s36, s[4:7], 0xdf ; C21205DF > s_buffer_load_dword s11, s[4:7], 0xba ; C20585BA > s_buffer_load_dword s15, s[4:7], 0xbe ; C20785BE > s_buffer_load_dword s19, s[4:7], 0xc2 ; C20985C2 > s_buffer_load_dword s23, s[4:7], 0xc6 ; C20B85C6 > s_buffer_load_dword s12, s[4:7], 0xbb ; C20605BB > s_buffer_load_dword s16, s[4:7], 0xbf ; C20805BF > s_buffer_load_dword s20, s[4:7], 0xc3 ; C20A05C3 > s_buffer_load_dword s24, s[4:7], 0xc7 ; C20C05C7 > s_buffer_load_dword s37, s[4:7], 0xe4 ; C21285E4 > s_buffer_load_dword s38, s[4:7], 0xe5 ; C21305E5 > s_buffer_load_dword s4, s[4:7], 0xe6 ; C20205E6 > s_buffer_load_dword s5, s[0:3], 0x54 ; C2028154 > s_buffer_load_dword s6, s[0:3], 0x55 ; C2030155 > s_buffer_load_dword s7, s[0:3], 0x56 ; C2038156 > s_buffer_load_dword s39, s[0:3], 0x57 ; C2138157 > s_buffer_load_dword s40, s[0:3], 0x60 ; C2140160 > s_buffer_load_dword s0, s[0:3], 0x61 ; C2000161 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v1, s8, v4 ; 10020808 > v_mul_f32_e32 v0, s8, v3 ; 10000608 > v_mul_f32_e32 v3, s8, v5 ; 10060A08 > v_mul_f32_e32 v5, s30, v1 ; 100A021E > v_mul_f32_e32 v4, s26, v1 ; 1008021A > v_mul_f32_e32 v1, s34, v1 ; 10020222 > v_mac_f32_e32 v5, s29, v0 ; 3E0A001D > v_mac_f32_e32 v4, s25, v0 ; 3E080019 > v_mac_f32_e32 v1, s33, v0 ; 3E020021 > v_mac_f32_e32 v5, s31, v3 ; 3E0A061F > v_mac_f32_e32 v1, s35, v3 ; 3E020623 > v_mac_f32_e32 v4, s27, v3 ; 3E08061B > v_add_f32_e32 v3, s32, v5 ; 06060A20 > v_add_f32_e32 v0, s28, v4 ; 0600081C > v_mul_f32_e32 v4, s10, v3 ; 1008060A > v_mul_f32_e32 v5, s14, v3 ; 100A060E > v_mul_f32_e32 v6, s18, v3 ; 100C0612 > v_mul_f32_e32 v7, s22, v3 ; 100E0616 > v_add_f32_e32 v1, s36, v1 ; 06020224 > v_mac_f32_e32 v4, s9, v0 ; 3E080009 > v_mac_f32_e32 v5, s13, v0 ; 3E0A000D > v_mac_f32_e32 v6, s17, v0 ; 3E0C0011 > v_mac_f32_e32 v7, s21, v0 ; 3E0E0015 > v_mac_f32_e32 v4, s11, v1 ; 3E08020B > v_mac_f32_e32 v5, s15, v1 ; 3E0A020F > v_mac_f32_e32 v6, s19, v1 ; 3E0C0213 > v_mac_f32_e32 v7, s23, v1 ; 3E0E0217 > v_sub_f32_e32 v8, s37, v0 ; 08100025 > v_add_f32_e32 v0, s12, v4 ; 0600080C > v_add_f32_e32 v4, s16, v5 ; 06080A10 > v_add_f32_e32 v5, s20, v6 ; 060A0C14 > v_add_f32_e32 v6, s24, v7 ; 060C0E18 > v_mul_f32_e32 v11, s40, v6 ; 10160C28 > v_mul_f32_e32 v12, s0, v6 ; 10180C00 > v_mul_f32_e32 v7, s5, v6 ; 100E0C05 > v_fma_f32 v11, v0, s40, v11 ; D296000B 042C5100 > v_fma_f32 v12, v4, -s0, v12 ; D296000C 44300104 > v_mul_f32_e32 v10, s6, v6 ; 10140C06 > v_fma_f32 v7, v11, s7, v7 ; D2960007 041C0F0B > exp 15, 32, 0, 0, 0, v11, v12, v5, v6 ; F800020F 06050C0B > v_fma_f32 v10, v12, s39, v10 ; D296000A 04284F0C > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v11, 1.0 ; 7E1602F2 > v_sub_f32_e32 v3, s38, v3 ; 08060626 > v_sub_f32_e32 v9, s4, v1 ; 08120204 > exp 15, 33, 0, 0, 0, v7, v10, v1, v11 ; F800021F 0B010A07 > exp 15, 34, 0, 0, 0, v8, v3, v9, v0 ; F800022F 00090308 > exp 15, 12, 0, 1, 0, v0, v4, v5, v6 ; F80008CF 06050400 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 48 >VGPRS: 16 >Code Size: 420 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > v_cndmask_b32_e64 v9, v0, 1.0, vcc ; D2000009 01A9E500 > v_bfrev_b32_e32 v7, 14 ; 7E0E708E > v_cmp_lt_f32_e32 vcc, 0, v1 ; 7C020280 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_interp_p1_f32 v5, v2, 0, 1, [m0] ; C8140402 > v_cndmask_b32_e64 v10, v1, 1.0, vcc ; D200000A 01A9E501 > v_bfrev_b32_e32 v8, 15 ; 7E10708F > v_mul_f32_e32 v11, v7, v9 ; 10161307 > v_cmp_le_f32_e32 vcc, 0, v9 ; 7C061280 > v_cndmask_b32_e32 v9, v8, v11 ; 00121708 > v_interp_p2_f32 v5, [v5], v3, 0, 1, [m0] ; C8150403 > v_interp_p1_f32 v6, v2, 1, 1, [m0] ; C8180502 > v_mul_f32_e32 v11, v7, v10 ; 10161507 > v_cmp_le_f32_e32 vcc, 0, v10 ; 7C061480 > v_cndmask_b32_e32 v10, v8, v11 ; 00141708 > v_interp_p2_f32 v6, [v6], v3, 1, 1, [m0] ; C8190503 > v_interp_p1_f32 v11, v2, 0, 2, [m0] ; C82C0802 > v_interp_p2_f32 v11, [v11], v3, 0, 2, [m0] ; C82D0803 > v_interp_p1_f32 v12, v2, 1, 2, [m0] ; C8300902 > s_load_dwordx4 s[16:19], s[2:3], 0x4 ; C0880304 > s_load_dwordx4 s[12:15], s[2:3], 0x8 ; C0860308 > v_interp_p2_f32 v12, [v12], v3, 1, 2, [m0] ; C8310903 > v_interp_p1_f32 v22, v2, 2, 2, [m0] ; C8580A02 > v_rcp_f32_e32 v2, v4 ; 7E045504 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_load_dwordx8 s[28:35], s[4:5], 0x0 ; C0CE0500 > v_cmp_eq_f32_e32 vcc, 0, v4 ; 7C040880 > v_mul_f32_e32 v0, v2, v0 ; 10000102 > v_mul_f32_e32 v1, v2, v1 ; 10020302 > v_cndmask_b32_e32 v25, v0, v9 ; 00321300 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s35 ; 87002300 > v_cndmask_b32_e32 v26, v1, v10 ; 00341501 > v_interp_p2_f32 v22, [v22], v3, 2, 2, [m0] ; C8590A03 > image_sample v0, v[25:26], s[28:35], s[0:3] dmask:0x1 ; F0800100 00070019 > v_cmp_lt_f32_e64 s[0:1], 0, v5 ; D0020000 00020A80 > v_cndmask_b32_e64 v1, v5, 1.0, s[0:1] ; D2000001 0001E505 > v_cmp_lt_f32_e64 s[0:1], 0, v6 ; D0020000 00020C80 > s_buffer_load_dword s7, s[16:19], 0x10 ; C2039110 > s_buffer_load_dword s20, s[16:19], 0x11 ; C20A1111 > s_buffer_load_dword s21, s[16:19], 0x12 ; C20A9112 > s_buffer_load_dword s22, s[16:19], 0x14 ; C20B1114 > s_buffer_load_dword s23, s[16:19], 0x15 ; C20B9115 > s_buffer_load_dword s24, s[16:19], 0x16 ; C20C1116 > s_buffer_load_dword s25, s[16:19], 0x18 ; C20C9118 > s_buffer_load_dword s26, s[16:19], 0x19 ; C20D1119 > s_buffer_load_dword s16, s[16:19], 0x1a ; C208111A > s_buffer_load_dword s18, s[12:15], 0x44 ; C2090D44 > v_cndmask_b32_e64 v3, v6, 1.0, s[0:1] ; D2000003 0001E506 > s_buffer_load_dword s17, s[12:15], 0x0 ; C2088D00 > s_buffer_load_dword s19, s[12:15], 0x45 ; C2098D45 > v_mul_f32_e32 v4, v7, v1 ; 10080307 > v_cmp_le_f32_e64 s[0:1], 0, v1 ; D0060000 00020280 > v_cndmask_b32_e64 v1, v8, v4, s[0:1] ; D2000001 00020908 > s_buffer_load_dword s27, s[12:15], 0x46 ; C20D8D46 > v_mul_f32_e32 v4, v7, v3 ; 10080707 > v_cmp_le_f32_e64 s[0:1], 0, v3 ; D0060000 00020680 > v_cndmask_b32_e64 v3, v8, v4, s[0:1] ; D2000003 00020908 > v_mul_f32_e32 v4, v2, v5 ; 10080B02 > v_mul_f32_e32 v2, v2, v6 ; 10040D02 > v_cndmask_b32_e32 v1, v4, v1 ; 00020304 > v_cndmask_b32_e32 v2, v2, v3 ; 00040702 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mad_f32 v1, -v1, v0, s18 ; D2820001 204A0101 > v_mad_f32 v2, -v2, v0, s19 ; D2820002 204E0102 > v_mul_f32_e32 v1, s17, v1 ; 10020211 > v_sub_f32_e32 v0, s27, v0 ; 0800001B > v_mul_f32_e32 v2, s17, v2 ; 10040411 > v_mul_f32_e32 v4, s7, v1 ; 10080207 > v_mul_f32_e32 v5, s22, v1 ; 100A0216 > s_load_dwordx8 s[36:43], s[4:5], 0x10 ; C0D20510 > s_buffer_load_dword s11, s[12:15], 0x40 ; C2058D40 > s_buffer_load_dword s9, s[12:15], 0x41 ; C2048D41 > s_buffer_load_dword s8, s[12:15], 0x42 ; C2040D42 > s_buffer_load_dword s6, s[12:15], 0x48 ; C2030D48 > s_buffer_load_dword s3, s[12:15], 0x49 ; C2018D49 > s_buffer_load_dword s2, s[12:15], 0x4a ; C2010D4A > s_load_dwordx4 s[12:15], s[4:5], 0x1c ; C086051C > v_mul_f32_e32 v0, s17, v0 ; 10000011 > v_mac_f32_e32 v4, s20, v2 ; 3E080414 > v_mac_f32_e32 v5, s23, v2 ; 3E0A0417 > v_mul_f32_e32 v6, s25, v1 ; 100C0219 > v_mac_f32_e32 v4, s21, v0 ; 3E080015 > v_mac_f32_e32 v6, s26, v2 ; 3E0C041A > v_mac_f32_e32 v5, s24, v0 ; 3E0A0018 > v_mul_f32_e32 v23, v4, v4 ; 102E0904 > v_mac_f32_e32 v6, s16, v0 ; 3E0C0010 > v_mac_f32_e32 v23, v5, v5 ; 3E2E0B05 > v_mov_b32_e32 v27, 0 ; 7E360280 > v_mac_f32_e32 v23, v6, v6 ; 3E2E0D06 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s12, s12, s43 ; 870C2B0C > v_mov_b32_e32 v29, v27 ; 7E3A031B > v_sqrt_f32_e32 v28, v23 ; 7E386717 > image_sample_l v[0:3], v[27:30], s[36:43], s[12:15] dmask:0xf ; F0900F00 0069001B > s_load_dwordx8 s[16:23], s[4:5], 0x20 ; C0C80520 > s_load_dwordx4 s[24:27], s[4:5], 0x2c ; C08C052C > s_load_dwordx4 s[12:15], s[4:5], 0x3c ; C086053C > s_load_dwordx8 s[28:35], s[4:5], 0x30 ; C0CE0530 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_rsq_clamp_f32_e32 v3, v23 ; 7E065917 > v_mul_f32_e32 v23, v11, v11 ; 102E170B > v_mac_f32_e32 v23, v12, v12 ; 3E2E190C > v_mac_f32_e32 v23, v22, v22 ; 3E2E2D16 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s24, s24, s23 ; 87181718 > s_and_b32 s12, s12, s35 ; 870C230C > image_sample v[14:17], v[25:26], s[16:23], s[24:27] dmask:0xf ; F0800F00 00C40E19 > image_sample v[18:21], v[25:26], s[28:35], s[12:15] dmask:0xf ; F0800F00 00671219 > v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v18, v18, 2.0, -1.0 ; D2960012 03CDE912 > v_mul_f32_e32 v24, v4, v3 ; 10300704 > v_mul_f32_e32 v28, v5, v3 ; 10380705 > v_mul_f32_e32 v29, v6, v3 ; 103A0706 > v_fma_f32 v19, v19, 2.0, -1.0 ; D2960013 03CDE913 > v_mul_f32_e32 v3, v18, v18 ; 10062512 > v_mac_f32_e32 v3, v19, v19 ; 3E062713 > v_fma_f32 v20, v20, 2.0, -1.0 ; D2960014 03CDE914 > v_fma_f32 v11, v11, v23, v24 ; D296000B 04622F0B > v_mac_f32_e32 v3, v20, v20 ; 3E062914 > s_load_dwordx4 s[36:39], s[4:5], 0x4c ; C092054C > s_load_dwordx8 s[40:47], s[4:5], 0x40 ; C0D40540 > s_load_dwordx8 s[12:19], s[4:5], 0x50 ; C0C60550 > s_load_dwordx4 s[20:23], s[4:5], 0x5c ; C08A055C > v_fma_f32 v12, v12, v23, v28 ; D296000C 04722F0C > v_fma_f32 v22, v22, v23, v29 ; D2960016 04762F16 > v_rsq_clamp_f32_e32 v23, v3 ; 7E2E5903 > v_mul_f32_e32 v3, v11, v11 ; 1006170B > v_mac_f32_e32 v3, v12, v12 ; 3E06190C > v_mac_f32_e32 v3, v22, v22 ; 3E062D16 > v_rsq_clamp_f32_e32 v30, v3 ; 7E3C5903 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s36, s36, s47 ; 87242F24 > s_and_b32 s20, s20, s19 ; 87141314 > image_sample v[7:10], v[25:26], s[40:47], s[36:39] dmask:0xf ; F0800F00 012A0719 > image_sample_l v[3:6], v[25:28], s[12:19], s[20:23] dmask:0xf ; F0900F00 00A30319 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v5, v18, v23 ; 100A2F12 > v_mul_f32_e32 v11, v11, v30 ; 10163D0B > v_mul_f32_e32 v18, v11, v5 ; 10240B0B > v_mul_f32_e32 v6, v19, v23 ; 100C2F13 > v_mul_f32_e32 v5, v24, v5 ; 100A0B18 > v_mul_f32_e32 v12, v12, v30 ; 10183D0C > v_mul_f32_e32 v11, v11, v24 ; 1016310B > v_mac_f32_e32 v18, v12, v6 ; 3E240D0C > v_mac_f32_e32 v11, v12, v28 ; 3E16390C > v_mac_f32_e32 v5, v28, v6 ; 3E0A0D1C > v_mul_f32_e32 v6, v20, v23 ; 100C2F14 > v_mul_f32_e32 v12, v22, v30 ; 10183D16 > v_mac_f32_e32 v18, v12, v6 ; 3E240D0C > v_mac_f32_e32 v5, v29, v6 ; 3E0A0D1D > v_add_f32_e64 v6, 0, v18 clamp ; D2060806 00022480 > v_log_f32_e32 v6, v6 ; 7E0C4F06 > v_mac_f32_e32 v11, v12, v29 ; 3E163B0C > v_mul_f32_e32 v18, v7, v7 ; 10240F07 > v_mov_b32_e32 v7, 0x3b83126f ; 7E0E02FF 3B83126F > v_mov_b32_e32 v12, 0x45800000 ; 7E1802FF 45800000 > v_fma_f32 v7, v18, v12, v7 ; D2960007 041E1912 > v_mul_f32_e32 v6, v7, v6 ; 100C0D07 > v_add_f32_e32 v7, v5, v17 ; 060E2305 > v_exp_f32_e32 v19, v6 ; 7E264B06 > v_add_f32_e64 v6, 0, v11 clamp ; D2060806 00021680 > v_add_f32_e32 v11, -1.0, v7 ; 06160EF3 > v_cmp_eq_f32_e32 vcc, 0, v17 ; 7C042280 > s_and_saveexec_b64 s[0:1], vcc ; BE80246A > s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E > v_cmp_lt_f32_e32 vcc, 0, v11 ; 7C021680 > v_cndmask_b32_e64 v7, v11, 1.0, vcc ; D2000007 01A9E50B > v_cmp_le_f32_e32 vcc, 0, v7 ; 7C060E80 > v_mul_f32_e32 v7, 0x70000000, v7 ; 100E0EFF 70000000 > v_bfrev_b32_e32 v12, 15 ; 7E18708F > v_cndmask_b32_e32 v7, v12, v7 ; 000E0F0C > s_or_saveexec_b64 s[0:1], s[0:1] ; BE802500 > s_xor_b64 exec, exec, s[0:1] ; 89FE007E > v_rcp_f32_e32 v7, v17 ; 7E0E5511 > v_mul_f32_e32 v7, v7, v11 ; 100E1707 > s_or_b64 exec, exec, s[0:1] ; 88FE007E > v_mov_b32_e32 v20, 0x40004189 ; 7E2802FF 40004189 > v_mov_b32_e32 v22, 0x45800000 ; 7E2C02FF 45800000 > v_fma_f32 v18, v18, v22, v20 ; D2960012 04522D12 > v_sub_f32_e32 v20, 1.0, v6 ; 08280CF2 > v_mad_f32 v20, -v6, v20, v20 ; D2820014 24522906 > v_mul_f32_e32 v18, 0x3e000000, v18 ; 102424FF 3E000000 > v_mul_f32_e32 v20, v20, v20 ; 10282914 > v_mul_f32_e32 v18, v18, v19 ; 10242712 > v_sub_f32_e32 v19, 1.0, v9 ; 082612F2 > v_mad_f32 v6, -v6, v20, v20 ; D2820006 24522906 > v_fma_f32 v6, v19, v6, v9 ; D2960006 04260D13 > v_mul_f32_e32 v9, s6, v3 ; 10120606 > v_mul_f32_e32 v6, v18, v6 ; 100C0D12 > v_mul_f32_e32 v18, s3, v3 ; 10240603 > v_mul_f32_e32 v3, s2, v3 ; 10060602 > v_mul_f32_e32 v11, s11, v14 ; 10161C0B > v_mul_f32_e32 v12, s9, v15 ; 10181E09 > v_mul_f32_e32 v17, s8, v16 ; 10222008 > v_mul_f32_e32 v9, v9, v8 ; 10121109 > v_mul_f32_e32 v18, v18, v8 ; 10241112 > v_mul_f32_e32 v3, v3, v8 ; 10061103 > v_fma_f32 v8, v9, v6, v11 ; D2960008 042E0D09 > v_fma_f32 v9, v18, v6, v12 ; D2960009 04320D12 > v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 > v_fma_f32 v18, v3, v6, v17 ; D2960012 04460D03 > v_mul_f32_e32 v6, v8, v7 ; 100C0F08 > v_mul_f32_e32 v3, v9, v7 ; 10060F09 > v_mul_f32_e32 v7, v18, v7 ; 100E0F12 > v_cmp_lt_f32_e32 vcc, 0, v10 ; 7C021480 > s_and_saveexec_b64 s[12:13], vcc ; BE8C246A > s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E > s_cbranch_execz BB0_6 ; BF880000 > v_add_f32_e32 v8, v11, v11 ; 0610170B > v_add_f32_e32 v11, v17, v17 ; 06162311 > v_max3_f32 v17, v16, v15, v14 ; D2A80011 043A1F10 > v_rcp_f32_e32 v18, v17 ; 7E245511 > v_add_f32_e32 v9, v12, v12 ; 0612190C > v_cmp_lt_f32_e32 vcc, 0, v14 ; 7C021C80 > v_mov_b32_e32 v12, 0x3f028283 ; 7E1802FF 3F028283 > v_cmp_lt_f32_e64 s[0:1], 0, v15 ; D0020000 00021E80 > v_cmp_lt_f32_e64 s[2:3], 0, v16 ; D0020002 00022080 > v_cmp_gt_f32_e64 s[4:5], v12, v21 ; D0080004 00022B0C > v_cndmask_b32_e64 v20, v14, 1.0, vcc ; D2000014 01A9E50E > v_cndmask_b32_e64 v21, v15, 1.0, s[0:1] ; D2000015 0001E50F > v_cmp_eq_f32_e32 vcc, 0, v17 ; 7C042280 > v_mul_f32_e32 v14, v18, v14 ; 101C1D12 > v_mul_f32_e32 v15, v18, v15 ; 101E1F12 > v_mul_f32_e32 v18, v18, v16 ; 10242112 > v_cndmask_b32_e64 v16, v16, 1.0, s[2:3] ; D2000010 0009E510 > v_bfrev_b32_e32 v17, 14 ; 7E22708E > v_cmp_le_f32_e64 s[0:1], 0, v20 ; D0060000 00022880 > v_cmp_le_f32_e64 s[2:3], 0, v21 ; D0060002 00022A80 > v_cmp_le_f32_e64 s[6:7], 0, v16 ; D0060006 00022080 > v_mul_f32_e32 v20, v17, v20 ; 10282911 > v_mul_f32_e32 v21, v17, v21 ; 102A2B11 > v_mul_f32_e32 v16, v17, v16 ; 10202111 > v_bfrev_b32_e32 v17, 15 ; 7E22708F > v_cndmask_b32_e64 v20, v17, v20, s[0:1] ; D2000014 00022911 > v_cndmask_b32_e32 v14, v14, v20 ; 001C290E > v_cndmask_b32_e64 v21, v17, v21, s[2:3] ; D2000015 000A2B11 > v_cndmask_b32_e64 v16, v17, v16, s[6:7] ; D2000010 001A2111 > v_cndmask_b32_e32 v15, v15, v21 ; 001E2B0F > v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 > v_cndmask_b32_e32 v16, v18, v16 ; 00202112 > v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 > v_add_f32_e64 v17, 0, s11 clamp ; D2060811 00001680 > v_mul_f32_e32 v14, v14, v14 ; 101C1D0E > v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 > v_mul_f32_e32 v14, v17, v14 ; 101C1D11 > v_add_f32_e64 v17, 0, s9 clamp ; D2060811 00001280 > v_mul_f32_e32 v15, v15, v15 ; 101E1F0F > v_mul_f32_e32 v15, v17, v15 ; 101E1F11 > v_add_f32_e64 v17, 0, s8 clamp ; D2060811 00001080 > v_mul_f32_e32 v16, v16, v16 ; 10202110 > v_add_f32_e32 v12, -0.5, v10 ; 061814F1 > v_min_f32_e32 v18, 0.5, v10 ; 1E2414F0 > v_mov_b32_e32 v23, 0x3e800000 ; 7E2E02FF 3E800000 > v_mul_f32_e32 v16, v17, v16 ; 10202111 > v_mov_b32_e32 v17, 0x3e19999a ; 7E2202FF 3E19999A > v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 > v_subrev_f32_e32 v24, v5, v23 ; 0A302F05 > v_cndmask_b32_e64 v10, v18, v10, s[4:5] ; D200000A 00121512 > v_mul_f32_e32 v14, v17, v14 ; 101C1D11 > v_mul_f32_e32 v15, v17, v15 ; 101E1F11 > v_mul_f32_e32 v16, v17, v16 ; 10202111 > v_sub_f32_e32 v17, 1.0, v5 ; 08220AF2 > v_mul_f32_e32 v19, v8, v12 ; 10261908 > v_mul_f32_e32 v22, v9, v12 ; 102C1909 > v_add_f32_e64 v24, 0, v24 clamp ; D2060818 00023080 > v_mul_f32_e32 v12, v11, v12 ; 1018190B > v_cndmask_b32_e64 v8, v14, v8, s[4:5] ; D2000008 0012110E > v_min_f32_e32 v10, 0.5, v10 ; 1E1414F0 > v_cndmask_b32_e64 v9, v15, v9, s[4:5] ; D2000009 0012130F > v_cndmask_b32_e64 v11, v16, v11, s[4:5] ; D200000B 00121710 > v_fma_f32 v19, v19, v24, v6 ; D2960013 041A3113 > v_fma_f32 v22, v22, v24, v3 ; D2960016 040E3116 > v_fma_f32 v12, v12, v24, v7 ; D296000C 041E310C > v_add_f32_e32 v5, v23, v5 ; 060A0B17 > v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 > v_mul_f32_e32 v8, v8, v10 ; 10101508 > v_mul_f32_e32 v9, v9, v10 ; 10121509 > v_mul_f32_e32 v10, v11, v10 ; 1014150B > v_cndmask_b32_e64 v6, v6, v19, s[4:5] ; D2000006 00122706 > v_cndmask_b32_e64 v3, v3, v22, s[4:5] ; D2000003 00122D03 > v_cndmask_b32_e64 v7, v7, v12, s[4:5] ; D2000007 00121907 > v_mul_f32_e32 v8, v8, v17 ; 10102308 > v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 > v_mul_f32_e32 v9, v9, v17 ; 10122309 > v_mul_f32_e32 v10, v10, v17 ; 1014230A > v_fma_f32 v6, v8, v5, v6 ; D2960006 041A0B08 > v_fma_f32 v3, v9, v5, v3 ; D2960003 040E0B09 > v_fma_f32 v7, v10, v5, v7 ; D2960007 041E0B0A > s_or_b64 exec, exec, s[12:13] ; 88FE0C7E > v_mul_f32_e32 v1, v3, v1 ; 10020303 > v_mul_f32_e32 v0, v6, v0 ; 10000106 > v_mul_f32_e32 v2, v7, v2 ; 10040507 > v_mul_f32_e32 v0, v0, v4 ; 10000900 > v_mul_f32_e32 v1, v1, v4 ; 10020901 > v_mul_f32_e32 v2, v2, v4 ; 10040902 > v_mov_b32_e32 v3, 1.0 ; 7E0602F2 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 56 >VGPRS: 32 >Code Size: 1568 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 8 >******************** >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL OUT[0], POSITION >DCL CONST[1][0..25] >DCL TEMP[0..2], LOCAL >IMM[0] UINT32 {0, 400, 256, 272} >IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} >IMM[2] UINT32 {288, 192, 208, 224} >IMM[3] UINT32 {240, 0, 0, 0} > 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1][25].xyyy > 1: FMA TEMP[1].x, IN[0].zzzz, CONST[1][25].zzzz, CONST[1][25].zzzz > 2: MOV TEMP[0].z, TEMP[1].xxxx > 3: MOV TEMP[0].w, IMM[1].xxxx > 4: DP4 TEMP[1].x, CONST[1][16], TEMP[0] > 5: DP4 TEMP[2].x, CONST[1][17], TEMP[0] > 6: MOV TEMP[1].y, TEMP[2].xxxx > 7: DP4 TEMP[0].x, CONST[1][18], TEMP[0] > 8: MOV TEMP[1].z, TEMP[0].xxxx > 9: MOV TEMP[1].w, IMM[1].xxxx > 10: DP4 TEMP[0].x, CONST[1][12], TEMP[1] > 11: DP4 TEMP[2].x, CONST[1][13], TEMP[1] > 12: MOV TEMP[0].y, TEMP[2].xxxx > 13: DP4 TEMP[2].x, CONST[1][14], TEMP[1] > 14: MOV TEMP[0].z, TEMP[2].xxxx > 15: DP4 TEMP[1].x, CONST[1][15], TEMP[1] > 16: MOV TEMP[0].w, TEMP[1].xxxx > 17: MOV OUT[0], TEMP[0] > 18: END >radeonsi: Compiling shader 398 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { >main_body: > %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 > %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 192) > %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 196) > %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 200) > %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 204) > %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 208) > %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 212) > %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 216) > %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 220) > %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 224) > %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 228) > %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 232) > %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 236) > %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 240) > %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 244) > %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 248) > %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 252) > %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 256) > %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 260) > %34 = call float @llvm.SI.load.const(<16 x i8> %15, i32 264) > %35 = call float @llvm.SI.load.const(<16 x i8> %15, i32 268) > %36 = call float @llvm.SI.load.const(<16 x i8> %15, i32 272) > %37 = call float @llvm.SI.load.const(<16 x i8> %15, i32 276) > %38 = call float @llvm.SI.load.const(<16 x i8> %15, i32 280) > %39 = call float @llvm.SI.load.const(<16 x i8> %15, i32 284) > %40 = call float @llvm.SI.load.const(<16 x i8> %15, i32 288) > %41 = call float @llvm.SI.load.const(<16 x i8> %15, i32 292) > %42 = call float @llvm.SI.load.const(<16 x i8> %15, i32 296) > %43 = call float @llvm.SI.load.const(<16 x i8> %15, i32 300) > %44 = call float @llvm.SI.load.const(<16 x i8> %15, i32 400) > %45 = call float @llvm.SI.load.const(<16 x i8> %15, i32 404) > %46 = call float @llvm.SI.load.const(<16 x i8> %15, i32 408) > %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 > %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %13) > %50 = extractelement <4 x float> %49, i32 0 > %51 = extractelement <4 x float> %49, i32 1 > %52 = extractelement <4 x float> %49, i32 2 > %53 = fmul float %50, %44 > %54 = fmul float %51, %45 > %55 = call float @llvm.fma.f32(float %52, float %46, float %46) > %56 = fmul float %32, %53 > %57 = fmul float %33, %54 > %58 = fadd float %56, %57 > %59 = fmul float %34, %55 > %60 = fadd float %58, %59 > %61 = fadd float %60, %35 > %62 = fmul float %36, %53 > %63 = fmul float %37, %54 > %64 = fadd float %62, %63 > %65 = fmul float %38, %55 > %66 = fadd float %64, %65 > %67 = fadd float %66, %39 > %68 = fmul float %40, %53 > %69 = fmul float %41, %54 > %70 = fadd float %68, %69 > %71 = fmul float %42, %55 > %72 = fadd float %70, %71 > %73 = fadd float %72, %43 > %74 = fmul float %16, %61 > %75 = fmul float %17, %67 > %76 = fadd float %74, %75 > %77 = fmul float %18, %73 > %78 = fadd float %76, %77 > %79 = fadd float %78, %19 > %80 = fmul float %20, %61 > %81 = fmul float %21, %67 > %82 = fadd float %80, %81 > %83 = fmul float %22, %73 > %84 = fadd float %82, %83 > %85 = fadd float %84, %23 > %86 = fmul float %24, %61 > %87 = fmul float %25, %67 > %88 = fadd float %86, %87 > %89 = fmul float %26, %73 > %90 = fadd float %88, %89 > %91 = fadd float %90, %27 > %92 = fmul float %28, %61 > %93 = fmul float %29, %67 > %94 = fadd float %92, %93 > %95 = fmul float %30, %73 > %96 = fadd float %94, %95 > %97 = fadd float %96, %31 > %98 = bitcast i32 %11 to float > %99 = insertvalue <{ float, float, float }> undef, float %98, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %79, float %85, float %91, float %97) > ret <{ float, float, float }> %99 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[8:11], s[10:11], 0x4 ; C0840B04 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[4:7], 0 idxen ; E00C2000 80010604 > buffer_load_format_xyzw v[10:13], v5, s[8:11], 0 idxen ; E00C2000 80020A05 > s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115 > s_buffer_load_dword s23, s[0:3], 0x11 ; C20B8111 > s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 > s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 > s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 > s_buffer_load_dword s17, s[0:3], 0xd ; C208810D > s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114 > s_buffer_load_dword s22, s[0:3], 0x10 ; C20B0110 > s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 > s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 > s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 > s_buffer_load_dword s16, s[0:3], 0xc ; C208010C > s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 > s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 > s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 > s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 > s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 > s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 > s_buffer_load_dword s14, s[0:3], 0xa ; C207010A > s_buffer_load_dword s15, s[0:3], 0xb ; C207810B > s_buffer_load_dword s18, s[0:3], 0xe ; C209010E > s_buffer_load_dword s21, s[0:3], 0xf ; C20A810F > s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 > s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 > s_waitcnt vmcnt(1) ; BF8C0F71 > exp 15, 32, 0, 0, 0, v6, v7, v8, v9 ; F800020F 09080706 > s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 > v_mul_f32_e32 v0, s23, v11 ; 10001617 > v_mul_f32_e32 v1, s20, v11 ; 10021614 > v_mul_f32_e32 v3, s5, v11 ; 10061605 > v_mul_f32_e32 v4, s9, v11 ; 10081609 > v_mul_f32_e32 v5, s13, v11 ; 100A160D > v_mul_f32_e32 v11, s17, v11 ; 10161611 > v_mac_f32_e32 v0, s22, v10 ; 3E001416 > v_mac_f32_e32 v1, s19, v10 ; 3E021413 > v_mac_f32_e32 v3, s4, v10 ; 3E061404 > v_mac_f32_e32 v4, s8, v10 ; 3E081408 > v_mac_f32_e32 v5, s12, v10 ; 3E0A140C > v_mac_f32_e32 v11, s16, v10 ; 3E161410 > v_mac_f32_e32 v0, s24, v12 ; 3E001818 > v_mac_f32_e32 v1, s26, v12 ; 3E02181A > v_mac_f32_e32 v3, s6, v12 ; 3E061806 > v_mac_f32_e32 v4, s10, v12 ; 3E08180A > v_mac_f32_e32 v5, s14, v12 ; 3E0A180E > v_mac_f32_e32 v11, s18, v12 ; 3E161812 > v_mac_f32_e32 v0, s25, v13 ; 3E001A19 > v_mac_f32_e32 v1, s0, v13 ; 3E021A00 > v_mac_f32_e32 v3, s7, v13 ; 3E061A07 > v_mac_f32_e32 v4, s11, v13 ; 3E081A0B > v_mac_f32_e32 v5, s15, v13 ; 3E0A1A0F > v_mac_f32_e32 v11, s21, v13 ; 3E161A15 > exp 15, 33, 0, 0, 0, v0, v1, v0, v0 ; F800021F 00000100 > exp 15, 12, 0, 1, 0, v3, v4, v5, v11 ; F80008CF 0B050403 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 16 >Code Size: 272 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_interp_p1_f32 v0, v2, 0, 1, [m0] ; C8000402 > v_interp_p2_f32 v0, [v0], v3, 0, 1, [m0] ; C8010403 > v_interp_p1_f32 v1, v2, 1, 1, [m0] ; C8040502 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v1, [v1], v3, 1, 1, [m0] ; C8050503 > image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v3, v4, v3 ; 10060704 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 84 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[8:11], s[10:11], 0x4 ; C0840B04 > v_mov_b32_e32 v1, 0x3dcccccd ; 7E0202FF 3DCCCCCD > v_mov_b32_e32 v3, 0x3f8ccccd ; 7E0602FF 3F8CCCCD > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[4:7], 0 idxen ; E00C2000 80010604 > buffer_load_format_xyzw v[10:13], v5, s[8:11], 0 idxen ; E00C2000 80020A05 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_lo_i32 v0, v10, 6 ; D2D60000 00010D0A > v_or_b32_e32 v4, 1, v0 ; 38080081 > v_cvt_f32_u32_e32 v5, v0 ; 7E0A0D00 > v_add_i32_e32 v10, vcc, 2, v0 ; 4A140082 > v_add_i32_e32 v0, vcc, 4, v0 ; 4A000084 > v_cvt_f32_u32_e32 v4, v4 ; 7E080D04 > v_cvt_f32_u32_e32 v10, v10 ; 7E140D0A > v_cvt_f32_u32_e32 v0, v0 ; 7E000D00 > v_add_f32_e32 v5, v1, v5 ; 060A0B01 > v_add_f32_e32 v4, v1, v4 ; 06080901 > v_add_f32_e32 v11, v1, v10 ; 06161501 > v_add_f32_e32 v1, v1, v0 ; 06020101 > v_add_f32_e32 v0, v3, v0 ; 06000103 > v_add_f32_e32 v10, v3, v10 ; 06141503 > v_cvt_u32_f32_e32 v5, v5 ; 7E0A0F05 > v_cvt_u32_f32_e32 v1, v1 ; 7E020F01 > v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 > v_cvt_u32_f32_e32 v3, v4 ; 7E060F04 > v_cvt_u32_f32_e32 v4, v11 ; 7E080F0B > v_cvt_u32_f32_e32 v10, v10 ; 7E140F0A > v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 > v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 > v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 > v_lshlrev_b32_e32 v4, 4, v4 ; 34080884 > v_lshlrev_b32_e32 v10, 4, v10 ; 34141484 > v_or_b32_e32 v12, 4, v5 ; 38180A84 > v_or_b32_e32 v13, 8, v5 ; 381A0A88 > v_or_b32_e32 v18, 4, v1 ; 38240284 > v_or_b32_e32 v21, 4, v0 ; 382A0084 > buffer_load_dword v11, v5, s[0:3], 0 offen ; E0301000 80000B05 > v_or_b32_e32 v5, 12, v5 ; 380A0A8C > v_or_b32_e32 v23, 4, v4 ; 382E0884 > v_or_b32_e32 v24, 4, v10 ; 38301484 > buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C > buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D > buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 > buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 > buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 > v_lshlrev_b32_e32 v3, 4, v3 ; 34060684 > v_or_b32_e32 v19, 8, v1 ; 38260288 > v_or_b32_e32 v22, 8, v0 ; 382C0088 > buffer_load_dword v17, v1, s[0:3], 0 offen ; E0301000 80001101 > buffer_load_dword v20, v0, s[0:3], 0 offen ; E0301000 80001400 > buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 > buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 > v_or_b32_e32 v15, 4, v3 ; 381E0684 > v_or_b32_e32 v16, 8, v3 ; 38200688 > v_or_b32_e32 v1, 12, v1 ; 3802028C > v_or_b32_e32 v0, 12, v0 ; 3800008C > v_or_b32_e32 v26, 8, v4 ; 38340888 > v_or_b32_e32 v28, 8, v10 ; 38381488 > buffer_load_dword v14, v3, s[0:3], 0 offen ; E0301000 80000E03 > v_or_b32_e32 v3, 12, v3 ; 3806068C > buffer_load_dword v25, v4, s[0:3], 0 offen ; E0301000 80001904 > buffer_load_dword v27, v10, s[0:3], 0 offen ; E0301000 80001B0A > buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 > buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 > v_or_b32_e32 v4, 12, v4 ; 3808088C > v_or_b32_e32 v10, 12, v10 ; 3814148C > buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F > buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 > buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 > buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A > buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C > buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 > buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 > buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 > buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A > s_waitcnt ; BF8C0F7F > exp 15, 32, 0, 0, 0, v11, v12, v13, v5 ; F800020F 050D0C0B > s_waitcnt expcnt(0) ; BF8C0F0F > v_mul_f32_e32 v5, v18, v7 ; 100A0F12 > v_mul_f32_e32 v11, v21, v7 ; 10160F15 > v_mac_f32_e32 v5, v17, v6 ; 3E0A0D11 > v_mac_f32_e32 v11, v20, v6 ; 3E160D14 > v_mul_f32_e32 v12, v23, v7 ; 10180F17 > s_waitcnt vmcnt(14) ; BF8C0F7E > v_mul_f32_e32 v7, v24, v7 ; 100E0F18 > s_waitcnt vmcnt(12) ; BF8C0F7C > v_mac_f32_e32 v12, v25, v6 ; 3E180D19 > s_waitcnt vmcnt(11) ; BF8C0F7B > v_mac_f32_e32 v7, v27, v6 ; 3E0E0D1B > s_waitcnt vmcnt(10) ; BF8C0F7A > v_mac_f32_e32 v5, v19, v8 ; 3E0A1113 > s_waitcnt vmcnt(9) ; BF8C0F79 > v_mac_f32_e32 v11, v22, v8 ; 3E161116 > s_waitcnt vmcnt(6) ; BF8C0F76 > exp 15, 33, 0, 0, 0, v14, v15, v16, v3 ; F800021F 03100F0E > s_waitcnt vmcnt(5) ; BF8C0F75 > v_mac_f32_e32 v12, v26, v8 ; 3E18111A > s_waitcnt vmcnt(4) ; BF8C0F74 > v_mac_f32_e32 v7, v28, v8 ; 3E0E111C > s_waitcnt vmcnt(3) ; BF8C0F73 > v_mac_f32_e32 v5, v1, v9 ; 3E0A1301 > s_waitcnt vmcnt(2) ; BF8C0F72 > v_mac_f32_e32 v11, v0, v9 ; 3E161300 > s_waitcnt expcnt(0) ; BF8C0F0F > v_mov_b32_e32 v3, 1.0 ; 7E0602F2 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mac_f32_e32 v12, v4, v9 ; 3E181304 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mac_f32_e32 v7, v10, v9 ; 3E0E130A > exp 15, 34, 0, 0, 0, v5, v11, v19, v1 ; F800022F 01130B05 > v_mov_b32_e32 v0, 0 ; 7E000280 > exp 15, 12, 0, 1, 0, v12, v7, v0, v3 ; F80008CF 0300070C > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 17 >VGPRS: 32 >Code Size: 604 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 8 >******************** > >Pixel Shader: >Shader main disassembly: > s_mov_b32 m0, s11 ; BEFC030B > s_load_dwordx4 s[16:19], s[2:3], 0x4 ; C0880304 > v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 > v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 > v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 > v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 > v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 > v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 > v_interp_p1_f32 v5, v2, 3, 0, [m0] ; C8140302 > v_interp_p2_f32 v5, [v5], v3, 3, 0, [m0] ; C8150303 > v_interp_p1_f32 v6, v2, 0, 1, [m0] ; C8180402 > v_interp_p2_f32 v6, [v6], v3, 0, 1, [m0] ; C8190403 > v_interp_p1_f32 v7, v2, 1, 1, [m0] ; C81C0502 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_buffer_load_dword s7, s[16:19], 0x0 ; C2039100 > s_buffer_load_dword s8, s[16:19], 0x1 ; C2041101 > s_buffer_load_dword s0, s[16:19], 0x2 ; C2001102 > s_buffer_load_dword s9, s[16:19], 0x3 ; C2049103 > s_buffer_load_dword s12, s[16:19], 0x4 ; C2061104 > s_buffer_load_dword s13, s[16:19], 0x5 ; C2069105 > s_buffer_load_dword s1, s[16:19], 0x8 ; C2009108 > s_buffer_load_dword s2, s[16:19], 0x9 ; C2011109 > s_buffer_load_dword s3, s[16:19], 0xa ; C201910A > s_buffer_load_dword s6, s[16:19], 0xb ; C203110B > s_buffer_load_dword s14, s[16:19], 0xc ; C207110C > s_buffer_load_dword s15, s[16:19], 0xd ; C207910D > s_buffer_load_dword s20, s[16:19], 0x10 ; C20A1110 > s_buffer_load_dword s21, s[16:19], 0x11 ; C20A9111 > s_load_dwordx4 s[16:19], s[4:5], 0x1c ; C088051C > s_load_dwordx8 s[24:31], s[4:5], 0x10 ; C0CC0510 > s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C > s_load_dwordx8 s[36:43], s[4:5], 0x0 ; C0D20500 > v_interp_p2_f32 v7, [v7], v3, 1, 1, [m0] ; C81D0503 > v_interp_p1_f32 v9, v2, 2, 1, [m0] ; C8240602 > v_interp_p2_f32 v9, [v9], v3, 2, 1, [m0] ; C8250603 > v_interp_p1_f32 v8, v2, 3, 1, [m0] ; C8200702 > v_interp_p2_f32 v8, [v8], v3, 3, 1, [m0] ; C8210703 > v_interp_p1_f32 v10, v2, 0, 2, [m0] ; C8280802 > v_interp_p2_f32 v10, [v10], v3, 0, 2, [m0] ; C8290803 > v_interp_p1_f32 v2, v2, 1, 2, [m0] ; C8080902 > v_bfrev_b32_e32 v11, 1 ; 7E167081 > v_interp_p2_f32 v2, [v2], v3, 1, 2, [m0] ; C8090903 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_xor_b32_e32 v3, s7, v11 ; 3A061607 > s_and_b32 s32, s32, s43 ; 87202B20 > s_and_b32 s16, s16, s31 ; 87101F10 > v_xor_b32_e32 v11, s8, v11 ; 3A161608 > v_mov_b32_e32 v12, 0 ; 7E180280 > v_mov_b32_e32 v14, 0 ; 7E1C0280 > v_cmp_nlt_f32_e32 vcc, s7, v3 ; 7C1C0607 > s_and_b64 vcc, exec, vcc ; 87EA6A7E > s_cbranch_vccnz BB0_3 ; BF870000 > s_branch BB0_2 ; BF820000 > v_add_f32_e32 v15, s12, v3 ; 061E060C > v_fma_f32 v15, v15, s20, v10 ; D296000F 0428290F > v_mov_b32_e32 v17, v11 ; 7E22030B > v_cmp_nlt_f32_e32 vcc, s8, v17 ; 7C1C2208 > s_and_b64 vcc, exec, vcc ; 87EA6A7E > s_cbranch_vccnz BB0_6 ; BF870000 > s_branch BB0_5 ; BF820000 > v_add_f32_e32 v16, s13, v17 ; 0620220D > v_mov_b32_e32 v21, v18 ; 7E2A0312 > v_fma_f32 v16, v16, s21, v2 ; D2960010 04082B10 > v_mov_b32_e32 v20, v17 ; 7E280311 > v_mov_b32_e32 v18, v15 ; 7E24030F > v_mov_b32_e32 v19, v16 ; 7E260310 > v_mov_b32_e32 v20, v12 ; 7E28030C > image_sample_l v16, v[18:21], s[36:43], s[32:35] dmask:0x8 ; F0900800 01091012 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_add_f32_e32 v14, v14, v16 ; 061C210E > v_add_f32_e32 v17, 1.0, v17 ; 062222F2 > s_branch BB0_4 ; BF820000 > v_add_f32_e32 v3, 1.0, v3 ; 060606F2 > s_branch BB0_1 ; BF820000 > v_mul_f32_e32 v3, s9, v14 ; 10061C09 > v_mul_f32_e32 v14, s14, v10 ; 101C140E > v_mul_f32_e32 v15, s15, v2 ; 101E040F > v_mov_b32_e32 v16, 0 ; 7E200280 > image_sample_l v[14:17], v[14:17], s[24:31], s[16:19] dmask:0xf ; F0900F00 00860E0E > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mad_f32 v2, -v17, v3, v3 ; D2820002 240E0711 > v_mul_f32_e32 v2, s0, v2 ; 10040400 > v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 > v_fma_f32 v3, s1, v2, v14 ; D2960003 043A0401 > v_fma_f32 v10, s2, v2, v15 ; D296000A 043E0402 > v_fma_f32 v11, s3, v2, v16 ; D296000B 04420403 > v_mul_f32_e32 v3, v6, v3 ; 10060706 > v_mul_f32_e32 v6, v7, v10 ; 100C1507 > v_mul_f32_e32 v7, v9, v11 ; 100E1709 > v_fma_f32 v2, s6, v2, v17 ; D2960002 04460406 > v_mul_f32_e32 v3, v8, v3 ; 10060708 > v_mul_f32_e32 v6, v8, v6 ; 100C0D08 > v_mul_f32_e32 v7, v8, v7 ; 100E0F08 > v_mul_f32_e32 v8, v8, v2 ; 10100508 > v_fma_f32 v0, v0, v8, v3 ; D2960000 040E1100 > v_fma_f32 v1, v1, v8, v6 ; D2960001 041A1101 > v_fma_f32 v2, v4, v8, v7 ; D2960002 041E1104 > v_fma_f32 v3, v5, v8, v8 ; D2960003 04221105 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 48 >VGPRS: 24 >Code Size: 468 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 > v_mov_b32_e32 v6, v4 ; 7E0C0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[12:15], s[10:11], 0x4 ; C0860B04 > s_load_dwordx4 s[8:11], s[10:11], 0x8 ; C0840B08 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[7:10], v4, s[4:7], 0 idxen ; E00C2000 80010704 > buffer_load_format_xyzw v[11:14], v5, s[12:15], 0 idxen ; E00C2000 80030B05 > buffer_load_format_xyzw v[3:6], v6, s[8:11], 0 idxen ; E00C2000 80020306 > s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 > s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 > s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 > s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 > s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 > s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 > s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 > s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 > s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 > s_buffer_load_dword s17, s[0:3], 0xd ; C208810D > s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 > s_buffer_load_dword s25, s[0:3], 0x15 ; C20C8115 > s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 > s_buffer_load_dword s16, s[0:3], 0xc ; C208010C > s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 > s_buffer_load_dword s24, s[0:3], 0x14 ; C20C0114 > s_buffer_load_dword s14, s[0:3], 0xa ; C207010A > s_buffer_load_dword s18, s[0:3], 0xe ; C209010E > s_buffer_load_dword s22, s[0:3], 0x12 ; C20B0112 > s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 > s_buffer_load_dword s15, s[0:3], 0xb ; C207810B > s_buffer_load_dword s19, s[0:3], 0xf ; C209810F > s_buffer_load_dword s23, s[0:3], 0x13 ; C20B8113 > s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v0, s4 ; 7E000204 > v_mov_b32_e32 v1, s5 ; 7E020205 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_mov_b32_e32 v13, s6 ; 7E1A0206 > v_mov_b32_e32 v14, s7 ; 7E1C0207 > v_fma_f32 v0, s8, v7, v0 ; D2960000 04020E08 > v_fma_f32 v7, s10, v9, v13 ; D2960007 0436120A > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v9, s13, v4 ; 1012080D > v_mul_f32_e32 v13, s21, v4 ; 101A0815 > v_fma_f32 v1, s9, v8, v1 ; D2960001 04061009 > v_fma_f32 v8, s11, v10, v14 ; D2960008 043A140B > v_mul_f32_e32 v10, s17, v4 ; 10140811 > v_mul_f32_e32 v4, s25, v4 ; 10080819 > v_mac_f32_e32 v9, s12, v3 ; 3E12060C > v_mac_f32_e32 v10, s16, v3 ; 3E140610 > v_mac_f32_e32 v13, s20, v3 ; 3E1A0614 > v_mac_f32_e32 v4, s24, v3 ; 3E080618 > v_mac_f32_e32 v9, s14, v5 ; 3E120A0E > v_mac_f32_e32 v10, s18, v5 ; 3E140A12 > v_mac_f32_e32 v13, s22, v5 ; 3E1A0A16 > v_mac_f32_e32 v4, s26, v5 ; 3E080A1A > exp 15, 32, 0, 0, 0, v11, v12, v0, v0 ; F800020F 00000C0B > v_mac_f32_e32 v9, s15, v6 ; 3E120C0F > v_mac_f32_e32 v10, s19, v6 ; 3E140C13 > v_mac_f32_e32 v13, s23, v6 ; 3E1A0C17 > v_mac_f32_e32 v4, s0, v6 ; 3E080C00 > exp 15, 33, 0, 0, 0, v0, v1, v7, v8 ; F800021F 08070100 > exp 15, 12, 0, 1, 0, v9, v10, v13, v4 ; F80008CF 040D0A09 > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 32 >VGPRS: 16 >Code Size: 308 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v5, v2, 0, 0, [m0] ; C8140002 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > v_interp_p2_f32 v5, [v5], v3, 0, 0, [m0] ; C8150003 > v_interp_p1_f32 v6, v2, 1, 0, [m0] ; C8180102 > v_interp_p2_f32 v6, [v6], v3, 1, 0, [m0] ; C8190103 > v_interp_p1_f32 v0, v2, 0, 1, [m0] ; C8000402 > v_interp_p2_f32 v0, [v0], v3, 0, 1, [m0] ; C8010403 > v_interp_p1_f32 v1, v2, 1, 1, [m0] ; C8040502 > v_interp_p2_f32 v1, [v1], v3, 1, 1, [m0] ; C8050503 > v_interp_p1_f32 v4, v2, 2, 1, [m0] ; C8100602 > v_interp_p2_f32 v4, [v4], v3, 2, 1, [m0] ; C8110603 > v_interp_p1_f32 v2, v2, 3, 1, [m0] ; C8080702 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v2, [v2], v3, 3, 1, [m0] ; C8090703 > image_sample v3, v[5:6], s[12:19], s[0:3] dmask:0x1 ; F0800100 00030305 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v3, v2, v3 ; 10060702 > v_mov_b32_e32 v2, v4 ; 7E040304 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 15 >Code Size: 112 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** >SHADER KEY > instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} > as_es = 0 > as_ls = 0 > export_prim_id = 0 >VERT >PROPERTY NEXT_SHADER 1 >DCL IN[0] >DCL IN[1] >DCL IN[2] >DCL IN[3] >DCL IN[4] >DCL OUT[0], POSITION >DCL OUT[1], GENERIC[0] >DCL OUT[2], GENERIC[1] >DCL OUT[3], GENERIC[2] >DCL OUT[4], GENERIC[3] >DCL CONST[1][0..35] >DCL CONST[2][0..4095] >DCL TEMP[0..20], LOCAL >DCL ADDR[0] >IMM[0] FLT32 { 255.0020, 2.0000, 1.0000, 0.0000} >IMM[1] INT32 {1, 2, 4, 0} >IMM[2] UINT32 {1, 16, 0, 272} >IMM[3] UINT32 {288, 304, 544, 352} >IMM[4] UINT32 {448, 320, 240, 512} >IMM[5] UINT32 {528, 336, 368, 480} >IMM[6] FLT32 { -0.1500, 0.0597, -1.5000, 0.0000} >IMM[7] UINT32 {400, 384, 464, 416} >IMM[8] FLT32 {158456325028528675187087900672.0000, 1.4427, 0.5000, 0.4545} >IMM[9] UINT32 {432, 0, 0, 0} >IMM[10] FLT32 { -0.0040, 6.2000, 1.7000, 0.0600} > 0: MUL TEMP[0].xyz, IN[4].zyxx, IMM[0].xxxx > 1: F2I TEMP[1].xyz, TEMP[0].xyzz > 2: SHL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].xxxx > 3: UMAD TEMP[3].xyz, TEMP[1].xyzz, IMM[1].yyyy, IMM[1].xxxx > 4: UMUL TEMP[4].x, TEMP[2].xxxx, IMM[2].yyyy > 5: USHR TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz > 6: UARL ADDR[0].x, TEMP[5].xxxx > 7: MOV TEMP[4].y, CONST[2][ADDR[0].x] > 8: MUL TEMP[4].x, IN[3].xxxx, TEMP[4].yyyy > 9: MOV TEMP[4].w, TEMP[4].xxxx > 10: UMUL TEMP[5].x, TEMP[2].yyyy, IMM[2].yyyy > 11: USHR TEMP[6].x, TEMP[5].xxxx, IMM[1].zzzz > 12: UARL ADDR[0].x, TEMP[6].xxxx > 13: MOV TEMP[5].y, CONST[2][ADDR[0].x] > 14: MUL TEMP[5].x, IN[3].yyyy, TEMP[5].yyyy > 15: MOV TEMP[5].w, TEMP[5].xxxx > 16: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy > 17: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz > 18: UARL ADDR[0].x, TEMP[7].xxxx > 19: MOV TEMP[6].x, CONST[2][ADDR[0].x] > 20: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 21: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 22: UARL ADDR[0].x, TEMP[8].xxxx > 23: MOV TEMP[7].w, CONST[2][ADDR[0].x] > 24: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].wwww > 25: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy > 26: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 27: UARL ADDR[0].x, TEMP[8].xxxx > 28: MOV TEMP[7].y, CONST[2][ADDR[0].x] > 29: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 30: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 31: UARL ADDR[0].x, TEMP[9].xxxx > 32: MOV TEMP[8].z, CONST[2][ADDR[0].x] > 33: FMA TEMP[7].x, TEMP[7].yyyy, TEMP[8].zzzz, -TEMP[6].xxxx > 34: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 35: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 36: UARL ADDR[0].x, TEMP[9].xxxx > 37: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 38: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 39: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 40: UARL ADDR[0].x, TEMP[10].xxxx > 41: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 42: FMA TEMP[6].x, TEMP[8].yyyy, TEMP[9].zzzz, TEMP[6].xxxx > 43: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].xxxx > 44: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].xxxx > 45: MUL TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy > 46: MOV TEMP[4].z, TEMP[7].xxxx > 47: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy > 48: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz > 49: UARL ADDR[0].x, TEMP[8].xxxx > 50: MOV TEMP[7].x, CONST[2][ADDR[0].x] > 51: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 52: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 53: UARL ADDR[0].x, TEMP[9].xxxx > 54: MOV TEMP[8].w, CONST[2][ADDR[0].x] > 55: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].wwww > 56: UMUL TEMP[8].x, TEMP[3].yyyy, IMM[2].yyyy > 57: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 58: UARL ADDR[0].x, TEMP[9].xxxx > 59: MOV TEMP[8].y, CONST[2][ADDR[0].x] > 60: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 61: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 62: UARL ADDR[0].x, TEMP[10].xxxx > 63: MOV TEMP[9].z, CONST[2][ADDR[0].x] > 64: FMA TEMP[8].x, TEMP[8].yyyy, TEMP[9].zzzz, -TEMP[7].xxxx > 65: UMUL TEMP[9].x, TEMP[3].yyyy, IMM[2].yyyy > 66: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 67: UARL ADDR[0].x, TEMP[10].xxxx > 68: MOV TEMP[9].y, CONST[2][ADDR[0].x] > 69: UMUL TEMP[10].x, TEMP[3].yyyy, IMM[2].yyyy > 70: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 71: UARL ADDR[0].x, TEMP[11].xxxx > 72: MOV TEMP[10].z, CONST[2][ADDR[0].x] > 73: FMA TEMP[7].x, TEMP[9].yyyy, TEMP[10].zzzz, TEMP[7].xxxx > 74: MUL TEMP[7].x, TEMP[7].xxxx, IN[3].yyyy > 75: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx > 76: MOV TEMP[7].y, TEMP[7].xxxx > 77: MUL TEMP[8].x, TEMP[8].xxxx, IN[3].yyyy > 78: MUL TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx > 79: MOV TEMP[5].z, TEMP[8].xxxx > 80: UMUL TEMP[8].x, TEMP[3].xxxx, IMM[2].yyyy > 81: USHR TEMP[9].x, TEMP[8].xxxx, IMM[1].zzzz > 82: UARL ADDR[0].x, TEMP[9].xxxx > 83: MOV TEMP[8].yz, CONST[2][ADDR[0].x] > 84: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 85: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 86: UARL ADDR[0].x, TEMP[10].xxxx > 87: MOV TEMP[9].xw, CONST[2][ADDR[0].x] > 88: MUL TEMP[8].xyz, TEMP[8].zzyy, TEMP[9].wxww > 89: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy > 90: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz > 91: UARL ADDR[0].x, TEMP[10].xxxx > 92: MOV TEMP[9].x, CONST[2][ADDR[0].x] > 93: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy > 94: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz > 95: UARL ADDR[0].x, TEMP[11].xxxx > 96: MOV TEMP[10].y, CONST[2][ADDR[0].x] > 97: FMA TEMP[9].x, TEMP[9].xxxx, TEMP[10].yyyy, TEMP[8].xxxx > 98: MUL TEMP[9].x, TEMP[9].xxxx, IN[3].xxxx > 99: MUL TEMP[4].x, IMM[0].yyyy, TEMP[9].xxxx >100: UMUL TEMP[9].x, TEMP[3].xxxx, IMM[2].yyyy >101: USHR TEMP[10].x, TEMP[9].xxxx, IMM[1].zzzz >102: UARL ADDR[0].x, TEMP[10].xxxx >103: MOV TEMP[9].xyz, CONST[2][ADDR[0].x] >104: UMUL TEMP[10].x, TEMP[3].xxxx, IMM[2].yyyy >105: USHR TEMP[11].x, TEMP[10].xxxx, IMM[1].zzzz >106: UARL ADDR[0].x, TEMP[11].xxxx >107: MOV TEMP[10].xyz, CONST[2][ADDR[0].x] >108: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xyzz >109: ADD TEMP[9].xyz, TEMP[9].zzyy, TEMP[9].yxxx >110: FMA TEMP[10].xyz, -TEMP[9].xyzz, IMM[0].yyyy, IMM[0].zzzz >111: MUL TEMP[11].x, IN[3].xxxx, TEMP[10].yyyy >112: MOV TEMP[4].y, TEMP[11].xxxx >113: UMUL TEMP[11].x, TEMP[3].yyyy, IMM[2].yyyy >114: USHR TEMP[12].x, TEMP[11].xxxx, IMM[1].zzzz >115: UARL ADDR[0].x, TEMP[12].xxxx >116: MOV TEMP[11].yz, CONST[2][ADDR[0].x] >117: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >118: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >119: UARL ADDR[0].x, TEMP[13].xxxx >120: MOV TEMP[12].xw, CONST[2][ADDR[0].x] >121: MUL TEMP[11].xyz, TEMP[11].zzyy, TEMP[12].wxww >122: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >123: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >124: UARL ADDR[0].x, TEMP[13].xxxx >125: MOV TEMP[12].x, CONST[2][ADDR[0].x] >126: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >127: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >128: UARL ADDR[0].x, TEMP[14].xxxx >129: MOV TEMP[13].y, CONST[2][ADDR[0].x] >130: FMA TEMP[12].x, TEMP[12].xxxx, TEMP[13].yyyy, TEMP[11].xxxx >131: MUL TEMP[12].x, TEMP[12].xxxx, IN[3].yyyy >132: MUL TEMP[5].x, IMM[0].yyyy, TEMP[12].xxxx >133: UMUL TEMP[12].x, TEMP[3].yyyy, IMM[2].yyyy >134: USHR TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz >135: UARL ADDR[0].x, TEMP[13].xxxx >136: MOV TEMP[12].xyz, CONST[2][ADDR[0].x] >137: UMUL TEMP[13].x, TEMP[3].yyyy, IMM[2].yyyy >138: USHR TEMP[14].x, TEMP[13].xxxx, IMM[1].zzzz >139: UARL ADDR[0].x, TEMP[14].xxxx >140: MOV TEMP[13].xyz, CONST[2][ADDR[0].x] >141: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz >142: ADD TEMP[12].xyz, TEMP[12].zzyy, TEMP[12].yxxx >143: FMA TEMP[13].xyz, -TEMP[12].xyzz, IMM[0].yyyy, IMM[0].zzzz >144: MUL TEMP[14].x, IN[3].yyyy, TEMP[13].yyyy >145: MOV TEMP[5].y, TEMP[14].xxxx >146: ADD TEMP[4], TEMP[4], TEMP[5] >147: UMUL TEMP[14].x, TEMP[2].zzzz, IMM[2].yyyy >148: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >149: UARL ADDR[0].x, TEMP[15].xxxx >150: MOV TEMP[14].y, CONST[2][ADDR[0].x] >151: MUL TEMP[14].x, IN[3].zzzz, TEMP[14].yyyy >152: MOV TEMP[5].w, TEMP[14].xxxx >153: UMUL TEMP[14].x, TEMP[3].zzzz, IMM[2].yyyy >154: USHR TEMP[15].x, TEMP[14].xxxx, IMM[1].zzzz >155: UARL ADDR[0].x, TEMP[15].xxxx >156: MOV TEMP[14].x, CONST[2][ADDR[0].x] >157: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >158: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >159: UARL ADDR[0].x, TEMP[16].xxxx >160: MOV TEMP[15].w, CONST[2][ADDR[0].x] >161: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[15].wwww >162: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >163: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >164: UARL ADDR[0].x, TEMP[16].xxxx >165: MOV TEMP[15].y, CONST[2][ADDR[0].x] >166: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >167: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >168: UARL ADDR[0].x, TEMP[17].xxxx >169: MOV TEMP[16].z, CONST[2][ADDR[0].x] >170: FMA TEMP[15].x, TEMP[15].yyyy, TEMP[16].zzzz, -TEMP[14].xxxx >171: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >172: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >173: UARL ADDR[0].x, TEMP[17].xxxx >174: MOV TEMP[16].y, CONST[2][ADDR[0].x] >175: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >176: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >177: UARL ADDR[0].x, TEMP[18].xxxx >178: MOV TEMP[17].z, CONST[2][ADDR[0].x] >179: FMA TEMP[14].x, TEMP[16].yyyy, TEMP[17].zzzz, TEMP[14].xxxx >180: MUL TEMP[14].x, TEMP[14].xxxx, IN[3].zzzz >181: MUL TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx >182: MOV TEMP[14].y, TEMP[14].xxxx >183: MUL TEMP[15].x, TEMP[15].xxxx, IN[3].zzzz >184: MUL TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx >185: MOV TEMP[5].z, TEMP[15].xxxx >186: UMUL TEMP[15].x, TEMP[3].zzzz, IMM[2].yyyy >187: USHR TEMP[16].x, TEMP[15].xxxx, IMM[1].zzzz >188: UARL ADDR[0].x, TEMP[16].xxxx >189: MOV TEMP[15].yz, CONST[2][ADDR[0].x] >190: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >191: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >192: UARL ADDR[0].x, TEMP[17].xxxx >193: MOV TEMP[16].xw, CONST[2][ADDR[0].x] >194: MUL TEMP[15].xyz, TEMP[15].zzyy, TEMP[16].wxww >195: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >196: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >197: UARL ADDR[0].x, TEMP[17].xxxx >198: MOV TEMP[16].x, CONST[2][ADDR[0].x] >199: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >200: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >201: UARL ADDR[0].x, TEMP[18].xxxx >202: MOV TEMP[17].y, CONST[2][ADDR[0].x] >203: FMA TEMP[16].x, TEMP[16].xxxx, TEMP[17].yyyy, TEMP[15].xxxx >204: MUL TEMP[16].x, TEMP[16].xxxx, IN[3].zzzz >205: MUL TEMP[5].x, IMM[0].yyyy, TEMP[16].xxxx >206: UMUL TEMP[16].x, TEMP[3].zzzz, IMM[2].yyyy >207: USHR TEMP[17].x, TEMP[16].xxxx, IMM[1].zzzz >208: UARL ADDR[0].x, TEMP[17].xxxx >209: MOV TEMP[16].xyz, CONST[2][ADDR[0].x] >210: UMUL TEMP[17].x, TEMP[3].zzzz, IMM[2].yyyy >211: USHR TEMP[18].x, TEMP[17].xxxx, IMM[1].zzzz >212: UARL ADDR[0].x, TEMP[18].xxxx >213: MOV TEMP[17].xyz, CONST[2][ADDR[0].x] >214: MUL TEMP[16].xyz, TEMP[16].xyzz, TEMP[17].xyzz >215: ADD TEMP[16].xyz, TEMP[16].zzyy, TEMP[16].yxxx >216: FMA TEMP[17].xyz, -TEMP[16].xyzz, IMM[0].yyyy, IMM[0].zzzz >217: MUL TEMP[18].x, IN[3].zzzz, TEMP[17].yyyy >218: MOV TEMP[5].y, TEMP[18].xxxx >219: ADD TEMP[4], TEMP[4], TEMP[5] >220: MOV TEMP[5].xyz, IN[0].xyzx >221: MOV TEMP[5].w, IMM[0].zzzz >222: DP4 TEMP[18].x, TEMP[4], TEMP[5] >223: MOV TEMP[4].y, TEMP[18].xxxx >224: UMUL TEMP[18].x, TEMP[3].xxxx, IMM[2].yyyy >225: USHR TEMP[19].x, TEMP[18].xxxx, IMM[1].zzzz >226: UARL ADDR[0].x, TEMP[19].xxxx >227: MOV TEMP[18].x, CONST[2][ADDR[0].x] >228: UMUL TEMP[19].x, TEMP[3].xxxx, IMM[2].yyyy >229: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >230: UARL ADDR[0].x, TEMP[20].xxxx >231: MOV TEMP[19].z, CONST[2][ADDR[0].x] >232: FMA TEMP[18].x, TEMP[18].xxxx, TEMP[19].zzzz, -TEMP[8].zzzz >233: MUL TEMP[18].x, TEMP[18].xxxx, IN[3].xxxx >234: MUL TEMP[18].x, IMM[0].yyyy, TEMP[18].xxxx >235: MUL TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy >236: MOV TEMP[18].y, TEMP[6].xxxx >237: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >238: USHR TEMP[19].x, TEMP[6].xxxx, IMM[1].zzzz >239: UARL ADDR[0].x, TEMP[19].xxxx >240: MOV TEMP[6].x, CONST[2][ADDR[0].x] >241: UMUL TEMP[19].x, TEMP[3].yyyy, IMM[2].yyyy >242: USHR TEMP[20].x, TEMP[19].xxxx, IMM[1].zzzz >243: UARL ADDR[0].x, TEMP[20].xxxx >244: MOV TEMP[19].z, CONST[2][ADDR[0].x] >245: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[19].zzzz, -TEMP[11].zzzz >246: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].yyyy >247: MUL TEMP[7].x, IMM[0].yyyy, TEMP[6].xxxx >248: MUL TEMP[6].x, IN[3].xxxx, TEMP[10].zzzz >249: MOV TEMP[18].z, TEMP[6].xxxx >250: MUL TEMP[9].x, IN[3].xxxx, TEMP[10].xxxx >251: MUL TEMP[6].x, IN[3].yyyy, TEMP[13].zzzz >252: MOV TEMP[7].z, TEMP[6].xxxx >253: MUL TEMP[12].x, IN[3].yyyy, TEMP[13].xxxx >254: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >255: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >256: UARL ADDR[0].x, TEMP[10].xxxx >257: MOV TEMP[6].z, CONST[2][ADDR[0].x] >258: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].zzzz >259: MOV TEMP[18].w, TEMP[6].xxxx >260: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >261: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >262: UARL ADDR[0].x, TEMP[10].xxxx >263: MOV TEMP[6].z, CONST[2][ADDR[0].x] >264: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].zzzz >265: MOV TEMP[7].w, TEMP[6].xxxx >266: ADD TEMP[7], TEMP[7], TEMP[18] >267: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >268: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >269: UARL ADDR[0].x, TEMP[10].xxxx >270: MOV TEMP[6].x, CONST[2][ADDR[0].x] >271: UMUL TEMP[10].x, TEMP[3].zzzz, IMM[2].yyyy >272: USHR TEMP[13].x, TEMP[10].xxxx, IMM[1].zzzz >273: UARL ADDR[0].x, TEMP[13].xxxx >274: MOV TEMP[10].z, CONST[2][ADDR[0].x] >275: FMA TEMP[6].x, TEMP[6].xxxx, TEMP[10].zzzz, -TEMP[15].zzzz >276: MUL TEMP[6].x, TEMP[6].xxxx, IN[3].zzzz >277: MUL TEMP[14].x, IMM[0].yyyy, TEMP[6].xxxx >278: MUL TEMP[6].x, IN[3].zzzz, TEMP[17].zzzz >279: MOV TEMP[14].z, TEMP[6].xxxx >280: MUL TEMP[16].x, IN[3].zzzz, TEMP[17].xxxx >281: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >282: USHR TEMP[10].x, TEMP[6].xxxx, IMM[1].zzzz >283: UARL ADDR[0].x, TEMP[10].xxxx >284: MOV TEMP[6].z, CONST[2][ADDR[0].x] >285: MUL TEMP[6].x, IN[3].zzzz, TEMP[6].zzzz >286: MOV TEMP[14].w, TEMP[6].xxxx >287: ADD TEMP[7], TEMP[7], TEMP[14] >288: DP4 TEMP[6].x, TEMP[7], TEMP[5] >289: MOV TEMP[4].z, TEMP[6].xxxx >290: UMUL TEMP[6].x, TEMP[2].xxxx, IMM[2].yyyy >291: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >292: UARL ADDR[0].x, TEMP[7].xxxx >293: MOV TEMP[6].x, CONST[2][ADDR[0].x] >294: MUL TEMP[6].x, IN[3].xxxx, TEMP[6].xxxx >295: MOV TEMP[9].w, TEMP[6].xxxx >296: UMUL TEMP[6].x, TEMP[2].yyyy, IMM[2].yyyy >297: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >298: UARL ADDR[0].x, TEMP[7].xxxx >299: MOV TEMP[6].x, CONST[2][ADDR[0].x] >300: MUL TEMP[6].x, IN[3].yyyy, TEMP[6].xxxx >301: MOV TEMP[12].w, TEMP[6].xxxx >302: UMUL TEMP[6].x, TEMP[2].zzzz, IMM[2].yyyy >303: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >304: UARL ADDR[0].x, TEMP[7].xxxx >305: MOV TEMP[6].x, CONST[2][ADDR[0].x] >306: MUL TEMP[6].x, IN[3].zzzz, TEMP[6].xxxx >307: MOV TEMP[16].w, TEMP[6].xxxx >308: UMUL TEMP[6].x, TEMP[3].xxxx, IMM[2].yyyy >309: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >310: UARL ADDR[0].x, TEMP[7].xxxx >311: MOV TEMP[6].x, CONST[2][ADDR[0].x] >312: UMUL TEMP[7].x, TEMP[3].xxxx, IMM[2].yyyy >313: USHR TEMP[10].x, TEMP[7].xxxx, IMM[1].zzzz >314: UARL ADDR[0].x, TEMP[10].xxxx >315: MOV TEMP[7].y, CONST[2][ADDR[0].x] >316: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[7].yyyy, -TEMP[8].xxxx >317: ADD TEMP[6].x, TEMP[8].zzzz, TEMP[8].yyyy >318: MOV TEMP[0].w, TEMP[6].xxxx >319: MUL TEMP[6].xy, TEMP[0].xwww, IN[3].xxxx >320: MUL TEMP[6].xy, IMM[0].yyyy, TEMP[6].xyyy >321: MOV TEMP[9].yz, TEMP[6].yxyy >322: UMUL TEMP[6].x, TEMP[3].yyyy, IMM[2].yyyy >323: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >324: UARL ADDR[0].x, TEMP[7].xxxx >325: MOV TEMP[6].x, CONST[2][ADDR[0].x] >326: UMUL TEMP[7].x, TEMP[3].yyyy, IMM[2].yyyy >327: USHR TEMP[8].x, TEMP[7].xxxx, IMM[1].zzzz >328: UARL ADDR[0].x, TEMP[8].xxxx >329: MOV TEMP[7].y, CONST[2][ADDR[0].x] >330: FMA TEMP[0].x, TEMP[6].xxxx, TEMP[7].yyyy, -TEMP[11].xxxx >331: UMUL TEMP[6].x, TEMP[3].zzzz, IMM[2].yyyy >332: USHR TEMP[7].x, TEMP[6].xxxx, IMM[1].zzzz >333: UARL ADDR[0].x, TEMP[7].xxxx >334: MOV TEMP[6].x, CONST[2][ADDR[0].x] >335: UMUL TEMP[3].x, TEMP[3].zzzz, IMM[2].yyyy >336: USHR TEMP[7].x, TEMP[3].xxxx, IMM[1].zzzz >337: UARL ADDR[0].x, TEMP[7].xxxx >338: MOV TEMP[3].y, CONST[2][ADDR[0].x] >339: FMA TEMP[3].x, TEMP[6].xxxx, TEMP[3].yyyy, -TEMP[15].xxxx >340: MOV TEMP[0].y, TEMP[3].xxxx >341: ADD TEMP[3].x, TEMP[15].zzzz, TEMP[15].yyyy >342: MOV TEMP[0].z, TEMP[3].xxxx >343: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[3].yzzz >344: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].yzzz >345: MOV TEMP[16].yz, TEMP[3].yxyy >346: ADD TEMP[3].x, TEMP[11].zzzz, TEMP[11].yyyy >347: MUL TEMP[3].x, TEMP[3].xxxx, IN[3].yyyy >348: MOV TEMP[0].y, TEMP[3].xxxx >349: MUL TEMP[3].xy, IMM[0].yyyy, TEMP[0].xyyy >350: MOV TEMP[12].yz, TEMP[3].yxyy >351: ADD TEMP[0], TEMP[9], TEMP[12] >352: ADD TEMP[0], TEMP[16], TEMP[0] >353: DP4 TEMP[4].x, TEMP[0], TEMP[5] >354: MOV TEMP[4].w, IMM[0].zzzz >355: DP4 TEMP[0].x, CONST[1][17], TEMP[4] >356: DP4 TEMP[3].x, CONST[1][18], TEMP[4] >357: MOV TEMP[0].y, TEMP[3].xxxx >358: DP4 TEMP[3].x, CONST[1][19], TEMP[4] >359: MOV TEMP[0].z, TEMP[3].xxxx >360: ADD TEMP[2].xyz, -TEMP[4].xyzz, CONST[1][34].xyzz >361: DP4 TEMP[3].x, CONST[1][22], TEMP[4] >362: ADD TEMP[3].x, TEMP[3].xxxx, CONST[1][28].wwww >363: DP4 TEMP[5].x, CONST[1][20], TEMP[4] >364: MUL TEMP[5].x, TEMP[5].xxxx, CONST[1][15].zzzz >365: MOV TEMP[0].w, TEMP[5].xxxx >366: MOV TEMP[6], TEMP[0] >367: MOV TEMP[7].xy, IN[1].xyxx >368: MUL TEMP[8].xyz, CONST[1][32].xyzz, CONST[1][33].xyzz >369: MOV TEMP[8].w, CONST[1][32].wwww >370: ABS TEMP[9].x, TEMP[5].xxxx >371: MUL TEMP[0].x, TEMP[9].xxxx, IMM[0].wwww >372: MIN TEMP[9].x, TEMP[0].xxxx, IMM[0].zzzz >373: ADD TEMP[0].x, -TEMP[9].xxxx, IMM[0].zzzz >374: DP3 TEMP[4].x, CONST[1][21].xyzz, TEMP[2].xyzz >375: DP3 TEMP[9].x, CONST[1][23].xyzz, TEMP[2].xyzz >376: MOV TEMP[4].z, TEMP[9].xxxx >377: DP3 TEMP[9].x, CONST[1][22].xyzz, TEMP[2].xyzz >378: MOV TEMP[4].y, TEMP[9].xxxx >379: DP3 TEMP[10].x, TEMP[4].xyzz, TEMP[4].xyzz >380: RSQ TEMP[10].x, TEMP[10].xxxx >381: MUL TEMP[11].xyz, TEMP[10].xxxx, TEMP[4].xyzz >382: FMA TEMP[9].x, -TEMP[9].xxxx, TEMP[10].xxxx, IMM[6].xxxx >383: ADD TEMP[9].x, -TEMP[9].xxxx, IMM[0].zzzz >384: MOV_SAT TEMP[9].x, TEMP[9].xxxx >385: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[9].xxxx >386: DP3 TEMP[10].x, -TEMP[11].xyzz, CONST[1][30].xyzz >387: FMA TEMP[1].x, -CONST[1][25].yyyy, TEMP[10].xxxx, CONST[1][25].xxxx >388: FMA TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx, IMM[0].zzzz >389: MUL TEMP[10].x, TEMP[10].xxxx, IMM[6].yyyy >390: ABS TEMP[11].x, TEMP[1].xxxx >391: LG2 TEMP[11].x, TEMP[11].xxxx >392: MUL TEMP[1].x, TEMP[11].xxxx, IMM[6].zzzz >393: EX2 TEMP[11].x, TEMP[1].xxxx >394: FMA TEMP[12].x, CONST[1][25].zzzz, TEMP[11].xxxx, -CONST[1][24].zzzz >395: MUL TEMP[1].x, TEMP[11].xxxx, CONST[1][25].zzzz >396: MAX TEMP[11].x, TEMP[12].xxxx, IMM[6].wwww >397: FMA TEMP[0].x, -TEMP[11].xxxx, TEMP[0].xxxx, TEMP[1].xxxx >398: MAX TEMP[11].x, TEMP[0].xxxx, CONST[1][29].wwww >399: FSNE TEMP[12].x, CONST[1][24].xxxx, IMM[6].wwww >400: UIF TEMP[12].xxxx :0 >401: RCP TEMP[12].x, CONST[1][24].xxxx >402: MUL TEMP[12].x, -TEMP[3].xxxx, TEMP[12].xxxx >403: ELSE :0 >404: SSG TEMP[13].x, -TEMP[3].xxxx >405: MUL TEMP[12].x, IMM[8].xxxx, TEMP[13].xxxx >406: ENDIF >407: MUL TEMP[1].x, TEMP[12].xxxx, IMM[8].yyyy >408: EX2 TEMP[12].x, TEMP[1].xxxx >409: ADD TEMP[1].x, TEMP[12].xxxx, CONST[1][25].wwww >410: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][26].yyyy >411: MUL TEMP[1].x, TEMP[1].xxxx, IMM[8].zzzz >412: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[1].xxxx >413: MIN TEMP[9].x, TEMP[9].xxxx, CONST[1][24].wwww >414: MAX TEMP[9].x, TEMP[9].xxxx, CONST[1][26].xxxx >415: MUL TEMP[0].x, TEMP[9].xxxx, TEMP[11].xxxx >416: FSNE TEMP[11].x, CONST[1][27].wwww, IMM[6].wwww >417: UIF TEMP[11].xxxx :0 >418: RCP TEMP[11].x, CONST[1][27].wwww >419: MUL TEMP[11].x, -TEMP[3].xxxx, TEMP[11].xxxx >420: ELSE :0 >421: SSG TEMP[12].x, -TEMP[3].xxxx >422: MUL TEMP[11].x, IMM[8].xxxx, TEMP[12].xxxx >423: ENDIF >424: ADD TEMP[3].x, -TEMP[3].xxxx, CONST[1][28].zzzz >425: FSNE TEMP[12].x, CONST[1][24].yyyy, IMM[6].wwww >426: UIF TEMP[12].xxxx :0 >427: RCP TEMP[12].x, CONST[1][24].yyyy >428: MUL TEMP[12].x, TEMP[3].xxxx, TEMP[12].xxxx >429: ELSE :0 >430: SSG TEMP[3].x, TEMP[3].xxxx >431: MUL TEMP[12].x, IMM[8].xxxx, TEMP[3].xxxx >432: ENDIF >433: MUL TEMP[1].x, TEMP[11].xxxx, IMM[8].yyyy >434: EX2 TEMP[3].x, TEMP[1].xxxx >435: MUL TEMP[2].xyz, TEMP[3].xxxx, CONST[1][27].xyzz >436: FMA TEMP[3].xyz, CONST[1][27].xyzz, TEMP[3].xxxx, TEMP[9].xxxx >437: FMA TEMP[9].xyz, TEMP[2].xyzz, TEMP[10].xxxx, TEMP[0].xxxx >438: FSEQ TEMP[10].xyz, TEMP[3].xyzz, IMM[6].wwww >439: SSG TEMP[11].xyz, TEMP[9].xyzz >440: MUL TEMP[11].xyz, IMM[8].xxxx, TEMP[11].xyzz >441: RCP TEMP[13].x, TEMP[3].xxxx >442: RCP TEMP[13].y, TEMP[3].yyyy >443: RCP TEMP[13].z, TEMP[3].zzzz >444: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xyzz >445: UCMP TEMP[9].xyz, TEMP[10].xyzz, TEMP[11].xyzz, TEMP[9].xyzz >446: MUL TEMP[2].xyz, TEMP[12].xxxx, -TEMP[3].xyzz >447: ABS TEMP[5].xyz, TEMP[5].xxxx >448: MUL TEMP[1].xyz, TEMP[5].xyzz, -TEMP[3].xyzz >449: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[8].yyyy >450: EX2 TEMP[3].x, TEMP[1].xxxx >451: EX2 TEMP[3].y, TEMP[1].yyyy >452: EX2 TEMP[3].z, TEMP[1].zzzz >453: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[8].yyyy >454: LG2 TEMP[5].x, CONST[1][29].xxxx >455: LG2 TEMP[5].y, CONST[1][29].yyyy >456: LG2 TEMP[5].z, CONST[1][29].zzzz >457: MUL TEMP[4].xyz, TEMP[5].xyzz, IMM[8].wwww >458: EX2 TEMP[5].x, TEMP[4].xxxx >459: EX2 TEMP[5].y, TEMP[4].yyyy >460: EX2 TEMP[5].z, TEMP[4].zzzz >461: EX2 TEMP[4].x, TEMP[2].xxxx >462: EX2 TEMP[4].y, TEMP[2].yyyy >463: EX2 TEMP[4].z, TEMP[2].zzzz >464: MUL TEMP[2].xyz, TEMP[4].xyzz, TEMP[5].xyzz >465: MUL TEMP[0].xyz, TEMP[9].xyzz, TEMP[2].xyzz >466: ADD TEMP[4].xyz, -TEMP[3].xyzz, IMM[0].zzzz >467: MOV TEMP[3].w, TEMP[3].xxxx >468: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xyzz, IMM[10].xxxx >469: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[6].wwww >470: FMA TEMP[4].xyz, TEMP[0].xyzz, IMM[10].yyyy, IMM[8].zzzz >471: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[4].xyzz >472: FMA TEMP[2].xyz, TEMP[0].xyzz, IMM[10].yyyy, IMM[10].zzzz >473: FMA TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz, IMM[10].wwww >474: FSEQ TEMP[2].xyz, TEMP[0].xyzz, IMM[6].wwww >475: SSG TEMP[4].xyz, TEMP[1].xyzz >476: MUL TEMP[4].xyz, IMM[8].xxxx, TEMP[4].xyzz >477: RCP TEMP[5].x, TEMP[0].xxxx >478: RCP TEMP[5].y, TEMP[0].yyyy >479: RCP TEMP[5].z, TEMP[0].zzzz >480: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[5].xyzz >481: UCMP TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xyzz, TEMP[0].xyzz >482: MOV OUT[4], IN[2] >483: MOV OUT[3], TEMP[3] >484: MOV OUT[2], TEMP[8] >485: MOV OUT[1], TEMP[7] >486: MOV OUT[0], TEMP[6] >487: END >radeonsi: Compiling shader 399 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_vs <{ float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) { >main_body: > %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !tbaa !0 > %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 248) > %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 272) > %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 276) > %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 280) > %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 284) > %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 288) > %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 292) > %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 296) > %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 300) > %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 304) > %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 308) > %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 312) > %32 = call float @llvm.SI.load.const(<16 x i8> %19, i32 316) > %33 = call float @llvm.SI.load.const(<16 x i8> %19, i32 320) > %34 = call float @llvm.SI.load.const(<16 x i8> %19, i32 324) > %35 = call float @llvm.SI.load.const(<16 x i8> %19, i32 328) > %36 = call float @llvm.SI.load.const(<16 x i8> %19, i32 332) > %37 = call float @llvm.SI.load.const(<16 x i8> %19, i32 336) > %38 = call float @llvm.SI.load.const(<16 x i8> %19, i32 340) > %39 = call float @llvm.SI.load.const(<16 x i8> %19, i32 344) > %40 = call float @llvm.SI.load.const(<16 x i8> %19, i32 352) > %41 = call float @llvm.SI.load.const(<16 x i8> %19, i32 356) > %42 = call float @llvm.SI.load.const(<16 x i8> %19, i32 360) > %43 = call float @llvm.SI.load.const(<16 x i8> %19, i32 364) > %44 = call float @llvm.SI.load.const(<16 x i8> %19, i32 368) > %45 = call float @llvm.SI.load.const(<16 x i8> %19, i32 372) > %46 = call float @llvm.SI.load.const(<16 x i8> %19, i32 376) > %47 = call float @llvm.SI.load.const(<16 x i8> %19, i32 384) > %48 = call float @llvm.SI.load.const(<16 x i8> %19, i32 388) > %49 = call float @llvm.SI.load.const(<16 x i8> %19, i32 392) > %50 = call float @llvm.SI.load.const(<16 x i8> %19, i32 396) > %51 = call float @llvm.SI.load.const(<16 x i8> %19, i32 400) > %52 = call float @llvm.SI.load.const(<16 x i8> %19, i32 404) > %53 = call float @llvm.SI.load.const(<16 x i8> %19, i32 408) > %54 = call float @llvm.SI.load.const(<16 x i8> %19, i32 412) > %55 = call float @llvm.SI.load.const(<16 x i8> %19, i32 416) > %56 = call float @llvm.SI.load.const(<16 x i8> %19, i32 420) > %57 = call float @llvm.SI.load.const(<16 x i8> %19, i32 432) > %58 = call float @llvm.SI.load.const(<16 x i8> %19, i32 436) > %59 = call float @llvm.SI.load.const(<16 x i8> %19, i32 440) > %60 = call float @llvm.SI.load.const(<16 x i8> %19, i32 444) > %61 = call float @llvm.SI.load.const(<16 x i8> %19, i32 456) > %62 = call float @llvm.SI.load.const(<16 x i8> %19, i32 460) > %63 = call float @llvm.SI.load.const(<16 x i8> %19, i32 464) > %64 = call float @llvm.SI.load.const(<16 x i8> %19, i32 468) > %65 = call float @llvm.SI.load.const(<16 x i8> %19, i32 472) > %66 = call float @llvm.SI.load.const(<16 x i8> %19, i32 476) > %67 = call float @llvm.SI.load.const(<16 x i8> %19, i32 480) > %68 = call float @llvm.SI.load.const(<16 x i8> %19, i32 484) > %69 = call float @llvm.SI.load.const(<16 x i8> %19, i32 488) > %70 = call float @llvm.SI.load.const(<16 x i8> %19, i32 512) > %71 = call float @llvm.SI.load.const(<16 x i8> %19, i32 516) > %72 = call float @llvm.SI.load.const(<16 x i8> %19, i32 520) > %73 = call float @llvm.SI.load.const(<16 x i8> %19, i32 524) > %74 = call float @llvm.SI.load.const(<16 x i8> %19, i32 528) > %75 = call float @llvm.SI.load.const(<16 x i8> %19, i32 532) > %76 = call float @llvm.SI.load.const(<16 x i8> %19, i32 536) > %77 = call float @llvm.SI.load.const(<16 x i8> %19, i32 544) > %78 = call float @llvm.SI.load.const(<16 x i8> %19, i32 548) > %79 = call float @llvm.SI.load.const(<16 x i8> %19, i32 552) > %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 > %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 > %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0 > %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 > %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %13) > %85 = extractelement <4 x float> %84, i32 0 > %86 = extractelement <4 x float> %84, i32 1 > %87 = extractelement <4 x float> %84, i32 2 > %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1 > %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 > %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %14) > %91 = extractelement <4 x float> %90, i32 0 > %92 = extractelement <4 x float> %90, i32 1 > %93 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2 > %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 > %95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %94, i32 0, i32 %15) > %96 = extractelement <4 x float> %95, i32 0 > %97 = extractelement <4 x float> %95, i32 1 > %98 = extractelement <4 x float> %95, i32 2 > %99 = extractelement <4 x float> %95, i32 3 > %100 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3 > %101 = load <16 x i8>, <16 x i8> addrspace(2)* %100, align 16, !tbaa !0 > %102 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %101, i32 0, i32 %16) > %103 = extractelement <4 x float> %102, i32 0 > %104 = extractelement <4 x float> %102, i32 1 > %105 = extractelement <4 x float> %102, i32 2 > %106 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4 > %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !tbaa !0 > %108 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %107, i32 0, i32 %17) > %109 = extractelement <4 x float> %108, i32 0 > %110 = extractelement <4 x float> %108, i32 1 > %111 = extractelement <4 x float> %108, i32 2 > %112 = fmul float %111, 0x406FE01000000000 > %113 = fmul float %110, 0x406FE01000000000 > %114 = fmul float %109, 0x406FE01000000000 > %115 = fptosi float %112 to i32 > %116 = fptosi float %113 to i32 > %117 = fptosi float %114 to i32 > %118 = shl i32 %115, 1 > %119 = or i32 %118, 1 > %120 = shl i32 %116, 1 > %121 = or i32 %120, 1 > %122 = shl i32 %117, 1 > %123 = or i32 %122, 1 > %124 = shl i32 %115, 5 > %125 = or i32 %124, 4 > %126 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %125) > %127 = fmul float %103, %126 > %128 = shl i32 %116, 5 > %129 = or i32 %128, 4 > %130 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %129) > %131 = fmul float %104, %130 > %132 = shl i32 %119, 4 > %133 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %132) > %134 = shl i32 %119, 4 > %135 = or i32 %134, 12 > %136 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %135) > %137 = fmul float %133, %136 > %138 = shl i32 %119, 4 > %139 = or i32 %138, 4 > %140 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %139) > %141 = shl i32 %119, 4 > %142 = or i32 %141, 8 > %143 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %142) > %144 = fsub float -0.000000e+00, %137 > %145 = call float @llvm.fma.f32(float %140, float %143, float %144) > %146 = shl i32 %119, 4 > %147 = or i32 %146, 4 > %148 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %147) > %149 = shl i32 %119, 4 > %150 = or i32 %149, 8 > %151 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %150) > %152 = call float @llvm.fma.f32(float %148, float %151, float %137) > %153 = fmul float %152, %103 > %154 = fmul float %145, %103 > %155 = fmul float %154, 2.000000e+00 > %156 = shl i32 %121, 4 > %157 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %156) > %158 = shl i32 %121, 4 > %159 = or i32 %158, 12 > %160 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %159) > %161 = fmul float %157, %160 > %162 = shl i32 %121, 4 > %163 = or i32 %162, 4 > %164 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %163) > %165 = shl i32 %121, 4 > %166 = or i32 %165, 8 > %167 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %166) > %168 = fsub float -0.000000e+00, %161 > %169 = call float @llvm.fma.f32(float %164, float %167, float %168) > %170 = shl i32 %121, 4 > %171 = or i32 %170, 4 > %172 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %171) > %173 = shl i32 %121, 4 > %174 = or i32 %173, 8 > %175 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %174) > %176 = call float @llvm.fma.f32(float %172, float %175, float %161) > %177 = fmul float %176, %104 > %178 = fmul float %177, 2.000000e+00 > %179 = fmul float %169, %104 > %180 = fmul float %179, 2.000000e+00 > %181 = shl i32 %119, 4 > %182 = or i32 %181, 4 > %183 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %182) > %184 = shl i32 %119, 4 > %185 = or i32 %184, 8 > %186 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %185) > %187 = shl i32 %119, 4 > %188 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %187) > %189 = shl i32 %119, 4 > %190 = or i32 %189, 12 > %191 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %190) > %192 = fmul float %186, %191 > %193 = fmul float %186, %188 > %194 = fmul float %183, %191 > %195 = shl i32 %119, 4 > %196 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %195) > %197 = shl i32 %119, 4 > %198 = or i32 %197, 4 > %199 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %198) > %200 = call float @llvm.fma.f32(float %196, float %199, float %192) > %201 = fmul float %200, %103 > %202 = fmul float %201, 2.000000e+00 > %203 = shl i32 %119, 4 > %204 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %203) > %205 = shl i32 %119, 4 > %206 = or i32 %205, 4 > %207 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %206) > %208 = shl i32 %119, 4 > %209 = or i32 %208, 8 > %210 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %209) > %211 = shl i32 %119, 4 > %212 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %211) > %213 = shl i32 %119, 4 > %214 = or i32 %213, 4 > %215 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %214) > %216 = shl i32 %119, 4 > %217 = or i32 %216, 8 > %218 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %217) > %219 = fmul float %204, %212 > %220 = fmul float %207, %215 > %221 = fmul float %210, %218 > %222 = fadd float %221, %220 > %223 = fadd float %221, %219 > %224 = fadd float %220, %219 > %225 = fsub float -0.000000e+00, %222 > %226 = call float @llvm.fma.f32(float %225, float 2.000000e+00, float 1.000000e+00) > %227 = fsub float -0.000000e+00, %223 > %228 = call float @llvm.fma.f32(float %227, float 2.000000e+00, float 1.000000e+00) > %229 = fsub float -0.000000e+00, %224 > %230 = call float @llvm.fma.f32(float %229, float 2.000000e+00, float 1.000000e+00) > %231 = fmul float %103, %228 > %232 = shl i32 %121, 4 > %233 = or i32 %232, 4 > %234 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %233) > %235 = shl i32 %121, 4 > %236 = or i32 %235, 8 > %237 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %236) > %238 = shl i32 %121, 4 > %239 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %238) > %240 = shl i32 %121, 4 > %241 = or i32 %240, 12 > %242 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %241) > %243 = fmul float %237, %242 > %244 = fmul float %237, %239 > %245 = fmul float %234, %242 > %246 = shl i32 %121, 4 > %247 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %246) > %248 = shl i32 %121, 4 > %249 = or i32 %248, 4 > %250 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %249) > %251 = call float @llvm.fma.f32(float %247, float %250, float %243) > %252 = fmul float %251, %104 > %253 = fmul float %252, 2.000000e+00 > %254 = shl i32 %121, 4 > %255 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %254) > %256 = shl i32 %121, 4 > %257 = or i32 %256, 4 > %258 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %257) > %259 = shl i32 %121, 4 > %260 = or i32 %259, 8 > %261 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %260) > %262 = shl i32 %121, 4 > %263 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %262) > %264 = shl i32 %121, 4 > %265 = or i32 %264, 4 > %266 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %265) > %267 = shl i32 %121, 4 > %268 = or i32 %267, 8 > %269 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %268) > %270 = fmul float %255, %263 > %271 = fmul float %258, %266 > %272 = fmul float %261, %269 > %273 = fadd float %272, %271 > %274 = fadd float %272, %270 > %275 = fadd float %271, %270 > %276 = fsub float -0.000000e+00, %273 > %277 = call float @llvm.fma.f32(float %276, float 2.000000e+00, float 1.000000e+00) > %278 = fsub float -0.000000e+00, %274 > %279 = call float @llvm.fma.f32(float %278, float 2.000000e+00, float 1.000000e+00) > %280 = fsub float -0.000000e+00, %275 > %281 = call float @llvm.fma.f32(float %280, float 2.000000e+00, float 1.000000e+00) > %282 = fmul float %104, %279 > %283 = fadd float %202, %253 > %284 = fadd float %231, %282 > %285 = fadd float %155, %180 > %286 = fadd float %127, %131 > %287 = shl i32 %117, 5 > %288 = or i32 %287, 4 > %289 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %288) > %290 = fmul float %105, %289 > %291 = shl i32 %123, 4 > %292 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %291) > %293 = shl i32 %123, 4 > %294 = or i32 %293, 12 > %295 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %294) > %296 = fmul float %292, %295 > %297 = shl i32 %123, 4 > %298 = or i32 %297, 4 > %299 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %298) > %300 = shl i32 %123, 4 > %301 = or i32 %300, 8 > %302 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %301) > %303 = fsub float -0.000000e+00, %296 > %304 = call float @llvm.fma.f32(float %299, float %302, float %303) > %305 = shl i32 %123, 4 > %306 = or i32 %305, 4 > %307 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %306) > %308 = shl i32 %123, 4 > %309 = or i32 %308, 8 > %310 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %309) > %311 = call float @llvm.fma.f32(float %307, float %310, float %296) > %312 = fmul float %311, %105 > %313 = fmul float %312, 2.000000e+00 > %314 = fmul float %304, %105 > %315 = fmul float %314, 2.000000e+00 > %316 = shl i32 %123, 4 > %317 = or i32 %316, 4 > %318 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %317) > %319 = shl i32 %123, 4 > %320 = or i32 %319, 8 > %321 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %320) > %322 = shl i32 %123, 4 > %323 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %322) > %324 = shl i32 %123, 4 > %325 = or i32 %324, 12 > %326 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %325) > %327 = fmul float %321, %326 > %328 = fmul float %321, %323 > %329 = fmul float %318, %326 > %330 = shl i32 %123, 4 > %331 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %330) > %332 = shl i32 %123, 4 > %333 = or i32 %332, 4 > %334 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %333) > %335 = call float @llvm.fma.f32(float %331, float %334, float %327) > %336 = fmul float %335, %105 > %337 = fmul float %336, 2.000000e+00 > %338 = shl i32 %123, 4 > %339 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %338) > %340 = shl i32 %123, 4 > %341 = or i32 %340, 4 > %342 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %341) > %343 = shl i32 %123, 4 > %344 = or i32 %343, 8 > %345 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %344) > %346 = shl i32 %123, 4 > %347 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %346) > %348 = shl i32 %123, 4 > %349 = or i32 %348, 4 > %350 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %349) > %351 = shl i32 %123, 4 > %352 = or i32 %351, 8 > %353 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %352) > %354 = fmul float %339, %347 > %355 = fmul float %342, %350 > %356 = fmul float %345, %353 > %357 = fadd float %356, %355 > %358 = fadd float %356, %354 > %359 = fadd float %355, %354 > %360 = fsub float -0.000000e+00, %357 > %361 = call float @llvm.fma.f32(float %360, float 2.000000e+00, float 1.000000e+00) > %362 = fsub float -0.000000e+00, %358 > %363 = call float @llvm.fma.f32(float %362, float 2.000000e+00, float 1.000000e+00) > %364 = fsub float -0.000000e+00, %359 > %365 = call float @llvm.fma.f32(float %364, float 2.000000e+00, float 1.000000e+00) > %366 = fmul float %105, %363 > %367 = fadd float %283, %337 > %368 = fadd float %284, %366 > %369 = fadd float %285, %315 > %370 = fadd float %286, %290 > %371 = fmul float %367, %85 > %372 = fmul float %368, %86 > %373 = fadd float %371, %372 > %374 = fmul float %369, %87 > %375 = fadd float %373, %374 > %376 = fadd float %375, %370 > %377 = shl i32 %119, 4 > %378 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %377) > %379 = shl i32 %119, 4 > %380 = or i32 %379, 8 > %381 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %380) > %382 = fsub float -0.000000e+00, %194 > %383 = call float @llvm.fma.f32(float %378, float %381, float %382) > %384 = fmul float %383, %103 > %385 = fmul float %384, 2.000000e+00 > %386 = fmul float %153, 2.000000e+00 > %387 = shl i32 %121, 4 > %388 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %387) > %389 = shl i32 %121, 4 > %390 = or i32 %389, 8 > %391 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %390) > %392 = fsub float -0.000000e+00, %245 > %393 = call float @llvm.fma.f32(float %388, float %391, float %392) > %394 = fmul float %393, %104 > %395 = fmul float %394, 2.000000e+00 > %396 = fmul float %103, %230 > %397 = fmul float %103, %226 > %398 = fmul float %104, %281 > %399 = fmul float %104, %277 > %400 = shl i32 %115, 5 > %401 = or i32 %400, 8 > %402 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %401) > %403 = fmul float %103, %402 > %404 = shl i32 %116, 5 > %405 = or i32 %404, 8 > %406 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %405) > %407 = fmul float %104, %406 > %408 = fadd float %395, %385 > %409 = fadd float %178, %386 > %410 = fadd float %398, %396 > %411 = fadd float %407, %403 > %412 = shl i32 %123, 4 > %413 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %412) > %414 = shl i32 %123, 4 > %415 = or i32 %414, 8 > %416 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %415) > %417 = fsub float -0.000000e+00, %329 > %418 = call float @llvm.fma.f32(float %413, float %416, float %417) > %419 = fmul float %418, %105 > %420 = fmul float %419, 2.000000e+00 > %421 = fmul float %105, %365 > %422 = fmul float %105, %361 > %423 = shl i32 %117, 5 > %424 = or i32 %423, 8 > %425 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %424) > %426 = fmul float %105, %425 > %427 = fadd float %408, %420 > %428 = fadd float %409, %313 > %429 = fadd float %410, %421 > %430 = fadd float %411, %426 > %431 = fmul float %427, %85 > %432 = fmul float %428, %86 > %433 = fadd float %431, %432 > %434 = fmul float %429, %87 > %435 = fadd float %433, %434 > %436 = fadd float %435, %430 > %437 = shl i32 %115, 5 > %438 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %437) > %439 = fmul float %103, %438 > %440 = shl i32 %116, 5 > %441 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %440) > %442 = fmul float %104, %441 > %443 = shl i32 %117, 5 > %444 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %443) > %445 = fmul float %105, %444 > %446 = shl i32 %119, 4 > %447 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %446) > %448 = shl i32 %119, 4 > %449 = or i32 %448, 4 > %450 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %449) > %451 = fsub float -0.000000e+00, %192 > %452 = call float @llvm.fma.f32(float %447, float %450, float %451) > %453 = fadd float %194, %193 > %454 = fmul float %452, %103 > %455 = fmul float %453, %103 > %456 = fmul float %454, 2.000000e+00 > %457 = fmul float %455, 2.000000e+00 > %458 = shl i32 %121, 4 > %459 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %458) > %460 = shl i32 %121, 4 > %461 = or i32 %460, 4 > %462 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %461) > %463 = fsub float -0.000000e+00, %243 > %464 = call float @llvm.fma.f32(float %459, float %462, float %463) > %465 = shl i32 %123, 4 > %466 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %465) > %467 = shl i32 %123, 4 > %468 = or i32 %467, 4 > %469 = call float @llvm.SI.load.const(<16 x i8> %81, i32 %468) > %470 = fsub float -0.000000e+00, %327 > %471 = call float @llvm.fma.f32(float %466, float %469, float %470) > %472 = fadd float %329, %328 > %473 = fmul float %464, %104 > %474 = fmul float %471, %105 > %475 = fmul float %472, %105 > %476 = fmul float %474, 2.000000e+00 > %477 = fmul float %475, 2.000000e+00 > %478 = fadd float %245, %244 > %479 = fmul float %478, %104 > %480 = fmul float %473, 2.000000e+00 > %481 = fmul float %479, 2.000000e+00 > %482 = fadd float %397, %399 > %483 = fadd float %456, %480 > %484 = fadd float %457, %481 > %485 = fadd float %439, %442 > %486 = fadd float %422, %482 > %487 = fadd float %476, %483 > %488 = fadd float %477, %484 > %489 = fadd float %445, %485 > %490 = fmul float %486, %85 > %491 = fmul float %487, %86 > %492 = fadd float %490, %491 > %493 = fmul float %488, %87 > %494 = fadd float %492, %493 > %495 = fadd float %494, %489 > %496 = fmul float %21, %495 > %497 = fmul float %22, %376 > %498 = fadd float %496, %497 > %499 = fmul float %23, %436 > %500 = fadd float %498, %499 > %501 = fadd float %500, %24 > %502 = fmul float %25, %495 > %503 = fmul float %26, %376 > %504 = fadd float %502, %503 > %505 = fmul float %27, %436 > %506 = fadd float %504, %505 > %507 = fadd float %506, %28 > %508 = fmul float %29, %495 > %509 = fmul float %30, %376 > %510 = fadd float %508, %509 > %511 = fmul float %31, %436 > %512 = fadd float %510, %511 > %513 = fadd float %512, %32 > %514 = fsub float %77, %495 > %515 = fsub float %78, %376 > %516 = fsub float %79, %436 > %517 = fmul float %40, %495 > %518 = fmul float %41, %376 > %519 = fadd float %517, %518 > %520 = fmul float %42, %436 > %521 = fadd float %519, %520 > %522 = fadd float %521, %43 > %523 = fadd float %522, %62 > %524 = fmul float %33, %495 > %525 = fmul float %34, %376 > %526 = fadd float %524, %525 > %527 = fmul float %35, %436 > %528 = fadd float %526, %527 > %529 = fadd float %528, %36 > %530 = fmul float %529, %20 > %531 = fmul float %70, %74 > %532 = fmul float %71, %75 > %533 = fmul float %72, %76 > %534 = call float @llvm.fabs.f32(float %530) > %535 = fmul float %534, 0x3EF4F8B580000000 > %536 = call float @llvm.minnum.f32(float %535, float 1.000000e+00) > %537 = fsub float 1.000000e+00, %536 > %538 = fmul float %37, %514 > %539 = fmul float %38, %515 > %540 = fadd float %539, %538 > %541 = fmul float %39, %516 > %542 = fadd float %540, %541 > %543 = fmul float %44, %514 > %544 = fmul float %45, %515 > %545 = fadd float %544, %543 > %546 = fmul float %46, %516 > %547 = fadd float %545, %546 > %548 = fmul float %40, %514 > %549 = fmul float %41, %515 > %550 = fadd float %549, %548 > %551 = fmul float %42, %516 > %552 = fadd float %550, %551 > %553 = fmul float %542, %542 > %554 = fmul float %552, %552 > %555 = fadd float %554, %553 > %556 = fmul float %547, %547 > %557 = fadd float %555, %556 > %558 = call float @llvm.AMDGPU.rsq.clamped.f32(float %557) > %559 = fmul float %558, %542 > %560 = fmul float %558, %552 > %561 = fmul float %558, %547 > %562 = fsub float -0.000000e+00, %552 > %563 = call float @llvm.fma.f32(float %562, float %558, float 0xBFC3333340000000) > %564 = fsub float 1.000000e+00, %563 > %565 = call float @llvm.AMDGPU.clamp.(float %564, float 0.000000e+00, float 1.000000e+00) > %566 = fmul float %565, %565 > %567 = fmul float %559, %67 > %568 = fsub float -0.000000e+00, %567 > %569 = fmul float %560, %68 > %570 = fsub float %568, %569 > %571 = fmul float %561, %69 > %572 = fsub float %570, %571 > %573 = fsub float -0.000000e+00, %52 > %574 = call float @llvm.fma.f32(float %573, float %572, float %51) > %575 = call float @llvm.fma.f32(float %572, float %572, float 1.000000e+00) > %576 = fmul float %575, 0x3FAE8EC8A0000000 > %577 = call float @llvm.fabs.f32(float %574) > %578 = call float @llvm.log2.f32(float %577) > %579 = fmul float %578, -1.500000e+00 > %580 = call float @llvm.exp2.f32(float %579) > %581 = fsub float -0.000000e+00, %49 > %582 = call float @llvm.fma.f32(float %53, float %580, float %581) > %583 = fmul float %580, %53 > %584 = call float @llvm.maxnum.f32(float %582, float 0.000000e+00) > %585 = fsub float -0.000000e+00, %584 > %586 = call float @llvm.fma.f32(float %585, float %537, float %583) > %587 = call float @llvm.maxnum.f32(float %586, float %66) > %588 = fcmp une float %47, 0.000000e+00 > br i1 %588, label %IF, label %ELSE > >IF: ; preds = %main_body > %589 = fdiv float 1.000000e+00, %47 > %590 = fmul float %523, %589 > %591 = fsub float -0.000000e+00, %590 > br label %ENDIF > >ELSE: ; preds = %main_body > %592 = fsub float -0.000000e+00, %523 > %593 = fcmp olt float %523, -0.000000e+00 > %594 = select i1 %593, float 1.000000e+00, float %592 > %595 = fcmp oge float %594, 0.000000e+00 > %.op = fmul float %594, 0x4600000000000000 > %596 = select i1 %595, float %.op, float 0xC600000000000000 > br label %ENDIF > >ENDIF: ; preds = %ELSE, %IF > %temp48.0 = phi float [ %591, %IF ], [ %596, %ELSE ] > %597 = fmul float %temp48.0, 0x3FF7154760000000 > %598 = call float @llvm.exp2.f32(float %597) > %599 = fadd float %598, %54 > %600 = fmul float %599, %56 > %601 = fmul float %600, 5.000000e-01 > %602 = fmul float %566, %601 > %603 = call float @llvm.minnum.f32(float %602, float %50) > %604 = call float @llvm.maxnum.f32(float %603, float %55) > %605 = fmul float %604, %587 > %606 = fcmp une float %60, 0.000000e+00 > br i1 %606, label %IF159, label %ELSE160 > >IF159: ; preds = %ENDIF > %607 = fdiv float 1.000000e+00, %60 > %608 = fmul float %523, %607 > %609 = fsub float -0.000000e+00, %608 > br label %ENDIF158 > >ELSE160: ; preds = %ENDIF > %610 = fsub float -0.000000e+00, %523 > %611 = fcmp olt float %523, -0.000000e+00 > %612 = select i1 %611, float 1.000000e+00, float %610 > %613 = fcmp oge float %612, 0.000000e+00 > %.op164 = fmul float %612, 0x4600000000000000 > %614 = select i1 %613, float %.op164, float 0xC600000000000000 > br label %ENDIF158 > >ENDIF158: ; preds = %ELSE160, %IF159 > %temp44.0 = phi float [ %609, %IF159 ], [ %614, %ELSE160 ] > %615 = fsub float %61, %523 > %616 = fcmp une float %48, 0.000000e+00 > br i1 %616, label %IF162, label %ELSE163 > >IF162: ; preds = %ENDIF158 > %617 = fdiv float 1.000000e+00, %48 > %618 = fmul float %615, %617 > br label %ENDIF161 > >ELSE163: ; preds = %ENDIF158 > %619 = fcmp ogt float %615, 0.000000e+00 > %620 = select i1 %619, float 1.000000e+00, float %615 > %621 = fcmp oge float %620, 0.000000e+00 > %.op165 = fmul float %620, 0x4600000000000000 > %622 = select i1 %621, float %.op165, float 0xC600000000000000 > br label %ENDIF161 > >ENDIF161: ; preds = %ELSE163, %IF162 > %temp48.1 = phi float [ %618, %IF162 ], [ %622, %ELSE163 ] > %623 = fmul float %temp44.0, 0x3FF7154760000000 > %624 = call float @llvm.exp2.f32(float %623) > %625 = fmul float %624, %57 > %626 = fmul float %624, %58 > %627 = fmul float %624, %59 > %628 = call float @llvm.fma.f32(float %57, float %624, float %604) > %629 = call float @llvm.fma.f32(float %58, float %624, float %604) > %630 = call float @llvm.fma.f32(float %59, float %624, float %604) > %631 = call float @llvm.fma.f32(float %625, float %576, float %605) > %632 = call float @llvm.fma.f32(float %626, float %576, float %605) > %633 = call float @llvm.fma.f32(float %627, float %576, float %605) > %634 = fcmp oeq float %628, 0.000000e+00 > %635 = fcmp oeq float %629, 0.000000e+00 > %636 = fcmp oeq float %630, 0.000000e+00 > %637 = fcmp ogt float %631, 0.000000e+00 > %638 = select i1 %637, float 1.000000e+00, float %631 > %639 = fcmp oge float %638, 0.000000e+00 > %640 = fcmp ogt float %632, 0.000000e+00 > %641 = select i1 %640, float 1.000000e+00, float %632 > %642 = fcmp oge float %641, 0.000000e+00 > %643 = fcmp ogt float %633, 0.000000e+00 > %644 = select i1 %643, float 1.000000e+00, float %633 > %645 = fcmp oge float %644, 0.000000e+00 > %.op166 = fmul float %638, 0x4600000000000000 > %646 = select i1 %639, float %.op166, float 0xC600000000000000 > %.op167 = fmul float %641, 0x4600000000000000 > %647 = select i1 %642, float %.op167, float 0xC600000000000000 > %.op168 = fmul float %644, 0x4600000000000000 > %648 = select i1 %645, float %.op168, float 0xC600000000000000 > %649 = fdiv float 1.000000e+00, %628 > %650 = fdiv float 1.000000e+00, %629 > %651 = fdiv float 1.000000e+00, %630 > %652 = fmul float %631, %649 > %653 = fmul float %632, %650 > %654 = fmul float %633, %651 > %655 = select i1 %634, float %646, float %652 > %656 = select i1 %635, float %647, float %653 > %657 = select i1 %636, float %648, float %654 > %658 = fmul float %628, %temp48.1 > %659 = fmul float %629, %temp48.1 > %660 = fmul float %630, %temp48.1 > %661 = call float @llvm.fabs.f32(float %530) > %662 = call float @llvm.fabs.f32(float %530) > %663 = call float @llvm.fabs.f32(float %530) > %664 = fmul float %628, %661 > %665 = fmul float %629, %662 > %666 = fmul float %630, %663 > %667 = fmul float %664, 0xBFF7154760000000 > %668 = fmul float %665, 0xBFF7154760000000 > %669 = fmul float %666, 0xBFF7154760000000 > %670 = call float @llvm.exp2.f32(float %667) > %671 = call float @llvm.exp2.f32(float %668) > %672 = call float @llvm.exp2.f32(float %669) > %673 = fmul float %658, 0xBFF7154760000000 > %674 = fmul float %659, 0xBFF7154760000000 > %675 = fmul float %660, 0xBFF7154760000000 > %676 = call float @llvm.log2.f32(float %63) > %677 = call float @llvm.log2.f32(float %64) > %678 = call float @llvm.log2.f32(float %65) > %679 = fmul float %676, 0x3FDD1745E0000000 > %680 = fmul float %677, 0x3FDD1745E0000000 > %681 = fmul float %678, 0x3FDD1745E0000000 > %682 = call float @llvm.exp2.f32(float %679) > %683 = call float @llvm.exp2.f32(float %680) > %684 = call float @llvm.exp2.f32(float %681) > %685 = call float @llvm.exp2.f32(float %673) > %686 = call float @llvm.exp2.f32(float %674) > %687 = call float @llvm.exp2.f32(float %675) > %688 = fmul float %685, %682 > %689 = fmul float %686, %683 > %690 = fmul float %687, %684 > %691 = fmul float %655, %688 > %692 = fmul float %656, %689 > %693 = fmul float %657, %690 > %694 = fsub float 1.000000e+00, %670 > %695 = fsub float 1.000000e+00, %671 > %696 = fsub float 1.000000e+00, %672 > %697 = call float @llvm.fma.f32(float %691, float %694, float 0xBF70624DE0000000) > %698 = call float @llvm.fma.f32(float %692, float %695, float 0xBF70624DE0000000) > %699 = call float @llvm.fma.f32(float %693, float %696, float 0xBF70624DE0000000) > %700 = call float @llvm.maxnum.f32(float %697, float 0.000000e+00) > %701 = call float @llvm.maxnum.f32(float %698, float 0.000000e+00) > %702 = call float @llvm.maxnum.f32(float %699, float 0.000000e+00) > %703 = call float @llvm.fma.f32(float %700, float 0x4018CCCCC0000000, float 5.000000e-01) > %704 = call float @llvm.fma.f32(float %701, float 0x4018CCCCC0000000, float 5.000000e-01) > %705 = call float @llvm.fma.f32(float %702, float 0x4018CCCCC0000000, float 5.000000e-01) > %706 = fmul float %700, %703 > %707 = fmul float %701, %704 > %708 = fmul float %702, %705 > %709 = call float @llvm.fma.f32(float %700, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %710 = call float @llvm.fma.f32(float %701, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %711 = call float @llvm.fma.f32(float %702, float 0x4018CCCCC0000000, float 0x3FFB333340000000) > %712 = call float @llvm.fma.f32(float %700, float %709, float 0x3FAEB851E0000000) > %713 = call float @llvm.fma.f32(float %701, float %710, float 0x3FAEB851E0000000) > %714 = call float @llvm.fma.f32(float %702, float %711, float 0x3FAEB851E0000000) > %715 = fcmp oeq float %712, 0.000000e+00 > %716 = fcmp oeq float %713, 0.000000e+00 > %717 = fcmp oeq float %714, 0.000000e+00 > %718 = fcmp ogt float %706, 0.000000e+00 > %719 = select i1 %718, float 1.000000e+00, float %706 > %720 = fcmp oge float %719, 0.000000e+00 > %721 = fcmp ogt float %707, 0.000000e+00 > %722 = select i1 %721, float 1.000000e+00, float %707 > %723 = fcmp oge float %722, 0.000000e+00 > %724 = fcmp ogt float %708, 0.000000e+00 > %725 = select i1 %724, float 1.000000e+00, float %708 > %726 = fcmp oge float %725, 0.000000e+00 > %.op169 = fmul float %719, 0x4600000000000000 > %727 = select i1 %720, float %.op169, float 0xC600000000000000 > %.op170 = fmul float %722, 0x4600000000000000 > %728 = select i1 %723, float %.op170, float 0xC600000000000000 > %.op171 = fmul float %725, 0x4600000000000000 > %729 = select i1 %726, float %.op171, float 0xC600000000000000 > %730 = fdiv float 1.000000e+00, %712 > %731 = fdiv float 1.000000e+00, %713 > %732 = fdiv float 1.000000e+00, %714 > %733 = fmul float %706, %730 > %734 = fmul float %707, %731 > %735 = fmul float %708, %732 > %736 = select i1 %715, float %727, float %733 > %737 = select i1 %716, float %728, float %734 > %738 = select i1 %717, float %729, float %735 > %739 = bitcast i32 %11 to float > %740 = insertvalue <{ float, float, float }> undef, float %739, 2 > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %91, float %92, float %429, float %430) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %531, float %532, float %533, float %73) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %736, float %737, float %738, float %670) > call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %96, float %97, float %98, float %99) > call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %501, float %507, float %513, float %530) > ret <{ float, float, float }> %740 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #0 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fma.f32(float, float, float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.fabs.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.minnum.f32(float, float) #0 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 > >; Function Attrs: readnone >declare float @llvm.AMDGPU.clamp.(float, float, float) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.log2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.exp2.f32(float) #0 > >; Function Attrs: nounwind readnone >declare float @llvm.maxnum.f32(float, float) #0 > >declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) > >attributes #0 = { nounwind readnone } >attributes #1 = { readnone } > >!0 = !{!"const", null, i32 1} > >SHADER KEY > prolog.color_two_side = 0 > prolog.poly_stipple = 0 > prolog.force_persample_interp = 0 > epilog.spi_shader_col_format = 0x0 > epilog.color_is_int8 = 0x0 > epilog.last_cbuf = 0 > epilog.alpha_func = 0 > epilog.alpha_to_one = 0 > epilog.poly_line_smoothing = 0 > epilog.clamp_color = 0 >FRAG >DCL IN[0], GENERIC[0], PERSPECTIVE >DCL IN[1], GENERIC[1], PERSPECTIVE >DCL IN[2], GENERIC[2], PERSPECTIVE >DCL IN[3], GENERIC[3], PERSPECTIVE >DCL OUT[0], COLOR >DCL SAMP[0] >DCL SVIEW[0], 2D, FLOAT >DCL CONST[1][0..31] >DCL TEMP[0..1], LOCAL >IMM[0] UINT32 {0, 496, 240, 0} > 0: MOV TEMP[0].xy, IN[0].xyyy > 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D > 2: MUL TEMP[0], TEMP[0], IN[3] > 3: MUL TEMP[0], TEMP[0], IN[1] > 4: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[2].wwww > 5: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0].xyzz > 6: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[1][31].xyzz > 7: MOV TEMP[0].xyz, TEMP[0].xyzx > 8: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1][15].wwww > 9: MOV TEMP[0].w, TEMP[1].xxxx > 10: MOV OUT[0], TEMP[0] > 11: END >radeonsi: Compiling shader 400 >TGSI shader LLVM IR: > >; ModuleID = 'tgsi' >source_filename = "tgsi" >target triple = "amdgcn--" > >define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([16 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { >main_body: > %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 > %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 > %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 252) > %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 496) > %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 500) > %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 504) > %29 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0 > %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 > %31 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* > %32 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %31, i64 0, i64 3 > %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 > %34 = extractelement <8 x i32> %30, i32 7 > %35 = extractelement <4 x i32> %33, i32 0 > %36 = and i32 %35, %34 > %37 = insertelement <4 x i32> %33, i32 %36, i32 0 > %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) > %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) > %40 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) > %41 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) > %42 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) > %43 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) > %44 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) > %45 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) > %46 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) > %47 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) > %48 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) > %49 = bitcast float %38 to i32 > %50 = bitcast float %39 to i32 > %51 = insertelement <2 x i32> undef, i32 %49, i32 0 > %52 = insertelement <2 x i32> %51, i32 %50, i32 1 > %53 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %52, <8 x i32> %30, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) > %54 = extractelement <4 x float> %53, i32 0 > %55 = extractelement <4 x float> %53, i32 1 > %56 = extractelement <4 x float> %53, i32 2 > %57 = extractelement <4 x float> %53, i32 3 > %58 = fmul float %54, %45 > %59 = fmul float %55, %46 > %60 = fmul float %56, %47 > %61 = fmul float %57, %48 > %62 = fmul float %58, %40 > %63 = fmul float %59, %41 > %64 = fmul float %60, %42 > %65 = fmul float %61, %43 > %66 = fmul float %62, %44 > %67 = fmul float %63, %44 > %68 = fmul float %64, %44 > %69 = fmul float %65, %66 > %70 = fmul float %65, %67 > %71 = fmul float %65, %68 > %72 = fmul float %69, %26 > %73 = fmul float %70, %27 > %74 = fadd float %73, %72 > %75 = fmul float %71, %28 > %76 = fadd float %74, %75 > %77 = fmul float %76, %25 > %78 = bitcast float %5 to i32 > %79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %78, 10 > %80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %79, float %69, 11 > %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %80, float %70, 12 > %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %71, 13 > %83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82, float %77, 14 > %84 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %83, float %21, 24 > ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %84 >} > >; Function Attrs: nounwind readnone >declare float @llvm.SI.load.const(<16 x i8>, i32) #1 > >; Function Attrs: nounwind readnone >declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 > >; Function Attrs: nounwind readnone >declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 > >attributes #0 = { "InitialPSInputAddr"="36983" } >attributes #1 = { nounwind readnone } > >!0 = !{!"const", null, i32 1} > > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 > s_load_dwordx4 s[8:11], s[10:11], 0x4 ; C0840B04 > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[4:7], 0 idxen ; E00C2000 80010604 > buffer_load_format_xyzw v[10:13], v5, s[8:11], 0 idxen ; E00C2000 80020A05 > s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 > s_buffer_load_dword s17, s[0:3], 0xd ; C208810D > s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 > s_buffer_load_dword s24, s[0:3], 0x19 ; C20C0119 > s_buffer_load_dword s28, s[0:3], 0x1d ; C20E011D > s_buffer_load_dword s30, s[0:3], 0x15 ; C20F0115 > s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 > s_buffer_load_dword s16, s[0:3], 0xc ; C208010C > s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 > s_buffer_load_dword s27, s[0:3], 0x1c ; C20D811C > s_buffer_load_dword s29, s[0:3], 0x14 ; C20E8114 > s_buffer_load_dword s33, s[0:3], 0x18 ; C2108118 > s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 > s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 > s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 > s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 > s_buffer_load_dword s14, s[0:3], 0xa ; C207010A > s_buffer_load_dword s18, s[0:3], 0xe ; C209010E > s_buffer_load_dword s22, s[0:3], 0x12 ; C20B0112 > s_buffer_load_dword s25, s[0:3], 0x1a ; C20C811A > s_buffer_load_dword s31, s[0:3], 0x16 ; C20F8116 > s_buffer_load_dword s34, s[0:3], 0x1e ; C211011E > s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 > s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 > s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 > s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 > s_buffer_load_dword s15, s[0:3], 0xb ; C207810B > s_buffer_load_dword s19, s[0:3], 0xf ; C209810F > s_buffer_load_dword s23, s[0:3], 0x13 ; C20B8113 > s_buffer_load_dword s26, s[0:3], 0x1b ; C20D011B > s_buffer_load_dword s32, s[0:3], 0x17 ; C2100117 > s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F > s_waitcnt lgkmcnt(0) ; BF8C007F > v_mov_b32_e32 v0, s4 ; 7E000204 > v_mov_b32_e32 v1, s5 ; 7E020205 > v_mov_b32_e32 v3, s6 ; 7E060206 > v_mov_b32_e32 v4, s7 ; 7E080207 > v_mov_b32_e32 v18, s11 ; 7E24020B > s_waitcnt vmcnt(1) ; BF8C0F71 > exp 15, 32, 0, 0, 0, v6, v7, v8, v9 ; F800020F 09080706 > s_waitcnt vmcnt(0) ; BF8C0F70 > v_mul_f32_e32 v5, s24, v11 ; 100A1618 > v_mul_f32_e32 v14, s28, v11 ; 101C161C > v_mul_f32_e32 v15, s13, v11 ; 101E160D > v_mul_f32_e32 v16, s17, v11 ; 10201611 > v_mul_f32_e32 v17, s21, v11 ; 10221615 > v_mul_f32_e32 v11, s30, v11 ; 1016161E > v_mac_f32_e32 v5, s33, v10 ; 3E0A1421 > v_mac_f32_e32 v14, s27, v10 ; 3E1C141B > v_mac_f32_e32 v15, s12, v10 ; 3E1E140C > v_mac_f32_e32 v16, s16, v10 ; 3E201410 > v_mac_f32_e32 v17, s20, v10 ; 3E221414 > v_mac_f32_e32 v11, s29, v10 ; 3E16141D > v_mac_f32_e32 v5, s25, v12 ; 3E0A1819 > v_mac_f32_e32 v14, s34, v12 ; 3E1C1822 > v_mac_f32_e32 v15, s14, v12 ; 3E1E180E > v_mac_f32_e32 v16, s18, v12 ; 3E201812 > v_mac_f32_e32 v17, s22, v12 ; 3E221816 > v_mac_f32_e32 v11, s31, v12 ; 3E16181F > v_mac_f32_e32 v5, s26, v13 ; 3E0A1A1A > v_mac_f32_e32 v14, s0, v13 ; 3E1C1A00 > v_mac_f32_e32 v15, s15, v13 ; 3E1E1A0F > v_mac_f32_e32 v16, s19, v13 ; 3E201A13 > v_mac_f32_e32 v17, s23, v13 ; 3E221A17 > v_mac_f32_e32 v11, s32, v13 ; 3E161A20 > v_mov_b32_e32 v10, s8 ; 7E140208 > v_mov_b32_e32 v12, s9 ; 7E180209 > v_mov_b32_e32 v13, s10 ; 7E1A020A > exp 15, 33, 0, 0, 0, v0, v1, v3, v4 ; F800021F 04030100 > exp 15, 34, 0, 0, 0, v10, v12, v13, v18 ; F800022F 120D0C0A > exp 15, 35, 0, 0, 0, v5, v14, v0, v0 ; F800023F 00000E05 > exp 15, 12, 0, 1, 0, v15, v16, v17, v11 ; F80008CF 0B11100F > s_waitcnt expcnt(0) ; BF8C0F0F >Shader epilog disassembly: > s_endpgm ; BF810000 > >*** SHADER STATS *** >SGPRS: 40 >VGPRS: 20 >Code Size: 356 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Pixel Shader: >Shader main disassembly: > s_wqm_b64 exec, exec ; BEFE0A7E > s_mov_b32 m0, s11 ; BEFC030B > v_interp_p1_f32 v4, v2, 3, 0, [m0] ; C8100302 > v_interp_p2_f32 v4, [v4], v3, 3, 0, [m0] ; C8110303 > v_interp_p1_f32 v0, v2, 0, 1, [m0] ; C8000402 > v_interp_p2_f32 v0, [v0], v3, 0, 1, [m0] ; C8010403 > v_interp_p1_f32 v1, v2, 1, 1, [m0] ; C8040502 > v_interp_p2_f32 v1, [v1], v3, 1, 1, [m0] ; C8050503 > v_interp_p1_f32 v5, v2, 2, 1, [m0] ; C8140602 > v_interp_p2_f32 v5, [v5], v3, 2, 1, [m0] ; C8150603 > v_interp_p1_f32 v6, v2, 3, 1, [m0] ; C8180702 > v_interp_p2_f32 v6, [v6], v3, 3, 1, [m0] ; C8190703 > v_interp_p1_f32 v7, v2, 0, 2, [m0] ; C81C0802 > v_interp_p2_f32 v7, [v7], v3, 0, 2, [m0] ; C81D0803 > v_interp_p1_f32 v8, v2, 1, 2, [m0] ; C8200902 > s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 > s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C > v_interp_p2_f32 v8, [v8], v3, 1, 2, [m0] ; C8210903 > v_interp_p1_f32 v9, v2, 2, 2, [m0] ; C8240A02 > v_interp_p2_f32 v9, [v9], v3, 2, 2, [m0] ; C8250A03 > v_interp_p1_f32 v10, v2, 3, 2, [m0] ; C8280B02 > v_interp_p2_f32 v10, [v10], v3, 3, 2, [m0] ; C8290B03 > v_interp_p1_f32 v11, v2, 0, 3, [m0] ; C82C0C02 > v_interp_p2_f32 v11, [v11], v3, 0, 3, [m0] ; C82D0C03 > v_interp_p1_f32 v12, v2, 1, 3, [m0] ; C8300D02 > s_waitcnt lgkmcnt(0) ; BF8C007F > s_and_b32 s0, s0, s19 ; 87001300 > v_interp_p2_f32 v12, [v12], v3, 1, 3, [m0] ; C8310D03 > image_sample v[14:17], v[11:12], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030E0B > s_waitcnt vmcnt(0) ; BF8C0F70 > v_fma_f32 v3, v17, v10, v6 ; D2960003 041A1511 > v_fma_f32 v0, v14, v7, v0 ; D2960000 04020F0E > v_fma_f32 v1, v15, v8, v1 ; D2960001 0406110F > v_fma_f32 v2, v16, v9, v5 ; D2960002 04161310 > v_mul_f32_e32 v3, v4, v3 ; 10060704 >Shader epilog disassembly: > v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 > v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 > exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 > s_endpgm ; BF810000 > >*** SHADER CONFIG *** >SPI_PS_INPUT_ADDR = 0xd077 >SPI_PS_INPUT_ENA = 0x0002 >*** SHADER STATS *** >SGPRS: 24 >VGPRS: 20 >Code Size: 180 bytes >LDS: 0 blocks >Scratch: 0 bytes per wave >Max Waves: 10 >******************** > >Vertex Shader as VS: >Shader prolog disassembly: > v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C > v_mov_b32_e32 v5, v4 ; 7E0A0304 >Shader main disassembly: > s_load_dwordx4 s[12:15], s[10:11], 0x0 ; C0860B00 > v_mov_b32_e32 v0, 0x3dcccccd ; 7E0002FF 3DCCCCCD > v_mov_b32_e32 v1, 0x437f028f ; 7E0202FF 437F028F > v_mov_b32_e32 v3, 0x40066666 ; 7E0602FF 40066666 > v_mov_b32_e32 v10, 0x40466666 ; 7E1402FF 40466666 > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[6:9], v4, s[12:15], 0 idxen ; E00C2000 80030604 > v_mov_b32_e32 v4, 0x447f028f ; 7E0802FF 447F028F > v_mov_b32_e32 v11, 0x3f8ccccd ; 7E1602FF 3F8CCCCD > s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 > s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04 > s_movk_i32 s8, 0x604 ; B0080604 > s_movk_i32 s9, 0x600 ; B0090600 > s_movk_i32 s10, 0x608 ; B00A0608 > s_movk_i32 s11, 0x60c ; B00B060C > s_waitcnt lgkmcnt(0) ; BF8C007F > buffer_load_format_xyzw v[29:32], v5, s[4:7], 0 idxen ; E00C2000 80011D05 > s_waitcnt vmcnt(1) ; BF8C0F71 > v_fma_f32 v1, v1, v8, v0 ; D2960001 04021101 > v_cvt_u32_f32_e32 v1, v1 ; 7E020F01 > v_fma_f32 v0, v8, v4, v0 ; D2960000 04020908 > v_fma_f32 v3, v4, v8, v3 ; D2960003 040E1104 > v_fma_f32 v10, v4, v8, v10 ; D296000A 042A1104 > v_fma_f32 v4, v4, v8, v11 ; D2960004 042E1104 > v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 > v_cvt_u32_f32_e32 v4, v4 ; 7E080F04 > v_cvt_u32_f32_e32 v3, v3 ; 7E060F03 > v_cvt_u32_f32_e32 v10, v10 ; 7E140F0A > v_lshlrev_b32_e32 v11, 6, v1 ; 34160286 > v_lshlrev_b32_e32 v1, 2, v1 ; 34020282 > v_and_b32_e32 v1, 0xffffffc, v1 ; 360202FF 0FFFFFFC > v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 > v_or_b32_e32 v27, 1, v1 ; 38360281 > v_lshlrev_b32_e32 v4, 4, v4 ; 34080884 > v_lshlrev_b32_e32 v3, 4, v3 ; 34060684 > v_lshlrev_b32_e32 v10, 4, v10 ; 34141484 > v_or_b32_e32 v28, 2, v1 ; 38380282 > v_lshlrev_b32_e32 v27, 4, v27 ; 34363684 > v_add_i32_e32 v12, vcc, s9, v0 ; 4A180009 > v_add_i32_e32 v13, vcc, s8, v0 ; 4A1A0008 >